summaryrefslogtreecommitdiffstats
path: root/src/test/recovery
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/recovery')
-rw-r--r--src/test/recovery/.gitignore2
-rw-r--r--src/test/recovery/Makefile29
-rw-r--r--src/test/recovery/README27
-rw-r--r--src/test/recovery/t/001_stream_rep.pl534
-rw-r--r--src/test/recovery/t/002_archiving.pl144
-rw-r--r--src/test/recovery/t/003_recovery_targets.pl186
-rw-r--r--src/test/recovery/t/004_timeline_switch.pl110
-rw-r--r--src/test/recovery/t/005_replay_delay.pl114
-rw-r--r--src/test/recovery/t/006_logical_decoding.pl275
-rw-r--r--src/test/recovery/t/007_sync_rep.pl221
-rw-r--r--src/test/recovery/t/008_fsm_truncation.pl100
-rw-r--r--src/test/recovery/t/009_twophase.pl484
-rw-r--r--src/test/recovery/t/010_logical_decoding_timelines.pl202
-rw-r--r--src/test/recovery/t/012_subtransactions.pl218
-rw-r--r--src/test/recovery/t/013_crash_restart.pl250
-rw-r--r--src/test/recovery/t/014_unlogged_reinit.pl134
-rw-r--r--src/test/recovery/t/015_promotion_pages.pl89
-rw-r--r--src/test/recovery/t/016_min_consistency.pl143
-rw-r--r--src/test/recovery/t/017_shm.pl218
-rw-r--r--src/test/recovery/t/018_wal_optimize.pl401
-rw-r--r--src/test/recovery/t/019_replslot_limit.pl444
-rw-r--r--src/test/recovery/t/020_archive_status.pl251
-rw-r--r--src/test/recovery/t/021_row_visibility.pl205
-rw-r--r--src/test/recovery/t/022_crash_temp_files.pl277
-rw-r--r--src/test/recovery/t/023_pitr_prepared_xact.pl91
-rw-r--r--src/test/recovery/t/024_archive_recovery.pl105
-rw-r--r--src/test/recovery/t/025_stuck_on_old_timeline.pl112
-rw-r--r--src/test/recovery/t/026_overwrite_contrecord.pl109
-rw-r--r--src/test/recovery/t/027_stream_regress.pl113
-rw-r--r--src/test/recovery/t/028_pitr_timelines.pl176
-rw-r--r--src/test/recovery/t/029_stats_restart.pl344
-rw-r--r--src/test/recovery/t/030_stats_cleanup_replica.pl206
-rw-r--r--src/test/recovery/t/031_recovery_conflict.pl383
-rw-r--r--src/test/recovery/t/032_relfilenode_reuse.pl243
-rw-r--r--src/test/recovery/t/033_replay_tsp_drops.pl148
-rw-r--r--src/test/recovery/t/034_create_database.pl45
-rw-r--r--src/test/recovery/t/037_invalid_database.pl157
-rw-r--r--src/test/recovery/t/039_end_of_wal.pl485
-rw-r--r--src/test/recovery/t/cp_history_files10
39 files changed, 7785 insertions, 0 deletions
diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore
new file mode 100644
index 0000000..871e943
--- /dev/null
+++ b/src/test/recovery/.gitignore
@@ -0,0 +1,2 @@
+# Generated by test suite
+/tmp_check/
diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile
new file mode 100644
index 0000000..c47eee2
--- /dev/null
+++ b/src/test/recovery/Makefile
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/recovery
+#
+# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/test/recovery/Makefile
+#
+#-------------------------------------------------------------------------
+
+EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm
+
+subdir = src/test/recovery
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+# required for 017_shm.pl and 027_stream_regress.pl
+REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)
+export REGRESS_SHLIB
+
+check:
+ $(prove_check)
+
+installcheck:
+ $(prove_installcheck)
+
+clean distclean maintainer-clean:
+ rm -rf tmp_check
diff --git a/src/test/recovery/README b/src/test/recovery/README
new file mode 100644
index 0000000..da11679
--- /dev/null
+++ b/src/test/recovery/README
@@ -0,0 +1,27 @@
+src/test/recovery/README
+
+Regression tests for recovery and replication
+=============================================
+
+This directory contains a test suite for recovery and replication.
+
+Running the tests
+=================
+
+NOTE: You must have given the --enable-tap-tests argument to configure.
+Also, to use "make installcheck", you must have built and installed
+contrib/pg_prewarm and contrib/test_decoding in addition to the core code.
+
+Run
+ make check
+or
+ make installcheck
+You can use "make installcheck" if you previously did "make install".
+In that case, the code in the installation tree is tested. With
+"make check", a temporary installation tree is built from the current
+sources and then tested.
+
+Either way, this test initializes, starts, and stops several test Postgres
+clusters.
+
+See src/test/perl/README for more info about running these tests.
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
new file mode 100644
index 0000000..8686409
--- /dev/null
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -0,0 +1,534 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Minimal test testing streaming replication
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+# A specific role is created to perform some tests related to replication,
+# and it needs proper authentication configuration.
+$node_primary->init(
+ allows_streaming => 1,
+ auth_extra => [ '--create-role', 'repl_role' ]);
+$node_primary->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1');
+$node_standby_1->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby_1->start;
+
+# Take backup of standby 1 (not mandatory, but useful to check if
+# pg_basebackup works on a standby).
+$node_standby_1->backup($backup_name);
+
+# Take a second backup of the standby while the primary is offline.
+$node_primary->stop;
+$node_standby_1->backup('my_backup_2');
+$node_primary->start;
+
+# Create second standby node linking to standby 1
+my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby_2');
+$node_standby_2->init_from_backup($node_standby_1, $backup_name,
+ has_streaming => 1);
+$node_standby_2->start;
+
+# Create some content on primary and check its presence in standby nodes
+$node_primary->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
+
+# Wait for standbys to catch up
+my $primary_lsn = $node_primary->lsn('write');
+$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
+$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn);
+
+my $result =
+ $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "standby 1: $result\n";
+is($result, qq(1002), 'check streamed content on standby 1');
+
+$result =
+ $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "standby 2: $result\n";
+is($result, qq(1002), 'check streamed content on standby 2');
+
+# Likewise, but for a sequence
+$node_primary->safe_psql('postgres',
+ "CREATE SEQUENCE seq1; SELECT nextval('seq1')");
+
+# Wait for standbys to catch up
+$primary_lsn = $node_primary->lsn('write');
+$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
+$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn);
+
+$result = $node_standby_1->safe_psql('postgres', "SELECT * FROM seq1");
+print "standby 1: $result\n";
+is($result, qq(33|0|t), 'check streamed sequence content on standby 1');
+
+$result = $node_standby_2->safe_psql('postgres', "SELECT * FROM seq1");
+print "standby 2: $result\n";
+is($result, qq(33|0|t), 'check streamed sequence content on standby 2');
+
+# Check that only READ-only queries can run on standbys
+is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
+ 3, 'read-only queries on standby 1');
+is($node_standby_2->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
+ 3, 'read-only queries on standby 2');
+
+# Tests for connection parameter target_session_attrs
+note "testing connection parameter \"target_session_attrs\"";
+
+# Attempt to connect to $node1, then $node2, using target_session_attrs=$mode.
+# Expect to connect to $target_node (undef for failure) with given $status.
+sub test_target_session_attrs
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $node1 = shift;
+ my $node2 = shift;
+ my $target_node = shift;
+ my $mode = shift;
+ my $status = shift;
+
+ my $node1_host = $node1->host;
+ my $node1_port = $node1->port;
+ my $node1_name = $node1->name;
+ my $node2_host = $node2->host;
+ my $node2_port = $node2->port;
+ my $node2_name = $node2->name;
+ my $target_port = undef;
+ $target_port = $target_node->port if (defined $target_node);
+ my $target_name = undef;
+ $target_name = $target_node->name if (defined $target_node);
+
+ # Build connection string for connection attempt.
+ my $connstr = "host=$node1_host,$node2_host ";
+ $connstr .= "port=$node1_port,$node2_port ";
+ $connstr .= "target_session_attrs=$mode";
+
+ # Attempt to connect, and if successful, get the server port number
+ # we connected to. Note we must pass the SQL command via the command
+ # line not stdin, else Perl may spit up trying to write to stdin of
+ # an already-failed psql process.
+ my ($ret, $stdout, $stderr) =
+ $node1->psql('postgres', undef,
+ extra_params => [ '-d', $connstr, '-c', 'SHOW port;' ]);
+ if ($status == 0)
+ {
+ is( $status == $ret && $stdout eq $target_port,
+ 1,
+ "connect to node $target_name with mode \"$mode\" and $node1_name,$node2_name listed"
+ );
+ }
+ else
+ {
+ print "status = $status\n";
+ print "ret = $ret\n";
+ print "stdout = $stdout\n";
+ print "stderr = $stderr\n";
+
+ is( $status == $ret && !defined $target_node,
+ 1,
+ "fail to connect with mode \"$mode\" and $node1_name,$node2_name listed"
+ );
+ }
+
+ return;
+}
+
+# Connect to primary in "read-write" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_primary,
+ "read-write", 0);
+
+# Connect to primary in "read-write" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_primary,
+ "read-write", 0);
+
+# Connect to primary in "any" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_primary,
+ "any", 0);
+
+# Connect to standby1 in "any" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1,
+ "any", 0);
+
+# Connect to primary in "primary" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_primary,
+ "primary", 0);
+
+# Connect to primary in "primary" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_primary,
+ "primary", 0);
+
+# Connect to standby1 in "read-only" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1,
+ "read-only", 0);
+
+# Connect to standby1 in "read-only" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1,
+ "read-only", 0);
+
+# Connect to primary in "prefer-standby" mode with primary,primary list.
+test_target_session_attrs($node_primary, $node_primary, $node_primary,
+ "prefer-standby", 0);
+
+# Connect to standby1 in "prefer-standby" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1,
+ "prefer-standby", 0);
+
+# Connect to standby1 in "prefer-standby" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1,
+ "prefer-standby", 0);
+
+# Connect to standby1 in "standby" mode with primary,standby1 list.
+test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1,
+ "standby", 0);
+
+# Connect to standby1 in "standby" mode with standby1,primary list.
+test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1,
+ "standby", 0);
+
+# Fail to connect in "read-write" mode with standby1,standby2 list.
+test_target_session_attrs($node_standby_1, $node_standby_2, undef,
+ "read-write", 2);
+
+# Fail to connect in "primary" mode with standby1,standby2 list.
+test_target_session_attrs($node_standby_1, $node_standby_2, undef,
+ "primary", 2);
+
+# Fail to connect in "read-only" mode with primary,primary list.
+test_target_session_attrs($node_primary, $node_primary, undef,
+ "read-only", 2);
+
+# Fail to connect in "standby" mode with primary,primary list.
+test_target_session_attrs($node_primary, $node_primary, undef, "standby", 2);
+
+# Test for SHOW commands using a WAL sender connection with a replication
+# role.
+note "testing SHOW commands for replication connection";
+
+$node_primary->psql(
+ 'postgres', "
+CREATE ROLE repl_role REPLICATION LOGIN;
+GRANT pg_read_all_settings TO repl_role;");
+my $primary_host = $node_primary->host;
+my $primary_port = $node_primary->port;
+my $connstr_common = "host=$primary_host port=$primary_port user=repl_role";
+my $connstr_rep = "$connstr_common replication=1";
+my $connstr_db = "$connstr_common replication=database dbname=postgres";
+
+# Test SHOW ALL
+my ($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW ALL;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok($ret == 0, "SHOW ALL with replication role and physical replication");
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW ALL;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok($ret == 0, "SHOW ALL with replication role and logical replication");
+
+# Test SHOW with a user-settable parameter
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW work_mem;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok( $ret == 0,
+ "SHOW with user-settable parameter, replication role and physical replication"
+);
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW work_mem;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok( $ret == 0,
+ "SHOW with user-settable parameter, replication role and logical replication"
+);
+
+# Test SHOW with a superuser-settable parameter
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW primary_conninfo;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok( $ret == 0,
+ "SHOW with superuser-settable parameter, replication role and physical replication"
+);
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres', 'SHOW primary_conninfo;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok( $ret == 0,
+ "SHOW with superuser-settable parameter, replication role and logical replication"
+);
+
+note "testing READ_REPLICATION_SLOT command for replication connection";
+
+my $slotname = 'test_read_replication_slot_physical';
+
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres',
+ 'READ_REPLICATION_SLOT non_existent_slot;',
+ extra_params => [ '-d', $connstr_rep ]);
+ok($ret == 0, "READ_REPLICATION_SLOT exit code 0 on success");
+like($stdout, qr/^\|\|$/,
+ "READ_REPLICATION_SLOT returns NULL values if slot does not exist");
+
+$node_primary->psql(
+ 'postgres',
+ "CREATE_REPLICATION_SLOT $slotname PHYSICAL RESERVE_WAL;",
+ extra_params => [ '-d', $connstr_rep ]);
+
+($ret, $stdout, $stderr) = $node_primary->psql(
+ 'postgres',
+ "READ_REPLICATION_SLOT $slotname;",
+ extra_params => [ '-d', $connstr_rep ]);
+ok($ret == 0, "READ_REPLICATION_SLOT success with existing slot");
+like($stdout, qr/^physical\|[^|]*\|1$/,
+ "READ_REPLICATION_SLOT returns tuple with slot information");
+
+$node_primary->psql(
+ 'postgres',
+ "DROP_REPLICATION_SLOT $slotname;",
+ extra_params => [ '-d', $connstr_rep ]);
+
+note "switching to physical replication slot";
+
+# Switch to using a physical replication slot. We can do this without a new
+# backup since physical slots can go backwards if needed. Do so on both
+# standbys. Since we're going to be testing things that affect the slot state,
+# also increase the standby feedback interval to ensure timely updates.
+my ($slotname_1, $slotname_2) = ('standby_1', 'standby_2');
+$node_primary->append_conf('postgresql.conf', "max_replication_slots = 4");
+$node_primary->restart;
+is( $node_primary->psql(
+ 'postgres',
+ qq[SELECT pg_create_physical_replication_slot('$slotname_1');]),
+ 0,
+ 'physical slot created on primary');
+$node_standby_1->append_conf('postgresql.conf',
+ "primary_slot_name = $slotname_1");
+$node_standby_1->append_conf('postgresql.conf',
+ "wal_receiver_status_interval = 1");
+$node_standby_1->append_conf('postgresql.conf', "max_replication_slots = 4");
+$node_standby_1->restart;
+is( $node_standby_1->psql(
+ 'postgres',
+ qq[SELECT pg_create_physical_replication_slot('$slotname_2');]),
+ 0,
+ 'physical slot created on intermediate replica');
+$node_standby_2->append_conf('postgresql.conf',
+ "primary_slot_name = $slotname_2");
+$node_standby_2->append_conf('postgresql.conf',
+ "wal_receiver_status_interval = 1");
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
+
+# Fetch xmin columns from slot's pg_replication_slots row, after waiting for
+# given boolean condition to be true to ensure we've reached a quiescent state
+sub get_slot_xmins
+{
+ my ($node, $slotname, $check_expr) = @_;
+
+ $node->poll_query_until(
+ 'postgres', qq[
+ SELECT $check_expr
+ FROM pg_catalog.pg_replication_slots
+ WHERE slot_name = '$slotname';
+ ]) or die "Timed out waiting for slot xmins to advance";
+
+ my $slotinfo = $node->slot($slotname);
+ return ($slotinfo->{'xmin'}, $slotinfo->{'catalog_xmin'});
+}
+
+# There's no hot standby feedback and there are no logical slots on either peer
+# so xmin and catalog_xmin should be null on both slots.
+my ($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of non-cascaded slot null with no hs_feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot null with no hs_feedback');
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of cascaded slot null with no hs_feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of cascaded slot null with no hs_feedback');
+
+# Replication still works?
+$node_primary->safe_psql('postgres', 'CREATE TABLE replayed(val integer);');
+
+sub replay_check
+{
+ my $newval = $node_primary->safe_psql('postgres',
+ 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+ );
+ my $primary_lsn = $node_primary->lsn('write');
+ $node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn);
+ $node_standby_1->wait_for_catchup($node_standby_2, 'replay',
+ $primary_lsn);
+
+ $node_standby_1->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval])
+ or die "standby_1 didn't replay primary value $newval";
+ $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval])
+ or die "standby_2 didn't replay standby_1 value $newval";
+ return;
+}
+
+replay_check();
+
+note "enabling hot_standby_feedback";
+
+# Enable hs_feedback. The slot should gain an xmin. We set the status interval
+# so we'll see the results promptly.
+$node_standby_1->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = on;');
+$node_standby_1->reload;
+$node_standby_2->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = on;');
+$node_standby_2->reload;
+replay_check();
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1,
+ "xmin IS NOT NULL AND catalog_xmin IS NULL");
+isnt($xmin, '', 'xmin of non-cascaded slot non-null with hs feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback');
+
+my ($xmin1, $catalog_xmin1) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NOT NULL AND catalog_xmin IS NULL");
+isnt($xmin1, '', 'xmin of cascaded slot non-null with hs feedback');
+is($catalog_xmin1, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback');
+
+note "doing some work to advance xmin";
+$node_primary->safe_psql(
+ 'postgres', q{
+do $$
+begin
+ for i in 10000..11000 loop
+ -- use an exception block so that each iteration eats an XID
+ begin
+ insert into tab_int values (i);
+ exception
+ when division_by_zero then null;
+ end;
+ end loop;
+end$$;
+});
+
+$node_primary->safe_psql('postgres', 'VACUUM;');
+$node_primary->safe_psql('postgres', 'CHECKPOINT;');
+
+my ($xmin2, $catalog_xmin2) =
+ get_slot_xmins($node_primary, $slotname_1, "xmin <> '$xmin'");
+note "primary slot's new xmin $xmin2, old xmin $xmin";
+isnt($xmin2, $xmin, 'xmin of non-cascaded slot with hs feedback has changed');
+is($catalog_xmin2, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback unchanged'
+);
+
+($xmin2, $catalog_xmin2) =
+ get_slot_xmins($node_standby_1, $slotname_2, "xmin <> '$xmin1'");
+note "standby_1 slot's new xmin $xmin2, old xmin $xmin1";
+isnt($xmin2, $xmin1, 'xmin of cascaded slot with hs feedback has changed');
+is($catalog_xmin2, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback unchanged');
+
+note "disabling hot_standby_feedback";
+
+# Disable hs_feedback. Xmin should be cleared.
+$node_standby_1->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = off;');
+$node_standby_1->reload;
+$node_standby_2->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = off;');
+$node_standby_2->reload;
+replay_check();
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of non-cascaded slot null with hs feedback reset');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback reset');
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
+is($catalog_xmin, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf', "primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_primary);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_primary->safe_psql('postgres',
+ 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_primary->wait_for_catchup($node_standby_2);
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay primary value $newval");
+
+# Drop any existing slots on the primary, for the follow-up tests.
+$node_primary->safe_psql('postgres',
+ "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;");
+
+# Test physical slot advancing and its durability. Create a new slot on
+# the primary, not used by any of the standbys. This reserves WAL at creation.
+my $phys_slot = 'phys_slot';
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('$phys_slot', true);");
+# Generate some WAL, and switch to a new segment, used to check that
+# the previous segment is correctly getting recycled as the slot advancing
+# would recompute the minimum LSN calculated across all slots.
+my $segment_removed = $node_primary->safe_psql('postgres',
+ 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+chomp($segment_removed);
+$node_primary->psql(
+ 'postgres', "
+ CREATE TABLE tab_phys_slot (a int);
+ INSERT INTO tab_phys_slot VALUES (generate_series(1,10));
+ SELECT pg_switch_wal();");
+my $current_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+chomp($current_lsn);
+my $psql_rc = $node_primary->psql('postgres',
+ "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);"
+);
+is($psql_rc, '0', 'slot advancing with physical slot');
+my $phys_restart_lsn_pre = $node_primary->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"
+);
+chomp($phys_restart_lsn_pre);
+# Slot advance should persist across clean restarts.
+$node_primary->restart;
+my $phys_restart_lsn_post = $node_primary->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"
+);
+chomp($phys_restart_lsn_post);
+ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0,
+ "physical slot advance persists across restarts");
+
+# Check if the previous segment gets correctly recycled after the
+# server stopped cleanly, causing a shutdown checkpoint to be generated.
+my $primary_data = $node_primary->data_dir;
+ok(!-f "$primary_data/pg_wal/$segment_removed",
+ "WAL segment $segment_removed recycled after physical slot advancing");
+
+done_testing();
diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl
new file mode 100644
index 0000000..d69da4e
--- /dev/null
+++ b/src/test/recovery/t/002_archiving.pl
@@ -0,0 +1,144 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# test for archiving with hot standby
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Copy;
+
+# Initialize primary node, doing archives
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(
+ has_archiving => 1,
+ allows_streaming => 1);
+my $backup_name = 'my_backup';
+
+# Start it
+$node_primary->start;
+
+# Take backup for standby
+$node_primary->backup($backup_name);
+
+# Initialize standby node from backup, fetching WAL from archives
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+# Note that this makes the standby store its contents on the archives
+# of the primary.
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_restoring => 1);
+$node_standby->append_conf('postgresql.conf',
+ "wal_retrieve_retry_interval = '100ms'");
+
+# Set archive_cleanup_command and recovery_end_command, checking their
+# execution by the backend with dummy commands.
+my $data_dir = $node_standby->data_dir;
+my $archive_cleanup_command_file = "archive_cleanup_command.done";
+my $recovery_end_command_file = "recovery_end_command.done";
+$node_standby->append_conf(
+ 'postgresql.conf', qq(
+archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file'
+recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file'
+));
+$node_standby->start;
+
+# Create some content on primary
+$node_primary->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+
+# Note the presence of this checkpoint for the archive_cleanup_command
+# check done below, before switching to a new segment.
+$node_primary->safe_psql('postgres', "CHECKPOINT");
+
+# Done after the checkpoint to ensure that it is replayed on the standby,
+# for archive_cleanup_command.
+my $current_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Force archiving of WAL file to make it present on primary
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
+
+# Add some more content, it should not be present on standby
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+
+# Wait until necessary replay has been done on standby
+my $caughtup_query =
+ "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+my $result =
+ $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result, qq(1000), 'check content from archives');
+
+# archive_cleanup_command is executed after generating a restart point,
+# with a checkpoint.
+$node_standby->safe_psql('postgres', q{CHECKPOINT});
+ok( -f "$data_dir/$archive_cleanup_command_file",
+ 'archive_cleanup_command executed on checkpoint');
+ok( !-f "$data_dir/$recovery_end_command_file",
+ 'recovery_end_command not executed yet');
+
+# Check the presence of temporary files specifically generated during
+# archive recovery. To ensure the presence of the temporary history
+# file, switch to a timeline large enough to allow a standby to recover
+# a history file from an archive. As this requires at least two timeline
+# switches, promote the existing standby first. Then create a second
+# standby based on the primary, using its archives. Finally, the second
+# standby is promoted.
+$node_standby->promote;
+
+# Wait until the history file has been stored on the archives of the
+# primary once the promotion of the standby completes. This ensures that
+# the second standby created below will be able to restore this file,
+# creating a RECOVERYHISTORY.
+my $primary_archive = $node_primary->archive_dir;
+$caughtup_query =
+ "SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history')";
+$node_primary->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for archiving of 00000002.history";
+
+# recovery_end_command should have been triggered on promotion.
+ok( -f "$data_dir/$recovery_end_command_file",
+ 'recovery_end_command executed after promotion');
+
+my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
+$node_standby2->init_from_backup($node_primary, $backup_name,
+ has_restoring => 1);
+
+# Make execution of recovery_end_command fail. This should not affect
+# promotion, and its failure should be logged.
+$node_standby2->append_conf(
+ 'postgresql.conf', qq(
+recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
+));
+
+$node_standby2->start;
+
+# Save the log location, to see the failure of recovery_end_command.
+my $log_location = -s $node_standby2->logfile;
+
+# Now promote standby2, and check that temporary files specifically
+# generated during archive recovery are removed by the end of recovery.
+$node_standby2->promote;
+
+# Check the logs of the standby to see that the commands have failed.
+my $log_contents = slurp_file($node_standby2->logfile, $log_location);
+my $node_standby2_data = $node_standby2->data_dir;
+
+like(
+ $log_contents,
+ qr/restored log file "00000002.history" from archive/s,
+ "00000002.history retrieved from the archives");
+ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
+ "RECOVERYHISTORY removed after promotion");
+ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
+ "RECOVERYXLOG removed after promotion");
+like(
+ $log_contents,
+ qr/WARNING:.*recovery_end_command/s,
+ "recovery_end_command failure detected in logs after promotion");
+
+done_testing();
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
new file mode 100644
index 0000000..e8e1a42
--- /dev/null
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -0,0 +1,186 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for recovery targets: name, timestamp, XID
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Time::HiRes qw(usleep);
+
+# Create and test a standby from given backup, with a certain recovery target.
+# Choose $until_lsn later than the transaction commit that causes the row
+# count to reach $num_rows, yet not later than the recovery target.
+sub test_recovery_standby
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my $test_name = shift;
+ my $node_name = shift;
+ my $node_primary = shift;
+ my $recovery_params = shift;
+ my $num_rows = shift;
+ my $until_lsn = shift;
+
+ my $node_standby = PostgreSQL::Test::Cluster->new($node_name);
+ $node_standby->init_from_backup($node_primary, 'my_backup',
+ has_restoring => 1);
+
+ foreach my $param_item (@$recovery_params)
+ {
+ $node_standby->append_conf('postgresql.conf', qq($param_item));
+ }
+
+ $node_standby->start;
+
+ # Wait until standby has replayed enough data
+ my $caughtup_query =
+ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+ $node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+ # Create some content on primary and check its presence in standby
+ my $result =
+ $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+ is($result, qq($num_rows), "check standby content for $test_name");
+
+ # Stop standby node
+ $node_standby->teardown_node;
+
+ return;
+}
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(has_archiving => 1, allows_streaming => 1);
+
+# Bump the transaction ID epoch. This is useful to stress the portability
+# of recovery_target_xid parsing.
+system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir);
+
+# Start it
+$node_primary->start;
+
+# Create data before taking the backup, aimed at testing
+# recovery_target = 'immediate'
+$node_primary->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+my $lsn1 =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Take backup from which all operations will be run
+$node_primary->backup('my_backup');
+
+# Insert some data with used as a replay reference, with a recovery
+# target TXID.
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+my $ret = $node_primary->safe_psql('postgres',
+ "SELECT pg_current_wal_lsn(), pg_current_xact_id();");
+my ($lsn2, $recovery_txid) = split /\|/, $ret;
+
+# More data, with recovery target timestamp
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(2001,3000))");
+my $lsn3 =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+my $recovery_time = $node_primary->safe_psql('postgres', "SELECT now()");
+
+# Even more data, this time with a recovery target name
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(3001,4000))");
+my $recovery_name = "my_target";
+my $lsn4 =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_restore_point('$recovery_name');");
+
+# And now for a recovery target LSN
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(4001,5000))");
+my $lsn5 = my $recovery_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(5001,6000))");
+
+# Force archiving of WAL file
+$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");
+
+# Test recovery targets
+my @recovery_params = ("recovery_target = 'immediate'");
+test_recovery_standby('immediate target',
+ 'standby_1', $node_primary, \@recovery_params, "1000", $lsn1);
+@recovery_params = ("recovery_target_xid = '$recovery_txid'");
+test_recovery_standby('XID', 'standby_2', $node_primary, \@recovery_params,
+ "2000", $lsn2);
+@recovery_params = ("recovery_target_time = '$recovery_time'");
+test_recovery_standby('time', 'standby_3', $node_primary, \@recovery_params,
+ "3000", $lsn3);
+@recovery_params = ("recovery_target_name = '$recovery_name'");
+test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params,
+ "4000", $lsn4);
+@recovery_params = ("recovery_target_lsn = '$recovery_lsn'");
+test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params,
+ "5000", $lsn5);
+
+# Multiple targets
+#
+# Multiple conflicting settings are not allowed, but setting the same
+# parameter multiple times or unsetting a parameter and setting a
+# different one is allowed.
+
+@recovery_params = (
+ "recovery_target_name = '$recovery_name'",
+ "recovery_target_name = ''",
+ "recovery_target_time = '$recovery_time'");
+test_recovery_standby('multiple overriding settings',
+ 'standby_6', $node_primary, \@recovery_params, "3000", $lsn3);
+
+my $node_standby = PostgreSQL::Test::Cluster->new('standby_7');
+$node_standby->init_from_backup($node_primary, 'my_backup',
+ has_restoring => 1);
+$node_standby->append_conf(
+ 'postgresql.conf', "recovery_target_name = '$recovery_name'
+recovery_target_time = '$recovery_time'");
+
+my $res = run_log(
+ [
+ 'pg_ctl', '-D', $node_standby->data_dir, '-l',
+ $node_standby->logfile, 'start'
+ ]);
+ok(!$res, 'invalid recovery startup fails');
+
+my $logfile = slurp_file($node_standby->logfile());
+ok($logfile =~ qr/multiple recovery targets specified/,
+ 'multiple conflicting settings');
+
+# Check behavior when recovery ends before target is reached
+
+$node_standby = PostgreSQL::Test::Cluster->new('standby_8');
+$node_standby->init_from_backup(
+ $node_primary, 'my_backup',
+ has_restoring => 1,
+ standby => 0);
+$node_standby->append_conf('postgresql.conf',
+ "recovery_target_name = 'does_not_exist'");
+
+run_log(
+ [
+ 'pg_ctl', '-D', $node_standby->data_dir, '-l',
+ $node_standby->logfile, 'start'
+ ]);
+
+# wait for postgres to terminate
+foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
+{
+ last if !-f $node_standby->data_dir . '/postmaster.pid';
+ usleep(100_000);
+}
+$logfile = slurp_file($node_standby->logfile());
+ok( $logfile =~
+ qr/FATAL: .* recovery ended before configured recovery target was reached/,
+ 'recovery end before target reached is a fatal error');
+
+done_testing();
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
new file mode 100644
index 0000000..3203d93
--- /dev/null
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -0,0 +1,110 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for timeline switch
+use strict;
+use warnings;
+use File::Path qw(rmtree);
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+$ENV{PGDATABASE} = 'postgres';
+
+# Ensure that a cascading standby is able to follow a newly-promoted standby
+# on a new timeline.
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create two standbys linking to it
+my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1');
+$node_standby_1->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby_1->start;
+my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby_2');
+$node_standby_2->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby_2->start;
+
+# Create some content on primary
+$node_primary->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+
+# Wait until standby has replayed enough data on standby 1
+$node_primary->wait_for_catchup($node_standby_1);
+
+# Stop and remove primary
+$node_primary->teardown_node;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+my $psql_out = '';
+$node_standby_1->psql(
+ 'postgres',
+ "SELECT pg_promote(wait_seconds => 300)",
+ stdout => \$psql_out);
+is($psql_out, 't', "promotion of standby with pg_promote");
+
+# Switch standby 2 to replay from standby 1
+my $connstr_1 = $node_standby_1->connstr;
+$node_standby_2->append_conf(
+ 'postgresql.conf', qq(
+primary_conninfo='$connstr_1'
+));
+$node_standby_2->restart;
+
+# Insert some data in standby 1 and check its presence in standby 2
+# to ensure that the timeline switch has been done.
+$node_standby_1->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+$node_standby_1->wait_for_catchup($node_standby_2);
+
+my $result =
+ $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result, qq(2000), 'check content of standby 2');
+
+
+# Ensure that a standby is able to follow a primary on a newer timeline
+# when WAL archiving is enabled.
+
+# Initialize primary node
+my $node_primary_2 = PostgreSQL::Test::Cluster->new('primary_2');
+$node_primary_2->init(allows_streaming => 1, has_archiving => 1);
+$node_primary_2->append_conf(
+ 'postgresql.conf', qq(
+wal_keep_size = 512MB
+));
+$node_primary_2->start;
+
+# Take backup
+$node_primary_2->backup($backup_name);
+
+# Create standby node
+my $node_standby_3 = PostgreSQL::Test::Cluster->new('standby_3');
+$node_standby_3->init_from_backup($node_primary_2, $backup_name,
+ has_streaming => 1);
+
+# Restart primary node in standby mode and promote it, switching it
+# to a new timeline.
+$node_primary_2->set_standby_mode;
+$node_primary_2->restart;
+$node_primary_2->promote;
+
+# Start standby node, create some content on primary and check its presence
+# in standby, to ensure that the timeline switch has been done.
+$node_standby_3->start;
+$node_primary_2->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT 1 AS a");
+$node_primary_2->wait_for_catchup($node_standby_3);
+
+my $result_2 =
+ $node_standby_3->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result_2, qq(1), 'check content of standby 3');
+
+done_testing();
diff --git a/src/test/recovery/t/005_replay_delay.pl b/src/test/recovery/t/005_replay_delay.pl
new file mode 100644
index 0000000..370fc9e
--- /dev/null
+++ b/src/test/recovery/t/005_replay_delay.pl
@@ -0,0 +1,114 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Checks for recovery_min_apply_delay and recovery pause
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+
+# And some content
+$node_primary->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a");
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create streaming standby from backup
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+my $delay = 3;
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf(
+ 'postgresql.conf', qq(
+recovery_min_apply_delay = '${delay}s'
+));
+$node_standby->start;
+
+# Make new content on primary and check its presence in standby depending
+# on the delay applied above. Before doing the insertion, get the
+# current timestamp that will be used as a comparison base. Even on slow
+# machines, this allows to have a predictable behavior when comparing the
+# delay between data insertion moment on primary and replay time on standby.
+my $primary_insert_time = time();
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(11, 20))");
+
+# Now wait for replay to complete on standby. We're done waiting when the
+# standby has replayed up to the previously saved primary LSN.
+my $until_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+
+$node_standby->poll_query_until('postgres',
+ "SELECT (pg_last_wal_replay_lsn() - '$until_lsn'::pg_lsn) >= 0")
+ or die "standby never caught up";
+
+# This test is successful if and only if the LSN has been applied with at least
+# the configured apply delay.
+ok(time() - $primary_insert_time >= $delay,
+ "standby applies WAL only after replication delay");
+
+
+# Check that recovery can be paused or resumed expectedly.
+my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
+$node_standby2->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby2->start;
+
+# Recovery is not yet paused.
+is( $node_standby2->safe_psql(
+ 'postgres', "SELECT pg_get_wal_replay_pause_state()"),
+ 'not paused',
+ 'pg_get_wal_replay_pause_state() reports not paused');
+
+# Request to pause recovery and wait until it's actually paused.
+$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()");
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(21,30))");
+$node_standby2->poll_query_until('postgres',
+ "SELECT pg_get_wal_replay_pause_state() = 'paused'")
+ or die "Timed out while waiting for recovery to be paused";
+
+# Even if new WAL records are streamed from the primary,
+# recovery in the paused state doesn't replay them.
+my $receive_lsn =
+ $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_receive_lsn()");
+my $replay_lsn =
+ $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()");
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(31,40))");
+$node_standby2->poll_query_until('postgres',
+ "SELECT '$receive_lsn'::pg_lsn < pg_last_wal_receive_lsn()")
+ or die "Timed out while waiting for new WAL to be streamed";
+is( $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()"),
+ qq($replay_lsn),
+ 'no WAL is replayed in the paused state');
+
+# Request to resume recovery and wait until it's actually resumed.
+$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_resume()");
+$node_standby2->poll_query_until('postgres',
+ "SELECT pg_get_wal_replay_pause_state() = 'not paused' AND pg_last_wal_replay_lsn() > '$replay_lsn'::pg_lsn"
+) or die "Timed out while waiting for recovery to be resumed";
+
+# Check that the paused state ends and promotion continues if a promotion
+# is triggered while recovery is paused.
+$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()");
+$node_primary->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(41,50))");
+$node_standby2->poll_query_until('postgres',
+ "SELECT pg_get_wal_replay_pause_state() = 'paused'")
+ or die "Timed out while waiting for recovery to be paused";
+
+$node_standby2->promote;
+$node_standby2->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()")
+ or die "Timed out while waiting for promotion to finish";
+
+done_testing();
diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl
new file mode 100644
index 0000000..a9edd8c
--- /dev/null
+++ b/src/test/recovery/t/006_logical_decoding.pl
@@ -0,0 +1,275 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Testing of logical decoding using SQL interface and/or pg_recvlogical
+#
+# Most logical decoding tests are in contrib/test_decoding. This module
+# is for work that doesn't fit well there, like where server restarts
+# are required.
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Config;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+wal_level = logical
+));
+$node_primary->start;
+my $backup_name = 'primary_backup';
+
+$node_primary->safe_psql('postgres',
+ qq[CREATE TABLE decoding_test(x integer, y text);]);
+
+$node_primary->safe_psql('postgres',
+ qq[SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');]
+);
+
+# Cover walsender error shutdown code
+my ($result, $stdout, $stderr) = $node_primary->psql(
+ 'template1',
+ qq[START_REPLICATION SLOT test_slot LOGICAL 0/0],
+ replication => 'database');
+ok( $stderr =~
+ m/replication slot "test_slot" was not created in this database/,
+ "Logical decoding correctly fails to start");
+
+($result, $stdout, $stderr) = $node_primary->psql(
+ 'template1',
+ qq[READ_REPLICATION_SLOT test_slot;],
+ replication => 'database');
+like(
+ $stderr,
+ qr/cannot use READ_REPLICATION_SLOT with a logical replication slot/,
+ 'READ_REPLICATION_SLOT not supported for logical slots');
+
+# Check case of walsender not using a database connection. Logical
+# decoding should not be allowed.
+($result, $stdout, $stderr) = $node_primary->psql(
+ 'template1',
+ qq[START_REPLICATION SLOT s1 LOGICAL 0/1],
+ replication => 'true');
+ok($stderr =~ /ERROR: logical decoding requires a database connection/,
+ "Logical decoding fails on non-database connection");
+
+$node_primary->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,10) s;]
+);
+
+# Basic decoding works
+$result = $node_primary->safe_psql('postgres',
+ qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]);
+is(scalar(my @foobar = split /^/m, $result),
+ 12, 'Decoding produced 12 rows inc BEGIN/COMMIT');
+
+# If we immediately crash the server we might lose the progress we just made
+# and replay the same changes again. But a clean shutdown should never repeat
+# the same changes when we use the SQL decoding interface.
+$node_primary->restart('fast');
+
+# There are no new writes, so the result should be empty.
+$result = $node_primary->safe_psql('postgres',
+ qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]);
+chomp($result);
+is($result, '', 'Decoding after fast restart repeats no rows');
+
+# Insert some rows and verify that we get the same results from pg_recvlogical
+# and the SQL interface.
+$node_primary->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,4) s;]
+);
+
+my $expected = q{BEGIN
+table public.decoding_test: INSERT: x[integer]:1 y[text]:'1'
+table public.decoding_test: INSERT: x[integer]:2 y[text]:'2'
+table public.decoding_test: INSERT: x[integer]:3 y[text]:'3'
+table public.decoding_test: INSERT: x[integer]:4 y[text]:'4'
+COMMIT};
+
+my $stdout_sql = $node_primary->safe_psql('postgres',
+ qq[SELECT data FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');]
+);
+is($stdout_sql, $expected, 'got expected output from SQL decoding session');
+
+my $endpos = $node_primary->safe_psql('postgres',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+);
+print "waiting to replay $endpos\n";
+
+# Insert some rows after $endpos, which we won't read.
+$node_primary->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(5,50) s;]
+);
+
+my $stdout_recv = $node_primary->pg_recvlogical_upto(
+ 'postgres', 'test_slot', $endpos,
+ $PostgreSQL::Test::Utils::timeout_default,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+chomp($stdout_recv);
+is($stdout_recv, $expected,
+ 'got same expected output from pg_recvlogical decoding session');
+
+$node_primary->poll_query_until('postgres',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'test_slot' AND active_pid IS NULL)"
+) or die "slot never became inactive";
+
+$stdout_recv = $node_primary->pg_recvlogical_upto(
+ 'postgres', 'test_slot', $endpos,
+ $PostgreSQL::Test::Utils::timeout_default,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+chomp($stdout_recv);
+is($stdout_recv, '', 'pg_recvlogical acknowledged changes');
+
+$node_primary->safe_psql('postgres', 'CREATE DATABASE otherdb');
+
+is( $node_primary->psql(
+ 'otherdb',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+ ),
+ 3,
+ 'replaying logical slot from another database fails');
+
+$node_primary->safe_psql('otherdb',
+ qq[SELECT pg_create_logical_replication_slot('otherdb_slot', 'test_decoding');]
+);
+
+# make sure you can't drop a slot while active
+SKIP:
+{
+
+ # some Windows Perls at least don't like IPC::Run's start/kill_kill regime.
+ skip "Test fails on Windows perl", 2 if $Config{osname} eq 'MSWin32';
+
+ my $pg_recvlogical = IPC::Run::start(
+ [
+ 'pg_recvlogical', '-d', $node_primary->connstr('otherdb'),
+ '-S', 'otherdb_slot', '-f', '-', '--start'
+ ]);
+ $node_primary->poll_query_until('otherdb',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NOT NULL)"
+ ) or die "slot never became active";
+ is($node_primary->psql('postgres', 'DROP DATABASE otherdb'),
+ 3, 'dropping a DB with active logical slots fails');
+ $pg_recvlogical->kill_kill;
+ is($node_primary->slot('otherdb_slot')->{'slot_name'},
+ undef, 'logical slot still exists');
+}
+
+$node_primary->poll_query_until('otherdb',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NULL)"
+) or die "slot never became inactive";
+
+is($node_primary->psql('postgres', 'DROP DATABASE otherdb'),
+ 0, 'dropping a DB with inactive logical slots succeeds');
+is($node_primary->slot('otherdb_slot')->{'slot_name'},
+ undef, 'logical slot was actually dropped with DB');
+
+# Test logical slot advancing and its durability.
+my $logical_slot = 'logical_slot';
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);"
+);
+$node_primary->psql(
+ 'postgres', "
+ CREATE TABLE tab_logical_slot (a int);
+ INSERT INTO tab_logical_slot VALUES (generate_series(1,10));");
+my $current_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+chomp($current_lsn);
+my $psql_rc = $node_primary->psql('postgres',
+ "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);"
+);
+is($psql_rc, '0', 'slot advancing with logical slot');
+my $logical_restart_lsn_pre = $node_primary->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"
+);
+chomp($logical_restart_lsn_pre);
+# Slot advance should persist across clean restarts.
+$node_primary->restart;
+my $logical_restart_lsn_post = $node_primary->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"
+);
+chomp($logical_restart_lsn_post);
+ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0,
+ "logical slot advance persists across restarts");
+
+my $stats_test_slot1 = 'test_slot';
+my $stats_test_slot2 = 'logical_slot';
+
+# Test that reset works for pg_stat_replication_slots
+
+# Stats exist for stats test slot 1
+is( $node_primary->safe_psql(
+ 'postgres',
+ qq(SELECT total_bytes > 0, stats_reset IS NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1')
+ ),
+ qq(t|t),
+ qq(Total bytes is > 0 and stats_reset is NULL for slot '$stats_test_slot1'.)
+);
+
+# Do reset of stats for stats test slot 1
+$node_primary->safe_psql('postgres',
+ qq(SELECT pg_stat_reset_replication_slot('$stats_test_slot1')));
+
+# Get reset value after reset
+my $reset1 = $node_primary->safe_psql('postgres',
+ qq(SELECT stats_reset FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1')
+);
+
+# Do reset again
+$node_primary->safe_psql('postgres',
+ qq(SELECT pg_stat_reset_replication_slot('$stats_test_slot1')));
+
+is( $node_primary->safe_psql(
+ 'postgres',
+ qq(SELECT stats_reset > '$reset1'::timestamptz, total_bytes = 0 FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1')
+ ),
+ qq(t|t),
+ qq(Check that reset timestamp is later after the second reset of stats for slot '$stats_test_slot1' and confirm total_bytes was set to 0.)
+);
+
+# Check that test slot 2 has NULL in reset timestamp
+is( $node_primary->safe_psql(
+ 'postgres',
+ qq(SELECT stats_reset IS NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot2')
+ ),
+ qq(t),
+ qq(Stats_reset is NULL for slot '$stats_test_slot2' before reset.));
+
+# Get reset value again for test slot 1
+$reset1 = $node_primary->safe_psql('postgres',
+ qq(SELECT stats_reset FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1')
+);
+
+# Reset stats for all replication slots
+$node_primary->safe_psql('postgres',
+ qq(SELECT pg_stat_reset_replication_slot(NULL)));
+
+# Check that test slot 2 reset timestamp is no longer NULL after reset
+is( $node_primary->safe_psql(
+ 'postgres',
+ qq(SELECT stats_reset IS NOT NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot2')
+ ),
+ qq(t),
+ qq(Stats_reset is not NULL for slot '$stats_test_slot2' after reset all.)
+);
+
+is( $node_primary->safe_psql(
+ 'postgres',
+ qq(SELECT stats_reset > '$reset1'::timestamptz FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1')
+ ),
+ qq(t),
+ qq(Check that reset timestamp is later after resetting stats for slot '$stats_test_slot1' again.)
+);
+
+# done with the node
+$node_primary->stop;
+
+done_testing();
diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl
new file mode 100644
index 0000000..86f89c6
--- /dev/null
+++ b/src/test/recovery/t/007_sync_rep.pl
@@ -0,0 +1,221 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Minimal test testing synchronous replication sync_state transition
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Query checking sync_priority and sync_state of each standby
+my $check_sql =
+ "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication ORDER BY application_name;";
+
+# Check that sync_state of each standby is expected (waiting till it is).
+# If $setting is given, synchronous_standby_names is set to it and
+# the configuration file is reloaded before the test.
+sub test_sync_state
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my ($self, $expected, $msg, $setting) = @_;
+
+ if (defined($setting))
+ {
+ $self->safe_psql('postgres',
+ "ALTER SYSTEM SET synchronous_standby_names = '$setting';");
+ $self->reload;
+ }
+
+ ok($self->poll_query_until('postgres', $check_sql, $expected), $msg);
+ return;
+}
+
+# Start a standby and check that it is registered within the WAL sender
+# array of the given primary. This polls the primary's pg_stat_replication
+# until the standby is confirmed as registered.
+sub start_standby_and_wait
+{
+ my ($primary, $standby) = @_;
+ my $primary_name = $primary->name;
+ my $standby_name = $standby->name;
+ my $query =
+ "SELECT count(1) = 1 FROM pg_stat_replication WHERE application_name = '$standby_name'";
+
+ $standby->start;
+
+ print("### Waiting for standby \"$standby_name\" on \"$primary_name\"\n");
+ $primary->poll_query_until('postgres', $query);
+ return;
+}
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+my $backup_name = 'primary_backup';
+
+# Take backup
+$node_primary->backup($backup_name);
+
+# Create all the standbys. Their status on the primary is checked to ensure
+# the ordering of each one of them in the WAL sender array of the primary.
+
+# Create standby1 linking to primary
+my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby1');
+$node_standby_1->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_primary, $node_standby_1);
+
+# Create standby2 linking to primary
+my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby2');
+$node_standby_2->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_primary, $node_standby_2);
+
+# Create standby3 linking to primary
+my $node_standby_3 = PostgreSQL::Test::Cluster->new('standby3');
+$node_standby_3->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_primary, $node_standby_3);
+
+# Check that sync_state is determined correctly when
+# synchronous_standby_names is specified in old syntax.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|2|potential
+standby3|0|async),
+ 'old syntax of synchronous_standby_names',
+ 'standby1,standby2');
+
+# Check that all the standbys are considered as either sync or
+# potential when * is specified in synchronous_standby_names.
+# Note that standby1 is chosen as sync standby because
+# it's stored in the head of WalSnd array which manages
+# all the standbys though they have the same priority.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|1|potential
+standby3|1|potential),
+ 'asterisk in synchronous_standby_names',
+ '*');
+
+# Stop and start standbys to rearrange the order of standbys
+# in WalSnd array. Now, if standbys have the same priority,
+# standby2 is selected preferentially and standby3 is next.
+$node_standby_1->stop;
+$node_standby_2->stop;
+$node_standby_3->stop;
+
+# Make sure that each standby reports back to the primary in the wanted
+# order.
+start_standby_and_wait($node_primary, $node_standby_2);
+start_standby_and_wait($node_primary, $node_standby_3);
+
+# Specify 2 as the number of sync standbys.
+# Check that two standbys are in 'sync' state.
+test_sync_state(
+ $node_primary, qq(standby2|2|sync
+standby3|3|sync),
+ '2 synchronous standbys',
+ '2(standby1,standby2,standby3)');
+
+# Start standby1
+start_standby_and_wait($node_primary, $node_standby_1);
+
+# Create standby4 linking to primary
+my $node_standby_4 = PostgreSQL::Test::Cluster->new('standby4');
+$node_standby_4->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby_4->start;
+
+# Check that standby1 and standby2 whose names appear earlier in
+# synchronous_standby_names are considered as sync. Also check that
+# standby3 appearing later represents potential, and standby4 is
+# in 'async' state because it's not in the list.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|2|sync
+standby3|3|potential
+standby4|0|async),
+ '2 sync, 1 potential, and 1 async');
+
+# Check that sync_state of each standby is determined correctly
+# when num_sync exceeds the number of names of potential sync standbys
+# specified in synchronous_standby_names.
+test_sync_state(
+ $node_primary, qq(standby1|0|async
+standby2|4|sync
+standby3|3|sync
+standby4|1|sync),
+ 'num_sync exceeds the num of potential sync standbys',
+ '6(standby4,standby0,standby3,standby2)');
+
+# The setting that * comes before another standby name is acceptable
+# but does not make sense in most cases. Check that sync_state is
+# chosen properly even in case of that setting. standby1 is selected
+# as synchronous as it has the highest priority, and is followed by a
+# second standby listed first in the WAL sender array, which is
+# standby2 in this case.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|2|sync
+standby3|2|potential
+standby4|2|potential),
+ 'asterisk before another standby name',
+ '2(standby1,*,standby2)');
+
+# Check that the setting of '2(*)' chooses standby2 and standby3 that are stored
+# earlier in WalSnd array as sync standbys.
+test_sync_state(
+ $node_primary, qq(standby1|1|potential
+standby2|1|sync
+standby3|1|sync
+standby4|1|potential),
+ 'multiple standbys having the same priority are chosen as sync',
+ '2(*)');
+
+# Stop Standby3 which is considered in 'sync' state.
+$node_standby_3->stop;
+
+# Check that the state of standby1 stored earlier in WalSnd array than
+# standby4 is transited from potential to sync.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|1|sync
+standby4|1|potential),
+ 'potential standby found earlier in array is promoted to sync');
+
+# Check that standby1 and standby2 are chosen as sync standbys
+# based on their priorities.
+test_sync_state(
+ $node_primary, qq(standby1|1|sync
+standby2|2|sync
+standby4|0|async),
+ 'priority-based sync replication specified by FIRST keyword',
+ 'FIRST 2(standby1, standby2)');
+
+# Check that all the listed standbys are considered as candidates
+# for sync standbys in a quorum-based sync replication.
+test_sync_state(
+ $node_primary, qq(standby1|1|quorum
+standby2|1|quorum
+standby4|0|async),
+ '2 quorum and 1 async',
+ 'ANY 2(standby1, standby2)');
+
+# Start Standby3 which will be considered in 'quorum' state.
+$node_standby_3->start;
+
+# Check that the setting of 'ANY 2(*)' chooses all standbys as
+# candidates for quorum sync standbys.
+test_sync_state(
+ $node_primary, qq(standby1|1|quorum
+standby2|1|quorum
+standby3|1|quorum
+standby4|1|quorum),
+ 'all standbys are considered as candidates for quorum sync standbys',
+ 'ANY 2(*)');
+
+done_testing();
diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl
new file mode 100644
index 0000000..5be2153
--- /dev/null
+++ b/src/test/recovery/t/008_fsm_truncation.pl
@@ -0,0 +1,100 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test WAL replay of FSM changes.
+#
+# FSM changes don't normally need to be WAL-logged, except for truncation.
+# The FSM mustn't return a page that doesn't exist (anymore).
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+
+$node_primary->append_conf(
+ 'postgresql.conf', qq{
+wal_log_hints = on
+max_prepared_transactions = 5
+autovacuum = off
+});
+
+# Create a primary node and its standby, initializing both with some data
+# at the same time.
+$node_primary->start;
+
+$node_primary->backup('primary_backup');
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, 'primary_backup',
+ has_streaming => 1);
+$node_standby->start;
+
+$node_primary->psql(
+ 'postgres', qq{
+create table testtab (a int, b char(100));
+insert into testtab select generate_series(1,1000), 'foo';
+insert into testtab select generate_series(1,1000), 'foo';
+delete from testtab where ctid > '(8,0)';
+});
+
+# Take a lock on the table to prevent following vacuum from truncating it
+$node_primary->psql(
+ 'postgres', qq{
+begin;
+lock table testtab in row share mode;
+prepare transaction 'p1';
+});
+
+# Vacuum, update FSM without truncation
+$node_primary->psql('postgres', 'vacuum verbose testtab');
+
+# Force a checkpoint
+$node_primary->psql('postgres', 'checkpoint');
+
+# Now do some more insert/deletes, another vacuum to ensure full-page writes
+# are done
+$node_primary->psql(
+ 'postgres', qq{
+insert into testtab select generate_series(1,1000), 'foo';
+delete from testtab where ctid > '(8,0)';
+vacuum verbose testtab;
+});
+
+# Ensure all buffers are now clean on the standby
+$node_standby->psql('postgres', 'checkpoint');
+
+# Release the lock, vacuum again which should lead to truncation
+$node_primary->psql(
+ 'postgres', qq{
+rollback prepared 'p1';
+vacuum verbose testtab;
+});
+
+$node_primary->psql('postgres', 'checkpoint');
+my $until_lsn =
+ $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Wait long enough for standby to receive and apply all WAL
+my $caughtup_query =
+ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+# Promote the standby
+$node_standby->promote;
+$node_standby->psql('postgres', 'checkpoint');
+
+# Restart to discard in-memory copy of FSM
+$node_standby->restart;
+
+# Insert should work on standby
+is( $node_standby->psql(
+ 'postgres',
+ qq{insert into testtab select generate_series(1,1000), 'foo';}),
+ 0,
+ 'INSERT succeeds with truncated relation FSM');
+
+done_testing();
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
new file mode 100644
index 0000000..3e25b8c
--- /dev/null
+++ b/src/test/recovery/t/009_twophase.pl
@@ -0,0 +1,484 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests dedicated to two-phase commit in recovery
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $psql_out = '';
+my $psql_rc = '';
+
+sub configure_and_reload
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my ($node, $parameter) = @_;
+ my $name = $node->name;
+
+ $node->append_conf(
+ 'postgresql.conf', qq(
+ $parameter
+ ));
+ $node->psql('postgres', "SELECT pg_reload_conf()", stdout => \$psql_out);
+ is($psql_out, 't', "reload node $name with $parameter");
+ return;
+}
+
+# Set up two nodes, which will alternately be primary and replication standby.
+
+# Setup london node
+my $node_london = PostgreSQL::Test::Cluster->new("london");
+$node_london->init(allows_streaming => 1);
+$node_london->append_conf(
+ 'postgresql.conf', qq(
+ max_prepared_transactions = 10
+ log_checkpoints = true
+));
+$node_london->start;
+$node_london->backup('london_backup');
+
+# Setup paris node
+my $node_paris = PostgreSQL::Test::Cluster->new('paris');
+$node_paris->init_from_backup($node_london, 'london_backup',
+ has_streaming => 1);
+$node_paris->start;
+
+# Switch to synchronous replication in both directions
+configure_and_reload($node_london, "synchronous_standby_names = 'paris'");
+configure_and_reload($node_paris, "synchronous_standby_names = 'london'");
+
+# Set up nonce names for current primary and standby nodes
+note "Initially, london is primary and paris is standby";
+my ($cur_primary, $cur_standby) = ($node_london, $node_paris);
+my $cur_primary_name = $cur_primary->name;
+
+# Create table we'll use in the test transactions
+$cur_primary->psql('postgres', "CREATE TABLE t_009_tbl (id int, msg text)");
+
+###############################################################################
+# Check that we can commit and abort transaction after soft restart.
+# Here checkpoint happens before shutdown and no WAL replay will occur at next
+# startup. In this case postgres re-creates shared-memory state from twophase
+# files.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (1, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (2, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_1';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (3, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (4, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_2';");
+$cur_primary->stop;
+$cur_primary->start;
+
+$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_1'");
+is($psql_rc, '0', 'Commit prepared transaction after restart');
+
+$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_2'");
+is($psql_rc, '0', 'Rollback prepared transaction after restart');
+
+###############################################################################
+# Check that we can commit and abort after a hard restart.
+# At next startup, WAL replay will re-create shared memory state for prepared
+# transaction using dedicated WAL records.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ CHECKPOINT;
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (5, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (6, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_3';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (7, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (8, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_4';");
+$cur_primary->teardown_node;
+$cur_primary->start;
+
+$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_3'");
+is($psql_rc, '0', 'Commit prepared transaction after teardown');
+
+$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_4'");
+is($psql_rc, '0', 'Rollback prepared transaction after teardown');
+
+###############################################################################
+# Check that WAL replay can handle several transactions with same GID name.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ CHECKPOINT;
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (9, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (10, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_5';
+ COMMIT PREPARED 'xact_009_5';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (11, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (12, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_5';");
+$cur_primary->teardown_node;
+$cur_primary->start;
+
+$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_5'");
+is($psql_rc, '0', 'Replay several transactions with same GID');
+
+###############################################################################
+# Check that WAL replay cleans up its shared memory state and releases locks
+# while replaying transaction commits.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (13, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (14, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_6';
+ COMMIT PREPARED 'xact_009_6';");
+$cur_primary->teardown_node;
+$cur_primary->start;
+$psql_rc = $cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (15, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (16, 'issued to ${cur_primary_name}');
+ -- This prepare can fail due to conflicting GID or locks conflicts if
+ -- replay did not fully cleanup its state on previous commit.
+ PREPARE TRANSACTION 'xact_009_7';");
+is($psql_rc, '0', "Cleanup of shared memory state for 2PC commit");
+
+$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_7'");
+
+###############################################################################
+# Check that WAL replay will cleanup its shared memory state on running standby.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (17, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (18, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_8';
+ COMMIT PREPARED 'xact_009_8';");
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0',
+ "Cleanup of shared memory state on running standby without checkpoint");
+
+###############################################################################
+# Same as in previous case, but let's force checkpoint on standby between
+# prepare and commit to use on-disk twophase files.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (19, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (20, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_9';");
+$cur_standby->psql('postgres', "CHECKPOINT");
+$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_9'");
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0',
+ "Cleanup of shared memory state on running standby after checkpoint");
+
+###############################################################################
+# Check that prepared transactions can be committed on promoted standby.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (21, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_10';");
+$cur_primary->teardown_node;
+$cur_standby->promote;
+
+# change roles
+note "Now paris is primary and london is standby";
+($cur_primary, $cur_standby) = ($node_paris, $node_london);
+$cur_primary_name = $cur_primary->name;
+
+# because london is not running at this point, we can't use syncrep commit
+# on this command
+$psql_rc = $cur_primary->psql('postgres',
+ "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'");
+is($psql_rc, '0', "Restore of prepared transaction on promoted standby");
+
+# restart old primary as new standby
+$cur_standby->enable_streaming($cur_primary);
+$cur_standby->start;
+
+###############################################################################
+# Check that prepared transactions are replayed after soft restart of standby
+# while primary is down. Since standby knows that primary is down it uses a
+# different code path on startup to ensure that the status of transactions is
+# consistent.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (23, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (24, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_11';");
+$cur_primary->stop;
+$cur_standby->restart;
+$cur_standby->promote;
+
+# change roles
+note "Now london is primary and paris is standby";
+($cur_primary, $cur_standby) = ($node_london, $node_paris);
+$cur_primary_name = $cur_primary->name;
+
+$cur_primary->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '1',
+ "Restore prepared transactions from files with primary down");
+
+# restart old primary as new standby
+$cur_standby->enable_streaming($cur_primary);
+$cur_standby->start;
+
+$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_11'");
+
+###############################################################################
+# Check that prepared transactions are correctly replayed after standby hard
+# restart while primary is down.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (25, 'issued to ${cur_primary_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (26, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_12';
+ ");
+$cur_primary->stop;
+$cur_standby->teardown_node;
+$cur_standby->start;
+$cur_standby->promote;
+
+# change roles
+note "Now paris is primary and london is standby";
+($cur_primary, $cur_standby) = ($node_paris, $node_london);
+$cur_primary_name = $cur_primary->name;
+
+$cur_primary->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '1',
+ "Restore prepared transactions from records with primary down");
+
+# restart old primary as new standby
+$cur_standby->enable_streaming($cur_primary);
+$cur_standby->start;
+
+$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
+
+###############################################################################
+# Check for a lock conflict between prepared transaction with DDL inside and
+# replay of XLOG_STANDBY_LOCK wal record.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl2 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl2 VALUES (27, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_13';
+ -- checkpoint will issue XLOG_STANDBY_LOCK that can conflict with lock
+ -- held by 'create table' statement
+ CHECKPOINT;
+ COMMIT PREPARED 'xact_009_13';");
+
+# Ensure that last transaction is replayed on standby.
+my $cur_primary_lsn =
+ $cur_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+my $caughtup_query =
+ "SELECT '$cur_primary_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$cur_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM t_009_tbl2",
+ stdout => \$psql_out);
+is($psql_out, '1', "Replay prepared transaction with DDL");
+
+###############################################################################
+# Check recovery of prepared transaction with DDL inside after a hard restart
+# of the primary.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl3 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl3 VALUES (28, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_14';
+ BEGIN;
+ CREATE TABLE t_009_tbl4 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl4 VALUES (29, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_15';");
+
+$cur_primary->teardown_node;
+$cur_primary->start;
+
+$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_14'");
+is($psql_rc, '0', 'Commit prepared transaction after teardown');
+
+$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_15'");
+is($psql_rc, '0', 'Rollback prepared transaction after teardown');
+
+###############################################################################
+# Check recovery of prepared transaction with DDL inside after a soft restart
+# of the primary.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl5 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl5 VALUES (30, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_16';
+ BEGIN;
+ CREATE TABLE t_009_tbl6 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl6 VALUES (31, 'issued to ${cur_primary_name}');
+ PREPARE TRANSACTION 'xact_009_17';");
+
+$cur_primary->stop;
+$cur_primary->start;
+
+$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_16'");
+is($psql_rc, '0', 'Commit prepared transaction after restart');
+
+$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_17'");
+is($psql_rc, '0', 'Rollback prepared transaction after restart');
+
+###############################################################################
+# Verify expected data appears on both servers.
+###############################################################################
+
+$cur_primary->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0', "No uncommitted prepared transactions on primary");
+
+$cur_primary->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl ORDER BY id",
+ stdout => \$psql_out);
+is( $psql_out, qq{1|issued to london
+2|issued to london
+5|issued to london
+6|issued to london
+9|issued to london
+10|issued to london
+11|issued to london
+12|issued to london
+13|issued to london
+14|issued to london
+15|issued to london
+16|issued to london
+17|issued to london
+18|issued to london
+19|issued to london
+20|issued to london
+21|issued to london
+22|issued to london
+23|issued to paris
+24|issued to paris
+25|issued to london
+26|issued to london},
+ "Check expected t_009_tbl data on primary");
+
+$cur_primary->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl2",
+ stdout => \$psql_out);
+is( $psql_out,
+ qq{27|issued to paris},
+ "Check expected t_009_tbl2 data on primary");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0', "No uncommitted prepared transactions on standby");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl ORDER BY id",
+ stdout => \$psql_out);
+is( $psql_out, qq{1|issued to london
+2|issued to london
+5|issued to london
+6|issued to london
+9|issued to london
+10|issued to london
+11|issued to london
+12|issued to london
+13|issued to london
+14|issued to london
+15|issued to london
+16|issued to london
+17|issued to london
+18|issued to london
+19|issued to london
+20|issued to london
+21|issued to london
+22|issued to london
+23|issued to paris
+24|issued to paris
+25|issued to london
+26|issued to london},
+ "Check expected t_009_tbl data on standby");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl2",
+ stdout => \$psql_out);
+is( $psql_out,
+ qq{27|issued to paris},
+ "Check expected t_009_tbl2 data on standby");
+
+done_testing();
diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl
new file mode 100644
index 0000000..135fb1a
--- /dev/null
+++ b/src/test/recovery/t/010_logical_decoding_timelines.pl
@@ -0,0 +1,202 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Demonstrate that logical can follow timeline switches.
+#
+# Logical replication slots can follow timeline switches but it's
+# normally not possible to have a logical slot on a replica where
+# promotion and a timeline switch can occur. The only ways
+# we can create that circumstance are:
+#
+# * By doing a filesystem-level copy of the DB, since pg_basebackup
+# excludes pg_replslot but we can copy it directly; or
+#
+# * by creating a slot directly at the C level on the replica and
+# advancing it as we go using the low level APIs. It can't be done
+# from SQL since logical decoding isn't allowed on replicas.
+#
+# This module uses the first approach to show that timeline following
+# on a logical slot works.
+#
+# (For convenience, it also tests some recovery-related operations
+# on logical slots).
+#
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Copy;
+use IPC::Run ();
+use Scalar::Util qw(blessed);
+
+my ($stdout, $stderr, $ret);
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1, has_archiving => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', q[
+wal_level = 'logical'
+max_replication_slots = 3
+max_wal_senders = 2
+log_min_messages = 'debug2'
+hot_standby_feedback = on
+wal_receiver_status_interval = 1
+]);
+$node_primary->dump_info;
+$node_primary->start;
+
+note "testing logical timeline following with a filesystem-level copy";
+
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('before_basebackup', 'test_decoding');"
+);
+$node_primary->safe_psql('postgres', "CREATE TABLE decoding(blah text);");
+$node_primary->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('beforebb');");
+
+# We also want to verify that DROP DATABASE on a standby with a logical
+# slot works. This isn't strictly related to timeline following, but
+# the only way to get a logical slot on a standby right now is to use
+# the same physical copy trick, so:
+$node_primary->safe_psql('postgres', 'CREATE DATABASE dropme;');
+$node_primary->safe_psql('dropme',
+ "SELECT pg_create_logical_replication_slot('dropme_slot', 'test_decoding');"
+);
+
+$node_primary->safe_psql('postgres', 'CHECKPOINT;');
+
+my $backup_name = 'b1';
+$node_primary->stop();
+$node_primary->backup_fs_cold($backup_name);
+$node_primary->start();
+
+$node_primary->safe_psql('postgres',
+ q[SELECT pg_create_physical_replication_slot('phys_slot');]);
+
+my $node_replica = PostgreSQL::Test::Cluster->new('replica');
+$node_replica->init_from_backup(
+ $node_primary, $backup_name,
+ has_streaming => 1,
+ has_restoring => 1);
+$node_replica->append_conf('postgresql.conf',
+ q[primary_slot_name = 'phys_slot']);
+
+$node_replica->start;
+
+# If we drop 'dropme' on the primary, the standby should drop the
+# db and associated slot.
+is($node_primary->psql('postgres', 'DROP DATABASE dropme'),
+ 0, 'dropped DB with logical slot OK on primary');
+$node_primary->wait_for_catchup($node_replica);
+is( $node_replica->safe_psql(
+ 'postgres', q[SELECT 1 FROM pg_database WHERE datname = 'dropme']),
+ '',
+ 'dropped DB dropme on standby');
+is($node_primary->slot('dropme_slot')->{'slot_name'},
+ undef, 'logical slot was actually dropped on standby');
+
+# Back to testing failover...
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('after_basebackup', 'test_decoding');"
+);
+$node_primary->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('afterbb');");
+$node_primary->safe_psql('postgres', 'CHECKPOINT;');
+
+# Verify that only the before base_backup slot is on the replica
+$stdout = $node_replica->safe_psql('postgres',
+ 'SELECT slot_name FROM pg_replication_slots ORDER BY slot_name');
+is($stdout, 'before_basebackup',
+ 'Expected to find only slot before_basebackup on replica');
+
+# Examine the physical slot the replica uses to stream changes
+# from the primary to make sure its hot_standby_feedback
+# has locked in a catalog_xmin on the physical slot, and that
+# any xmin is >= the catalog_xmin
+$node_primary->poll_query_until(
+ 'postgres', q[
+ SELECT catalog_xmin IS NOT NULL
+ FROM pg_replication_slots
+ WHERE slot_name = 'phys_slot'
+ ]) or die "slot's catalog_xmin never became set";
+
+my $phys_slot = $node_primary->slot('phys_slot');
+isnt($phys_slot->{'xmin'}, '', 'xmin assigned on physical slot of primary');
+isnt($phys_slot->{'catalog_xmin'},
+ '', 'catalog_xmin assigned on physical slot of primary');
+
+# Ignore wrap-around here, we're on a new cluster:
+cmp_ok(
+ $phys_slot->{'xmin'}, '>=',
+ $phys_slot->{'catalog_xmin'},
+ 'xmin on physical slot must not be lower than catalog_xmin');
+
+$node_primary->safe_psql('postgres', 'CHECKPOINT');
+$node_primary->wait_for_catchup($node_replica, 'write');
+
+# Boom, crash
+$node_primary->stop('immediate');
+
+$node_replica->promote;
+
+$node_replica->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('after failover');");
+
+# Shouldn't be able to read from slot created after base backup
+($ret, $stdout, $stderr) = $node_replica->psql('postgres',
+ "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');"
+);
+is($ret, 3, 'replaying from after_basebackup slot fails');
+like(
+ $stderr,
+ qr/replication slot "after_basebackup" does not exist/,
+ 'after_basebackup slot missing');
+
+# Should be able to read from slot created before base backup
+($ret, $stdout, $stderr) = $node_replica->psql(
+ 'postgres',
+ "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');",
+ timeout => $PostgreSQL::Test::Utils::timeout_default);
+is($ret, 0, 'replay from slot before_basebackup succeeds');
+
+my $final_expected_output_bb = q(BEGIN
+table public.decoding: INSERT: blah[text]:'beforebb'
+COMMIT
+BEGIN
+table public.decoding: INSERT: blah[text]:'afterbb'
+COMMIT
+BEGIN
+table public.decoding: INSERT: blah[text]:'after failover'
+COMMIT);
+is($stdout, $final_expected_output_bb,
+ 'decoded expected data from slot before_basebackup');
+is($stderr, '', 'replay from slot before_basebackup produces no stderr');
+
+# So far we've peeked the slots, so when we fetch the same info over
+# pg_recvlogical we should get complete results. First, find out the commit lsn
+# of the last transaction. There's no max(pg_lsn), so:
+
+my $endpos = $node_replica->safe_psql('postgres',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+);
+
+# now use the walsender protocol to peek the slot changes and make sure we see
+# the same results.
+
+$stdout = $node_replica->pg_recvlogical_upto(
+ 'postgres', 'before_basebackup',
+ $endpos, $PostgreSQL::Test::Utils::timeout_default,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+
+# walsender likes to add a newline
+chomp($stdout);
+is($stdout, $final_expected_output_bb,
+ 'got same output from walsender via pg_recvlogical on before_basebackup');
+
+$node_replica->teardown_node();
+
+done_testing();
diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl
new file mode 100644
index 0000000..f807509
--- /dev/null
+++ b/src/test/recovery/t/012_subtransactions.pl
@@ -0,0 +1,218 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests dedicated to subtransactions in recovery
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Setup primary node
+my $node_primary = PostgreSQL::Test::Cluster->new("primary");
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+ max_prepared_transactions = 10
+ log_checkpoints = true
+));
+$node_primary->start;
+$node_primary->backup('primary_backup');
+$node_primary->psql('postgres', "CREATE TABLE t_012_tbl (id int)");
+
+# Setup standby node
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, 'primary_backup',
+ has_streaming => 1);
+$node_standby->start;
+
+# Switch to synchronous replication
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+ synchronous_standby_names = '*'
+));
+$node_primary->psql('postgres', "SELECT pg_reload_conf()");
+
+my $psql_out = '';
+my $psql_rc = '';
+
+###############################################################################
+# Check that replay will correctly set SUBTRANS and properly advance nextXid
+# so that it won't conflict with savepoint xids.
+###############################################################################
+
+$node_primary->psql(
+ 'postgres', "
+ BEGIN;
+ DELETE FROM t_012_tbl;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s1;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s2;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s3;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s4;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s5;
+ INSERT INTO t_012_tbl VALUES (43);
+ PREPARE TRANSACTION 'xact_012_1';
+ CHECKPOINT;");
+
+$node_primary->stop;
+$node_primary->start;
+$node_primary->psql(
+ 'postgres', "
+ -- here we can get xid of previous savepoint if nextXid
+ -- wasn't properly advanced
+ BEGIN;
+ INSERT INTO t_012_tbl VALUES (142);
+ ROLLBACK;
+ COMMIT PREPARED 'xact_012_1';");
+
+$node_primary->psql(
+ 'postgres',
+ "SELECT count(*) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '6', "Check nextXid handling for prepared subtransactions");
+
+###############################################################################
+# Check that replay will correctly set 2PC with more than
+# PGPROC_MAX_CACHED_SUBXIDS subtransactions and also show data properly
+# on promotion
+###############################################################################
+$node_primary->psql('postgres', "DELETE FROM t_012_tbl");
+
+# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql
+$node_primary->psql(
+ 'postgres', "
+ CREATE OR REPLACE FUNCTION hs_subxids (n integer)
+ RETURNS void
+ LANGUAGE plpgsql
+ AS \$\$
+ BEGIN
+ IF n <= 0 THEN RETURN; END IF;
+ INSERT INTO t_012_tbl VALUES (n);
+ PERFORM hs_subxids(n - 1);
+ RETURN;
+ EXCEPTION WHEN raise_exception THEN NULL; END;
+ \$\$;");
+$node_primary->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(127);
+ COMMIT;");
+$node_primary->wait_for_catchup($node_standby);
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+$node_primary->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+# restore state
+($node_primary, $node_standby) = ($node_standby, $node_primary);
+$node_standby->enable_streaming($node_primary);
+$node_standby->start;
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+$node_primary->psql('postgres', "DELETE FROM t_012_tbl");
+
+# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql
+$node_primary->psql(
+ 'postgres', "
+ CREATE OR REPLACE FUNCTION hs_subxids (n integer)
+ RETURNS void
+ LANGUAGE plpgsql
+ AS \$\$
+ BEGIN
+ IF n <= 0 THEN RETURN; END IF;
+ INSERT INTO t_012_tbl VALUES (n);
+ PERFORM hs_subxids(n - 1);
+ RETURN;
+ EXCEPTION WHEN raise_exception THEN NULL; END;
+ \$\$;");
+$node_primary->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(127);
+ PREPARE TRANSACTION 'xact_012_1';");
+$node_primary->wait_for_catchup($node_standby);
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+$node_primary->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+
+# restore state
+($node_primary, $node_standby) = ($node_standby, $node_primary);
+$node_standby->enable_streaming($node_primary);
+$node_standby->start;
+$psql_rc = $node_primary->psql('postgres', "COMMIT PREPARED 'xact_012_1'");
+is($psql_rc, '0',
+ "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby"
+);
+
+$node_primary->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+$node_primary->psql('postgres', "DELETE FROM t_012_tbl");
+$node_primary->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(201);
+ PREPARE TRANSACTION 'xact_012_1';");
+$node_primary->wait_for_catchup($node_standby);
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+$node_primary->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+
+# restore state
+($node_primary, $node_standby) = ($node_standby, $node_primary);
+$node_standby->enable_streaming($node_primary);
+$node_standby->start;
+$psql_rc = $node_primary->psql('postgres', "ROLLBACK PREPARED 'xact_012_1'");
+is($psql_rc, '0',
+ "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby"
+);
+
+$node_primary->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+
+done_testing();
diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl
new file mode 100644
index 0000000..c22844d
--- /dev/null
+++ b/src/test/recovery/t/013_crash_restart.pl
@@ -0,0 +1,250 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+#
+# Tests restarts of postgres due to crashes of a subprocess.
+#
+# Two longer-running psql subprocesses are used: One to kill a
+# backend, triggering a crash-restart cycle, one to detect when
+# postmaster noticed the backend died. The second backend is
+# necessary because it's otherwise hard to determine if postmaster is
+# still accepting new sessions (because it hasn't noticed that the
+# backend died), or because it's already restarted.
+#
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(allows_streaming => 1);
+$node->start();
+
+# by default PostgreSQL::Test::Cluster doesn't restart after a crash
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET restart_after_crash = 1;
+ ALTER SYSTEM SET log_connections = 1;
+ SELECT pg_reload_conf();]);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin,
+ '>',
+ \$killme_stdout,
+ '2>',
+ \$killme_stderr,
+ $psql_timeout);
+
+# Need a second psql to check if crash-restart happened.
+my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+my $monitor = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$monitor_stdin,
+ '>',
+ \$monitor_stdout,
+ '2>',
+ \$monitor_stderr,
+ $psql_timeout);
+
+#create table, insert row that should survive
+$killme_stdin .= q[
+CREATE TABLE alive(status text);
+INSERT INTO alive VALUES($$committed-before-sigquit$$);
+SELECT pg_backend_pid();
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGQUIT');
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+#insert a row that should *not* survive, due to in-progress xact
+$killme_stdin .= q[
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status;
+];
+ok( pump_until(
+ $killme, $psql_timeout,
+ \$killme_stdout, qr/in-progress-before-sigquit/m),
+ 'inserted in-progress-before-sigquit');
+$killme_stdout = '';
+$killme_stderr = '';
+
+
+# Start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok( pump_until(
+ $monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+# kill once with QUIT - we expect psql to exit, while emitting error message first
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'QUIT', $pid);
+
+# Exactly process should have been alive to be killed
+is($ret, 0, "killed process with SIGQUIT");
+
+# Check that psql sees the killed backend as having been terminated
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ $psql_timeout,
+ \$killme_stderr,
+ qr/WARNING: terminating connection because of unexpected SIGQUIT signal|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql query died successfully after SIGQUIT");
+$killme_stderr = '';
+$killme_stdout = '';
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ $psql_timeout,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql monitor died successfully after SIGQUIT");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGQUIT");
+
+
+# restart psql processes, now that the crash cycle finished
+($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+$killme->run();
+($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+$monitor->run();
+
+
+# Acquire pid of new backend
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ "acquired pid for SIGKILL");
+$pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Insert test rows
+$killme_stdin .= q[
+INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status;
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status;
+];
+ok( pump_until(
+ $killme, $psql_timeout,
+ \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'inserted in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Re-start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok( pump_until(
+ $monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+
+# kill with SIGKILL this time - we expect the backend to exit, without
+# being able to emit an error message
+$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, "killed process with KILL");
+
+# Check that psql sees the server as being terminated. No WARNING,
+# because signal handlers aren't being run on SIGKILL.
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ $psql_timeout,
+ \$killme_stderr,
+ qr/server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql query died successfully after SIGKILL");
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ $psql_timeout,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "psql monitor died successfully after SIGKILL");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGKILL");
+
+# Make sure the committed rows survived, in-progress ones not
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status'
+ ),
+ 'before-orderly-restart',
+ 'can still write after crash restart');
+
+# Just to be sure, check that an orderly restart now still works
+$node->restart();
+
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status'
+ ),
+ 'after-orderly-restart',
+ 'can still write after orderly restart');
+
+$node->stop();
+
+done_testing();
diff --git a/src/test/recovery/t/014_unlogged_reinit.pl b/src/test/recovery/t/014_unlogged_reinit.pl
new file mode 100644
index 0000000..7289510
--- /dev/null
+++ b/src/test/recovery/t/014_unlogged_reinit.pl
@@ -0,0 +1,134 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests that unlogged tables are properly reinitialized after a crash.
+#
+# The behavior should be the same when restoring from a backup, but
+# that is not tested here.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('main');
+
+$node->init;
+$node->start;
+my $pgdata = $node->data_dir;
+
+# Create an unlogged table and an unlogged sequence to test that forks
+# other than init are not copied.
+$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)');
+$node->safe_psql('postgres', 'CREATE UNLOGGED SEQUENCE seq_unlogged');
+
+my $baseUnloggedPath = $node->safe_psql('postgres',
+ q{select pg_relation_filepath('base_unlogged')});
+my $seqUnloggedPath = $node->safe_psql('postgres',
+ q{select pg_relation_filepath('seq_unlogged')});
+
+# Test that main and init forks exist.
+ok(-f "$pgdata/${baseUnloggedPath}_init", 'table init fork exists');
+ok(-f "$pgdata/$baseUnloggedPath", 'table main fork exists');
+ok(-f "$pgdata/${seqUnloggedPath}_init", 'sequence init fork exists');
+ok(-f "$pgdata/$seqUnloggedPath", 'sequence main fork exists');
+
+# Test the sequence
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"),
+ 1, 'sequence nextval');
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"),
+ 2, 'sequence nextval again');
+
+# Create an unlogged table in a tablespace.
+
+my $tablespaceDir = PostgreSQL::Test::Utils::tempdir;
+
+$node->safe_psql('postgres',
+ "CREATE TABLESPACE ts1 LOCATION '$tablespaceDir'");
+$node->safe_psql('postgres',
+ 'CREATE UNLOGGED TABLE ts1_unlogged (id int) TABLESPACE ts1');
+
+my $ts1UnloggedPath = $node->safe_psql('postgres',
+ q{select pg_relation_filepath('ts1_unlogged')});
+
+# Test that main and init forks exist.
+ok(-f "$pgdata/${ts1UnloggedPath}_init", 'init fork in tablespace exists');
+ok(-f "$pgdata/$ts1UnloggedPath", 'main fork in tablespace exists');
+
+# Create more unlogged sequences for testing.
+$node->safe_psql('postgres', 'CREATE UNLOGGED SEQUENCE seq_unlogged2');
+# This rewrites the sequence relation in AlterSequence().
+$node->safe_psql('postgres', 'ALTER SEQUENCE seq_unlogged2 INCREMENT 2');
+$node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')");
+
+$node->safe_psql('postgres',
+ 'CREATE UNLOGGED TABLE tab_seq_unlogged3 (a int GENERATED ALWAYS AS IDENTITY)'
+);
+# This rewrites the sequence relation in ResetSequence().
+$node->safe_psql('postgres', 'TRUNCATE tab_seq_unlogged3 RESTART IDENTITY');
+$node->safe_psql('postgres', 'INSERT INTO tab_seq_unlogged3 DEFAULT VALUES');
+
+# Crash the postmaster.
+$node->stop('immediate');
+
+# Write fake forks to test that they are removed during recovery.
+append_to_file("$pgdata/${baseUnloggedPath}_vm", 'TEST_VM');
+append_to_file("$pgdata/${baseUnloggedPath}_fsm", 'TEST_FSM');
+
+# Remove main fork to test that it is recopied from init.
+unlink("$pgdata/${baseUnloggedPath}")
+ or BAIL_OUT("could not remove \"${baseUnloggedPath}\": $!");
+unlink("$pgdata/${seqUnloggedPath}")
+ or BAIL_OUT("could not remove \"${seqUnloggedPath}\": $!");
+
+# the same for the tablespace
+append_to_file("$pgdata/${ts1UnloggedPath}_vm", 'TEST_VM');
+append_to_file("$pgdata/${ts1UnloggedPath}_fsm", 'TEST_FSM');
+unlink("$pgdata/${ts1UnloggedPath}")
+ or BAIL_OUT("could not remove \"${ts1UnloggedPath}\": $!");
+
+$node->start;
+
+# check unlogged table in base
+ok( -f "$pgdata/${baseUnloggedPath}_init",
+ 'table init fork in base still exists');
+ok(-f "$pgdata/$baseUnloggedPath",
+ 'table main fork in base recreated at startup');
+ok(!-f "$pgdata/${baseUnloggedPath}_vm",
+ 'vm fork in base removed at startup');
+ok( !-f "$pgdata/${baseUnloggedPath}_fsm",
+ 'fsm fork in base removed at startup');
+
+# check unlogged sequence
+ok(-f "$pgdata/${seqUnloggedPath}_init", 'sequence init fork still exists');
+ok(-f "$pgdata/$seqUnloggedPath", 'sequence main fork recreated at startup');
+
+# Test the sequence after restart
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"),
+ 1, 'sequence nextval after restart');
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"),
+ 2, 'sequence nextval after restart again');
+
+# check unlogged table in tablespace
+ok( -f "$pgdata/${ts1UnloggedPath}_init",
+ 'init fork still exists in tablespace');
+ok(-f "$pgdata/$ts1UnloggedPath",
+ 'main fork in tablespace recreated at startup');
+ok( !-f "$pgdata/${ts1UnloggedPath}_vm",
+ 'vm fork in tablespace removed at startup');
+ok( !-f "$pgdata/${ts1UnloggedPath}_fsm",
+ 'fsm fork in tablespace removed at startup');
+
+# Test other sequences
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')"),
+ 1, 'altered sequence nextval after restart');
+is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')"),
+ 3, 'altered sequence nextval after restart again');
+
+$node->safe_psql('postgres',
+ "INSERT INTO tab_seq_unlogged3 VALUES (DEFAULT), (DEFAULT)");
+is($node->safe_psql('postgres', "SELECT * FROM tab_seq_unlogged3"),
+ "1\n2", 'reset sequence nextval after restart');
+
+done_testing();
diff --git a/src/test/recovery/t/015_promotion_pages.pl b/src/test/recovery/t/015_promotion_pages.pl
new file mode 100644
index 0000000..8d57b1b
--- /dev/null
+++ b/src/test/recovery/t/015_promotion_pages.pl
@@ -0,0 +1,89 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for promotion handling with WAL records generated post-promotion
+# before the first checkpoint is generated. This test case checks for
+# invalid page references at replay based on the minimum consistent
+# recovery point defined.
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize primary node
+my $alpha = PostgreSQL::Test::Cluster->new('alpha');
+$alpha->init(allows_streaming => 1);
+# Setting wal_log_hints to off is important to get invalid page
+# references.
+$alpha->append_conf("postgresql.conf", <<EOF);
+wal_log_hints = off
+EOF
+
+# Start the primary
+$alpha->start;
+
+# setup/start a standby
+$alpha->backup('bkp');
+my $bravo = PostgreSQL::Test::Cluster->new('bravo');
+$bravo->init_from_backup($alpha, 'bkp', has_streaming => 1);
+$bravo->append_conf('postgresql.conf', <<EOF);
+checkpoint_timeout=1h
+EOF
+$bravo->start;
+
+# Dummy table for the upcoming tests.
+$alpha->safe_psql('postgres', 'create table test1 (a int)');
+$alpha->safe_psql('postgres',
+ 'insert into test1 select generate_series(1, 10000)');
+
+# take a checkpoint
+$alpha->safe_psql('postgres', 'checkpoint');
+
+# The following vacuum will set visibility map bits and create
+# problematic WAL records.
+$alpha->safe_psql('postgres', 'vacuum verbose test1');
+# Wait for last record to have been replayed on the standby.
+$alpha->wait_for_catchup($bravo);
+
+# Now force a checkpoint on the standby. This seems unnecessary but for "some"
+# reason, the previous checkpoint on the primary does not reflect on the standby
+# and without an explicit checkpoint, it may start redo recovery from a much
+# older point, which includes even create table and initial page additions.
+$bravo->safe_psql('postgres', 'checkpoint');
+
+# Now just use a dummy table and run some operations to move minRecoveryPoint
+# beyond the previous vacuum.
+$alpha->safe_psql('postgres', 'create table test2 (a int, b text)');
+$alpha->safe_psql('postgres',
+ 'insert into test2 select generate_series(1,10000), md5(random()::text)');
+$alpha->safe_psql('postgres', 'truncate test2');
+
+# Wait again for all records to be replayed.
+$alpha->wait_for_catchup($bravo);
+
+# Do the promotion, which reinitializes minRecoveryPoint in the control
+# file so as WAL is replayed up to the end.
+$bravo->promote;
+
+# Truncate the table on the promoted standby, vacuum and extend it
+# again to create new page references. The first post-recovery checkpoint
+# has not happened yet.
+$bravo->safe_psql('postgres', 'truncate test1');
+$bravo->safe_psql('postgres', 'vacuum verbose test1');
+$bravo->safe_psql('postgres',
+ 'insert into test1 select generate_series(1,1000)');
+
+# Now crash-stop the promoted standby and restart. This makes sure that
+# replay does not see invalid page references because of an invalid
+# minimum consistent recovery point.
+$bravo->stop('immediate');
+$bravo->start;
+
+# Check state of the table after full crash recovery. All its data should
+# be here.
+my $psql_out;
+$bravo->psql('postgres', "SELECT count(*) FROM test1", stdout => \$psql_out);
+is($psql_out, '1000', "Check that table state is correct");
+
+done_testing();
diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl
new file mode 100644
index 0000000..5e0655c
--- /dev/null
+++ b/src/test/recovery/t/016_min_consistency.pl
@@ -0,0 +1,143 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for checking consistency of on-disk pages for a cluster with
+# the minimum recovery LSN, ensuring that the updates happen across
+# all processes. In this test, the updates from the startup process
+# and the checkpointer (which triggers non-startup code paths) are
+# both checked.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Find the largest LSN in the set of pages part of the given relation
+# file. This is used for offline checks of page consistency. The LSN
+# is historically stored as a set of two numbers of 4 byte-length
+# located at the beginning of each page.
+sub find_largest_lsn
+{
+ my $blocksize = int(shift);
+ my $filename = shift;
+ my ($max_hi, $max_lo) = (0, 0);
+ open(my $fh, "<:raw", $filename)
+ or die "failed to open $filename: $!";
+ my ($buf, $len);
+ while ($len = read($fh, $buf, $blocksize))
+ {
+ $len == $blocksize
+ or die "read only $len of $blocksize bytes from $filename";
+ my ($hi, $lo) = unpack("LL", $buf);
+
+ if ($hi > $max_hi or ($hi == $max_hi and $lo > $max_lo))
+ {
+ ($max_hi, $max_lo) = ($hi, $lo);
+ }
+ }
+ defined($len) or die "read error on $filename: $!";
+ close($fh);
+
+ return sprintf("%X/%X", $max_hi, $max_lo);
+}
+
+# Initialize primary node
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(allows_streaming => 1);
+
+# Set shared_buffers to a very low value to enforce discard and flush
+# of PostgreSQL buffers on standby, enforcing other processes than the
+# startup process to update the minimum recovery LSN in the control
+# file. Autovacuum is disabled so as there is no risk of having other
+# processes than the checkpointer doing page flushes.
+$primary->append_conf("postgresql.conf", <<EOF);
+shared_buffers = 128kB
+autovacuum = off
+EOF
+
+# Start the primary
+$primary->start;
+
+# setup/start a standby
+$primary->backup('bkp');
+my $standby = PostgreSQL::Test::Cluster->new('standby');
+$standby->init_from_backup($primary, 'bkp', has_streaming => 1);
+$standby->start;
+
+# Create base table whose data consistency is checked.
+$primary->safe_psql(
+ 'postgres', "
+CREATE TABLE test1 (a int) WITH (fillfactor = 10);
+INSERT INTO test1 SELECT generate_series(1, 10000);");
+
+# Take a checkpoint and enforce post-checkpoint full page writes
+# which makes the startup process replay those pages, updating
+# minRecoveryPoint.
+$primary->safe_psql('postgres', 'CHECKPOINT;');
+$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;');
+
+# Wait for last record to have been replayed on the standby.
+$primary->wait_for_catchup($standby);
+
+# Fill in the standby's shared buffers with the data filled in
+# previously.
+$standby->safe_psql('postgres', 'SELECT count(*) FROM test1;');
+
+# Update the table again, this does not generate full page writes so
+# the standby will replay records associated with it, but the startup
+# process will not flush those pages.
+$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;');
+
+# Extract from the relation the last block created and its relation
+# file, this will be used at the end of the test for sanity checks.
+my $blocksize = $primary->safe_psql('postgres',
+ "SELECT setting::int FROM pg_settings WHERE name = 'block_size';");
+my $last_block = $primary->safe_psql('postgres',
+ "SELECT pg_relation_size('test1')::int / $blocksize - 1;");
+my $relfilenode = $primary->safe_psql('postgres',
+ "SELECT pg_relation_filepath('test1'::regclass);");
+
+# Wait for last record to have been replayed on the standby.
+$primary->wait_for_catchup($standby);
+
+# Issue a restart point on the standby now, which makes the checkpointer
+# update minRecoveryPoint.
+$standby->safe_psql('postgres', 'CHECKPOINT;');
+
+# Now shut down the primary violently so as the standby does not
+# receive the shutdown checkpoint, making sure that the startup
+# process does not flush any pages on its side. The standby is
+# cleanly stopped, which makes the checkpointer update minRecoveryPoint
+# with the restart point created at shutdown.
+$primary->stop('immediate');
+$standby->stop('fast');
+
+# Check the data consistency of the instance while offline. This is
+# done by directly scanning the on-disk relation blocks and what
+# pg_controldata lets know.
+my $standby_data = $standby->data_dir;
+my $offline_max_lsn =
+ find_largest_lsn($blocksize, "$standby_data/$relfilenode");
+
+# Fetch minRecoveryPoint from the control file itself
+my ($stdout, $stderr) = run_command([ 'pg_controldata', $standby_data ]);
+my @control_data = split("\n", $stdout);
+my $offline_recovery_lsn = undef;
+foreach (@control_data)
+{
+ if ($_ =~ /^Minimum recovery ending location:\s*(.*)$/mg)
+ {
+ $offline_recovery_lsn = $1;
+ last;
+ }
+}
+die "No minRecoveryPoint in control file found\n"
+ unless defined($offline_recovery_lsn);
+
+# minRecoveryPoint should never be older than the maximum LSN for all
+# the pages on disk.
+ok($offline_recovery_lsn ge $offline_max_lsn,
+ "Check offline that table data is consistent with minRecoveryPoint");
+
+done_testing();
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl
new file mode 100644
index 0000000..875657b
--- /dev/null
+++ b/src/test/recovery/t/017_shm.pl
@@ -0,0 +1,218 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+#
+# Tests of pg_shmem.h functions
+#
+use strict;
+use warnings;
+use File::stat qw(stat);
+use IPC::Run 'run';
+use PostgreSQL::Test::Cluster;
+use Test::More;
+use PostgreSQL::Test::Utils;
+use Time::HiRes qw(usleep);
+
+# If we don't have shmem support, skip the whole thing
+eval {
+ require IPC::SharedMem;
+ IPC::SharedMem->import;
+ require IPC::SysV;
+ IPC::SysV->import(qw(IPC_CREAT IPC_EXCL S_IRUSR S_IWUSR));
+};
+if ($@ || $windows_os)
+{
+ plan skip_all => 'SysV shared memory not supported by this platform';
+}
+
+my $tempdir = PostgreSQL::Test::Utils::tempdir;
+
+# Log "ipcs" diffs on a best-effort basis, swallowing any error.
+my $ipcs_before = "$tempdir/ipcs_before";
+eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
+
+sub log_ipcs
+{
+ eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
+ return;
+}
+
+# Node setup.
+my $gnat = PostgreSQL::Test::Cluster->new('gnat');
+$gnat->init;
+
+# Create a shmem segment that will conflict with gnat's first choice
+# of shmem key. (If we fail to create it because something else is
+# already using that key, that's perfectly fine, though the test will
+# exercise a different scenario than it usually does.)
+my $gnat_dir_stat = stat($gnat->data_dir);
+defined($gnat_dir_stat) or die('unable to stat ' . $gnat->data_dir);
+my $gnat_inode = $gnat_dir_stat->ino;
+note "gnat's datadir inode = $gnat_inode";
+
+# Note: must reference IPC::SysV's constants as functions, or this file
+# fails to compile when that module is not available.
+my $gnat_conflict_shm =
+ IPC::SharedMem->new($gnat_inode, 1024,
+ IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR());
+note "could not create conflicting shmem" if !defined($gnat_conflict_shm);
+log_ipcs();
+
+$gnat->start;
+log_ipcs();
+
+$gnat->restart; # should keep same shmem key
+log_ipcs();
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+poll_start($gnat); # gnat recycles its former shm key.
+log_ipcs();
+
+note "removing the conflicting shmem ...";
+$gnat_conflict_shm->remove if $gnat_conflict_shm;
+log_ipcs();
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+
+# In this start, gnat will use its normal shmem key, and fail to remove
+# the higher-keyed segment that the previous postmaster was using.
+# That's not great, but key collisions should be rare enough to not
+# make this a big problem.
+poll_start($gnat);
+log_ipcs();
+$gnat->stop;
+log_ipcs();
+
+# Re-create the conflicting segment, and start/stop normally, just so
+# this test script doesn't leak the higher-keyed segment.
+note "re-creating conflicting shmem ...";
+$gnat_conflict_shm =
+ IPC::SharedMem->new($gnat_inode, 1024,
+ IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR());
+note "could not create conflicting shmem" if !defined($gnat_conflict_shm);
+log_ipcs();
+
+$gnat->start;
+log_ipcs();
+$gnat->stop;
+log_ipcs();
+
+note "removing the conflicting shmem ...";
+$gnat_conflict_shm->remove if $gnat_conflict_shm;
+log_ipcs();
+
+# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
+# Use a regress.c function to emulate the responsiveness of a backend working
+# through a CPU-intensive task.
+$gnat->start;
+log_ipcs();
+
+my $regress_shlib = $ENV{REGRESS_SHLIB};
+$gnat->safe_psql('postgres', <<EOSQL);
+CREATE FUNCTION wait_pid(int)
+ RETURNS void
+ AS '$regress_shlib'
+ LANGUAGE C STRICT;
+EOSQL
+my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
+my ($stdout, $stderr);
+my $slow_client = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
+ '-c', $slow_query
+ ],
+ '<',
+ \undef,
+ '>',
+ \$stdout,
+ '2>',
+ \$stderr,
+ IPC::Run::timeout(5 * $PostgreSQL::Test::Utils::timeout_default));
+ok( $gnat->poll_query_until(
+ 'postgres',
+ "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
+ 'slow query started');
+my $slow_pid = $gnat->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
+$gnat->kill9;
+unlink($gnat->data_dir . '/postmaster.pid');
+$gnat->rotate_logfile; # on Windows, can't open old log for writing
+log_ipcs();
+# Reject ordinary startup. Retry for the same reasons poll_start() does,
+# every 0.1s for at least $PostgreSQL::Test::Utils::timeout_default seconds.
+my $pre_existing_msg = qr/pre-existing shared memory block/;
+{
+ my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default;
+ my $attempts = 0;
+ while ($attempts < $max_attempts)
+ {
+ last
+ if $gnat->start(fail_ok => 1)
+ || slurp_file($gnat->logfile) =~ $pre_existing_msg;
+ usleep(100_000);
+ $attempts++;
+ }
+}
+like(slurp_file($gnat->logfile),
+ $pre_existing_msg, 'detected live backend via shared memory');
+# Reject single-user startup.
+my $single_stderr;
+ok( !run_log(
+ [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
+ '<', \undef, '2>', \$single_stderr),
+ 'live query blocks --single');
+print STDERR $single_stderr;
+like($single_stderr, $pre_existing_msg,
+ 'single-user mode detected live backend via shared memory');
+log_ipcs();
+
+# cleanup slow backend
+PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
+$slow_client->finish; # client has detected backend termination
+log_ipcs();
+
+# now startup should work
+poll_start($gnat);
+log_ipcs();
+
+# finish testing
+$gnat->stop;
+log_ipcs();
+
+
+# We may need retries to start a new postmaster. Causes:
+# - kernel is slow to deliver SIGKILL
+# - postmaster parent is slow to waitpid()
+# - postmaster child is slow to exit in response to SIGQUIT
+# - postmaster child is slow to exit after postmaster death
+sub poll_start
+{
+ my ($node) = @_;
+
+ my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default;
+ my $attempts = 0;
+
+ while ($attempts < $max_attempts)
+ {
+ $node->start(fail_ok => 1) && return 1;
+
+ # Wait 0.1 second before retrying.
+ usleep(100_000);
+
+ # Clean up in case the start attempt just timed out or some such.
+ $node->stop('fast', fail_ok => 1);
+
+ $attempts++;
+ }
+
+ # Try one last time without fail_ok, which will BAIL_OUT unless it
+ # succeeds.
+ $node->start && return 1;
+ return 0;
+}
+
+done_testing();
diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl
new file mode 100644
index 0000000..4700d49
--- /dev/null
+++ b/src/test/recovery/t/018_wal_optimize.pl
@@ -0,0 +1,401 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test WAL replay when some operation has skipped WAL.
+#
+# These tests exercise code that once violated the mandate described in
+# src/backend/access/transam/README section "Skipping WAL for New
+# RelFileNode". The tests work by committing some transactions, initiating an
+# immediate shutdown, and confirming that the expected data survives recovery.
+# For many years, individual commands made the decision to skip WAL, hence the
+# frequent appearance of COPY in these tests.
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+sub check_orphan_relfilenodes
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my ($node, $test_name) = @_;
+
+ my $db_oid = $node->safe_psql('postgres',
+ "SELECT oid FROM pg_database WHERE datname = 'postgres'");
+ my $prefix = "base/$db_oid/";
+ my $filepaths_referenced = $node->safe_psql(
+ 'postgres', "
+ SELECT pg_relation_filepath(oid) FROM pg_class
+ WHERE reltablespace = 0 AND relpersistence <> 't' AND
+ pg_relation_filepath(oid) IS NOT NULL;");
+ is_deeply(
+ [
+ sort(map { "$prefix$_" }
+ grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix")))
+ ],
+ [ sort split /\n/, $filepaths_referenced ],
+ $test_name);
+ return;
+}
+
+# We run this same test suite for both wal_level=minimal and replica.
+sub run_wal_optimize
+{
+ my $wal_level = shift;
+
+ my $node = PostgreSQL::Test::Cluster->new("node_$wal_level");
+ $node->init;
+ $node->append_conf(
+ 'postgresql.conf', qq(
+wal_level = $wal_level
+max_prepared_transactions = 1
+wal_log_hints = on
+wal_skip_threshold = 0
+#wal_debug = on
+));
+ $node->start;
+
+ # Setup
+ my $tablespace_dir = $node->basedir . '/tablespace_other';
+ mkdir($tablespace_dir);
+ my $result;
+
+ # Test redo of CREATE TABLESPACE.
+ $node->safe_psql(
+ 'postgres', "
+ CREATE TABLE moved (id int);
+ INSERT INTO moved VALUES (1);
+ CREATE TABLESPACE other LOCATION '$tablespace_dir';
+ BEGIN;
+ ALTER TABLE moved SET TABLESPACE other;
+ CREATE TABLE originated (id int);
+ INSERT INTO originated VALUES (1);
+ CREATE UNIQUE INDEX ON originated(id) TABLESPACE other;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;");
+ is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE");
+ $result = $node->safe_psql(
+ 'postgres', "
+ INSERT INTO originated VALUES (1) ON CONFLICT (id)
+ DO UPDATE set id = originated.id + 1
+ RETURNING id;");
+ is($result, qq(2),
+ "wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX");
+
+ # Test direct truncation optimization. No tuples.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc (id serial PRIMARY KEY);
+ TRUNCATE trunc;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
+ is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
+
+ # Test truncation with inserted tuples within the same transaction.
+ # Tuples inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_ins (id serial PRIMARY KEY);
+ INSERT INTO trunc_ins VALUES (DEFAULT);
+ TRUNCATE trunc_ins;
+ INSERT INTO trunc_ins VALUES (DEFAULT);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres',
+ "SELECT count(*), min(id) FROM trunc_ins;");
+ is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT");
+
+ # Same for prepared transaction.
+ # Tuples inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE twophase (id serial PRIMARY KEY);
+ INSERT INTO twophase VALUES (DEFAULT);
+ TRUNCATE twophase;
+ INSERT INTO twophase VALUES (DEFAULT);
+ PREPARE TRANSACTION 't';
+ COMMIT PREPARED 't';");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres',
+ "SELECT count(*), min(id) FROM trunc_ins;");
+ is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE");
+
+ # Writing WAL at end of xact, instead of syncing.
+ $node->safe_psql(
+ 'postgres', "
+ SET wal_skip_threshold = '1GB';
+ BEGIN;
+ CREATE TABLE noskip (id serial PRIMARY KEY);
+ INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;");
+ is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL");
+
+ # Data file for COPY query in subsequent tests
+ my $basedir = $node->basedir;
+ my $copy_file = "$basedir/copy_data.txt";
+ PostgreSQL::Test::Utils::append_to_file(
+ $copy_file, qq(20000,30000
+20001,30001
+20002,30002));
+
+ # Test truncation with inserted tuples using both INSERT and COPY. Tuples
+ # inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int);
+ INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000));
+ TRUNCATE ins_trunc;
+ INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
+ COPY ins_trunc FROM '$copy_file' DELIMITER ',';
+ INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;");
+ is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT");
+
+ # Test truncation with inserted tuples using COPY. Tuples copied after
+ # the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int);
+ INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE trunc_copy;
+ COPY trunc_copy FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;");
+ is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY");
+
+ # Like previous test, but rollback SET TABLESPACE in a subtransaction.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_abort;
+ SAVEPOINT s;
+ ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s;
+ COPY spc_abort FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE abort subtransaction");
+
+ # in different subtransaction patterns
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_commit;
+ SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s;
+ COPY spc_commit FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE commit subtransaction");
+
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_nest;
+ SAVEPOINT s;
+ ALTER TABLE spc_nest SET TABLESPACE other;
+ SAVEPOINT s2;
+ ALTER TABLE spc_nest SET TABLESPACE pg_default;
+ ROLLBACK TO s2;
+ SAVEPOINT s2;
+ ALTER TABLE spc_nest SET TABLESPACE pg_default;
+ RELEASE s2;
+ ROLLBACK TO s;
+ COPY spc_nest FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE nested subtransaction");
+
+ $node->safe_psql(
+ 'postgres', "
+ CREATE TABLE spc_hint (id int);
+ INSERT INTO spc_hint VALUES (1);
+ BEGIN;
+ ALTER TABLE spc_hint SET TABLESPACE other;
+ CHECKPOINT;
+ SELECT * FROM spc_hint; -- set hint bit
+ INSERT INTO spc_hint VALUES (2);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;");
+ is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit");
+
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE idx_hint (c int PRIMARY KEY);
+ SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q;
+ CHECKPOINT;
+ INSERT INTO idx_hint VALUES (1); -- set index hint bit
+ INSERT INTO idx_hint VALUES (2);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->psql('postgres',);
+ my ($ret, $stdout, $stderr) =
+ $node->psql('postgres', "INSERT INTO idx_hint VALUES (2);");
+ is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD");
+ like(
+ $stderr,
+ qr/violates unique/,
+ "wal_level = $wal_level, unique index LP_DEAD message");
+
+ # UPDATE touches two buffers for one row.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE upd (id serial PRIMARY KEY, id2 int);
+ INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000));
+ COPY upd FROM '$copy_file' DELIMITER ',';
+ UPDATE upd SET id2 = id2 + 1;
+ DELETE FROM upd;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;");
+ is($result, qq(0),
+ "wal_level = $wal_level, UPDATE touches two buffers for one row");
+
+ # Test consistency of COPY with INSERT for table created in the same
+ # transaction.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int);
+ INSERT INTO ins_copy VALUES (DEFAULT, 1);
+ COPY ins_copy FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;");
+ is($result, qq(4), "wal_level = $wal_level, INSERT COPY");
+
+ # Test consistency of COPY that inserts more to the same table using
+ # triggers. If the INSERTS from the trigger go to the same block data
+ # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when
+ # it tries to replay the WAL record but the "before" image doesn't match,
+ # because not all changes were WAL-logged.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO ins_trig
+ VALUES (DEFAULT, 'triggered row before' || NEW.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO ins_trig
+ VALUES (DEFAULT, 'triggered row after' || NEW.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE TRIGGER ins_trig_before_row_insert
+ BEFORE INSERT ON ins_trig
+ FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig();
+ CREATE TRIGGER ins_trig_after_row_insert
+ AFTER INSERT ON ins_trig
+ FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig();
+ COPY ins_trig FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;");
+ is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers");
+
+ # Test consistency of INSERT, COPY and TRUNCATE in same transaction block
+ # with TRUNCATE triggers.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE TRIGGER trunc_trig_before_stat_truncate
+ BEFORE TRUNCATE ON trunc_trig
+ FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig();
+ CREATE TRIGGER trunc_trig_after_stat_truncate
+ AFTER TRUNCATE ON trunc_trig
+ FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig();
+ INSERT INTO trunc_trig VALUES (DEFAULT, 1);
+ TRUNCATE trunc_trig;
+ COPY trunc_trig FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;");
+ is($result, qq(4),
+ "wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers");
+
+ # Test redo of temp table creation.
+ $node->safe_psql(
+ 'postgres', "
+ CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);");
+ $node->stop('immediate');
+ $node->start;
+ check_orphan_relfilenodes($node,
+ "wal_level = $wal_level, no orphan relfilenode remains");
+
+ return;
+}
+
+# Run same test suite for multiple wal_level values.
+run_wal_optimize("minimal");
+run_wal_optimize("replica");
+
+done_testing();
diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl
new file mode 100644
index 0000000..4ec1a9a
--- /dev/null
+++ b/src/test/recovery/t/019_replslot_limit.pl
@@ -0,0 +1,444 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for replication slot limit
+# Ensure that max_slot_wal_keep_size limits the number of WAL files to
+# be kept by replication slots.
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Utils;
+use PostgreSQL::Test::Cluster;
+
+use File::Path qw(rmtree);
+use Test::More;
+use Time::HiRes qw(usleep);
+
+$ENV{PGDATABASE} = 'postgres';
+
+# Initialize primary node, setting wal-segsize to 1MB
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1, extra => ['--wal-segsize=1']);
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+min_wal_size = 2MB
+max_wal_size = 4MB
+log_checkpoints = yes
+));
+$node_primary->start;
+$node_primary->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('rep1')");
+
+# The slot state and remain should be null before the first connection
+my $result = $node_primary->safe_psql('postgres',
+ "SELECT restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"');
+
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create a standby linking to it using the replication slot
+my $node_standby = PostgreSQL::Test::Cluster->new('standby_1');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
+
+$node_standby->start;
+
+# Wait until standby has replayed enough data
+$node_primary->wait_for_catchup($node_standby);
+
+# Stop standby
+$node_standby->stop;
+
+# Preparation done, the slot is the state "reserved" now
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t", 'check the catching-up state');
+
+# Advance WAL by five segments (= 5MB) on primary
+advance_wal($node_primary, 1);
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is always "safe" when fitting max_wal_size
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t",
+ 'check that it is safe if WAL fits in max_wal_size');
+
+advance_wal($node_primary, 4);
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is always "safe" when max_slot_wal_keep_size is not set
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t", 'check that slot is working');
+
+# The standby can reconnect to primary
+$node_standby->start;
+
+$node_primary->wait_for_catchup($node_standby);
+
+$node_standby->stop;
+
+# Set max_slot_wal_keep_size on primary
+my $max_slot_wal_keep_size_mb = 6;
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+max_slot_wal_keep_size = ${max_slot_wal_keep_size_mb}MB
+));
+$node_primary->reload;
+
+# The slot is in safe state.
+
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "reserved", 'check that max_slot_wal_keep_size is working');
+
+# Advance WAL again then checkpoint, reducing remain by 2 MB.
+advance_wal($node_primary, 2);
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is still working
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "reserved",
+ 'check that safe_wal_size gets close to the current LSN');
+
+# The standby can reconnect to primary
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby);
+$node_standby->stop;
+
+# wal_keep_size overrides max_slot_wal_keep_size
+$result = $node_primary->safe_psql('postgres',
+ "ALTER SYSTEM SET wal_keep_size to '8MB'; SELECT pg_reload_conf();");
+# Advance WAL again then checkpoint, reducing remain by 6 MB.
+advance_wal($node_primary, 6);
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "extended",
+ 'check that wal_keep_size overrides max_slot_wal_keep_size');
+# restore wal_keep_size
+$result = $node_primary->safe_psql('postgres',
+ "ALTER SYSTEM SET wal_keep_size to 0; SELECT pg_reload_conf();");
+
+# The standby can reconnect to primary
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby);
+$node_standby->stop;
+
+# Advance WAL again without checkpoint, reducing remain by 6 MB.
+advance_wal($node_primary, 6);
+
+# Slot gets into 'reserved' state
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "extended", 'check that the slot state changes to "extended"');
+
+# do checkpoint so that the next checkpoint runs too early
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+
+# Advance WAL again without checkpoint; remain goes to 0.
+advance_wal($node_primary, 1);
+
+# Slot gets into 'unreserved' state and safe_wal_size is negative
+$result = $node_primary->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size <= 0 FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "unreserved|t",
+ 'check that the slot state changes to "unreserved"');
+
+# The standby still can connect to primary before a checkpoint
+$node_standby->start;
+
+$node_primary->wait_for_catchup($node_standby);
+
+$node_standby->stop;
+
+ok( !$node_standby->log_contains(
+ "requested WAL segment [0-9A-F]+ has already been removed"),
+ 'check that required WAL segments are still available');
+
+# Create one checkpoint, to improve stability of the next steps
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+
+# Prevent other checkpoints from occurring while advancing WAL segments
+$node_primary->safe_psql('postgres',
+ "ALTER SYSTEM SET max_wal_size='40MB'; SELECT pg_reload_conf()");
+
+# Advance WAL again. The slot loses the oldest segment by the next checkpoint
+my $logstart = get_log_size($node_primary);
+advance_wal($node_primary, 7);
+
+# Now create another checkpoint and wait until the WARNING is issued
+$node_primary->safe_psql('postgres',
+ 'ALTER SYSTEM RESET max_wal_size; SELECT pg_reload_conf()');
+$node_primary->safe_psql('postgres', "CHECKPOINT;");
+my $invalidated = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if ($node_primary->log_contains(
+ "invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size",
+ $logstart))
+ {
+ $invalidated = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($invalidated, 'check that slot invalidation has been logged');
+
+$result = $node_primary->safe_psql(
+ 'postgres',
+ qq[
+ SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size
+ FROM pg_replication_slots WHERE slot_name = 'rep1']);
+is($result, "rep1|f|t|lost|",
+ 'check that the slot became inactive and the state "lost" persists');
+
+# Wait until current checkpoint ends
+my $checkpoint_ended = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if ($node_primary->log_contains("checkpoint complete: ", $logstart))
+ {
+ $checkpoint_ended = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($checkpoint_ended, 'waited for checkpoint to end');
+
+# The invalidated slot shouldn't keep the old-segment horizon back;
+# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
+# Test for this by creating a new slot and comparing its restart LSN
+# to the oldest existing file.
+my $redoseg = $node_primary->safe_psql('postgres',
+ "SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)"
+);
+my $oldestseg = $node_primary->safe_psql('postgres',
+ "SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1"
+);
+$node_primary->safe_psql('postgres',
+ qq[SELECT pg_drop_replication_slot('s2')]);
+is($oldestseg, $redoseg, "check that segments have been removed");
+
+# The standby no longer can connect to the primary
+$logstart = get_log_size($node_standby);
+$node_standby->start;
+
+my $failed = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if ($node_standby->log_contains(
+ "requested WAL segment [0-9A-F]+ has already been removed",
+ $logstart))
+ {
+ $failed = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($failed, 'check that replication has been broken');
+
+$node_primary->stop;
+$node_standby->stop;
+
+my $node_primary2 = PostgreSQL::Test::Cluster->new('primary2');
+$node_primary2->init(allows_streaming => 1);
+$node_primary2->append_conf(
+ 'postgresql.conf', qq(
+min_wal_size = 32MB
+max_wal_size = 32MB
+log_checkpoints = yes
+));
+$node_primary2->start;
+$node_primary2->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('rep1')");
+$backup_name = 'my_backup2';
+$node_primary2->backup($backup_name);
+
+$node_primary2->stop;
+$node_primary2->append_conf(
+ 'postgresql.conf', qq(
+max_slot_wal_keep_size = 0
+));
+$node_primary2->start;
+
+$node_standby = PostgreSQL::Test::Cluster->new('standby_2');
+$node_standby->init_from_backup($node_primary2, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
+$node_standby->start;
+my @result =
+ split(
+ '\n',
+ $node_primary2->safe_psql(
+ 'postgres',
+ "CREATE TABLE tt();
+ DROP TABLE tt;
+ SELECT pg_switch_wal();
+ CHECKPOINT;
+ SELECT 'finished';",
+ timeout => $PostgreSQL::Test::Utils::timeout_default));
+is($result[1], 'finished', 'check if checkpoint command is not blocked');
+
+$node_primary2->stop;
+$node_standby->stop;
+
+# The next test depends on Perl's `kill`, which apparently is not
+# portable to Windows. (It would be nice to use Test::More's `subtest`,
+# but that's not in the ancient version we require.)
+if ($PostgreSQL::Test::Utils::windows_os)
+{
+ done_testing();
+ exit;
+}
+
+# Get a slot terminated while the walsender is active
+# We do this by sending SIGSTOP to the walsender. Skip this on Windows.
+my $node_primary3 = PostgreSQL::Test::Cluster->new('primary3');
+$node_primary3->init(allows_streaming => 1, extra => ['--wal-segsize=1']);
+$node_primary3->append_conf(
+ 'postgresql.conf', qq(
+ min_wal_size = 2MB
+ max_wal_size = 2MB
+ log_checkpoints = yes
+ max_slot_wal_keep_size = 1MB
+ ));
+$node_primary3->start;
+$node_primary3->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('rep3')");
+# Take backup
+$backup_name = 'my_backup';
+$node_primary3->backup($backup_name);
+# Create standby
+my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3');
+$node_standby3->init_from_backup($node_primary3, $backup_name,
+ has_streaming => 1);
+$node_standby3->append_conf('postgresql.conf', "primary_slot_name = 'rep3'");
+$node_standby3->start;
+$node_primary3->wait_for_catchup($node_standby3);
+
+my $senderpid;
+
+# We've seen occasional cases where multiple walsender pids are still active
+# at this point, apparently just due to process shutdown being slow. To avoid
+# spurious failures, retry a couple times.
+my $i = 0;
+while (1)
+{
+ my ($stdout, $stderr);
+
+ $senderpid = $node_primary3->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'");
+
+ last if $senderpid =~ qr/^[0-9]+$/;
+
+ diag "multiple walsenders active in iteration $i";
+
+ # show information about all active connections
+ $node_primary3->psql(
+ 'postgres',
+ "\\a\\t\nSELECT * FROM pg_stat_activity",
+ stdout => \$stdout,
+ stderr => \$stderr);
+ diag $stdout, $stderr;
+
+ # unlikely that the problem would resolve after 15s, so give up at point
+ if ($i++ == 150)
+ {
+ # An immediate shutdown may hide evidence of a locking bug. If
+ # retrying didn't resolve the issue, shut down in fast mode.
+ $node_primary3->stop('fast');
+ $node_standby3->stop('fast');
+ die "could not determine walsender pid, can't continue";
+ }
+
+ usleep(100_000);
+}
+
+like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid");
+
+my $receiverpid = $node_standby3->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'");
+like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid");
+
+$logstart = get_log_size($node_primary3);
+# freeze walsender and walreceiver. Slot will still be active, but walreceiver
+# won't get anything anymore.
+kill 'STOP', $senderpid, $receiverpid;
+advance_wal($node_primary3, 2);
+
+my $max_attempts = $PostgreSQL::Test::Utils::timeout_default;
+while ($max_attempts-- >= 0)
+{
+ if ($node_primary3->log_contains(
+ "terminating process $senderpid to release replication slot \"rep3\"",
+ $logstart))
+ {
+ ok(1, "walsender termination logged");
+ last;
+ }
+ sleep 1;
+}
+
+# Now let the walsender continue; slot should be killed now.
+# (Must not let walreceiver run yet; otherwise the standby could start another
+# one before the slot can be killed)
+kill 'CONT', $senderpid;
+$node_primary3->poll_query_until('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep3'",
+ "lost")
+ or die "timed out waiting for slot to be lost";
+
+$max_attempts = $PostgreSQL::Test::Utils::timeout_default;
+while ($max_attempts-- >= 0)
+{
+ if ($node_primary3->log_contains(
+ 'invalidating slot "rep3" because its restart_lsn', $logstart))
+ {
+ ok(1, "slot invalidation logged");
+ last;
+ }
+ sleep 1;
+}
+
+# Now let the walreceiver continue, so that the node can be stopped cleanly
+kill 'CONT', $receiverpid;
+
+$node_primary3->stop;
+$node_standby3->stop;
+
+#####################################
+# Advance WAL of $node by $n segments
+sub advance_wal
+{
+ my ($node, $n) = @_;
+
+ # Advance by $n segments (= (16 * $n) MB) on primary
+ for (my $i = 0; $i < $n; $i++)
+ {
+ $node->safe_psql('postgres',
+ "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();");
+ }
+ return;
+}
+
+# return the size of logfile of $node in bytes
+sub get_log_size
+{
+ my ($node) = @_;
+
+ return (stat $node->logfile)[7];
+}
+
+done_testing();
diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl
new file mode 100644
index 0000000..2108d50
--- /dev/null
+++ b/src/test/recovery/t/020_archive_status.pl
@@ -0,0 +1,251 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+#
+# Tests related to WAL archiving and recovery.
+#
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $primary = PostgreSQL::Test::Cluster->new('primary');
+$primary->init(
+ has_archiving => 1,
+ allows_streaming => 1);
+$primary->append_conf('postgresql.conf', 'autovacuum = off');
+$primary->start;
+my $primary_data = $primary->data_dir;
+
+# Temporarily use an archive_command value to make the archiver fail,
+# knowing that archiving is enabled. Note that we cannot use a command
+# that does not exist as in this case the archiver process would just exit
+# without reporting the failure to pg_stat_archiver. This also cannot
+# use a plain "false" as that's unportable on Windows. So, instead, as
+# a portable solution, use an archive command based on a command known to
+# work but will fail: copy with an incorrect original path.
+my $incorrect_command =
+ $PostgreSQL::Test::Utils::windows_os
+ ? qq{copy "%p_does_not_exist" "%f_does_not_exist"}
+ : qq{cp "%p_does_not_exist" "%f_does_not_exist"};
+$primary->safe_psql(
+ 'postgres', qq{
+ ALTER SYSTEM SET archive_command TO '$incorrect_command';
+ SELECT pg_reload_conf();
+});
+
+# Save the WAL segment currently in use and switch to a new segment.
+# This will be used to track the activity of the archiver.
+my $segment_name_1 = $primary->safe_psql('postgres',
+ q{SELECT pg_walfile_name(pg_current_wal_lsn())});
+my $segment_path_1 = "pg_wal/archive_status/$segment_name_1";
+my $segment_path_1_ready = "$segment_path_1.ready";
+my $segment_path_1_done = "$segment_path_1.done";
+$primary->safe_psql(
+ 'postgres', q{
+ CREATE TABLE mine AS SELECT generate_series(1,10) AS x;
+ SELECT pg_switch_wal();
+ CHECKPOINT;
+});
+
+# Wait for an archive failure.
+$primary->poll_query_until('postgres',
+ q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't')
+ or die "Timed out while waiting for archiving to fail";
+ok( -f "$primary_data/$segment_path_1_ready",
+ ".ready file exists for WAL segment $segment_name_1 waiting to be archived"
+);
+ok( !-f "$primary_data/$segment_path_1_done",
+ ".done file does not exist for WAL segment $segment_name_1 waiting to be archived"
+);
+
+is( $primary->safe_psql(
+ 'postgres', q{
+ SELECT archived_count, last_failed_wal
+ FROM pg_stat_archiver
+ }),
+ "0|$segment_name_1",
+ "pg_stat_archiver failed to archive $segment_name_1");
+
+# Crash the cluster for the next test in charge of checking that non-archived
+# WAL segments are not removed.
+$primary->stop('immediate');
+
+# Recovery tests for the archiving with a standby partially check
+# the recovery behavior when restoring a backup taken using a
+# snapshot with no pg_backup_start/stop. In this situation,
+# the recovered standby should enter first crash recovery then
+# switch to regular archive recovery. Note that the base backup
+# is taken here so as archive_command will fail. This is necessary
+# for the assumptions of the tests done with the standbys below.
+$primary->backup_fs_cold('backup');
+
+$primary->start;
+ok( -f "$primary_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary"
+);
+
+# Allow WAL archiving again and wait for a success.
+$primary->safe_psql(
+ 'postgres', q{
+ ALTER SYSTEM RESET archive_command;
+ SELECT pg_reload_conf();
+});
+
+$primary->poll_query_until('postgres',
+ q{SELECT archived_count FROM pg_stat_archiver}, '1')
+ or die "Timed out while waiting for archiving to finish";
+
+ok(!-f "$primary_data/$segment_path_1_ready",
+ ".ready file for archived WAL segment $segment_name_1 removed");
+
+ok(-f "$primary_data/$segment_path_1_done",
+ ".done file for archived WAL segment $segment_name_1 exists");
+
+is( $primary->safe_psql(
+ 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }),
+ $segment_name_1,
+ "archive success reported in pg_stat_archiver for WAL segment $segment_name_1"
+);
+
+# Create some WAL activity and a new checkpoint so as the next standby can
+# create a restartpoint. As this standby starts in crash recovery because
+# of the cold backup taken previously, it needs a clean restartpoint to deal
+# with existing status files.
+my $segment_name_2 = $primary->safe_psql('postgres',
+ q{SELECT pg_walfile_name(pg_current_wal_lsn())});
+my $segment_path_2 = "pg_wal/archive_status/$segment_name_2";
+my $segment_path_2_ready = "$segment_path_2.ready";
+my $segment_path_2_done = "$segment_path_2.done";
+$primary->safe_psql(
+ 'postgres', q{
+ INSERT INTO mine SELECT generate_series(10,20) AS x;
+ CHECKPOINT;
+});
+
+# Switch to a new segment and use the returned LSN to make sure that
+# standbys have caught up to this point.
+my $primary_lsn = $primary->safe_psql(
+ 'postgres', q{
+ SELECT pg_switch_wal();
+});
+
+$primary->poll_query_until('postgres',
+ q{ SELECT last_archived_wal FROM pg_stat_archiver },
+ $segment_name_2)
+ or die "Timed out while waiting for archiving to finish";
+
+# Test standby with archive_mode = on.
+my $standby1 = PostgreSQL::Test::Cluster->new('standby');
+$standby1->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby1->append_conf('postgresql.conf', "archive_mode = on");
+my $standby1_data = $standby1->data_dir;
+$standby1->start;
+
+# Wait for the replay of the segment switch done previously, ensuring
+# that all segments needed are restored from the archives.
+$standby1->poll_query_until('postgres',
+ qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
+) or die "Timed out while waiting for xlog replay on standby1";
+
+$standby1->safe_psql('postgres', q{CHECKPOINT});
+
+# Recovery with archive_mode=on does not keep .ready signal files inherited
+# from backup. Note that this WAL segment existed in the backup.
+ok( !-f "$standby1_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby"
+);
+
+# Recovery with archive_mode=on should not create .ready files.
+# Note that this segment did not exist in the backup.
+ok( !-f "$standby1_data/$segment_path_2_ready",
+ ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby"
+);
+
+# Recovery with archive_mode = on creates .done files.
+ok( -f "$standby1_data/$segment_path_2_done",
+ ".done file for WAL segment $segment_name_2 created when archive_mode=on on standby"
+);
+
+# Test recovery with archive_mode = always, which should always keep
+# .ready files if archiving is enabled, though here we want the archive
+# command to fail to persist the .ready files. Note that this node
+# has inherited the archive command of the previous cold backup that
+# will cause archiving failures.
+my $standby2 = PostgreSQL::Test::Cluster->new('standby2');
+$standby2->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby2->append_conf('postgresql.conf', 'archive_mode = always');
+my $standby2_data = $standby2->data_dir;
+$standby2->start;
+
+# Wait for the replay of the segment switch done previously, ensuring
+# that all segments needed are restored from the archives.
+$standby2->poll_query_until('postgres',
+ qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
+) or die "Timed out while waiting for xlog replay on standby2";
+
+$standby2->safe_psql('postgres', q{CHECKPOINT});
+
+ok( -f "$standby2_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby"
+);
+
+ok( -f "$standby2_data/$segment_path_2_ready",
+ ".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby"
+);
+
+# Reset statistics of the archiver for the next checks.
+$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')});
+
+# Now crash the cluster to check that recovery step does not
+# remove non-archived WAL segments on a standby where archiving
+# is enabled.
+$standby2->stop('immediate');
+$standby2->start;
+
+ok( -f "$standby2_data/$segment_path_1_ready",
+ "WAL segment still ready to archive after crash recovery on standby with archive_mode=always"
+);
+
+# Allow WAL archiving again, and wait for the segments to be archived.
+$standby2->safe_psql(
+ 'postgres', q{
+ ALTER SYSTEM RESET archive_command;
+ SELECT pg_reload_conf();
+});
+$standby2->poll_query_until('postgres',
+ q{SELECT last_archived_wal FROM pg_stat_archiver},
+ $segment_name_2)
+ or die "Timed out while waiting for archiving to finish";
+
+is( $standby2->safe_psql(
+ 'postgres', q{SELECT archived_count FROM pg_stat_archiver}),
+ '2',
+ "correct number of WAL segments archived from standby");
+
+ok( !-f "$standby2_data/$segment_path_1_ready"
+ && !-f "$standby2_data/$segment_path_2_ready",
+ ".ready files removed after archive success with archive_mode=always on standby"
+);
+
+ok( -f "$standby2_data/$segment_path_1_done"
+ && -f "$standby2_data/$segment_path_2_done",
+ ".done files created after archive success with archive_mode=always on standby"
+);
+
+# Check that the archiver process calls the shell archive module's shutdown
+# callback.
+$standby2->append_conf('postgresql.conf', "log_min_messages = debug1");
+$standby2->reload;
+
+# Run a query to make sure that the reload has taken effect.
+$standby2->safe_psql('postgres', q{SELECT 1});
+my $log_location = -s $standby2->logfile;
+
+$standby2->stop;
+my $logfile = slurp_file($standby2->logfile, $log_location);
+ok( $logfile =~ qr/archiver process shutting down/,
+ 'check shutdown callback of shell archive module');
+
+done_testing();
diff --git a/src/test/recovery/t/021_row_visibility.pl b/src/test/recovery/t/021_row_visibility.pl
new file mode 100644
index 0000000..aeaf37c
--- /dev/null
+++ b/src/test/recovery/t/021_row_visibility.pl
@@ -0,0 +1,205 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Checks that snapshots on standbys behave in a minimally reasonable
+# way.
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf('postgresql.conf', 'max_prepared_transactions=10');
+$node_primary->start;
+
+# Initialize with empty test table
+$node_primary->safe_psql('postgres',
+ 'CREATE TABLE public.test_visibility (data text not null)');
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create streaming standby from backup
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', 'max_prepared_transactions=10');
+$node_standby->start;
+
+my $psql_timeout =
+ IPC::Run::timer(2 * $PostgreSQL::Test::Utils::timeout_default);
+
+# One psql to primary and standby each, for all queries. That allows
+# to check uncommitted changes being replicated and such.
+my %psql_primary = (stdin => '', stdout => '', stderr => '');
+$psql_primary{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ],
+ '<',
+ \$psql_primary{stdin},
+ '>',
+ \$psql_primary{stdout},
+ '2>',
+ \$psql_primary{stderr},
+ $psql_timeout);
+
+my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => '');
+$psql_standby{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+ '<',
+ \$psql_standby{stdin},
+ '>',
+ \$psql_standby{stdout},
+ '2>',
+ \$psql_standby{stderr},
+ $psql_timeout);
+
+#
+# 1. Check initial data is the same
+#
+ok( send_query_and_wait(
+ \%psql_standby,
+ q/SELECT * FROM test_visibility ORDER BY data;/,
+ qr/^\(0 rows\)$/m),
+ 'data not visible');
+
+#
+# 2. Check if an INSERT is replayed and visible
+#
+$node_primary->psql('postgres',
+ "INSERT INTO test_visibility VALUES ('first insert')");
+$node_primary->wait_for_catchup($node_standby);
+
+ok( send_query_and_wait(
+ \%psql_standby,
+ q[SELECT * FROM test_visibility ORDER BY data;],
+ qr/first insert.*\n\(1 row\)/m),
+ 'insert visible');
+
+#
+# 3. Verify that uncommitted changes aren't visible.
+#
+ok( send_query_and_wait(
+ \%psql_primary,
+ q[
+BEGIN;
+UPDATE test_visibility SET data = 'first update' RETURNING data;
+ ],
+ qr/^UPDATE 1$/m),
+ 'UPDATE');
+
+$node_primary->psql('postgres', "SELECT txid_current();"); # ensure WAL flush
+$node_primary->wait_for_catchup($node_standby);
+
+ok( send_query_and_wait(
+ \%psql_standby,
+ q[SELECT * FROM test_visibility ORDER BY data;],
+ qr/first insert.*\n\(1 row\)/m),
+ 'uncommitted update invisible');
+
+#
+# 4. That a commit turns 3. visible
+#
+ok(send_query_and_wait(\%psql_primary, q[COMMIT;], qr/^COMMIT$/m), 'COMMIT');
+
+$node_primary->wait_for_catchup($node_standby);
+
+ok( send_query_and_wait(
+ \%psql_standby,
+ q[SELECT * FROM test_visibility ORDER BY data;],
+ qr/first update\n\(1 row\)$/m),
+ 'committed update visible');
+
+#
+# 5. Check that changes in prepared xacts is invisible
+#
+ok( send_query_and_wait(
+ \%psql_primary, q[
+DELETE from test_visibility; -- delete old data, so we start with clean slate
+BEGIN;
+INSERT INTO test_visibility VALUES('inserted in prepared will_commit');
+PREPARE TRANSACTION 'will_commit';],
+ qr/^PREPARE TRANSACTION$/m),
+ 'prepared will_commit');
+
+ok( send_query_and_wait(
+ \%psql_primary, q[
+BEGIN;
+INSERT INTO test_visibility VALUES('inserted in prepared will_abort');
+PREPARE TRANSACTION 'will_abort';
+ ],
+ qr/^PREPARE TRANSACTION$/m),
+ 'prepared will_abort');
+
+$node_primary->wait_for_catchup($node_standby);
+
+ok( send_query_and_wait(
+ \%psql_standby,
+ q[SELECT * FROM test_visibility ORDER BY data;],
+ qr/^\(0 rows\)$/m),
+ 'uncommitted prepared invisible');
+
+# For some variation, finish prepared xacts via separate connections
+$node_primary->safe_psql('postgres', "COMMIT PREPARED 'will_commit';");
+$node_primary->safe_psql('postgres', "ROLLBACK PREPARED 'will_abort';");
+$node_primary->wait_for_catchup($node_standby);
+
+ok( send_query_and_wait(
+ \%psql_standby,
+ q[SELECT * FROM test_visibility ORDER BY data;],
+ qr/will_commit.*\n\(1 row\)$/m),
+ 'finished prepared visible');
+
+# explicitly shut down psql instances gracefully - to avoid hangs
+# or worse on windows
+$psql_primary{stdin} .= "\\q\n";
+$psql_primary{run}->finish;
+$psql_standby{stdin} .= "\\q\n";
+$psql_standby{run}->finish;
+
+$node_primary->stop;
+$node_standby->stop;
+
+# Send query, wait until string matches
+sub send_query_and_wait
+{
+ my ($psql, $query, $untl) = @_;
+ my $ret;
+
+ # send query
+ $$psql{stdin} .= $query;
+ $$psql{stdin} .= "\n";
+
+ # wait for query results
+ $$psql{run}->pump_nb();
+ while (1)
+ {
+ last if $$psql{stdout} =~ /$untl/;
+
+ if ($psql_timeout->is_expired)
+ {
+ BAIL_OUT("aborting wait: program timed out\n"
+ . "stream contents: >>$$psql{stdout}<<\n"
+ . "pattern searched for: $untl\n");
+ return 0;
+ }
+ if (not $$psql{run}->pumpable())
+ {
+ BAIL_OUT("aborting wait: program died\n"
+ . "stream contents: >>$$psql{stdout}<<\n"
+ . "pattern searched for: $untl\n");
+ return 0;
+ }
+ $$psql{run}->pump();
+ }
+
+ $$psql{stdout} = '';
+
+ return 1;
+}
+
+done_testing();
diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl
new file mode 100644
index 0000000..53a55c7
--- /dev/null
+++ b/src/test/recovery/t/022_crash_temp_files.pl
@@ -0,0 +1,277 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test remove of temporary files after a crash.
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Config;
+
+if ($Config{osname} eq 'MSWin32')
+{
+ plan skip_all => 'tests hang on Windows';
+ exit;
+}
+
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+
+my $node = PostgreSQL::Test::Cluster->new('node_crash');
+$node->init();
+$node->start();
+
+# By default, PostgreSQL::Test::Cluster doesn't restart after crash
+# Reduce work_mem to generate temporary file with a few number of rows
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET remove_temp_files_after_crash = on;
+ ALTER SYSTEM SET log_connections = 1;
+ ALTER SYSTEM SET work_mem = '64kB';
+ ALTER SYSTEM SET restart_after_crash = on;
+ SELECT pg_reload_conf();]);
+
+# create table, insert rows
+$node->safe_psql('postgres', q[CREATE TABLE tab_crash (a integer UNIQUE);]);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin,
+ '>',
+ \$killme_stdout,
+ '2>',
+ \$killme_stderr,
+ $psql_timeout);
+
+# Get backend pid
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGKILL');
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Open a 2nd session that will block the 1st one, using the UNIQUE constraint.
+# This will prevent removal of the temporary file created by the 1st session.
+my ($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', '');
+my $killme2 = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin2,
+ '>',
+ \$killme_stdout2,
+ '2>',
+ \$killme_stderr2,
+ $psql_timeout);
+
+# Insert one tuple and leave the transaction open
+$killme_stdin2 .= q[
+BEGIN;
+INSERT INTO tab_crash (a) VALUES(1);
+SELECT $$insert-tuple-to-lock-next-insert$$;
+];
+pump_until($killme2, $psql_timeout, \$killme_stdout2,
+ qr/insert-tuple-to-lock-next-insert/m);
+$killme_stdout2 = '';
+$killme_stderr2 = '';
+
+# Run the query that generates a temporary file and that will be killed before
+# it finishes. Since the query that generates the temporary file does not
+# return before the connection is killed, use a SELECT before to trigger
+# pump_until.
+$killme_stdin .= q[
+BEGIN;
+SELECT $$in-progress-before-sigkill$$;
+INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i);
+];
+ok( pump_until(
+ $killme, $psql_timeout,
+ \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'insert in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Wait until the batch insert gets stuck on the lock.
+$killme_stdin2 .= q[
+DO $c$
+DECLARE
+ c INT;
+BEGIN
+ LOOP
+ SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid
+ . q[ AND NOT granted;
+ IF c > 0 THEN
+ EXIT;
+ END IF;
+ END LOOP;
+END; $c$;
+SELECT $$insert-tuple-lock-waiting$$;
+];
+
+pump_until($killme2, $psql_timeout, \$killme_stdout2,
+ qr/insert-tuple-lock-waiting/m);
+$killme_stdout2 = '';
+$killme_stderr2 = '';
+
+# Kill with SIGKILL
+my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+
+# Close that psql session
+$killme->finish;
+
+# Wait till the other session reports failure, ensuring that the postmaster
+# has noticed its dead child and begun a restart cycle.
+$killme_stdin2 .= qq[
+SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default);
+];
+ok( pump_until(
+ $killme2,
+ $psql_timeout,
+ \$killme_stderr2,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "second psql session died successfully after SIGKILL");
+$killme2->finish;
+
+# Wait till server finishes restarting
+$node->poll_query_until('postgres', undef, '');
+
+# Check for temporary files
+is( $node->safe_psql(
+ 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(0),
+ 'no temporary files');
+
+#
+# Test old behavior (don't remove temporary files after crash)
+#
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET remove_temp_files_after_crash = off;
+ SELECT pg_reload_conf();]);
+
+# Restart psql session
+($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+$killme->run();
+
+# Get backend pid
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok( pump_until(
+ $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGKILL');
+$pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Restart the 2nd psql session
+($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', '');
+$killme2->run();
+
+# Insert one tuple and leave the transaction open
+$killme_stdin2 .= q[
+BEGIN;
+INSERT INTO tab_crash (a) VALUES(1);
+SELECT $$insert-tuple-to-lock-next-insert$$;
+];
+pump_until($killme2, $psql_timeout, \$killme_stdout2,
+ qr/insert-tuple-to-lock-next-insert/m);
+$killme_stdout2 = '';
+$killme_stderr2 = '';
+
+# Run the query that generates a temporary file and that will be killed before
+# it finishes. Since the query that generates the temporary file does not
+# return before the connection is killed, use a SELECT before to trigger
+# pump_until.
+$killme_stdin .= q[
+BEGIN;
+SELECT $$in-progress-before-sigkill$$;
+INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i);
+];
+ok( pump_until(
+ $killme, $psql_timeout,
+ \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'insert in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Wait until the batch insert gets stuck on the lock.
+$killme_stdin2 .= q[
+DO $c$
+DECLARE
+ c INT;
+BEGIN
+ LOOP
+ SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid
+ . q[ AND NOT granted;
+ IF c > 0 THEN
+ EXIT;
+ END IF;
+ END LOOP;
+END; $c$;
+SELECT $$insert-tuple-lock-waiting$$;
+];
+
+pump_until($killme2, $psql_timeout, \$killme_stdout2,
+ qr/insert-tuple-lock-waiting/m);
+$killme_stdout2 = '';
+$killme_stderr2 = '';
+
+# Kill with SIGKILL
+$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, 'killed process with KILL');
+
+# Close that psql session
+$killme->finish;
+
+# Wait till the other session reports failure, ensuring that the postmaster
+# has noticed its dead child and begun a restart cycle.
+$killme_stdin2 .= qq[
+SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default);
+];
+ok( pump_until(
+ $killme2,
+ $psql_timeout,
+ \$killme_stderr2,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m
+ ),
+ "second psql session died successfully after SIGKILL");
+$killme2->finish;
+
+# Wait till server finishes restarting
+$node->poll_query_until('postgres', undef, '');
+
+# Check for temporary files -- should be there
+is( $node->safe_psql(
+ 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(1),
+ 'one temporary file');
+
+# Restart should remove the temporary files
+$node->restart();
+
+# Check the temporary files -- should be gone
+is( $node->safe_psql(
+ 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'),
+ qq(0),
+ 'temporary file was removed');
+
+$node->stop();
+
+done_testing();
diff --git a/src/test/recovery/t/023_pitr_prepared_xact.pl b/src/test/recovery/t/023_pitr_prepared_xact.pl
new file mode 100644
index 0000000..39e8a8f
--- /dev/null
+++ b/src/test/recovery/t/023_pitr_prepared_xact.pl
@@ -0,0 +1,91 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for point-in-time-recovery (PITR) with prepared transactions
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Compare;
+
+# Initialize and start primary node with WAL archiving
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(has_archiving => 1, allows_streaming => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', qq{
+max_prepared_transactions = 10});
+$node_primary->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Initialize node for PITR targeting a very specific restore point, just
+# after a PREPARE TRANSACTION is issued so as we finish with a promoted
+# node where this 2PC transaction needs an explicit COMMIT PREPARED.
+my $node_pitr = PostgreSQL::Test::Cluster->new('node_pitr');
+$node_pitr->init_from_backup(
+ $node_primary, $backup_name,
+ standby => 0,
+ has_restoring => 1);
+$node_pitr->append_conf(
+ 'postgresql.conf', qq{
+recovery_target_name = 'rp'
+recovery_target_action = 'promote'});
+
+# Workload with a prepared transaction and the target restore point.
+$node_primary->psql(
+ 'postgres', qq{
+CREATE TABLE foo(i int);
+BEGIN;
+INSERT INTO foo VALUES(1);
+PREPARE TRANSACTION 'fooinsert';
+SELECT pg_create_restore_point('rp');
+INSERT INTO foo VALUES(2);
+});
+
+# Find next WAL segment to be archived
+my $walfile_to_be_archived = $node_primary->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Wait until the WAL segment has been archived.
+my $archive_wait_query =
+ "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
+$node_primary->poll_query_until('postgres', $archive_wait_query)
+ or die "Timed out while waiting for WAL segment to be archived";
+my $last_archived_wal_file = $walfile_to_be_archived;
+
+# Now start the PITR node.
+$node_pitr->start;
+
+# Wait until the PITR node exits recovery.
+$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
+ or die "Timed out while waiting for PITR promotion";
+
+# Commit the prepared transaction in the latest timeline and check its
+# result. There should only be one row in the table, coming from the
+# prepared transaction. The row from the INSERT after the restore point
+# should not show up, since our recovery target was older than the second
+# INSERT done.
+$node_pitr->psql('postgres', qq{COMMIT PREPARED 'fooinsert';});
+my $result = $node_pitr->safe_psql('postgres', "SELECT * FROM foo;");
+is($result, qq{1}, "check table contents after COMMIT PREPARED");
+
+# Insert more data and do a checkpoint. These should be generated on the
+# timeline chosen after the PITR promotion.
+$node_pitr->psql(
+ 'postgres', qq{
+INSERT INTO foo VALUES(3);
+CHECKPOINT;
+});
+
+# Enforce recovery, the checkpoint record generated previously should
+# still be found.
+$node_pitr->stop('immediate');
+$node_pitr->start;
+
+done_testing();
diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl
new file mode 100644
index 0000000..ce347e0
--- /dev/null
+++ b/src/test/recovery/t/024_archive_recovery.pl
@@ -0,0 +1,105 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test for archive recovery of WAL generated with wal_level=minimal
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Time::HiRes qw(usleep);
+
+# Initialize and start node with wal_level = replica and WAL archiving
+# enabled.
+my $node = PostgreSQL::Test::Cluster->new('orig');
+$node->init(has_archiving => 1, allows_streaming => 1);
+my $replica_config = q[
+wal_level = replica
+archive_mode = on
+max_wal_senders = 10
+hot_standby = off
+];
+$node->append_conf('postgresql.conf', $replica_config);
+$node->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node->backup($backup_name);
+
+# Restart node with wal_level = minimal and WAL archiving disabled
+# to generate WAL with that setting. Note that such WAL has not been
+# archived yet at this moment because WAL archiving is not enabled.
+$node->append_conf(
+ 'postgresql.conf', q[
+wal_level = minimal
+archive_mode = off
+max_wal_senders = 0
+]);
+$node->restart;
+
+# Restart node with wal_level = replica and WAL archiving enabled
+# to archive WAL previously generated with wal_level = minimal.
+# We ensure the WAL file containing the record indicating the change
+# of wal_level to minimal is archived by checking pg_stat_archiver.
+$node->append_conf('postgresql.conf', $replica_config);
+$node->restart;
+
+# Find next WAL segment to be archived
+my $walfile_to_be_archived = $node->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node->safe_psql('postgres', 'SELECT pg_switch_wal()');
+my $archive_wait_query =
+ "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
+
+# Wait until the WAL segment has been archived.
+$node->poll_query_until('postgres', $archive_wait_query)
+ or die "Timed out while waiting for WAL segment to be archived";
+
+$node->stop;
+
+# Initialize new node from backup, and start archive recovery. Check that
+# archive recovery fails with an error when it detects the WAL record
+# indicating the change of wal_level to minimal and node stops.
+sub test_recovery_wal_level_minimal
+{
+ my ($node_name, $node_text, $standby_setting) = @_;
+
+ my $recovery_node = PostgreSQL::Test::Cluster->new($node_name);
+ $recovery_node->init_from_backup(
+ $node, $backup_name,
+ has_restoring => 1,
+ standby => $standby_setting);
+
+ # Use run_log instead of recovery_node->start because this test expects
+ # that the server ends with an error during recovery.
+ run_log(
+ [
+ 'pg_ctl', '-D',
+ $recovery_node->data_dir, '-l',
+ $recovery_node->logfile, 'start'
+ ]);
+
+ # wait for postgres to terminate
+ foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default)
+ {
+ last if !-f $recovery_node->data_dir . '/postmaster.pid';
+ usleep(100_000);
+ }
+
+ # Confirm that the archive recovery fails with an expected error
+ my $logfile = slurp_file($recovery_node->logfile());
+ ok( $logfile =~
+ qr/FATAL: .* WAL was generated with wal_level=minimal, cannot continue recovering/,
+ "$node_text ends with an error because it finds WAL generated with wal_level=minimal"
+ );
+}
+
+# Test for archive recovery
+test_recovery_wal_level_minimal('archive_recovery', 'archive recovery', 0);
+
+# Test for standby server
+test_recovery_wal_level_minimal('standby', 'standby', 1);
+
+done_testing();
diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl
new file mode 100644
index 0000000..fd82124
--- /dev/null
+++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl
@@ -0,0 +1,112 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Testing streaming replication where standby is promoted and a new cascading
+# standby (without WAL) is connected to the promoted standby. Both archiving
+# and streaming are enabled, but only the history file is available from the
+# archive, so the WAL files all have to be streamed. Test that the cascading
+# standby can follow the new primary (promoted standby).
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+
+use File::Basename;
+use FindBin;
+use Test::More;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+
+# Set up an archive command that will copy the history file but not the WAL
+# files. No real archive command should behave this way; the point is to
+# simulate a race condition where the new cascading standby starts up after
+# the timeline history file reaches the archive but before any of the WAL files
+# get there.
+$node_primary->init(allows_streaming => 1, has_archiving => 1);
+
+# Note: consistent use of forward slashes here avoids any escaping problems
+# that arise from use of backslashes. That means we need to double-quote all
+# the paths in the archive_command
+my $perlbin = $^X;
+$perlbin =~ s!\\!/!g if $PostgreSQL::Test::Utils::windows_os;
+my $archivedir_primary = $node_primary->archive_dir;
+$archivedir_primary =~ s!\\!/!g if $PostgreSQL::Test::Utils::windows_os;
+$node_primary->append_conf(
+ 'postgresql.conf', qq(
+archive_command = '"$perlbin" "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"'
+wal_keep_size=128MB
+));
+# Make sure that Msys perl doesn't complain about difficulty in setting locale
+# when called from the archive_command.
+local $ENV{PERL_BADLANG} = 0;
+$node_primary->start;
+
+# Take backup from primary
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup(
+ $node_primary, $backup_name,
+ allows_streaming => 1,
+ has_streaming => 1,
+ has_archiving => 1);
+$node_standby->start;
+
+# Take backup of standby, use -Xnone so that pg_wal is empty.
+$node_standby->backup($backup_name, backup_options => ['-Xnone']);
+
+# Create cascading standby but don't start it yet.
+# Must set up both streaming and archiving.
+my $node_cascade = PostgreSQL::Test::Cluster->new('cascade');
+$node_cascade->init_from_backup($node_standby, $backup_name,
+ has_streaming => 1);
+$node_cascade->enable_restoring($node_primary);
+$node_cascade->append_conf(
+ 'postgresql.conf', qq(
+recovery_target_timeline='latest'
+));
+
+# Promote the standby.
+$node_standby->promote;
+
+# Wait for promotion to complete
+$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery();")
+ or die "Timed out while waiting for promotion";
+
+# Find next WAL segment to be archived
+my $walfile_to_be_archived = $node_standby->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Wait until the WAL segment has been archived.
+# Since the history file gets created on promotion and is archived before any
+# WAL segment, this is enough to guarantee that the history file was
+# archived.
+my $archive_wait_query =
+ "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver";
+$node_standby->poll_query_until('postgres', $archive_wait_query)
+ or die "Timed out while waiting for WAL segment to be archived";
+my $last_archived_wal_file = $walfile_to_be_archived;
+
+# Start cascade node
+$node_cascade->start;
+
+# Create some content on promoted standby and check its presence on the
+# cascading standby.
+$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a");
+
+# Wait for the replication to catch up
+$node_standby->wait_for_catchup($node_cascade);
+
+# Check that cascading standby has the new content
+my $result =
+ $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "cascade: $result\n";
+is($result, 1, 'check streamed content on cascade standby');
+
+done_testing();
diff --git a/src/test/recovery/t/026_overwrite_contrecord.pl b/src/test/recovery/t/026_overwrite_contrecord.pl
new file mode 100644
index 0000000..78feccd
--- /dev/null
+++ b/src/test/recovery/t/026_overwrite_contrecord.pl
@@ -0,0 +1,109 @@
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests for already-propagated WAL segments ending in incomplete WAL records.
+
+use strict;
+use warnings;
+
+use FindBin;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+# Test: Create a physical replica that's missing the last WAL file,
+# then restart the primary to create a divergent WAL file and observe
+# that the replica replays the "overwrite contrecord" from that new
+# file and the standby promotes successfully.
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(allows_streaming => 1);
+# We need these settings for stability of WAL behavior.
+$node->append_conf(
+ 'postgresql.conf', qq(
+autovacuum = off
+wal_keep_size = 1GB
+));
+$node->start;
+
+$node->safe_psql('postgres', 'create table filler (a int, b text)');
+
+# Now consume all remaining room in the current WAL segment, leaving
+# space enough only for the start of a largish record.
+$node->safe_psql(
+ 'postgres', q{
+DO $$
+DECLARE
+ wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size';
+ remain int;
+ iters int := 0;
+BEGIN
+ LOOP
+ INSERT into filler
+ select g, repeat(md5(g::text), (random() * 60 + 1)::int)
+ from generate_series(1, 10) g;
+
+ remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize;
+ IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN
+ RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain;
+ EXIT;
+ END IF;
+ iters := iters + 1;
+ END LOOP;
+END
+$$;
+});
+
+my $initfile = $node->safe_psql('postgres',
+ 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())');
+$node->safe_psql('postgres',
+ qq{SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))}
+);
+#$node->safe_psql('postgres', qq{create table foo ()});
+my $endfile = $node->safe_psql('postgres',
+ 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())');
+ok($initfile ne $endfile, "$initfile differs from $endfile");
+
+# Now stop abruptly, to avoid a stop checkpoint. We can remove the tail file
+# afterwards, and on startup the large message should be overwritten with new
+# contents
+$node->stop('immediate');
+
+unlink $node->basedir . "/pgdata/pg_wal/$endfile"
+ or die "could not unlink " . $node->basedir . "/pgdata/pg_wal/$endfile: $!";
+
+# OK, create a standby at this spot.
+$node->backup_fs_cold('backup');
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node, 'backup', has_streaming => 1);
+
+$node_standby->start;
+$node->start;
+
+$node->safe_psql('postgres',
+ qq{create table foo (a text); insert into foo values ('hello')});
+$node->safe_psql('postgres',
+ qq{SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')});
+
+my $until_lsn = $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+my $caughtup_query =
+ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+ok($node_standby->safe_psql('postgres', 'select * from foo') eq 'hello',
+ 'standby replays past overwritten contrecord');
+
+# Verify message appears in standby's log
+my $log = slurp_file($node_standby->logfile);
+like(
+ $log,
+ qr[successfully skipped missing contrecord at],
+ "found log line in standby");
+
+# Verify promotion is successful
+$node_standby->promote;
+
+$node->stop;
+$node_standby->stop;
+
+done_testing();
diff --git a/src/test/recovery/t/027_stream_regress.pl b/src/test/recovery/t/027_stream_regress.pl
new file mode 100644
index 0000000..69d6ddf
--- /dev/null
+++ b/src/test/recovery/t/027_stream_regress.pl
@@ -0,0 +1,113 @@
+# Run the standard regression tests with streaming replication
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Basename;
+
+# Initialize primary node
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+
+# Increase some settings that Cluster->new makes too low by default.
+$node_primary->adjust_conf('postgresql.conf', 'max_connections', '25');
+$node_primary->append_conf('postgresql.conf',
+ 'max_prepared_transactions = 10');
+# We'll stick with Cluster->new's small default shared_buffers, but since that
+# makes synchronized seqscans more probable, it risks changing the results of
+# some test queries. Disable synchronized seqscans to prevent that.
+$node_primary->append_conf('postgresql.conf', 'synchronize_seqscans = off');
+
+# WAL consistency checking is resource intensive so require opt-in with the
+# PG_TEST_EXTRA environment variable.
+if ( $ENV{PG_TEST_EXTRA}
+ && $ENV{PG_TEST_EXTRA} =~ m/\bwal_consistency_checking\b/)
+{
+ $node_primary->append_conf('postgresql.conf',
+ 'wal_consistency_checking = all');
+}
+
+$node_primary->start;
+is( $node_primary->psql(
+ 'postgres',
+ qq[SELECT pg_create_physical_replication_slot('standby_1');]),
+ 0,
+ 'physical slot created on primary');
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1');
+$node_standby_1->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby_1->append_conf('postgresql.conf',
+ "primary_slot_name = standby_1");
+$node_standby_1->append_conf('postgresql.conf',
+ 'max_standby_streaming_delay = 600s');
+$node_standby_1->start;
+
+my $dlpath = dirname($ENV{REGRESS_SHLIB});
+my $outputdir = $PostgreSQL::Test::Utils::tmp_check;
+
+# Run the regression tests against the primary.
+my $extra_opts = $ENV{EXTRA_REGRESS_OPTS} || "";
+my $rc =
+ system($ENV{PG_REGRESS}
+ . " $extra_opts "
+ . "--dlpath=\"$dlpath\" "
+ . "--bindir= "
+ . "--host="
+ . $node_primary->host . " "
+ . "--port="
+ . $node_primary->port . " "
+ . "--schedule=../regress/parallel_schedule "
+ . "--max-concurrent-tests=20 "
+ . "--inputdir=../regress "
+ . "--outputdir=\"$outputdir\"");
+if ($rc != 0)
+{
+ # Dump out the regression diffs file, if there is one
+ my $diffs = "$outputdir/regression.diffs";
+ if (-e $diffs)
+ {
+ print "=== dumping $diffs ===\n";
+ print slurp_file($diffs);
+ print "=== EOF ===\n";
+ }
+}
+is($rc, 0, 'regression tests pass');
+
+# Clobber all sequences with their next value, so that we don't have
+# differences between nodes due to caching.
+$node_primary->psql('regression',
+ "select setval(seqrelid, nextval(seqrelid)) from pg_sequence");
+
+# Wait for standby to catch up
+$node_primary->wait_for_catchup($node_standby_1, 'replay',
+ $node_primary->lsn('insert'));
+
+# Perform a logical dump of primary and standby, and check that they match
+command_ok(
+ [
+ 'pg_dumpall', '-f', $outputdir . '/primary.dump',
+ '--no-sync', '-p', $node_primary->port,
+ '--no-unlogged-table-data' # if unlogged, standby has schema only
+ ],
+ 'dump primary server');
+command_ok(
+ [
+ 'pg_dumpall', '-f', $outputdir . '/standby.dump',
+ '--no-sync', '-p', $node_standby_1->port
+ ],
+ 'dump standby server');
+command_ok(
+ [ 'diff', $outputdir . '/primary.dump', $outputdir . '/standby.dump' ],
+ 'compare primary and standby dumps');
+
+$node_standby_1->stop;
+$node_primary->stop;
+
+done_testing();
diff --git a/src/test/recovery/t/028_pitr_timelines.pl b/src/test/recovery/t/028_pitr_timelines.pl
new file mode 100644
index 0000000..bad02ed
--- /dev/null
+++ b/src/test/recovery/t/028_pitr_timelines.pl
@@ -0,0 +1,176 @@
+# Copyright (c) 2022, PostgreSQL Global Development Group
+
+# Test recovering to a point-in-time using WAL archive, such that the
+# target point is physically in a WAL segment with a higher TLI than
+# the target point's TLI. For example, imagine that the following WAL
+# segments exist in the WAL archive:
+#
+# 000000010000000000000001
+# 000000010000000000000002
+# 000000020000000000000003
+#
+# The timeline switch happened in the middle of WAL segment 3, but it
+# was never archived on timeline 1. The first half of
+# 000000020000000000000003 contains the WAL from timeline 1 up to the
+# point where the timeline switch happened. If you now perform
+# archive recovery with recovery target point in that first half of
+# segment 3, archive recovery will find the WAL up to that point in
+# segment 000000020000000000000003, but it will not follow the
+# timeline switch to timeline 2, and creates a timeline switching
+# end-of-recovery record with TLI 1 -> 3. That's what this test case
+# tests.
+#
+# The comments below contain lists of WAL segments at different points
+# in the tests, to make it easier to follow along. They are correct
+# as of this writing, but the exact WAL segment numbers could change
+# if the backend logic for when it switches to a new segment changes.
+# The actual checks are not sensitive to that.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Compare;
+
+# Initialize and start primary node with WAL archiving
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(has_archiving => 1, allows_streaming => 1);
+$node_primary->start;
+
+# Take a backup.
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Workload with some transactions, and the target restore point.
+$node_primary->psql(
+ 'postgres', qq{
+CREATE TABLE foo(i int);
+INSERT INTO foo VALUES(1);
+SELECT pg_create_restore_point('rp');
+INSERT INTO foo VALUES(2);
+});
+
+# Contents of the WAL archive at this point:
+#
+# 000000010000000000000001
+# 000000010000000000000002
+# 000000010000000000000002.00000028.backup
+#
+# The operations on the test table and the restore point went into WAL
+# segment 3, but it hasn't been archived yet.
+
+# Start a standby node, and wait for it to catch up.
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup(
+ $node_primary, $backup_name,
+ standby => 1,
+ has_streaming => 1,
+ has_archiving => 1,
+ has_restoring => 0);
+$node_standby->append_conf('postgresql.conf', 'archive_mode = always');
+$node_standby->start;
+$node_primary->wait_for_catchup($node_standby);
+
+# Check that it's really caught up.
+my $result = $node_standby->safe_psql('postgres', "SELECT max(i) FROM foo;");
+is($result, qq{2}, "check table contents after archive recovery");
+
+# Kill the old primary, before it archives the most recent WAL segment that
+# contains all the INSERTs.
+$node_primary->stop('immediate');
+
+# Promote the standby, and switch WAL so that it archives a WAL segment
+# that contains all the INSERTs, on a new timeline.
+$node_standby->promote;
+
+# Find next WAL segment to be archived.
+my $walfile_to_be_archived = $node_standby->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# We don't need the standby anymore, request shutdown. The server will
+# finish archiving all the WAL on timeline 2 before it exits.
+$node_standby->stop;
+
+# Contents of the WAL archive at this point:
+#
+# 000000010000000000000001
+# 000000010000000000000002
+# 000000010000000000000002.00000028.backup
+# 000000010000000000000003.partial
+# 000000020000000000000003
+# 00000002.history
+#
+# The operations on the test table and the restore point are in
+# segment 3. They are part of timeline 1, but were not archived by
+# the primary yet. However, they were copied into the beginning of
+# segment 000000020000000000000003, before the timeline switching
+# record. (They are also present in the
+# 000000010000000000000003.partial file, but .partial files are not
+# used automatically.)
+
+# Now test PITR to the recovery target. It should find the WAL in
+# segment 000000020000000000000003, but not follow the timeline switch
+# to timeline 2.
+my $node_pitr = PostgreSQL::Test::Cluster->new('node_pitr');
+$node_pitr->init_from_backup(
+ $node_primary, $backup_name,
+ standby => 0,
+ has_restoring => 1);
+$node_pitr->append_conf(
+ 'postgresql.conf', qq{
+recovery_target_name = 'rp'
+recovery_target_action = 'promote'
+});
+
+$node_pitr->start;
+
+# Wait until recovery finishes.
+$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
+ or die "Timed out while waiting for PITR promotion";
+
+# Check that we see the data we expect.
+$result = $node_pitr->safe_psql('postgres', "SELECT max(i) FROM foo;");
+is($result, qq{1}, "check table contents after point-in-time recovery");
+
+# Insert a row so that we can check later that we successfully recover
+# back to this timeline.
+$node_pitr->safe_psql('postgres', "INSERT INTO foo VALUES(3);");
+
+# Wait for the archiver to be running. The startup process might have yet to
+# exit, in which case the postmaster has not started the archiver. If we
+# stop() without an archiver, the archive will be incomplete.
+$node_pitr->poll_query_until('postgres',
+ "SELECT true FROM pg_stat_activity WHERE backend_type = 'archiver';")
+ or die "Timed out while waiting for archiver to start";
+
+# Stop the node. This archives the last segment.
+$node_pitr->stop();
+
+# Test archive recovery on the timeline created by the PITR. This
+# replays the end-of-recovery record that switches from timeline 1 to
+# 3.
+my $node_pitr2 = PostgreSQL::Test::Cluster->new('node_pitr2');
+$node_pitr2->init_from_backup(
+ $node_primary, $backup_name,
+ standby => 0,
+ has_restoring => 1);
+$node_pitr2->append_conf(
+ 'postgresql.conf', qq{
+recovery_target_action = 'promote'
+});
+
+$node_pitr2->start;
+
+# Wait until recovery finishes.
+$node_pitr2->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
+ or die "Timed out while waiting for PITR promotion";
+
+# Verify that we can see the row inserted after the PITR.
+$result = $node_pitr2->safe_psql('postgres', "SELECT max(i) FROM foo;");
+is($result, qq{3}, "check table contents after point-in-time recovery");
+
+done_testing();
diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl
new file mode 100644
index 0000000..1bf7b56
--- /dev/null
+++ b/src/test/recovery/t/029_stats_restart.pl
@@ -0,0 +1,344 @@
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests statistics handling around restarts, including handling of crashes and
+# invalid stats files, as well as restorting stats after "normal" restarts.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Copy;
+
+my $node = PostgreSQL::Test::Cluster->new('primary');
+$node->init(allows_streaming => 1);
+$node->append_conf('postgresql.conf', "track_functions = 'all'");
+$node->start;
+
+my $connect_db = 'postgres';
+my $db_under_test = 'test';
+
+# create test objects
+$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test");
+$node->safe_psql($db_under_test,
+ "CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a"
+);
+$node->safe_psql($db_under_test,
+ "CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE"
+);
+
+# collect object oids
+my $dboid = $node->safe_psql($db_under_test,
+ "SELECT oid FROM pg_database WHERE datname = '$db_under_test'");
+my $funcoid = $node->safe_psql($db_under_test,
+ "SELECT 'func_stats_crash_discard1()'::regprocedure::oid");
+my $tableoid = $node->safe_psql($db_under_test,
+ "SELECT 'tab_stats_crash_discard_test1'::regclass::oid");
+
+# generate stats and flush them
+trigger_funcrel_stat();
+
+# verify stats objects exist
+my $sect = "initial";
+is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
+is(have_stats('function', $dboid, $funcoid),
+ 't', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 't', "$sect: relation stats do exist");
+
+# regular shutdown
+$node->stop();
+
+# backup stats files
+my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1";
+ok(!-f "$statsfile", "backup statsfile cannot already exist");
+
+my $datadir = $node->data_dir();
+my $og_stats = "$datadir/pg_stat/pgstat.stat";
+ok(-f "$og_stats", "origin stats file must exist");
+copy($og_stats, $statsfile) or die "Copy failed: $!";
+
+
+## test discarding of stats file after crash etc
+
+$node->start;
+
+$sect = "copy";
+is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
+is(have_stats('function', $dboid, $funcoid),
+ 't', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 't', "$sect: relation stats do exist");
+
+$node->stop('immediate');
+
+ok(!-f "$og_stats", "no stats file should exist after immediate shutdown");
+
+# copy the old stats back to test we discard stats after crash restart
+copy($statsfile, $og_stats) or die "Copy failed: $!";
+
+$node->start;
+
+# stats should have been discarded
+$sect = "post immediate";
+is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
+is(have_stats('function', $dboid, $funcoid),
+ 'f', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 'f', "$sect: relation stats do not exist");
+
+# get rid of backup statsfile
+unlink $statsfile or die "cannot unlink $statsfile $!";
+
+
+# generate new stats and flush them
+trigger_funcrel_stat();
+
+$sect = "post immediate, new";
+is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
+is(have_stats('function', $dboid, $funcoid),
+ 't', "$sect: function stats do exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 't', "$sect: relation stats do exist");
+
+# regular shutdown
+$node->stop();
+
+
+## check an invalid stats file is handled
+
+overwrite_file($og_stats, "ZZZZZZZZZZZZZ");
+
+# normal startup and no issues despite invalid stats file
+$node->start;
+
+# no stats present due to invalid stats file
+$sect = "invalid_overwrite";
+is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
+is(have_stats('function', $dboid, $funcoid),
+ 'f', "$sect: function stats do not exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 'f', "$sect: relation stats do not exist");
+
+
+## check invalid stats file starting with valid contents, but followed by
+## invalid content is handled.
+
+trigger_funcrel_stat();
+$node->stop;
+append_file($og_stats, "XYZ");
+$node->start;
+
+$sect = "invalid_append";
+is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
+is(have_stats('function', $dboid, $funcoid),
+ 'f', "$sect: function stats do not exist");
+is(have_stats('relation', $dboid, $tableoid),
+ 'f', "$sect: relation stats do not exist");
+
+
+## checks related to stats persistency around restarts and resets
+
+# Ensure enough checkpoints to protect against races for test after reset,
+# even on very slow machines.
+$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;");
+
+
+## check checkpoint and wal stats are incremented due to restart
+
+my $ckpt_start = checkpoint_stats();
+my $wal_start = wal_stats();
+$node->restart;
+
+$sect = "post restart";
+my $ckpt_restart = checkpoint_stats();
+my $wal_restart = wal_stats();
+
+cmp_ok(
+ $ckpt_start->{count}, '<',
+ $ckpt_restart->{count},
+ "$sect: increased checkpoint count");
+cmp_ok(
+ $wal_start->{records}, '<',
+ $wal_restart->{records},
+ "$sect: increased wal record count");
+cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes},
+ "$sect: increased wal bytes");
+is( $ckpt_start->{reset},
+ $ckpt_restart->{reset},
+ "$sect: checkpoint stats_reset equal");
+is($wal_start->{reset}, $wal_restart->{reset},
+ "$sect: wal stats_reset equal");
+
+
+## Check that checkpoint stats are reset, WAL stats aren't affected
+
+$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')");
+
+$sect = "post ckpt reset";
+my $ckpt_reset = checkpoint_stats();
+my $wal_ckpt_reset = wal_stats();
+
+cmp_ok($ckpt_restart->{count},
+ '>', $ckpt_reset->{count}, "$sect: checkpoint count smaller");
+cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset},
+ "$sect: stats_reset newer");
+
+cmp_ok(
+ $wal_restart->{records},
+ '<=',
+ $wal_ckpt_reset->{records},
+ "$sect: wal record count not affected by reset");
+is( $wal_start->{reset},
+ $wal_ckpt_reset->{reset},
+ "$sect: wal stats_reset equal");
+
+
+## check that checkpoint stats stay reset after restart
+
+$node->restart;
+
+$sect = "post ckpt reset & restart";
+my $ckpt_restart_reset = checkpoint_stats();
+my $wal_restart2 = wal_stats();
+
+# made sure above there's enough checkpoints that this will be stable even on slow machines
+cmp_ok(
+ $ckpt_restart_reset->{count},
+ '<',
+ $ckpt_restart->{count},
+ "$sect: checkpoint still reset");
+is($ckpt_restart_reset->{reset},
+ $ckpt_reset->{reset}, "$sect: stats_reset same");
+
+cmp_ok(
+ $wal_ckpt_reset->{records},
+ '<',
+ $wal_restart2->{records},
+ "$sect: increased wal record count");
+cmp_ok(
+ $wal_ckpt_reset->{bytes},
+ '<',
+ $wal_restart2->{bytes},
+ "$sect: increased wal bytes");
+is( $wal_start->{reset},
+ $wal_restart2->{reset},
+ "$sect: wal stats_reset equal");
+
+
+## check WAL stats stay reset
+
+$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')");
+
+$sect = "post wal reset";
+my $wal_reset = wal_stats();
+
+cmp_ok(
+ $wal_reset->{records}, '<',
+ $wal_restart2->{records},
+ "$sect: smaller record count");
+cmp_ok(
+ $wal_reset->{bytes}, '<',
+ $wal_restart2->{bytes},
+ "$sect: smaller bytes");
+cmp_ok(
+ $wal_reset->{reset}, 'gt',
+ $wal_restart2->{reset},
+ "$sect: newer stats_reset");
+
+$node->restart;
+
+$sect = "post wal reset & restart";
+my $wal_reset_restart = wal_stats();
+
+# enough WAL generated during prior tests and initdb to make this not racy
+cmp_ok(
+ $wal_reset_restart->{records},
+ '<',
+ $wal_restart2->{records},
+ "$sect: smaller record count");
+cmp_ok(
+ $wal_reset->{bytes}, '<',
+ $wal_restart2->{bytes},
+ "$sect: smaller bytes");
+cmp_ok(
+ $wal_reset->{reset}, 'gt',
+ $wal_restart2->{reset},
+ "$sect: newer stats_reset");
+
+$node->stop('immediate');
+$node->start;
+
+$sect = "post immediate restart";
+my $wal_restart_immediate = wal_stats();
+
+cmp_ok(
+ $wal_reset_restart->{reset},
+ 'lt',
+ $wal_restart_immediate->{reset},
+ "$sect: reset timestamp is new");
+
+$node->stop;
+done_testing();
+
+sub trigger_funcrel_stat
+{
+ $node->safe_psql(
+ $db_under_test, q[
+ SELECT * FROM tab_stats_crash_discard_test1;
+ SELECT func_stats_crash_discard1();
+ SELECT pg_stat_force_next_flush();]);
+}
+
+sub have_stats
+{
+ my ($kind, $dboid, $objoid) = @_;
+
+ return $node->safe_psql($connect_db,
+ "SELECT pg_stat_have_stats('$kind', $dboid, $objoid)");
+}
+
+sub overwrite_file
+{
+ my ($filename, $str) = @_;
+ open my $fh, ">", $filename
+ or die "could not overwrite \"$filename\": $!";
+ print $fh $str;
+ close $fh;
+ return;
+}
+
+sub append_file
+{
+ my ($filename, $str) = @_;
+ open my $fh, ">>", $filename
+ or die "could not append to \"$filename\": $!";
+ print $fh $str;
+ close $fh;
+ return;
+}
+
+sub checkpoint_stats
+{
+ my %results;
+
+ $results{count} = $node->safe_psql($connect_db,
+ "SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter");
+ $results{reset} = $node->safe_psql($connect_db,
+ "SELECT stats_reset FROM pg_stat_bgwriter");
+
+ return \%results;
+}
+
+sub wal_stats
+{
+ my %results;
+ $results{records} =
+ $node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal");
+ $results{bytes} =
+ $node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal");
+ $results{reset} =
+ $node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal");
+
+ return \%results;
+}
diff --git a/src/test/recovery/t/030_stats_cleanup_replica.pl b/src/test/recovery/t/030_stats_cleanup_replica.pl
new file mode 100644
index 0000000..cc92ddb
--- /dev/null
+++ b/src/test/recovery/t/030_stats_cleanup_replica.pl
@@ -0,0 +1,206 @@
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Tests that standbys:
+# - drop stats for objects when the those records are replayed
+# - persist stats across graceful restarts
+# - discard stats after immediate / crash restarts
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf('postgresql.conf', "track_functions = 'all'");
+$node_primary->start;
+
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->start;
+
+
+## Test that stats are cleaned up on standby after dropping table or function
+
+my $sect = 'initial';
+
+my ($dboid, $tableoid, $funcoid) =
+ populate_standby_stats('postgres', 'public');
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 't');
+
+drop_table_by_oid('postgres', $tableoid);
+drop_function_by_oid('postgres', $funcoid);
+
+$sect = 'post drop';
+my $primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 'f');
+
+
+## Test that stats are cleaned up on standby after dropping indirectly
+
+$sect = "schema creation";
+
+$node_primary->safe_psql('postgres', "CREATE SCHEMA drop_schema_test1");
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+($dboid, $tableoid, $funcoid) =
+ populate_standby_stats('postgres', 'drop_schema_test1');
+
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 't');
+$node_primary->safe_psql('postgres', "DROP SCHEMA drop_schema_test1 CASCADE");
+
+$sect = "post schema drop";
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+# verify table and function stats removed from standby
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 'f');
+
+
+## Test that stats are cleaned up on standby after dropping database
+
+$sect = "createdb";
+
+$node_primary->safe_psql('postgres', "CREATE DATABASE test");
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+($dboid, $tableoid, $funcoid) = populate_standby_stats('test', 'public');
+
+# verify stats are present
+test_standby_func_tab_stats_status('test', $dboid, $tableoid, $funcoid, 't');
+test_standby_db_stats_status('test', $dboid, 't');
+
+$node_primary->safe_psql('postgres', "DROP DATABASE test");
+$sect = "post dropdb";
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+# Test that the stats were cleaned up on standby
+# Note that this connects to 'postgres' but provides the dboid of dropped db
+# 'test' which we acquired previously
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 'f');
+
+test_standby_db_stats_status('postgres', $dboid, 'f');
+
+
+## verify that stats persist across graceful restarts on a replica
+
+# NB: Can't test database stats, they're immediately repopulated when
+# reconnecting...
+$sect = "pre restart";
+($dboid, $tableoid, $funcoid) = populate_standby_stats('postgres', 'public');
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 't');
+
+$node_standby->restart();
+
+$sect = "post non-immediate";
+
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 't');
+
+# but gone after an immediate restart
+$node_standby->stop('immediate');
+$node_standby->start();
+
+$sect = "post immediate restart";
+
+test_standby_func_tab_stats_status('postgres',
+ $dboid, $tableoid, $funcoid, 'f');
+
+
+done_testing();
+
+
+sub populate_standby_stats
+{
+ my ($connect_db, $schema) = @_;
+
+ # create objects on primary
+ $node_primary->safe_psql($connect_db,
+ "CREATE TABLE $schema.drop_tab_test1 AS SELECT generate_series(1,100) AS a"
+ );
+ $node_primary->safe_psql($connect_db,
+ "CREATE FUNCTION $schema.drop_func_test1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE"
+ );
+ my $primary_lsn = $node_primary->lsn('flush');
+ $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+ # collect object oids
+ my $dboid = $node_standby->safe_psql($connect_db,
+ "SELECT oid FROM pg_database WHERE datname = '$connect_db'");
+ my $tableoid = $node_standby->safe_psql($connect_db,
+ "SELECT '$schema.drop_tab_test1'::regclass::oid");
+ my $funcoid = $node_standby->safe_psql($connect_db,
+ "SELECT '$schema.drop_func_test1()'::regprocedure::oid");
+
+ # generate stats on standby
+ $node_standby->safe_psql($connect_db,
+ "SELECT * FROM $schema.drop_tab_test1");
+ $node_standby->safe_psql($connect_db, "SELECT $schema.drop_func_test1()");
+
+ return ($dboid, $tableoid, $funcoid);
+}
+
+sub drop_function_by_oid
+{
+ my ($connect_db, $funcoid) = @_;
+
+ # Get function name from returned oid
+ my $func_name = $node_primary->safe_psql($connect_db,
+ "SELECT '$funcoid'::regprocedure");
+ $node_primary->safe_psql($connect_db, "DROP FUNCTION $func_name");
+}
+
+sub drop_table_by_oid
+{
+ my ($connect_db, $tableoid) = @_;
+
+ # Get table name from returned oid
+ my $table_name =
+ $node_primary->safe_psql($connect_db, "SELECT '$tableoid'::regclass");
+ $node_primary->safe_psql($connect_db, "DROP TABLE $table_name");
+}
+
+sub test_standby_func_tab_stats_status
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+ my ($connect_db, $dboid, $tableoid, $funcoid, $present) = @_;
+
+ my %expected = (rel => $present, func => $present);
+ my %stats;
+
+ $stats{rel} = $node_standby->safe_psql($connect_db,
+ "SELECT pg_stat_have_stats('relation', $dboid, $tableoid)");
+ $stats{func} = $node_standby->safe_psql($connect_db,
+ "SELECT pg_stat_have_stats('function', $dboid, $funcoid)");
+
+ is_deeply(\%stats, \%expected, "$sect: standby stats as expected");
+
+ return;
+}
+
+sub test_standby_db_stats_status
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+ my ($connect_db, $dboid, $present) = @_;
+
+ is( $node_standby->safe_psql(
+ $connect_db, "SELECT pg_stat_have_stats('database', $dboid, 0)"),
+ $present,
+ "$sect: standby db stats as expected");
+}
diff --git a/src/test/recovery/t/031_recovery_conflict.pl b/src/test/recovery/t/031_recovery_conflict.pl
new file mode 100644
index 0000000..99f154a
--- /dev/null
+++ b/src/test/recovery/t/031_recovery_conflict.pl
@@ -0,0 +1,383 @@
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test that connections to a hot standby are correctly canceled when a
+# recovery conflict is detected Also, test that statistics in
+# pg_stat_database_conflicts are populated correctly
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+plan skip_all => "disabled due to instability";
+
+# Set up nodes
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+
+my $tablespace1 = "test_recovery_conflict_tblspc";
+
+$node_primary->append_conf(
+ 'postgresql.conf', qq[
+allow_in_place_tablespaces = on
+log_temp_files = 0
+
+# for deadlock test
+max_prepared_transactions = 10
+
+# wait some to test the wait paths as well, but not long for obvious reasons
+max_standby_streaming_delay = 50ms
+
+temp_tablespaces = $tablespace1
+# Some of the recovery conflict logging code only gets exercised after
+# deadlock_timeout. The test doesn't rely on that additional output, but it's
+# nice to get some minimal coverage of that code.
+log_recovery_conflict_waits = on
+deadlock_timeout = 10ms
+]);
+$node_primary->start;
+
+my $backup_name = 'my_backup';
+
+$node_primary->safe_psql('postgres',
+ qq[CREATE TABLESPACE $tablespace1 LOCATION '']);
+
+$node_primary->backup($backup_name);
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+
+$node_standby->start;
+
+my $test_db = "test_db";
+
+# use a new database, to trigger database recovery conflict
+$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db");
+
+# test schema / data
+my $table1 = "test_recovery_conflict_table1";
+my $table2 = "test_recovery_conflict_table2";
+$node_primary->safe_psql(
+ $test_db, qq[
+CREATE TABLE ${table1}(a int, b int);
+INSERT INTO $table1 SELECT i % 3, 0 FROM generate_series(1,20) i;
+CREATE TABLE ${table2}(a int, b int);
+]);
+my $primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+
+# a longrunning psql that we can use to trigger conflicts
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+my %psql_standby = ('stdin' => '', 'stdout' => '');
+$psql_standby{run} =
+ $node_standby->background_psql($test_db, \$psql_standby{stdin},
+ \$psql_standby{stdout},
+ $psql_timeout);
+$psql_standby{stdout} = '';
+
+my $expected_conflicts = 0;
+
+
+## RECOVERY CONFLICT 1: Buffer pin conflict
+my $sect = "buffer pin conflict";
+$expected_conflicts++;
+
+# Aborted INSERT on primary that will be cleaned up by vacuum. Has to be old
+# enough so that there's not a snapshot conflict before the buffer pin
+# conflict.
+
+$node_primary->safe_psql(
+ $test_db,
+ qq[
+ BEGIN;
+ INSERT INTO $table1 VALUES (1,0);
+ ROLLBACK;
+ -- ensure flush, rollback doesn't do so
+ BEGIN; LOCK $table1; COMMIT;
+ ]);
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+my $cursor1 = "test_recovery_conflict_cursor";
+
+# DECLARE and use a cursor on standby, causing buffer with the only block of
+# the relation to be pinned on the standby
+$psql_standby{stdin} .= qq[
+ BEGIN;
+ DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1;
+ FETCH FORWARD FROM $cursor1;
+ ];
+# FETCH FORWARD should have returned a 0 since all values of b in the table
+# are 0
+ok(pump_until_standby(qr/^0$/m),
+ "$sect: cursor with conflicting pin established");
+
+# to check the log starting now for recovery conflict messages
+my $log_location = -s $node_standby->logfile;
+
+# VACUUM on the primary
+$node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
+
+# Wait for catchup. Existing connection will be terminated before replay is
+# finished, so waiting for catchup ensures that there is no race between
+# encountering the recovery conflict which causes the disconnect and checking
+# the logfile for the terminated connection.
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log("User was holding shared buffer pin for too long");
+reconnect_and_clear();
+check_conflict_stat("bufferpin");
+
+
+## RECOVERY CONFLICT 2: Snapshot conflict
+$sect = "snapshot conflict";
+$expected_conflicts++;
+
+$node_primary->safe_psql($test_db,
+ qq[INSERT INTO $table1 SELECT i, 0 FROM generate_series(1,20) i]);
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+# DECLARE and FETCH from cursor on the standby
+$psql_standby{stdin} .= qq[
+ BEGIN;
+ DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1;
+ FETCH FORWARD FROM $cursor1;
+ ];
+ok( pump_until(
+ $psql_standby{run}, $psql_timeout,
+ \$psql_standby{stdout}, qr/^0$/m,),
+ "$sect: cursor with conflicting snapshot established");
+
+# Do some HOT updates
+$node_primary->safe_psql($test_db,
+ qq[UPDATE $table1 SET a = a + 1 WHERE a > 2;]);
+
+# VACUUM, pruning those dead tuples
+$node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
+
+# Wait for attempted replay of PRUNE records
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log(
+ "User query might have needed to see row versions that must be removed");
+reconnect_and_clear();
+check_conflict_stat("snapshot");
+
+
+## RECOVERY CONFLICT 3: Lock conflict
+$sect = "lock conflict";
+$expected_conflicts++;
+
+# acquire lock to conflict with
+$psql_standby{stdin} .= qq[
+ BEGIN;
+ LOCK TABLE $table1 IN ACCESS SHARE MODE;
+ SELECT 1;
+ ];
+ok(pump_until_standby(qr/^1$/m), "$sect: conflicting lock acquired");
+
+# DROP TABLE containing block which standby has in a pinned buffer
+$node_primary->safe_psql($test_db, qq[DROP TABLE $table1;]);
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log("User was holding a relation lock for too long");
+reconnect_and_clear();
+check_conflict_stat("lock");
+
+
+## RECOVERY CONFLICT 4: Tablespace conflict
+$sect = "tablespace conflict";
+$expected_conflicts++;
+
+# DECLARE a cursor for a query which, with sufficiently low work_mem, will
+# spill tuples into temp files in the temporary tablespace created during
+# setup.
+$psql_standby{stdin} .= qq[
+ BEGIN;
+ SET work_mem = '64kB';
+ DECLARE $cursor1 CURSOR FOR
+ SELECT count(*) FROM generate_series(1,6000);
+ FETCH FORWARD FROM $cursor1;
+ ];
+ok(pump_until_standby(qr/^6000$/m),
+ "$sect: cursor with conflicting temp file established");
+
+# Drop the tablespace currently containing spill files for the query on the
+# standby
+$node_primary->safe_psql($test_db, qq[DROP TABLESPACE $tablespace1;]);
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log(
+ "User was or might have been using tablespace that must be dropped");
+reconnect_and_clear();
+check_conflict_stat("tablespace");
+
+
+## RECOVERY CONFLICT 5: Deadlock
+$sect = "startup deadlock";
+$expected_conflicts++;
+
+# Want to test recovery deadlock conflicts, not buffer pin conflicts. Without
+# changing max_standby_streaming_delay it'd be timing dependent what we hit
+# first
+$node_standby->adjust_conf(
+ 'postgresql.conf',
+ 'max_standby_streaming_delay',
+ "${PostgreSQL::Test::Utils::timeout_default}s");
+$node_standby->restart();
+reconnect_and_clear();
+
+# Generate a few dead rows, to later be cleaned up by vacuum. Then acquire a
+# lock on another relation in a prepared xact, so it's held continuously by
+# the startup process. The standby psql will block acquiring that lock while
+# holding a pin that vacuum needs, triggering the deadlock.
+$node_primary->safe_psql(
+ $test_db,
+ qq[
+CREATE TABLE $table1(a int, b int);
+INSERT INTO $table1 VALUES (1);
+BEGIN;
+INSERT INTO $table1(a) SELECT generate_series(1, 100) i;
+ROLLBACK;
+BEGIN;
+LOCK TABLE $table2;
+PREPARE TRANSACTION 'lock';
+INSERT INTO $table1(a) VALUES (170);
+SELECT txid_current();
+]);
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+$psql_standby{stdin} .= qq[
+ BEGIN;
+ -- hold pin
+ DECLARE $cursor1 CURSOR FOR SELECT a FROM $table1;
+ FETCH FORWARD FROM $cursor1;
+ -- wait for lock held by prepared transaction
+ SELECT * FROM $table2;
+ ];
+ok( pump_until(
+ $psql_standby{run}, $psql_timeout,
+ \$psql_standby{stdout}, qr/^1$/m,),
+ "$sect: cursor holding conflicting pin, also waiting for lock, established"
+);
+
+# just to make sure we're waiting for lock already
+ok( $node_standby->poll_query_until(
+ 'postgres', qq[
+SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' AND NOT granted;
+], 'waiting'),
+ "$sect: lock acquisition is waiting");
+
+# VACUUM will prune away rows, causing a buffer pin conflict, while standby
+# psql is waiting on lock
+$node_primary->safe_psql($test_db, qq[VACUUM $table1;]);
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log("User transaction caused buffer deadlock with recovery.");
+reconnect_and_clear();
+check_conflict_stat("deadlock");
+
+# clean up for next tests
+$node_primary->safe_psql($test_db, qq[ROLLBACK PREPARED 'lock';]);
+$node_standby->adjust_conf('postgresql.conf', 'max_standby_streaming_delay',
+ '50ms');
+$node_standby->restart();
+reconnect_and_clear();
+
+
+# Check that expected number of conflicts show in pg_stat_database. Needs to
+# be tested before database is dropped, for obvious reasons.
+is( $node_standby->safe_psql(
+ $test_db,
+ qq[SELECT conflicts FROM pg_stat_database WHERE datname='$test_db';]),
+ $expected_conflicts,
+ qq[$expected_conflicts recovery conflicts shown in pg_stat_database]);
+
+
+## RECOVERY CONFLICT 6: Database conflict
+$sect = "database conflict";
+
+$node_primary->safe_psql('postgres', qq[DROP DATABASE $test_db;]);
+
+$primary_lsn = $node_primary->lsn('flush');
+$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn);
+
+check_conflict_log("User was connected to a database that must be dropped");
+
+
+# explicitly shut down psql instances gracefully - to avoid hangs or worse on
+# windows
+$psql_standby{stdin} .= "\\q\n";
+$psql_standby{run}->finish;
+
+$node_standby->stop();
+$node_primary->stop();
+
+
+done_testing();
+
+
+sub pump_until_standby
+{
+ my $match = shift;
+
+ return pump_until($psql_standby{run}, $psql_timeout,
+ \$psql_standby{stdout}, $match);
+}
+
+sub reconnect_and_clear
+{
+ # If psql isn't dead already, tell it to quit as \q, when already dead,
+ # causes IPC::Run to unhelpfully error out with "ack Broken pipe:".
+ $psql_standby{run}->pump_nb();
+ if ($psql_standby{run}->pumpable())
+ {
+ $psql_standby{stdin} .= "\\q\n";
+ }
+ $psql_standby{run}->finish;
+
+ # restart
+ $psql_standby{run}->run();
+ $psql_standby{stdin} = '';
+ $psql_standby{stdout} = '';
+
+ # Run query to ensure connection has finished re-establishing
+ $psql_standby{stdin} .= qq[SELECT 1;\n];
+ die unless pump_until_standby(qr/^1$/m);
+ $psql_standby{stdout} = '';
+}
+
+sub check_conflict_log
+{
+ my $message = shift;
+ my $old_log_location = $log_location;
+
+ $log_location = $node_standby->wait_for_log(qr/$message/, $log_location);
+
+ cmp_ok($log_location, '>', $old_log_location,
+ "$sect: logfile contains terminated connection due to recovery conflict"
+ );
+}
+
+sub check_conflict_stat
+{
+ my $conflict_type = shift;
+ my $count = $node_standby->safe_psql($test_db,
+ qq[SELECT confl_$conflict_type FROM pg_stat_database_conflicts WHERE datname='$test_db';]
+ );
+
+ is($count, 1, "$sect: stats show conflict on standby");
+}
diff --git a/src/test/recovery/t/032_relfilenode_reuse.pl b/src/test/recovery/t/032_relfilenode_reuse.pl
new file mode 100644
index 0000000..92ec510
--- /dev/null
+++ b/src/test/recovery/t/032_relfilenode_reuse.pl
@@ -0,0 +1,243 @@
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use File::Basename;
+
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary');
+$node_primary->init(allows_streaming => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', q[
+allow_in_place_tablespaces = true
+log_connections=on
+# to avoid "repairing" corruption
+full_page_writes=off
+log_min_messages=debug2
+shared_buffers=1MB
+]);
+$node_primary->start;
+
+
+# Create streaming standby linking to primary
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+my $node_standby = PostgreSQL::Test::Cluster->new('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->start;
+
+# We'll reset this timeout for each individual query we run.
+my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+
+my %psql_primary = (stdin => '', stdout => '', stderr => '');
+$psql_primary{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ],
+ '<',
+ \$psql_primary{stdin},
+ '>',
+ \$psql_primary{stdout},
+ '2>',
+ \$psql_primary{stderr},
+ $psql_timeout);
+
+my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => '');
+$psql_standby{run} = IPC::Run::start(
+ [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ],
+ '<',
+ \$psql_standby{stdin},
+ '>',
+ \$psql_standby{stdout},
+ '2>',
+ \$psql_standby{stderr},
+ $psql_timeout);
+
+
+# Create template database with a table that we'll update, to trigger dirty
+# rows. Using a template database + preexisting rows makes it a bit easier to
+# reproduce, because there's no cache invalidations generated.
+
+$node_primary->safe_psql('postgres',
+ "CREATE DATABASE conflict_db_template OID = 50000;");
+$node_primary->safe_psql(
+ 'conflict_db_template', q[
+ CREATE TABLE large(id serial primary key, dataa text, datab text);
+ INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);]
+);
+$node_primary->safe_psql('postgres',
+ "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
+
+$node_primary->safe_psql(
+ 'postgres', q[
+ CREATE EXTENSION pg_prewarm;
+ CREATE TABLE replace_sb(data text);
+ INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);]
+);
+
+$node_primary->wait_for_catchup($node_standby);
+
+# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files
+send_query_and_wait(\%psql_primary, q[BEGIN;], qr/BEGIN/m);
+send_query_and_wait(\%psql_standby, q[BEGIN;], qr/BEGIN/m);
+
+# Cause lots of dirty rows in shared_buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 1;");
+
+# Now do a bunch of work in another database. That will end up needing to
+# write back dirty data from the previous step, opening the relevant file
+# descriptors
+cause_eviction(\%psql_primary, \%psql_standby);
+
+# drop and recreate database
+$node_primary->safe_psql('postgres', "DROP DATABASE conflict_db;");
+$node_primary->safe_psql('postgres',
+ "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;");
+
+verify($node_primary, $node_standby, 1, "initial contents as expected");
+
+# Again cause lots of dirty rows in shared_buffers, but use a different update
+# value so we can check everything is OK
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 2;");
+
+# Again cause a lot of IO. That'll again write back dirty data, but uses newly
+# opened file descriptors, so we don't confuse old files with new files despite
+# recycling relfilenodes.
+cause_eviction(\%psql_primary, \%psql_standby);
+
+verify($node_primary, $node_standby, 2,
+ "update to reused relfilenode (due to DB oid conflict) is not lost");
+
+
+$node_primary->safe_psql('conflict_db', "VACUUM FULL large;");
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 3;");
+
+verify($node_primary, $node_standby, 3, "restored contents as expected");
+
+# Test for old filehandles after moving a database in / out of tablespace
+$node_primary->safe_psql('postgres',
+ q[CREATE TABLESPACE test_tablespace LOCATION '']);
+
+# cause dirty buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 4;");
+# cause files to be opened in backend in other database
+cause_eviction(\%psql_primary, \%psql_standby);
+
+# move database back / forth
+$node_primary->safe_psql('postgres',
+ 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
+$node_primary->safe_psql('postgres',
+ 'ALTER DATABASE conflict_db SET TABLESPACE pg_default');
+
+# cause dirty buffers
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 5;");
+cause_eviction(\%psql_primary, \%psql_standby);
+
+verify($node_primary, $node_standby, 5, "post move contents as expected");
+
+$node_primary->safe_psql('postgres',
+ 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace');
+
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 7;");
+cause_eviction(\%psql_primary, \%psql_standby);
+$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 8;");
+$node_primary->safe_psql('postgres', 'DROP DATABASE conflict_db');
+$node_primary->safe_psql('postgres', 'DROP TABLESPACE test_tablespace');
+
+$node_primary->safe_psql('postgres', 'REINDEX TABLE pg_database');
+
+
+# explicitly shut down psql instances gracefully - to avoid hangs
+# or worse on windows
+$psql_primary{stdin} .= "\\q\n";
+$psql_primary{run}->finish;
+$psql_standby{stdin} .= "\\q\n";
+$psql_standby{run}->finish;
+
+$node_primary->stop();
+$node_standby->stop();
+
+# Make sure that there weren't crashes during shutdown
+
+command_like(
+ [ 'pg_controldata', $node_primary->data_dir ],
+ qr/Database cluster state:\s+shut down\n/,
+ 'primary shut down ok');
+command_like(
+ [ 'pg_controldata', $node_standby->data_dir ],
+ qr/Database cluster state:\s+shut down in recovery\n/,
+ 'standby shut down ok');
+done_testing();
+
+sub verify
+{
+ my ($primary, $standby, $counter, $message) = @_;
+
+ my $query =
+ "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10";
+ is($primary->safe_psql('conflict_db', $query),
+ "$counter|4000", "primary: $message");
+
+ $primary->wait_for_catchup($standby);
+ is($standby->safe_psql('conflict_db', $query),
+ "$counter|4000", "standby: $message");
+}
+
+sub cause_eviction
+{
+ my ($psql_primary, $psql_standby) = @_;
+
+ send_query_and_wait(
+ $psql_primary,
+ q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
+ qr/warmed_buffers/m);
+
+ send_query_and_wait(
+ $psql_standby,
+ q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;],
+ qr/warmed_buffers/m);
+}
+
+# Send query, wait until string matches
+sub send_query_and_wait
+{
+ my ($psql, $query, $untl) = @_;
+ my $ret;
+
+ # For each query we run, we'll restart the timeout. Otherwise the timeout
+ # would apply to the whole test script, and would need to be set very high
+ # to survive when running under Valgrind.
+ $psql_timeout->reset();
+ $psql_timeout->start();
+
+ # send query
+ $$psql{stdin} .= $query;
+ $$psql{stdin} .= "\n";
+
+ # wait for query results
+ $$psql{run}->pump_nb();
+ while (1)
+ {
+ last if $$psql{stdout} =~ /$untl/;
+
+ if ($psql_timeout->is_expired)
+ {
+ BAIL_OUT("aborting wait: program timed out\n"
+ . "stream contents: >>$$psql{stdout}<<\n"
+ . "pattern searched for: $untl\n");
+ return 0;
+ }
+ if (not $$psql{run}->pumpable())
+ {
+ BAIL_OUT("aborting wait: program died\n"
+ . "stream contents: >>$$psql{stdout}<<\n"
+ . "pattern searched for: $untl\n");
+ return 0;
+ }
+ $$psql{run}->pump();
+ }
+
+ $$psql{stdout} = '';
+
+ return 1;
+}
diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl b/src/test/recovery/t/033_replay_tsp_drops.pl
new file mode 100644
index 0000000..64c3296
--- /dev/null
+++ b/src/test/recovery/t/033_replay_tsp_drops.pl
@@ -0,0 +1,148 @@
+
+# Copyright (c) 2021-2022, PostgreSQL Global Development Group
+
+# Test replay of tablespace/database creation/drop
+
+use strict;
+use warnings;
+
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Time::HiRes qw(usleep);
+
+sub test_tablespace
+{
+ my ($strategy) = @_;
+
+ my $node_primary = PostgreSQL::Test::Cluster->new("primary1_$strategy");
+ $node_primary->init(allows_streaming => 1);
+ $node_primary->start;
+ $node_primary->psql(
+ 'postgres',
+ qq[
+ SET allow_in_place_tablespaces=on;
+ CREATE TABLESPACE dropme_ts1 LOCATION '';
+ CREATE TABLESPACE dropme_ts2 LOCATION '';
+ CREATE TABLESPACE source_ts LOCATION '';
+ CREATE TABLESPACE target_ts LOCATION '';
+ CREATE DATABASE template_db IS_TEMPLATE = true;
+ SELECT pg_create_physical_replication_slot('slot', true);
+ ]);
+ my $backup_name = 'my_backup';
+ $node_primary->backup($backup_name);
+
+ my $node_standby = PostgreSQL::Test::Cluster->new("standby2_$strategy");
+ $node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+ $node_standby->append_conf('postgresql.conf',
+ "allow_in_place_tablespaces = on");
+ $node_standby->append_conf('postgresql.conf',
+ "primary_slot_name = slot");
+ $node_standby->start;
+
+ # Make sure the connection is made
+ $node_primary->wait_for_catchup($node_standby, 'write',
+ $node_primary->lsn('write'));
+
+ # Do immediate shutdown just after a sequence of CREATE DATABASE / DROP
+ # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records
+ # to be applied to already-removed directories.
+ my $query = q[
+ CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1 STRATEGY=<STRATEGY>;
+ CREATE TABLE t (a int) TABLESPACE dropme_ts2;
+ CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2 STRATEGY=<STRATEGY>;
+ CREATE DATABASE moveme_db TABLESPACE source_ts STRATEGY=<STRATEGY>;
+ ALTER DATABASE moveme_db SET TABLESPACE target_ts;
+ CREATE DATABASE newdb TEMPLATE template_db STRATEGY=<STRATEGY>;
+ ALTER DATABASE template_db IS_TEMPLATE = false;
+ DROP DATABASE dropme_db1;
+ DROP TABLE t;
+ DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2;
+ DROP TABLESPACE source_ts;
+ DROP DATABASE template_db;
+ ];
+ $query =~ s/<STRATEGY>/$strategy/g;
+
+ $node_primary->safe_psql('postgres', $query);
+ $node_primary->wait_for_catchup($node_standby, 'write',
+ $node_primary->lsn('write'));
+
+ # show "create missing directory" log message
+ $node_standby->safe_psql('postgres',
+ "ALTER SYSTEM SET log_min_messages TO debug1;");
+ $node_standby->stop('immediate');
+ # Should restart ignoring directory creation error.
+ is($node_standby->start(fail_ok => 1),
+ 1, "standby node started for $strategy");
+ $node_standby->stop('immediate');
+}
+
+test_tablespace("FILE_COPY");
+test_tablespace("WAL_LOG");
+
+# Ensure that a missing tablespace directory during create database
+# replay immediately causes panic if the standby has already reached
+# consistent state (archive recovery is in progress). This is
+# effective only for CREATE DATABASE WITH STRATEGY=FILE_COPY.
+
+my $node_primary = PostgreSQL::Test::Cluster->new('primary2');
+$node_primary->init(allows_streaming => 1);
+$node_primary->start;
+
+# Create tablespace
+$node_primary->safe_psql(
+ 'postgres', q[
+ SET allow_in_place_tablespaces=on;
+ CREATE TABLESPACE ts1 LOCATION ''
+ ]);
+$node_primary->safe_psql('postgres',
+ "CREATE DATABASE db1 WITH TABLESPACE ts1 STRATEGY=FILE_COPY");
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+my $node_standby = PostgreSQL::Test::Cluster->new('standby3');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf',
+ "allow_in_place_tablespaces = on");
+$node_standby->start;
+
+# Make sure standby reached consistency and starts accepting connections
+$node_standby->poll_query_until('postgres', 'SELECT 1', '1');
+
+# Remove standby tablespace directory so it will be missing when
+# replay resumes.
+my $tspoid = $node_standby->safe_psql('postgres',
+ "SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';");
+my $tspdir = $node_standby->data_dir . "/pg_tblspc/$tspoid";
+File::Path::rmtree($tspdir);
+
+my $logstart = -s $node_standby->logfile;
+
+# Create a database in the tablespace and a table in default tablespace
+$node_primary->safe_psql(
+ 'postgres',
+ q[
+ CREATE TABLE should_not_replay_insertion(a int);
+ CREATE DATABASE db2 WITH TABLESPACE ts1 STRATEGY=FILE_COPY;
+ INSERT INTO should_not_replay_insertion VALUES (1);
+ ]);
+
+# Standby should fail and should not silently skip replaying the wal
+# In this test, PANIC turns into WARNING by allow_in_place_tablespaces.
+# Check the log messages instead of confirming standby failure.
+my $max_attempts = $PostgreSQL::Test::Utils::timeout_default * 10;
+while ($max_attempts-- >= 0)
+{
+ last
+ if (
+ $node_standby->log_contains(
+ qr!WARNING: ( [A-Z0-9]+:)? creating missing directory: pg_tblspc/!,
+ $logstart));
+ usleep(100_000);
+}
+ok($max_attempts > 0, "invalid directory creation is detected");
+
+done_testing();
diff --git a/src/test/recovery/t/034_create_database.pl b/src/test/recovery/t/034_create_database.pl
new file mode 100644
index 0000000..4698cbc
--- /dev/null
+++ b/src/test/recovery/t/034_create_database.pl
@@ -0,0 +1,45 @@
+
+# Copyright (c) 2023, PostgreSQL Global Development Group
+
+# Test WAL replay for CREATE DATABASE .. STRATEGY WAL_LOG.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init;
+$node->start;
+
+# This checks that any DDLs run on the template database that modify pg_class
+# are persisted after creating a database from it using the WAL_LOG strategy,
+# as a direct copy of the template database's pg_class is used in this case.
+my $db_template = "template1";
+my $db_new = "test_db_1";
+
+# Create table. It should persist on the template database.
+$node->safe_psql("postgres",
+ "CREATE DATABASE $db_new STRATEGY WAL_LOG TEMPLATE $db_template;");
+
+$node->safe_psql($db_template, "CREATE TABLE tab_db_after_create_1 (a INT);");
+
+# Flush the changes affecting the template database, then replay them.
+$node->safe_psql("postgres", "CHECKPOINT;");
+
+$node->stop('immediate');
+$node->start;
+my $result = $node->safe_psql($db_template,
+ "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';");
+is($result, "1",
+ "check that table exists on template after crash, with checkpoint");
+
+# The new database should have no tables.
+$result = $node->safe_psql($db_new,
+ "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';");
+is($result, "0",
+ "check that there are no tables from template on new database after crash"
+);
+
+done_testing();
diff --git a/src/test/recovery/t/037_invalid_database.pl b/src/test/recovery/t/037_invalid_database.pl
new file mode 100644
index 0000000..a061fab
--- /dev/null
+++ b/src/test/recovery/t/037_invalid_database.pl
@@ -0,0 +1,157 @@
+# Copyright (c) 2023, PostgreSQL Global Development Group
+#
+# Test we handle interrupted DROP DATABASE correctly.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+
+my $node = PostgreSQL::Test::Cluster->new('node');
+$node->init;
+$node->append_conf(
+ "postgresql.conf", qq(
+autovacuum = off
+max_prepared_transactions=5
+log_min_duration_statement=0
+log_connections=on
+log_disconnections=on
+));
+
+$node->start;
+
+
+# First verify that we can't connect to or ALTER an invalid database. Just
+# mark the database as invalid ourselves, that's more reliable than hitting the
+# required race conditions (see testing further down)...
+
+$node->safe_psql(
+ "postgres", qq(
+CREATE DATABASE regression_invalid;
+UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid';
+));
+
+my $psql_stdout = '';
+my $psql_stderr = '';
+
+is($node->psql('regression_invalid', '', stderr => \$psql_stderr),
+ 2, "can't connect to invalid database - error code");
+like(
+ $psql_stderr,
+ qr/FATAL:\s+cannot connect to invalid database "regression_invalid"/,
+ "can't connect to invalid database - error message");
+
+is($node->psql('postgres', 'ALTER DATABASE regression_invalid CONNECTION LIMIT 10'),
+ 2, "can't ALTER invalid database");
+
+# check invalid database can't be used as a template
+is( $node->psql('postgres', 'CREATE DATABASE copy_invalid TEMPLATE regression_invalid'),
+ 3,
+ "can't use invalid database as template");
+
+
+# Verify that VACUUM ignores an invalid database when computing how much of
+# the clog is needed (vac_truncate_clog()). For that we modify the pg_database
+# row of the invalid database to have an outdated datfrozenxid.
+$psql_stderr = '';
+$node->psql(
+ 'postgres',
+ qq(
+UPDATE pg_database SET datfrozenxid = '123456' WHERE datname = 'regression_invalid';
+DROP TABLE IF EXISTS foo_tbl; CREATE TABLE foo_tbl();
+VACUUM FREEZE;),
+ stderr => \$psql_stderr);
+unlike(
+ $psql_stderr,
+ qr/some databases have not been vacuumed in over 2 billion transactions/,
+ "invalid databases are ignored by vac_truncate_clog");
+
+
+# But we need to be able to drop an invalid database.
+is( $node->psql(
+ 'postgres', 'DROP DATABASE regression_invalid',
+ stdout => \$psql_stdout,
+ stderr => \$psql_stderr),
+ 0,
+ "can DROP invalid database");
+
+# Ensure database is gone
+is($node->psql('postgres', 'DROP DATABASE regression_invalid'),
+ 3, "can't drop already dropped database");
+
+
+# Test that interruption of DROP DATABASE is handled properly. To ensure the
+# interruption happens at the appropriate moment, we lock pg_tablespace. DROP
+# DATABASE scans pg_tablespace once it has reached the "irreversible" part of
+# dropping the database, making it a suitable point to wait.
+my $bgpsql_in = '';
+my $bgpsql_out = '';
+my $bgpsql_err = '';
+my $bgpsql_timer = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default);
+my $bgpsql = $node->background_psql('postgres', \$bgpsql_in, \$bgpsql_out,
+ $bgpsql_timer, on_error_stop => 0);
+$bgpsql_out = '';
+$bgpsql_in .= "SELECT pg_backend_pid();\n";
+
+pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/\d/);
+
+my $pid = $bgpsql_out;
+$bgpsql_out = '';
+
+# create the database, prevent drop database via lock held by a 2PC transaction
+$bgpsql_in .= qq(
+ CREATE DATABASE regression_invalid_interrupt;
+ BEGIN;
+ LOCK pg_tablespace;
+ PREPARE TRANSACTION 'lock_tblspc';
+ \\echo done
+);
+
+ok(pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/done/),
+ "blocked DROP DATABASE completion");
+$bgpsql_out = '';
+
+# Try to drop. This will wait due to the still held lock.
+$bgpsql_in .= qq(
+ DROP DATABASE regression_invalid_interrupt;
+ \\echo DROP DATABASE completed
+);
+$bgpsql->pump_nb;
+
+# Ensure we're waiting for the lock
+$node->poll_query_until('postgres',
+ qq(SELECT EXISTS(SELECT * FROM pg_locks WHERE NOT granted AND relation = 'pg_tablespace'::regclass AND mode = 'AccessShareLock');)
+);
+
+# and finally interrupt the DROP DATABASE
+ok($node->safe_psql('postgres', "SELECT pg_cancel_backend($pid)"),
+ "canceling DROP DATABASE");
+
+# wait for cancellation to be processed
+ok( pump_until(
+ $bgpsql, $bgpsql_timer, \$bgpsql_out, qr/DROP DATABASE completed/),
+ "cancel processed");
+$bgpsql_out = '';
+
+# verify that connection to the database aren't allowed
+is($node->psql('regression_invalid_interrupt', ''),
+ 2, "can't connect to invalid_interrupt database");
+
+# To properly drop the database, we need to release the lock previously preventing
+# doing so.
+$bgpsql_in .= qq(
+ ROLLBACK PREPARED 'lock_tblspc';
+ \\echo ROLLBACK PREPARED
+);
+ok(pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/ROLLBACK PREPARED/),
+ "unblock DROP DATABASE");
+$bgpsql_out = '';
+
+is($node->psql('postgres', "DROP DATABASE regression_invalid_interrupt"),
+ 0, "DROP DATABASE invalid_interrupt");
+
+$bgpsql_in .= "\\q\n";
+$bgpsql->finish();
+
+done_testing();
diff --git a/src/test/recovery/t/039_end_of_wal.pl b/src/test/recovery/t/039_end_of_wal.pl
new file mode 100644
index 0000000..b3b813d
--- /dev/null
+++ b/src/test/recovery/t/039_end_of_wal.pl
@@ -0,0 +1,485 @@
+# Copyright (c) 2023, PostgreSQL Global Development Group
+#
+# Test detecting end-of-WAL conditions. This test suite generates
+# fake defective page and record headers to trigger various failure
+# scenarios.
+
+use strict;
+use warnings;
+use PostgreSQL::Test::Cluster;
+use PostgreSQL::Test::Utils;
+use Test::More;
+use Fcntl qw(SEEK_SET);
+
+use integer; # causes / operator to use integer math
+
+# Is this a big-endian system ("network" byte order)? We can't use 'Q' in
+# pack() calls because it's not available in some perl builds, so we need to
+# break 64 bit LSN values into two 'I' values. Fortunately we don't need to
+# deal with high values, so we can just write 0 for the high order 32 bits, but
+# we need to know the endianness to do that.
+my $BIG_ENDIAN = pack("L", 0x12345678) eq pack("N", 0x12345678);
+
+# Header size of record header.
+my $RECORD_HEADER_SIZE = 24;
+
+# Fields retrieved from code headers.
+my @scan_result = scan_server_header('access/xlog_internal.h',
+ '#define\s+XLOG_PAGE_MAGIC\s+(\w+)');
+my $XLP_PAGE_MAGIC = hex($scan_result[0]);
+@scan_result = scan_server_header('access/xlog_internal.h',
+ '#define\s+XLP_FIRST_IS_CONTRECORD\s+(\w+)');
+my $XLP_FIRST_IS_CONTRECORD = hex($scan_result[0]);
+
+# Values queried from the server
+my $WAL_SEGMENT_SIZE;
+my $WAL_BLOCK_SIZE;
+my $TLI;
+
+# Build path of a WAL segment.
+sub wal_segment_path
+{
+ my $node = shift;
+ my $tli = shift;
+ my $segment = shift;
+ my $wal_path =
+ sprintf("%s/pg_wal/%08X%08X%08X", $node->data_dir, $tli, 0, $segment);
+ return $wal_path;
+}
+
+# Calculate from a LSN (in bytes) its segment number and its offset.
+sub lsn_to_segment_and_offset
+{
+ my $lsn = shift;
+ return ($lsn / $WAL_SEGMENT_SIZE, $lsn % $WAL_SEGMENT_SIZE);
+}
+
+# Write some arbitrary data in WAL for the given segment at LSN.
+# This should be called while the cluster is not running.
+sub write_wal
+{
+ my $node = shift;
+ my $tli = shift;
+ my $lsn = shift;
+ my $data = shift;
+
+ my ($segment, $offset) = lsn_to_segment_and_offset($lsn);
+ my $path = wal_segment_path($node, $tli, $segment);
+
+ open my $fh, "+<:raw", $path or die;
+ seek($fh, $offset, SEEK_SET) or die;
+ print $fh $data;
+ close $fh;
+}
+
+# Emit a WAL record of arbitrary size. Returns the end LSN of the
+# record inserted, in bytes.
+sub emit_message
+{
+ my $node = shift;
+ my $size = shift;
+ return int(
+ $node->safe_psql(
+ 'postgres',
+ "SELECT pg_logical_emit_message(true, '', repeat('a', $size)) - '0/0'"
+ ));
+}
+
+# Get the current insert LSN of a node, in bytes.
+sub get_insert_lsn
+{
+ my $node = shift;
+ return int(
+ $node->safe_psql(
+ 'postgres', "SELECT pg_current_wal_insert_lsn() - '0/0'"));
+}
+
+# Get GUC value, converted to an int.
+sub get_int_setting
+{
+ my $node = shift;
+ my $name = shift;
+ return int(
+ $node->safe_psql(
+ 'postgres',
+ "SELECT setting FROM pg_settings WHERE name = '$name'"));
+}
+
+sub start_of_page
+{
+ my $lsn = shift;
+ return $lsn & ~($WAL_BLOCK_SIZE - 1);
+}
+
+sub start_of_next_page
+{
+ my $lsn = shift;
+ return start_of_page($lsn) + $WAL_BLOCK_SIZE;
+}
+
+# Build a fake WAL record header based on the data given by the caller.
+# This needs to follow the format of the C structure XLogRecord. To
+# be inserted with write_wal().
+sub build_record_header
+{
+ my $xl_tot_len = shift;
+ my $xl_xid = shift || 0;
+ my $xl_prev = shift || 0;
+ my $xl_info = shift || 0;
+ my $xl_rmid = shift || 0;
+ my $xl_crc = shift || 0;
+
+ # This needs to follow the structure XLogRecord:
+ # I for xl_tot_len
+ # I for xl_xid
+ # II for xl_prev
+ # C for xl_info
+ # C for xl_rmid
+ # BB for two bytes of padding
+ # I for xl_crc
+ return pack("IIIICCBBI",
+ $xl_tot_len, $xl_xid,
+ $BIG_ENDIAN ? 0 : $xl_prev,
+ $BIG_ENDIAN ? $xl_prev : 0,
+ $xl_info, $xl_rmid, 0, 0, $xl_crc);
+}
+
+# Build a fake WAL page header, based on the data given by the caller
+# This needs to follow the format of the C structure XLogPageHeaderData.
+# To be inserted with write_wal().
+sub build_page_header
+{
+ my $xlp_magic = shift;
+ my $xlp_info = shift || 0;
+ my $xlp_tli = shift || 0;
+ my $xlp_pageaddr = shift || 0;
+ my $xlp_rem_len = shift || 0;
+
+ # This needs to follow the structure XLogPageHeaderData:
+ # S for xlp_magic
+ # S for xlp_info
+ # I for xlp_tli
+ # II for xlp_pageaddr
+ # I for xlp_rem_len
+ return pack("SSIIII",
+ $xlp_magic, $xlp_info, $xlp_tli,
+ $BIG_ENDIAN ? 0 : $xlp_pageaddr,
+ $BIG_ENDIAN ? $xlp_pageaddr : 0, $xlp_rem_len);
+}
+
+# Make sure we are far away enough from the end of a page that we could insert
+# a couple of small records. This inserts a few records of a fixed size, until
+# the threshold gets close enough to the end of the WAL page inserting records
+# to.
+sub advance_out_of_record_splitting_zone
+{
+ my $node = shift;
+
+ my $page_threshold = $WAL_BLOCK_SIZE / 4;
+ my $end_lsn = get_insert_lsn($node);
+ my $page_offset = $end_lsn % $WAL_BLOCK_SIZE;
+ while ($page_offset >= $WAL_BLOCK_SIZE - $page_threshold)
+ {
+ emit_message($node, $page_threshold);
+ $end_lsn = get_insert_lsn($node);
+ $page_offset = $end_lsn % $WAL_BLOCK_SIZE;
+ }
+ return $end_lsn;
+}
+
+# Advance so close to the end of a page that an XLogRecordHeader would not
+# fit on it.
+sub advance_to_record_splitting_zone
+{
+ my $node = shift;
+
+ my $end_lsn = get_insert_lsn($node);
+ my $page_offset = $end_lsn % $WAL_BLOCK_SIZE;
+
+ # Get fairly close to the end of a page in big steps
+ while ($page_offset <= $WAL_BLOCK_SIZE - 512)
+ {
+ emit_message($node, $WAL_BLOCK_SIZE - $page_offset - 256);
+ $end_lsn = get_insert_lsn($node);
+ $page_offset = $end_lsn % $WAL_BLOCK_SIZE;
+ }
+
+ # Calibrate our message size so that we can get closer 8 bytes at
+ # a time.
+ my $message_size = $WAL_BLOCK_SIZE - 80;
+ while ($page_offset <= $WAL_BLOCK_SIZE - $RECORD_HEADER_SIZE)
+ {
+ emit_message($node, $message_size);
+ $end_lsn = get_insert_lsn($node);
+
+ my $old_offset = $page_offset;
+ $page_offset = $end_lsn % $WAL_BLOCK_SIZE;
+
+ # Adjust the message size until it causes 8 bytes changes in
+ # offset, enough to be able to split a record header.
+ my $delta = $page_offset - $old_offset;
+ if ($delta > 8)
+ {
+ $message_size -= 8;
+ }
+ elsif ($delta <= 0)
+ {
+ $message_size += 8;
+ }
+ }
+ return $end_lsn;
+}
+
+# Setup a new node. The configuration chosen here minimizes the number
+# of arbitrary records that could get generated in a cluster. Enlarging
+# checkpoint_timeout avoids noise with checkpoint activity. wal_level
+# set to "minimal" avoids random standby snapshot records. Autovacuum
+# could also trigger randomly, generating random WAL activity of its own.
+my $node = PostgreSQL::Test::Cluster->new("node");
+$node->init;
+$node->append_conf(
+ 'postgresql.conf',
+ q[wal_level = minimal
+ autovacuum = off
+ checkpoint_timeout = '30min'
+]);
+$node->start;
+$node->safe_psql('postgres', "CREATE TABLE t AS SELECT 42");
+
+$WAL_SEGMENT_SIZE = get_int_setting($node, 'wal_segment_size');
+$WAL_BLOCK_SIZE = get_int_setting($node, 'wal_block_size');
+$TLI = $node->safe_psql('postgres',
+ "SELECT timeline_id FROM pg_control_checkpoint();");
+
+my $end_lsn;
+my $prev_lsn;
+
+###########################################################################
+note "Single-page end-of-WAL detection";
+###########################################################################
+
+# xl_tot_len is 0 (a common case, we hit trailing zeroes).
+emit_message($node, 0);
+$end_lsn = advance_out_of_record_splitting_zone($node);
+$node->stop('immediate');
+my $log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "invalid record length at .*: wanted 24, got 0", $log_size
+ ),
+ "xl_tot_len zero");
+
+# xl_tot_len is < 24 (presumably recycled garbage).
+emit_message($node, 0);
+$end_lsn = advance_out_of_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn, build_record_header(23));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "invalid record length at .*: wanted 24, got 23",
+ $log_size),
+ "xl_tot_len short");
+
+# xl_tot_len in final position, not big enough to span into a new page but
+# also not eligible for regular record header validation
+emit_message($node, 0);
+$end_lsn = advance_to_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn, build_record_header(1));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "invalid record length at .*: wanted 24, got 1", $log_size
+ ),
+ "xl_tot_len short at end-of-page");
+
+# Need more pages, but xl_prev check fails first.
+emit_message($node, 0);
+$end_lsn = advance_out_of_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "record with incorrect prev-link 0/DEADBEEF at .*", $log_size),
+ "xl_prev bad");
+
+# xl_crc check fails.
+emit_message($node, 0);
+advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 10);
+$node->stop('immediate');
+# Corrupt a byte in that record, breaking its CRC.
+write_wal($node, $TLI, $end_lsn - 8, '!');
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "incorrect resource manager data checksum in record at .*", $log_size
+ ),
+ "xl_crc bad");
+
+
+###########################################################################
+note "Multi-page end-of-WAL detection, header is not split";
+###########################################################################
+
+# This series of tests requires a valid xl_prev set in the record header
+# written to WAL.
+
+# Good xl_prev, we hit zero page next (zero magic).
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn));
+$log_size = -s $node->logfile;
+$node->start;
+ok($node->log_contains("invalid magic number 0000 ", $log_size),
+ "xlp_magic zero");
+
+# Good xl_prev, we hit garbage page next (bad magic).
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header(0xcafe, 0, 1, 0));
+$log_size = -s $node->logfile;
+$node->start;
+ok($node->log_contains("invalid magic number CAFE ", $log_size),
+ "xlp_magic bad");
+
+# Good xl_prev, we hit typical recycled page (good xlp_magic, bad
+# xlp_pageaddr).
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header($XLP_PAGE_MAGIC, 0, 1, 0xbaaaaaad));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "unexpected pageaddr 0/BAAAAAAD ", $log_size),
+ "xlp_pageaddr bad");
+
+# Good xl_prev, xlp_magic, xlp_pageaddr, but bogus xlp_info.
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header(
+ $XLP_PAGE_MAGIC, 0x1234, 1, start_of_next_page($end_lsn)));
+$log_size = -s $node->logfile;
+$node->start;
+ok($node->log_contains("invalid info bits 1234 ", $log_size),
+ "xlp_info bad");
+
+# Good xl_prev, xlp_magic, xlp_pageaddr, but xlp_info doesn't mention
+# continuation record.
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header($XLP_PAGE_MAGIC, 0, 1, start_of_next_page($end_lsn)));
+$log_size = -s $node->logfile;
+$node->start;
+ok($node->log_contains("there is no contrecord flag at .*", $log_size),
+ "xlp_info lacks XLP_FIRST_IS_CONTRECORD");
+
+# Good xl_prev, xlp_magic, xlp_pageaddr, xlp_info but xlp_rem_len doesn't add
+# up.
+emit_message($node, 0);
+$prev_lsn = advance_out_of_record_splitting_zone($node);
+$end_lsn = emit_message($node, 0);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header(
+ $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD,
+ 1, start_of_next_page($end_lsn),
+ 123456));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "invalid contrecord length 123456 .* at .*", $log_size),
+ "xlp_rem_len bad");
+
+
+###########################################################################
+note "Multi-page, but header is split, so page checks are done first";
+###########################################################################
+
+# xl_prev is bad and xl_tot_len is too big, but we'll check xlp_magic first.
+emit_message($node, 0);
+$end_lsn = advance_to_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef));
+$log_size = -s $node->logfile;
+$node->start;
+ok($node->log_contains("invalid magic number 0000 ", $log_size),
+ "xlp_magic zero (split record header)");
+
+# And we'll also check xlp_pageaddr before any header checks.
+emit_message($node, 0);
+$end_lsn = advance_to_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header(
+ $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD, 1, 0xbaaaaaad));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "unexpected pageaddr 0/BAAAAAAD ", $log_size),
+ "xlp_pageaddr bad (split record header)");
+
+# We'll also discover that xlp_rem_len doesn't add up before any
+# header checks,
+emit_message($node, 0);
+$end_lsn = advance_to_record_splitting_zone($node);
+$node->stop('immediate');
+write_wal($node, $TLI, $end_lsn,
+ build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef));
+write_wal(
+ $node, $TLI,
+ start_of_next_page($end_lsn),
+ build_page_header(
+ $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD,
+ 1, start_of_next_page($end_lsn),
+ 123456));
+$log_size = -s $node->logfile;
+$node->start;
+ok( $node->log_contains(
+ "invalid contrecord length 123456 .* at .*", $log_size),
+ "xlp_rem_len bad (split record header)");
+
+done_testing();
diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files
new file mode 100644
index 0000000..cfeea41
--- /dev/null
+++ b/src/test/recovery/t/cp_history_files
@@ -0,0 +1,10 @@
+#!/usr/bin/perl
+
+use File::Copy;
+use strict;
+use warnings;
+
+die "wrong number of arguments" if @ARGV != 2;
+my ($source, $target) = @ARGV;
+exit if $source !~ /history/;
+copy($source, $target) or die "couldn't copy $source to $target: $!";