summaryrefslogtreecommitdiffstats
path: root/src/test/recovery
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/test/recovery/.gitignore2
-rw-r--r--src/test/recovery/Makefile29
-rw-r--r--src/test/recovery/README25
-rw-r--r--src/test/recovery/t/001_stream_rep.pl411
-rw-r--r--src/test/recovery/t/002_archiving.pl74
-rw-r--r--src/test/recovery/t/003_recovery_targets.pl179
-rw-r--r--src/test/recovery/t/004_timeline_switch.pl108
-rw-r--r--src/test/recovery/t/005_replay_delay.pl54
-rw-r--r--src/test/recovery/t/006_logical_decoding.pl190
-rw-r--r--src/test/recovery/t/007_sync_rep.pl214
-rw-r--r--src/test/recovery/t/008_fsm_truncation.pl96
-rw-r--r--src/test/recovery/t/009_twophase.pl477
-rw-r--r--src/test/recovery/t/010_logical_decoding_timelines.pl196
-rw-r--r--src/test/recovery/t/011_crash_recovery.pl68
-rw-r--r--src/test/recovery/t/012_subtransactions.pl216
-rw-r--r--src/test/recovery/t/013_crash_restart.pl272
-rw-r--r--src/test/recovery/t/014_unlogged_reinit.pl81
-rw-r--r--src/test/recovery/t/015_promotion_pages.pl85
-rw-r--r--src/test/recovery/t/016_min_consistency.pl138
-rw-r--r--src/test/recovery/t/017_shm.pl214
-rw-r--r--src/test/recovery/t/018_wal_optimize.pl373
-rw-r--r--src/test/recovery/t/019_replslot_limit.pl334
-rw-r--r--src/test/recovery/t/020_archive_status.pl233
-rw-r--r--src/test/recovery/t/023_pitr_prepared_xact.pl86
-rw-r--r--src/test/recovery/t/025_stuck_on_old_timeline.pl107
-rw-r--r--src/test/recovery/t/cp_history_files17
26 files changed, 4279 insertions, 0 deletions
diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore
new file mode 100644
index 0000000..871e943
--- /dev/null
+++ b/src/test/recovery/.gitignore
@@ -0,0 +1,2 @@
+# Generated by test suite
+/tmp_check/
diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile
new file mode 100644
index 0000000..04ff514
--- /dev/null
+++ b/src/test/recovery/Makefile
@@ -0,0 +1,29 @@
+#-------------------------------------------------------------------------
+#
+# Makefile for src/test/recovery
+#
+# Portions Copyright (c) 1996-2020, PostgreSQL Global Development Group
+# Portions Copyright (c) 1994, Regents of the University of California
+#
+# src/test/recovery/Makefile
+#
+#-------------------------------------------------------------------------
+
+EXTRA_INSTALL=contrib/test_decoding
+
+subdir = src/test/recovery
+top_builddir = ../../..
+include $(top_builddir)/src/Makefile.global
+
+# required for 017_shm.pl
+REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX)
+export REGRESS_SHLIB
+
+check:
+ $(prove_check)
+
+installcheck:
+ $(prove_installcheck)
+
+clean distclean maintainer-clean:
+ rm -rf tmp_check
diff --git a/src/test/recovery/README b/src/test/recovery/README
new file mode 100644
index 0000000..632e720
--- /dev/null
+++ b/src/test/recovery/README
@@ -0,0 +1,25 @@
+src/test/recovery/README
+
+Regression tests for recovery and replication
+=============================================
+
+This directory contains a test suite for recovery and replication.
+
+Running the tests
+=================
+
+NOTE: You must have given the --enable-tap-tests argument to configure.
+Also, to use "make installcheck", you must have built and installed
+contrib/test_decoding in addition to the core code.
+
+Run
+ make check
+or
+ make installcheck
+You can use "make installcheck" if you previously did "make install".
+In that case, the code in the installation tree is tested. With
+"make check", a temporary installation tree is built from the current
+sources and then tested.
+
+Either way, this test initializes, starts, and stops several test Postgres
+clusters.
diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl
new file mode 100644
index 0000000..778f11b
--- /dev/null
+++ b/src/test/recovery/t/001_stream_rep.pl
@@ -0,0 +1,411 @@
+# Minimal test testing streaming replication
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 36;
+
+# Initialize master node
+my $node_master = get_new_node('master');
+# A specific role is created to perform some tests related to replication,
+# and it needs proper authentication configuration.
+$node_master->init(
+ allows_streaming => 1,
+ auth_extra => [ '--create-role', 'repl_role' ]);
+$node_master->start;
+my $backup_name = 'my_backup';
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create streaming standby linking to master
+my $node_standby_1 = get_new_node('standby_1');
+$node_standby_1->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby_1->start;
+
+# Take backup of standby 1 (not mandatory, but useful to check if
+# pg_basebackup works on a standby).
+$node_standby_1->backup($backup_name);
+
+# Take a second backup of the standby while the master is offline.
+$node_master->stop;
+$node_standby_1->backup('my_backup_2');
+$node_master->start;
+
+# Create second standby node linking to standby 1
+my $node_standby_2 = get_new_node('standby_2');
+$node_standby_2->init_from_backup($node_standby_1, $backup_name,
+ has_streaming => 1);
+$node_standby_2->start;
+
+# Create some content on master and check its presence in standby 1
+$node_master->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a");
+
+# Wait for standbys to catch up
+$node_master->wait_for_catchup($node_standby_1, 'replay',
+ $node_master->lsn('insert'));
+$node_standby_1->wait_for_catchup($node_standby_2, 'replay',
+ $node_standby_1->lsn('replay'));
+
+my $result =
+ $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "standby 1: $result\n";
+is($result, qq(1002), 'check streamed content on standby 1');
+
+$result =
+ $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "standby 2: $result\n";
+is($result, qq(1002), 'check streamed content on standby 2');
+
+# Check that only READ-only queries can run on standbys
+is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
+ 3, 'read-only queries on standby 1');
+is($node_standby_2->psql('postgres', 'INSERT INTO tab_int VALUES (1)'),
+ 3, 'read-only queries on standby 2');
+
+# Tests for connection parameter target_session_attrs
+note "testing connection parameter \"target_session_attrs\"";
+
+# Routine designed to run tests on the connection parameter
+# target_session_attrs with multiple nodes.
+sub test_target_session_attrs
+{
+ my $node1 = shift;
+ my $node2 = shift;
+ my $target_node = shift;
+ my $mode = shift;
+ my $status = shift;
+
+ my $node1_host = $node1->host;
+ my $node1_port = $node1->port;
+ my $node1_name = $node1->name;
+ my $node2_host = $node2->host;
+ my $node2_port = $node2->port;
+ my $node2_name = $node2->name;
+
+ my $target_name = $target_node->name;
+
+ # Build connection string for connection attempt.
+ my $connstr = "host=$node1_host,$node2_host ";
+ $connstr .= "port=$node1_port,$node2_port ";
+ $connstr .= "target_session_attrs=$mode";
+
+ # The client used for the connection does not matter, only the backend
+ # point does.
+ my ($ret, $stdout, $stderr) =
+ $node1->psql('postgres', 'SHOW port;',
+ extra_params => [ '-d', $connstr ]);
+ is( $status == $ret && $stdout eq $target_node->port,
+ 1,
+ "connect to node $target_name if mode \"$mode\" and $node1_name,$node2_name listed"
+ );
+
+ return;
+}
+
+# Connect to master in "read-write" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_master,
+ "read-write", 0);
+
+# Connect to master in "read-write" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_master,
+ "read-write", 0);
+
+# Connect to master in "any" mode with master,standby1 list.
+test_target_session_attrs($node_master, $node_standby_1, $node_master, "any",
+ 0);
+
+# Connect to standby1 in "any" mode with standby1,master list.
+test_target_session_attrs($node_standby_1, $node_master, $node_standby_1,
+ "any", 0);
+
+# Test for SHOW commands using a WAL sender connection with a replication
+# role.
+note "testing SHOW commands for replication connection";
+
+$node_master->psql(
+ 'postgres', "
+CREATE ROLE repl_role REPLICATION LOGIN;
+GRANT pg_read_all_settings TO repl_role;");
+my $master_host = $node_master->host;
+my $master_port = $node_master->port;
+my $connstr_common = "host=$master_host port=$master_port user=repl_role";
+my $connstr_rep = "$connstr_common replication=1";
+my $connstr_db = "$connstr_common replication=database dbname=postgres";
+
+# Test SHOW ALL
+my ($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW ALL;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok($ret == 0, "SHOW ALL with replication role and physical replication");
+($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW ALL;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok($ret == 0, "SHOW ALL with replication role and logical replication");
+
+# Test SHOW with a user-settable parameter
+($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW work_mem;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok( $ret == 0,
+ "SHOW with user-settable parameter, replication role and physical replication"
+);
+($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW work_mem;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok( $ret == 0,
+ "SHOW with user-settable parameter, replication role and logical replication"
+);
+
+# Test SHOW with a superuser-settable parameter
+($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW primary_conninfo;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_rep ]);
+ok( $ret == 0,
+ "SHOW with superuser-settable parameter, replication role and physical replication"
+);
+($ret, $stdout, $stderr) = $node_master->psql(
+ 'postgres', 'SHOW primary_conninfo;',
+ on_error_die => 1,
+ extra_params => [ '-d', $connstr_db ]);
+ok( $ret == 0,
+ "SHOW with superuser-settable parameter, replication role and logical replication"
+);
+
+note "switching to physical replication slot";
+
+# Switch to using a physical replication slot. We can do this without a new
+# backup since physical slots can go backwards if needed. Do so on both
+# standbys. Since we're going to be testing things that affect the slot state,
+# also increase the standby feedback interval to ensure timely updates.
+my ($slotname_1, $slotname_2) = ('standby_1', 'standby_2');
+$node_master->append_conf('postgresql.conf', "max_replication_slots = 4");
+$node_master->restart;
+is( $node_master->psql(
+ 'postgres',
+ qq[SELECT pg_create_physical_replication_slot('$slotname_1');]),
+ 0,
+ 'physical slot created on master');
+$node_standby_1->append_conf('postgresql.conf',
+ "primary_slot_name = $slotname_1");
+$node_standby_1->append_conf('postgresql.conf',
+ "wal_receiver_status_interval = 1");
+$node_standby_1->append_conf('postgresql.conf', "max_replication_slots = 4");
+$node_standby_1->restart;
+is( $node_standby_1->psql(
+ 'postgres',
+ qq[SELECT pg_create_physical_replication_slot('$slotname_2');]),
+ 0,
+ 'physical slot created on intermediate replica');
+$node_standby_2->append_conf('postgresql.conf',
+ "primary_slot_name = $slotname_2");
+$node_standby_2->append_conf('postgresql.conf',
+ "wal_receiver_status_interval = 1");
+# should be able change primary_slot_name without restart
+# will wait effect in get_slot_xmins above
+$node_standby_2->reload;
+
+# Fetch xmin columns from slot's pg_replication_slots row, after waiting for
+# given boolean condition to be true to ensure we've reached a quiescent state
+sub get_slot_xmins
+{
+ my ($node, $slotname, $check_expr) = @_;
+
+ $node->poll_query_until(
+ 'postgres', qq[
+ SELECT $check_expr
+ FROM pg_catalog.pg_replication_slots
+ WHERE slot_name = '$slotname';
+ ]) or die "Timed out waiting for slot xmins to advance";
+
+ my $slotinfo = $node->slot($slotname);
+ return ($slotinfo->{'xmin'}, $slotinfo->{'catalog_xmin'});
+}
+
+# There's no hot standby feedback and there are no logical slots on either peer
+# so xmin and catalog_xmin should be null on both slots.
+my ($xmin, $catalog_xmin) = get_slot_xmins($node_master, $slotname_1,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of non-cascaded slot null with no hs_feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot null with no hs_feedback');
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of cascaded slot null with no hs_feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of cascaded slot null with no hs_feedback');
+
+# Replication still works?
+$node_master->safe_psql('postgres', 'CREATE TABLE replayed(val integer);');
+
+sub replay_check
+{
+ my $newval = $node_master->safe_psql('postgres',
+ 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+ );
+ $node_master->wait_for_catchup($node_standby_1, 'replay',
+ $node_master->lsn('insert'));
+ $node_standby_1->wait_for_catchup($node_standby_2, 'replay',
+ $node_standby_1->lsn('replay'));
+ $node_standby_1->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval])
+ or die "standby_1 didn't replay master value $newval";
+ $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval])
+ or die "standby_2 didn't replay standby_1 value $newval";
+ return;
+}
+
+replay_check();
+
+note "enabling hot_standby_feedback";
+
+# Enable hs_feedback. The slot should gain an xmin. We set the status interval
+# so we'll see the results promptly.
+$node_standby_1->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = on;');
+$node_standby_1->reload;
+$node_standby_2->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = on;');
+$node_standby_2->reload;
+replay_check();
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_master, $slotname_1,
+ "xmin IS NOT NULL AND catalog_xmin IS NULL");
+isnt($xmin, '', 'xmin of non-cascaded slot non-null with hs feedback');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback');
+
+my ($xmin1, $catalog_xmin1) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NOT NULL AND catalog_xmin IS NULL");
+isnt($xmin1, '', 'xmin of cascaded slot non-null with hs feedback');
+is($catalog_xmin1, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback');
+
+note "doing some work to advance xmin";
+$node_master->safe_psql(
+ 'postgres', q{
+do $$
+begin
+ for i in 10000..11000 loop
+ -- use an exception block so that each iteration eats an XID
+ begin
+ insert into tab_int values (i);
+ exception
+ when division_by_zero then null;
+ end;
+ end loop;
+end$$;
+});
+
+$node_master->safe_psql('postgres', 'VACUUM;');
+$node_master->safe_psql('postgres', 'CHECKPOINT;');
+
+my ($xmin2, $catalog_xmin2) =
+ get_slot_xmins($node_master, $slotname_1, "xmin <> '$xmin'");
+note "master slot's new xmin $xmin2, old xmin $xmin";
+isnt($xmin2, $xmin, 'xmin of non-cascaded slot with hs feedback has changed');
+is($catalog_xmin2, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback unchanged'
+);
+
+($xmin2, $catalog_xmin2) =
+ get_slot_xmins($node_standby_1, $slotname_2, "xmin <> '$xmin1'");
+note "standby_1 slot's new xmin $xmin2, old xmin $xmin1";
+isnt($xmin2, $xmin1, 'xmin of cascaded slot with hs feedback has changed');
+is($catalog_xmin2, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback unchanged');
+
+note "disabling hot_standby_feedback";
+
+# Disable hs_feedback. Xmin should be cleared.
+$node_standby_1->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = off;');
+$node_standby_1->reload;
+$node_standby_2->safe_psql('postgres',
+ 'ALTER SYSTEM SET hot_standby_feedback = off;');
+$node_standby_2->reload;
+replay_check();
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_master, $slotname_1,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of non-cascaded slot null with hs feedback reset');
+is($catalog_xmin, '',
+ 'catalog xmin of non-cascaded slot still null with hs_feedback reset');
+
+($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2,
+ "xmin IS NULL AND catalog_xmin IS NULL");
+is($xmin, '', 'xmin of cascaded slot null with hs feedback reset');
+is($catalog_xmin, '',
+ 'catalog xmin of cascaded slot still null with hs_feedback reset');
+
+note "check change primary_conninfo without restart";
+$node_standby_2->append_conf('postgresql.conf', "primary_slot_name = ''");
+$node_standby_2->enable_streaming($node_master);
+$node_standby_2->reload;
+
+# be sure do not streaming from cascade
+$node_standby_1->stop;
+
+my $newval = $node_master->safe_psql('postgres',
+ 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val'
+);
+$node_master->wait_for_catchup($node_standby_2, 'replay',
+ $node_master->lsn('insert'));
+my $is_replayed = $node_standby_2->safe_psql('postgres',
+ qq[SELECT 1 FROM replayed WHERE val = $newval]);
+is($is_replayed, qq(1), "standby_2 didn't replay master value $newval");
+
+# Drop any existing slots on the primary, for the follow-up tests.
+$node_master->safe_psql('postgres',
+ "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;");
+
+# Test physical slot advancing and its durability. Create a new slot on
+# the primary, not used by any of the standbys. This reserves WAL at creation.
+my $phys_slot = 'phys_slot';
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('$phys_slot', true);");
+# Generate some WAL, and switch to a new segment, used to check that
+# the previous segment is correctly getting recycled as the slot advancing
+# would recompute the minimum LSN calculated across all slots.
+my $segment_removed = $node_master->safe_psql('postgres',
+ 'SELECT pg_walfile_name(pg_current_wal_lsn())');
+chomp($segment_removed);
+$node_master->psql(
+ 'postgres', "
+ CREATE TABLE tab_phys_slot (a int);
+ INSERT INTO tab_phys_slot VALUES (generate_series(1,10));
+ SELECT pg_switch_wal();");
+my $current_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+chomp($current_lsn);
+my $psql_rc = $node_master->psql('postgres',
+ "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);"
+);
+is($psql_rc, '0', 'slot advancing with physical slot');
+my $phys_restart_lsn_pre = $node_master->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"
+);
+chomp($phys_restart_lsn_pre);
+# Slot advance should persist across clean restarts.
+$node_master->restart;
+my $phys_restart_lsn_post = $node_master->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';"
+);
+chomp($phys_restart_lsn_post);
+ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0,
+ "physical slot advance persists across restarts");
+
+# Check if the previous segment gets correctly recycled after the
+# server stopped cleanly, causing a shutdown checkpoint to be generated.
+my $master_data = $node_master->data_dir;
+ok(!-f "$master_data/pg_wal/$segment_removed",
+ "WAL segment $segment_removed recycled after physical slot advancing");
diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl
new file mode 100644
index 0000000..683c33b
--- /dev/null
+++ b/src/test/recovery/t/002_archiving.pl
@@ -0,0 +1,74 @@
+# test for archiving with hot standby
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 3;
+use File::Copy;
+
+# Initialize master node, doing archives
+my $node_master = get_new_node('master');
+$node_master->init(
+ has_archiving => 1,
+ allows_streaming => 1);
+my $backup_name = 'my_backup';
+
+# Start it
+$node_master->start;
+
+# Take backup for standby
+$node_master->backup($backup_name);
+
+# Initialize standby node from backup, fetching WAL from archives
+my $node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, $backup_name,
+ has_restoring => 1);
+$node_standby->append_conf('postgresql.conf',
+ "wal_retrieve_retry_interval = '100ms'");
+$node_standby->start;
+
+# Create some content on master
+$node_master->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+my $current_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Force archiving of WAL file to make it present on master
+$node_master->safe_psql('postgres', "SELECT pg_switch_wal()");
+
+# Add some more content, it should not be present on standby
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+
+# Wait until necessary replay has been done on standby
+my $caughtup_query =
+ "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+my $result =
+ $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result, qq(1000), 'check content from archives');
+
+# Check the presence of temporary files specifically generated during
+# archive recovery. To ensure the presence of the temporary history
+# file, switch to a timeline large enough to allow a standby to recover
+# a history file from an archive. As this requires at least two timeline
+# switches, promote the existing standby first. Then create a second
+# standby based on the promoted one. Finally, the second standby is
+# promoted.
+$node_standby->promote;
+
+my $node_standby2 = get_new_node('standby2');
+$node_standby2->init_from_backup($node_master, $backup_name,
+ has_restoring => 1);
+$node_standby2->start;
+
+# Now promote standby2, and check that temporary files specifically
+# generated during archive recovery are removed by the end of recovery.
+$node_standby2->promote;
+my $node_standby2_data = $node_standby2->data_dir;
+ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
+ "RECOVERYHISTORY removed after promotion");
+ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
+ "RECOVERYXLOG removed after promotion");
diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl
new file mode 100644
index 0000000..2b4360a
--- /dev/null
+++ b/src/test/recovery/t/003_recovery_targets.pl
@@ -0,0 +1,179 @@
+# Test for recovery targets: name, timestamp, XID
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 9;
+use Time::HiRes qw(usleep);
+
+# Create and test a standby from given backup, with a certain recovery target.
+# Choose $until_lsn later than the transaction commit that causes the row
+# count to reach $num_rows, yet not later than the recovery target.
+sub test_recovery_standby
+{
+ my $test_name = shift;
+ my $node_name = shift;
+ my $node_master = shift;
+ my $recovery_params = shift;
+ my $num_rows = shift;
+ my $until_lsn = shift;
+
+ my $node_standby = get_new_node($node_name);
+ $node_standby->init_from_backup($node_master, 'my_backup',
+ has_restoring => 1);
+
+ foreach my $param_item (@$recovery_params)
+ {
+ $node_standby->append_conf('postgresql.conf', qq($param_item));
+ }
+
+ $node_standby->start;
+
+ # Wait until standby has replayed enough data
+ my $caughtup_query =
+ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+ $node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+ # Create some content on master and check its presence in standby
+ my $result =
+ $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+ is($result, qq($num_rows), "check standby content for $test_name");
+
+ # Stop standby node
+ $node_standby->teardown_node;
+
+ return;
+}
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(has_archiving => 1, allows_streaming => 1);
+
+# Bump the transaction ID epoch. This is useful to stress the portability
+# of recovery_target_xid parsing.
+system_or_bail('pg_resetwal', '--epoch', '1', $node_master->data_dir);
+
+# Start it
+$node_master->start;
+
+# Create data before taking the backup, aimed at testing
+# recovery_target = 'immediate'
+$node_master->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+my $lsn1 =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Take backup from which all operations will be run
+$node_master->backup('my_backup');
+
+# Insert some data with used as a replay reference, with a recovery
+# target TXID.
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+my $ret = $node_master->safe_psql('postgres',
+ "SELECT pg_current_wal_lsn(), pg_current_xact_id();");
+my ($lsn2, $recovery_txid) = split /\|/, $ret;
+
+# More data, with recovery target timestamp
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(2001,3000))");
+my $lsn3 =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+my $recovery_time = $node_master->safe_psql('postgres', "SELECT now()");
+
+# Even more data, this time with a recovery target name
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(3001,4000))");
+my $recovery_name = "my_target";
+my $lsn4 =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_restore_point('$recovery_name');");
+
+# And now for a recovery target LSN
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(4001,5000))");
+my $lsn5 = my $recovery_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(5001,6000))");
+
+# Force archiving of WAL file
+$node_master->safe_psql('postgres', "SELECT pg_switch_wal()");
+
+# Test recovery targets
+my @recovery_params = ("recovery_target = 'immediate'");
+test_recovery_standby('immediate target',
+ 'standby_1', $node_master, \@recovery_params, "1000", $lsn1);
+@recovery_params = ("recovery_target_xid = '$recovery_txid'");
+test_recovery_standby('XID', 'standby_2', $node_master, \@recovery_params,
+ "2000", $lsn2);
+@recovery_params = ("recovery_target_time = '$recovery_time'");
+test_recovery_standby('time', 'standby_3', $node_master, \@recovery_params,
+ "3000", $lsn3);
+@recovery_params = ("recovery_target_name = '$recovery_name'");
+test_recovery_standby('name', 'standby_4', $node_master, \@recovery_params,
+ "4000", $lsn4);
+@recovery_params = ("recovery_target_lsn = '$recovery_lsn'");
+test_recovery_standby('LSN', 'standby_5', $node_master, \@recovery_params,
+ "5000", $lsn5);
+
+# Multiple targets
+#
+# Multiple conflicting settings are not allowed, but setting the same
+# parameter multiple times or unsetting a parameter and setting a
+# different one is allowed.
+
+@recovery_params = (
+ "recovery_target_name = '$recovery_name'",
+ "recovery_target_name = ''",
+ "recovery_target_time = '$recovery_time'");
+test_recovery_standby('multiple overriding settings',
+ 'standby_6', $node_master, \@recovery_params, "3000", $lsn3);
+
+my $node_standby = get_new_node('standby_7');
+$node_standby->init_from_backup($node_master, 'my_backup',
+ has_restoring => 1);
+$node_standby->append_conf(
+ 'postgresql.conf', "recovery_target_name = '$recovery_name'
+recovery_target_time = '$recovery_time'");
+
+my $res = run_log(
+ [
+ 'pg_ctl', '-D', $node_standby->data_dir, '-l',
+ $node_standby->logfile, 'start'
+ ]);
+ok(!$res, 'invalid recovery startup fails');
+
+my $logfile = slurp_file($node_standby->logfile());
+ok($logfile =~ qr/multiple recovery targets specified/,
+ 'multiple conflicting settings');
+
+# Check behavior when recovery ends before target is reached
+
+$node_standby = get_new_node('standby_8');
+$node_standby->init_from_backup(
+ $node_master, 'my_backup',
+ has_restoring => 1,
+ standby => 0);
+$node_standby->append_conf('postgresql.conf',
+ "recovery_target_name = 'does_not_exist'");
+
+run_log(
+ [
+ 'pg_ctl', '-D', $node_standby->data_dir, '-l',
+ $node_standby->logfile, 'start'
+ ]);
+
+# wait up to 180s for postgres to terminate
+foreach my $i (0 .. 1800)
+{
+ last if !-f $node_standby->data_dir . '/postmaster.pid';
+ usleep(100_000);
+}
+$logfile = slurp_file($node_standby->logfile());
+ok( $logfile =~
+ qr/FATAL: .* recovery ended before configured recovery target was reached/,
+ 'recovery end before target reached is a fatal error');
diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl
new file mode 100644
index 0000000..91a63f4
--- /dev/null
+++ b/src/test/recovery/t/004_timeline_switch.pl
@@ -0,0 +1,108 @@
+# Test for timeline switch
+use strict;
+use warnings;
+use File::Path qw(rmtree);
+use PostgresNode;
+use TestLib;
+use Test::More tests => 3;
+
+$ENV{PGDATABASE} = 'postgres';
+
+# Ensure that a cascading standby is able to follow a newly-promoted standby
+# on a new timeline.
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_master->backup($backup_name);
+
+# Create two standbys linking to it
+my $node_standby_1 = get_new_node('standby_1');
+$node_standby_1->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby_1->start;
+my $node_standby_2 = get_new_node('standby_2');
+$node_standby_2->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby_2->start;
+
+# Create some content on master
+$node_master->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");
+
+# Wait until standby has replayed enough data on standby 1
+$node_master->wait_for_catchup($node_standby_1, 'replay',
+ $node_master->lsn('write'));
+
+# Stop and remove master
+$node_master->teardown_node;
+
+# promote standby 1 using "pg_promote", switching it to a new timeline
+my $psql_out = '';
+$node_standby_1->psql(
+ 'postgres',
+ "SELECT pg_promote(wait_seconds => 300)",
+ stdout => \$psql_out);
+is($psql_out, 't', "promotion of standby with pg_promote");
+
+# Switch standby 2 to replay from standby 1
+my $connstr_1 = $node_standby_1->connstr;
+$node_standby_2->append_conf(
+ 'postgresql.conf', qq(
+primary_conninfo='$connstr_1'
+));
+$node_standby_2->restart;
+
+# Insert some data in standby 1 and check its presence in standby 2
+# to ensure that the timeline switch has been done.
+$node_standby_1->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(1001,2000))");
+$node_standby_1->wait_for_catchup($node_standby_2, 'replay',
+ $node_standby_1->lsn('write'));
+
+my $result =
+ $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result, qq(2000), 'check content of standby 2');
+
+
+# Ensure that a standby is able to follow a master on a newer timeline
+# when WAL archiving is enabled.
+
+# Initialize master node
+my $node_master_2 = get_new_node('master_2');
+$node_master_2->init(allows_streaming => 1, has_archiving => 1);
+$node_master_2->append_conf(
+ 'postgresql.conf', qq(
+wal_keep_size = 512MB
+));
+$node_master_2->start;
+
+# Take backup
+$node_master_2->backup($backup_name);
+
+# Create standby node
+my $node_standby_3 = get_new_node('standby_3');
+$node_standby_3->init_from_backup($node_master_2, $backup_name,
+ has_streaming => 1);
+
+# Restart master node in standby mode and promote it, switching it
+# to a new timeline.
+$node_master_2->set_standby_mode;
+$node_master_2->restart;
+$node_master_2->promote;
+
+# Start standby node, create some content on master and check its presence
+# in standby, to ensure that the timeline switch has been done.
+$node_standby_3->start;
+$node_master_2->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT 1 AS a");
+$node_master_2->wait_for_catchup($node_standby_3, 'replay',
+ $node_master_2->lsn('write'));
+
+my $result_2 =
+ $node_standby_3->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+is($result_2, qq(1), 'check content of standby 3');
diff --git a/src/test/recovery/t/005_replay_delay.pl b/src/test/recovery/t/005_replay_delay.pl
new file mode 100644
index 0000000..6c85c92
--- /dev/null
+++ b/src/test/recovery/t/005_replay_delay.pl
@@ -0,0 +1,54 @@
+# Checks for recovery_min_apply_delay
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+
+# And some content
+$node_master->safe_psql('postgres',
+ "CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a");
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_master->backup($backup_name);
+
+# Create streaming standby from backup
+my $node_standby = get_new_node('standby');
+my $delay = 3;
+$node_standby->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf(
+ 'postgresql.conf', qq(
+recovery_min_apply_delay = '${delay}s'
+));
+$node_standby->start;
+
+# Make new content on master and check its presence in standby depending
+# on the delay applied above. Before doing the insertion, get the
+# current timestamp that will be used as a comparison base. Even on slow
+# machines, this allows to have a predictable behavior when comparing the
+# delay between data insertion moment on master and replay time on standby.
+my $master_insert_time = time();
+$node_master->safe_psql('postgres',
+ "INSERT INTO tab_int VALUES (generate_series(11, 20))");
+
+# Now wait for replay to complete on standby. We're done waiting when the
+# standby has replayed up to the previously saved master LSN.
+my $until_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+
+$node_standby->poll_query_until('postgres',
+ "SELECT (pg_last_wal_replay_lsn() - '$until_lsn'::pg_lsn) >= 0")
+ or die "standby never caught up";
+
+# This test is successful if and only if the LSN has been applied with at least
+# the configured apply delay.
+ok(time() - $master_insert_time >= $delay,
+ "standby applies WAL only after replication delay");
diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl
new file mode 100644
index 0000000..78229a7
--- /dev/null
+++ b/src/test/recovery/t/006_logical_decoding.pl
@@ -0,0 +1,190 @@
+# Testing of logical decoding using SQL interface and/or pg_recvlogical
+#
+# Most logical decoding tests are in contrib/test_decoding. This module
+# is for work that doesn't fit well there, like where server restarts
+# are required.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 14;
+use Config;
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->append_conf(
+ 'postgresql.conf', qq(
+wal_level = logical
+));
+$node_master->start;
+my $backup_name = 'master_backup';
+
+$node_master->safe_psql('postgres',
+ qq[CREATE TABLE decoding_test(x integer, y text);]);
+
+$node_master->safe_psql('postgres',
+ qq[SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');]
+);
+
+# Cover walsender error shutdown code
+my ($result, $stdout, $stderr) = $node_master->psql(
+ 'template1',
+ qq[START_REPLICATION SLOT test_slot LOGICAL 0/0],
+ replication => 'database');
+ok( $stderr =~
+ m/replication slot "test_slot" was not created in this database/,
+ "Logical decoding correctly fails to start");
+
+# Check case of walsender not using a database connection. Logical
+# decoding should not be allowed.
+($result, $stdout, $stderr) = $node_master->psql(
+ 'template1',
+ qq[START_REPLICATION SLOT s1 LOGICAL 0/1],
+ replication => 'true');
+ok($stderr =~ /ERROR: logical decoding requires a database connection/,
+ "Logical decoding fails on non-database connection");
+
+$node_master->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,10) s;]
+);
+
+# Basic decoding works
+$result = $node_master->safe_psql('postgres',
+ qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]);
+is(scalar(my @foobar = split /^/m, $result),
+ 12, 'Decoding produced 12 rows inc BEGIN/COMMIT');
+
+# If we immediately crash the server we might lose the progress we just made
+# and replay the same changes again. But a clean shutdown should never repeat
+# the same changes when we use the SQL decoding interface.
+$node_master->restart('fast');
+
+# There are no new writes, so the result should be empty.
+$result = $node_master->safe_psql('postgres',
+ qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]);
+chomp($result);
+is($result, '', 'Decoding after fast restart repeats no rows');
+
+# Insert some rows and verify that we get the same results from pg_recvlogical
+# and the SQL interface.
+$node_master->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,4) s;]
+);
+
+my $expected = q{BEGIN
+table public.decoding_test: INSERT: x[integer]:1 y[text]:'1'
+table public.decoding_test: INSERT: x[integer]:2 y[text]:'2'
+table public.decoding_test: INSERT: x[integer]:3 y[text]:'3'
+table public.decoding_test: INSERT: x[integer]:4 y[text]:'4'
+COMMIT};
+
+my $stdout_sql = $node_master->safe_psql('postgres',
+ qq[SELECT data FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');]
+);
+is($stdout_sql, $expected, 'got expected output from SQL decoding session');
+
+my $endpos = $node_master->safe_psql('postgres',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+);
+print "waiting to replay $endpos\n";
+
+# Insert some rows after $endpos, which we won't read.
+$node_master->safe_psql('postgres',
+ qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(5,50) s;]
+);
+
+my $stdout_recv = $node_master->pg_recvlogical_upto(
+ 'postgres', 'test_slot', $endpos, 180,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+chomp($stdout_recv);
+is($stdout_recv, $expected,
+ 'got same expected output from pg_recvlogical decoding session');
+
+$node_master->poll_query_until('postgres',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'test_slot' AND active_pid IS NULL)"
+) or die "slot never became inactive";
+
+$stdout_recv = $node_master->pg_recvlogical_upto(
+ 'postgres', 'test_slot', $endpos, 180,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+chomp($stdout_recv);
+is($stdout_recv, '', 'pg_recvlogical acknowledged changes');
+
+$node_master->safe_psql('postgres', 'CREATE DATABASE otherdb');
+
+is( $node_master->psql(
+ 'otherdb',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+ ),
+ 3,
+ 'replaying logical slot from another database fails');
+
+$node_master->safe_psql('otherdb',
+ qq[SELECT pg_create_logical_replication_slot('otherdb_slot', 'test_decoding');]
+);
+
+# make sure you can't drop a slot while active
+SKIP:
+{
+
+ # some Windows Perls at least don't like IPC::Run's start/kill_kill regime.
+ skip "Test fails on Windows perl", 2 if $Config{osname} eq 'MSWin32';
+
+ my $pg_recvlogical = IPC::Run::start(
+ [
+ 'pg_recvlogical', '-d', $node_master->connstr('otherdb'),
+ '-S', 'otherdb_slot', '-f', '-', '--start'
+ ]);
+ $node_master->poll_query_until('otherdb',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NOT NULL)"
+ ) or die "slot never became active";
+ is($node_master->psql('postgres', 'DROP DATABASE otherdb'),
+ 3, 'dropping a DB with active logical slots fails');
+ $pg_recvlogical->kill_kill;
+ is($node_master->slot('otherdb_slot')->{'slot_name'},
+ undef, 'logical slot still exists');
+}
+
+$node_master->poll_query_until('otherdb',
+ "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NULL)"
+) or die "slot never became inactive";
+
+is($node_master->psql('postgres', 'DROP DATABASE otherdb'),
+ 0, 'dropping a DB with inactive logical slots succeeds');
+is($node_master->slot('otherdb_slot')->{'slot_name'},
+ undef, 'logical slot was actually dropped with DB');
+
+# Test logical slot advancing and its durability.
+my $logical_slot = 'logical_slot';
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);"
+);
+$node_master->psql(
+ 'postgres', "
+ CREATE TABLE tab_logical_slot (a int);
+ INSERT INTO tab_logical_slot VALUES (generate_series(1,10));");
+my $current_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+chomp($current_lsn);
+my $psql_rc = $node_master->psql('postgres',
+ "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);"
+);
+is($psql_rc, '0', 'slot advancing with logical slot');
+my $logical_restart_lsn_pre = $node_master->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"
+);
+chomp($logical_restart_lsn_pre);
+# Slot advance should persist across clean restarts.
+$node_master->restart;
+my $logical_restart_lsn_post = $node_master->safe_psql('postgres',
+ "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';"
+);
+chomp($logical_restart_lsn_post);
+ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0,
+ "logical slot advance persists across restarts");
+
+# done with the node
+$node_master->stop;
diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl
new file mode 100644
index 0000000..05803be
--- /dev/null
+++ b/src/test/recovery/t/007_sync_rep.pl
@@ -0,0 +1,214 @@
+# Minimal test testing synchronous replication sync_state transition
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 11;
+
+# Query checking sync_priority and sync_state of each standby
+my $check_sql =
+ "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication ORDER BY application_name;";
+
+# Check that sync_state of each standby is expected (waiting till it is).
+# If $setting is given, synchronous_standby_names is set to it and
+# the configuration file is reloaded before the test.
+sub test_sync_state
+{
+ my ($self, $expected, $msg, $setting) = @_;
+
+ if (defined($setting))
+ {
+ $self->safe_psql('postgres',
+ "ALTER SYSTEM SET synchronous_standby_names = '$setting';");
+ $self->reload;
+ }
+
+ ok($self->poll_query_until('postgres', $check_sql, $expected), $msg);
+ return;
+}
+
+# Start a standby and check that it is registered within the WAL sender
+# array of the given primary. This polls the primary's pg_stat_replication
+# until the standby is confirmed as registered.
+sub start_standby_and_wait
+{
+ my ($master, $standby) = @_;
+ my $master_name = $master->name;
+ my $standby_name = $standby->name;
+ my $query =
+ "SELECT count(1) = 1 FROM pg_stat_replication WHERE application_name = '$standby_name'";
+
+ $standby->start;
+
+ print("### Waiting for standby \"$standby_name\" on \"$master_name\"\n");
+ $master->poll_query_until('postgres', $query);
+ return;
+}
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+$node_master->start;
+my $backup_name = 'master_backup';
+
+# Take backup
+$node_master->backup($backup_name);
+
+# Create all the standbys. Their status on the primary is checked to ensure
+# the ordering of each one of them in the WAL sender array of the primary.
+
+# Create standby1 linking to master
+my $node_standby_1 = get_new_node('standby1');
+$node_standby_1->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_master, $node_standby_1);
+
+# Create standby2 linking to master
+my $node_standby_2 = get_new_node('standby2');
+$node_standby_2->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_master, $node_standby_2);
+
+# Create standby3 linking to master
+my $node_standby_3 = get_new_node('standby3');
+$node_standby_3->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+start_standby_and_wait($node_master, $node_standby_3);
+
+# Check that sync_state is determined correctly when
+# synchronous_standby_names is specified in old syntax.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|2|potential
+standby3|0|async),
+ 'old syntax of synchronous_standby_names',
+ 'standby1,standby2');
+
+# Check that all the standbys are considered as either sync or
+# potential when * is specified in synchronous_standby_names.
+# Note that standby1 is chosen as sync standby because
+# it's stored in the head of WalSnd array which manages
+# all the standbys though they have the same priority.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|1|potential
+standby3|1|potential),
+ 'asterisk in synchronous_standby_names',
+ '*');
+
+# Stop and start standbys to rearrange the order of standbys
+# in WalSnd array. Now, if standbys have the same priority,
+# standby2 is selected preferentially and standby3 is next.
+$node_standby_1->stop;
+$node_standby_2->stop;
+$node_standby_3->stop;
+
+# Make sure that each standby reports back to the primary in the wanted
+# order.
+start_standby_and_wait($node_master, $node_standby_2);
+start_standby_and_wait($node_master, $node_standby_3);
+
+# Specify 2 as the number of sync standbys.
+# Check that two standbys are in 'sync' state.
+test_sync_state(
+ $node_master, qq(standby2|2|sync
+standby3|3|sync),
+ '2 synchronous standbys',
+ '2(standby1,standby2,standby3)');
+
+# Start standby1
+start_standby_and_wait($node_master, $node_standby_1);
+
+# Create standby4 linking to master
+my $node_standby_4 = get_new_node('standby4');
+$node_standby_4->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby_4->start;
+
+# Check that standby1 and standby2 whose names appear earlier in
+# synchronous_standby_names are considered as sync. Also check that
+# standby3 appearing later represents potential, and standby4 is
+# in 'async' state because it's not in the list.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|2|sync
+standby3|3|potential
+standby4|0|async),
+ '2 sync, 1 potential, and 1 async');
+
+# Check that sync_state of each standby is determined correctly
+# when num_sync exceeds the number of names of potential sync standbys
+# specified in synchronous_standby_names.
+test_sync_state(
+ $node_master, qq(standby1|0|async
+standby2|4|sync
+standby3|3|sync
+standby4|1|sync),
+ 'num_sync exceeds the num of potential sync standbys',
+ '6(standby4,standby0,standby3,standby2)');
+
+# The setting that * comes before another standby name is acceptable
+# but does not make sense in most cases. Check that sync_state is
+# chosen properly even in case of that setting. standby1 is selected
+# as synchronous as it has the highest priority, and is followed by a
+# second standby listed first in the WAL sender array, which is
+# standby2 in this case.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|2|sync
+standby3|2|potential
+standby4|2|potential),
+ 'asterisk before another standby name',
+ '2(standby1,*,standby2)');
+
+# Check that the setting of '2(*)' chooses standby2 and standby3 that are stored
+# earlier in WalSnd array as sync standbys.
+test_sync_state(
+ $node_master, qq(standby1|1|potential
+standby2|1|sync
+standby3|1|sync
+standby4|1|potential),
+ 'multiple standbys having the same priority are chosen as sync',
+ '2(*)');
+
+# Stop Standby3 which is considered in 'sync' state.
+$node_standby_3->stop;
+
+# Check that the state of standby1 stored earlier in WalSnd array than
+# standby4 is transited from potential to sync.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|1|sync
+standby4|1|potential),
+ 'potential standby found earlier in array is promoted to sync');
+
+# Check that standby1 and standby2 are chosen as sync standbys
+# based on their priorities.
+test_sync_state(
+ $node_master, qq(standby1|1|sync
+standby2|2|sync
+standby4|0|async),
+ 'priority-based sync replication specified by FIRST keyword',
+ 'FIRST 2(standby1, standby2)');
+
+# Check that all the listed standbys are considered as candidates
+# for sync standbys in a quorum-based sync replication.
+test_sync_state(
+ $node_master, qq(standby1|1|quorum
+standby2|1|quorum
+standby4|0|async),
+ '2 quorum and 1 async',
+ 'ANY 2(standby1, standby2)');
+
+# Start Standby3 which will be considered in 'quorum' state.
+$node_standby_3->start;
+
+# Check that the setting of 'ANY 2(*)' chooses all standbys as
+# candidates for quorum sync standbys.
+test_sync_state(
+ $node_master, qq(standby1|1|quorum
+standby2|1|quorum
+standby3|1|quorum
+standby4|1|quorum),
+ 'all standbys are considered as candidates for quorum sync standbys',
+ 'ANY 2(*)');
diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl
new file mode 100644
index 0000000..ddab464
--- /dev/null
+++ b/src/test/recovery/t/008_fsm_truncation.pl
@@ -0,0 +1,96 @@
+# Test WAL replay of FSM changes.
+#
+# FSM changes don't normally need to be WAL-logged, except for truncation.
+# The FSM mustn't return a page that doesn't exist (anymore).
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1);
+
+$node_master->append_conf(
+ 'postgresql.conf', qq{
+fsync = on
+wal_log_hints = on
+max_prepared_transactions = 5
+autovacuum = off
+});
+
+# Create a master node and its standby, initializing both with some data
+# at the same time.
+$node_master->start;
+
+$node_master->backup('master_backup');
+my $node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, 'master_backup',
+ has_streaming => 1);
+$node_standby->start;
+
+$node_master->psql(
+ 'postgres', qq{
+create table testtab (a int, b char(100));
+insert into testtab select generate_series(1,1000), 'foo';
+insert into testtab select generate_series(1,1000), 'foo';
+delete from testtab where ctid > '(8,0)';
+});
+
+# Take a lock on the table to prevent following vacuum from truncating it
+$node_master->psql(
+ 'postgres', qq{
+begin;
+lock table testtab in row share mode;
+prepare transaction 'p1';
+});
+
+# Vacuum, update FSM without truncation
+$node_master->psql('postgres', 'vacuum verbose testtab');
+
+# Force a checkpoint
+$node_master->psql('postgres', 'checkpoint');
+
+# Now do some more insert/deletes, another vacuum to ensure full-page writes
+# are done
+$node_master->psql(
+ 'postgres', qq{
+insert into testtab select generate_series(1,1000), 'foo';
+delete from testtab where ctid > '(8,0)';
+vacuum verbose testtab;
+});
+
+# Ensure all buffers are now clean on the standby
+$node_standby->psql('postgres', 'checkpoint');
+
+# Release the lock, vacuum again which should lead to truncation
+$node_master->psql(
+ 'postgres', qq{
+rollback prepared 'p1';
+vacuum verbose testtab;
+});
+
+$node_master->psql('postgres', 'checkpoint');
+my $until_lsn =
+ $node_master->safe_psql('postgres', "SELECT pg_current_wal_lsn();");
+
+# Wait long enough for standby to receive and apply all WAL
+my $caughtup_query =
+ "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$node_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+# Promote the standby
+$node_standby->promote;
+$node_standby->psql('postgres', 'checkpoint');
+
+# Restart to discard in-memory copy of FSM
+$node_standby->restart;
+
+# Insert should work on standby
+is( $node_standby->psql(
+ 'postgres',
+ qq{insert into testtab select generate_series(1,1000), 'foo';}),
+ 0,
+ 'INSERT succeeds with truncated relation FSM');
diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl
new file mode 100644
index 0000000..1b748ad
--- /dev/null
+++ b/src/test/recovery/t/009_twophase.pl
@@ -0,0 +1,477 @@
+# Tests dedicated to two-phase commit in recovery
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 24;
+
+my $psql_out = '';
+my $psql_rc = '';
+
+sub configure_and_reload
+{
+ my ($node, $parameter) = @_;
+ my $name = $node->name;
+
+ $node->append_conf(
+ 'postgresql.conf', qq(
+ $parameter
+ ));
+ $node->psql('postgres', "SELECT pg_reload_conf()", stdout => \$psql_out);
+ is($psql_out, 't', "reload node $name with $parameter");
+ return;
+}
+
+# Set up two nodes, which will alternately be master and replication standby.
+
+# Setup london node
+my $node_london = get_new_node("london");
+$node_london->init(allows_streaming => 1);
+$node_london->append_conf(
+ 'postgresql.conf', qq(
+ max_prepared_transactions = 10
+ log_checkpoints = true
+));
+$node_london->start;
+$node_london->backup('london_backup');
+
+# Setup paris node
+my $node_paris = get_new_node('paris');
+$node_paris->init_from_backup($node_london, 'london_backup',
+ has_streaming => 1);
+$node_paris->start;
+
+# Switch to synchronous replication in both directions
+configure_and_reload($node_london, "synchronous_standby_names = 'paris'");
+configure_and_reload($node_paris, "synchronous_standby_names = 'london'");
+
+# Set up nonce names for current master and standby nodes
+note "Initially, london is master and paris is standby";
+my ($cur_master, $cur_standby) = ($node_london, $node_paris);
+my $cur_master_name = $cur_master->name;
+
+# Create table we'll use in the test transactions
+$cur_master->psql('postgres', "CREATE TABLE t_009_tbl (id int, msg text)");
+
+###############################################################################
+# Check that we can commit and abort transaction after soft restart.
+# Here checkpoint happens before shutdown and no WAL replay will occur at next
+# startup. In this case postgres re-creates shared-memory state from twophase
+# files.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (1, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (2, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_1';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (3, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (4, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_2';");
+$cur_master->stop;
+$cur_master->start;
+
+$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_1'");
+is($psql_rc, '0', 'Commit prepared transaction after restart');
+
+$psql_rc = $cur_master->psql('postgres', "ROLLBACK PREPARED 'xact_009_2'");
+is($psql_rc, '0', 'Rollback prepared transaction after restart');
+
+###############################################################################
+# Check that we can commit and abort after a hard restart.
+# At next startup, WAL replay will re-create shared memory state for prepared
+# transaction using dedicated WAL records.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ CHECKPOINT;
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (5, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (6, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_3';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (7, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (8, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_4';");
+$cur_master->teardown_node;
+$cur_master->start;
+
+$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_3'");
+is($psql_rc, '0', 'Commit prepared transaction after teardown');
+
+$psql_rc = $cur_master->psql('postgres', "ROLLBACK PREPARED 'xact_009_4'");
+is($psql_rc, '0', 'Rollback prepared transaction after teardown');
+
+###############################################################################
+# Check that WAL replay can handle several transactions with same GID name.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ CHECKPOINT;
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (9, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (10, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_5';
+ COMMIT PREPARED 'xact_009_5';
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (11, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (12, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_5';");
+$cur_master->teardown_node;
+$cur_master->start;
+
+$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_5'");
+is($psql_rc, '0', 'Replay several transactions with same GID');
+
+###############################################################################
+# Check that WAL replay cleans up its shared memory state and releases locks
+# while replaying transaction commits.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (13, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (14, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_6';
+ COMMIT PREPARED 'xact_009_6';");
+$cur_master->teardown_node;
+$cur_master->start;
+$psql_rc = $cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (15, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (16, 'issued to ${cur_master_name}');
+ -- This prepare can fail due to conflicting GID or locks conflicts if
+ -- replay did not fully cleanup its state on previous commit.
+ PREPARE TRANSACTION 'xact_009_7';");
+is($psql_rc, '0', "Cleanup of shared memory state for 2PC commit");
+
+$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_7'");
+
+###############################################################################
+# Check that WAL replay will cleanup its shared memory state on running standby.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (17, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (18, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_8';
+ COMMIT PREPARED 'xact_009_8';");
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0',
+ "Cleanup of shared memory state on running standby without checkpoint");
+
+###############################################################################
+# Same as in previous case, but let's force checkpoint on standby between
+# prepare and commit to use on-disk twophase files.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (19, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (20, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_9';");
+$cur_standby->psql('postgres', "CHECKPOINT");
+$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_9'");
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0',
+ "Cleanup of shared memory state on running standby after checkpoint");
+
+###############################################################################
+# Check that prepared transactions can be committed on promoted standby.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (21, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_10';");
+$cur_master->teardown_node;
+$cur_standby->promote;
+
+# change roles
+note "Now paris is master and london is standby";
+($cur_master, $cur_standby) = ($node_paris, $node_london);
+$cur_master_name = $cur_master->name;
+
+# because london is not running at this point, we can't use syncrep commit
+# on this command
+$psql_rc = $cur_master->psql('postgres',
+ "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'");
+is($psql_rc, '0', "Restore of prepared transaction on promoted standby");
+
+# restart old master as new standby
+$cur_standby->enable_streaming($cur_master);
+$cur_standby->start;
+
+###############################################################################
+# Check that prepared transactions are replayed after soft restart of standby
+# while master is down. Since standby knows that master is down it uses a
+# different code path on startup to ensure that the status of transactions is
+# consistent.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (23, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (24, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_11';");
+$cur_master->stop;
+$cur_standby->restart;
+$cur_standby->promote;
+
+# change roles
+note "Now london is master and paris is standby";
+($cur_master, $cur_standby) = ($node_london, $node_paris);
+$cur_master_name = $cur_master->name;
+
+$cur_master->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '1',
+ "Restore prepared transactions from files with master down");
+
+# restart old master as new standby
+$cur_standby->enable_streaming($cur_master);
+$cur_standby->start;
+
+$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_11'");
+
+###############################################################################
+# Check that prepared transactions are correctly replayed after standby hard
+# restart while master is down.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ INSERT INTO t_009_tbl VALUES (25, 'issued to ${cur_master_name}');
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl VALUES (26, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_12';
+ ");
+$cur_master->stop;
+$cur_standby->teardown_node;
+$cur_standby->start;
+$cur_standby->promote;
+
+# change roles
+note "Now paris is master and london is standby";
+($cur_master, $cur_standby) = ($node_paris, $node_london);
+$cur_master_name = $cur_master->name;
+
+$cur_master->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '1',
+ "Restore prepared transactions from records with master down");
+
+# restart old master as new standby
+$cur_standby->enable_streaming($cur_master);
+$cur_standby->start;
+
+$cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_12'");
+
+###############################################################################
+# Check for a lock conflict between prepared transaction with DDL inside and
+# replay of XLOG_STANDBY_LOCK wal record.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl2 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl2 VALUES (27, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_13';
+ -- checkpoint will issue XLOG_STANDBY_LOCK that can conflict with lock
+ -- held by 'create table' statement
+ CHECKPOINT;
+ COMMIT PREPARED 'xact_009_13';");
+
+# Ensure that last transaction is replayed on standby.
+my $cur_master_lsn =
+ $cur_master->safe_psql('postgres', "SELECT pg_current_wal_lsn()");
+my $caughtup_query =
+ "SELECT '$cur_master_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
+$cur_standby->poll_query_until('postgres', $caughtup_query)
+ or die "Timed out while waiting for standby to catch up";
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM t_009_tbl2",
+ stdout => \$psql_out);
+is($psql_out, '1', "Replay prepared transaction with DDL");
+
+###############################################################################
+# Check recovery of prepared transaction with DDL inside after a hard restart
+# of the master.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl3 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl3 VALUES (28, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_14';
+ BEGIN;
+ CREATE TABLE t_009_tbl4 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl4 VALUES (29, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_15';");
+
+$cur_master->teardown_node;
+$cur_master->start;
+
+$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_14'");
+is($psql_rc, '0', 'Commit prepared transaction after teardown');
+
+$psql_rc = $cur_master->psql('postgres', "ROLLBACK PREPARED 'xact_009_15'");
+is($psql_rc, '0', 'Rollback prepared transaction after teardown');
+
+###############################################################################
+# Check recovery of prepared transaction with DDL inside after a soft restart
+# of the master.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE t_009_tbl5 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl5 VALUES (30, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_16';
+ BEGIN;
+ CREATE TABLE t_009_tbl6 (id int, msg text);
+ SAVEPOINT s1;
+ INSERT INTO t_009_tbl6 VALUES (31, 'issued to ${cur_master_name}');
+ PREPARE TRANSACTION 'xact_009_17';");
+
+$cur_master->stop;
+$cur_master->start;
+
+$psql_rc = $cur_master->psql('postgres', "COMMIT PREPARED 'xact_009_16'");
+is($psql_rc, '0', 'Commit prepared transaction after restart');
+
+$psql_rc = $cur_master->psql('postgres', "ROLLBACK PREPARED 'xact_009_17'");
+is($psql_rc, '0', 'Rollback prepared transaction after restart');
+
+###############################################################################
+# Verify expected data appears on both servers.
+###############################################################################
+
+$cur_master->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0', "No uncommitted prepared transactions on master");
+
+$cur_master->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl ORDER BY id",
+ stdout => \$psql_out);
+is( $psql_out, qq{1|issued to london
+2|issued to london
+5|issued to london
+6|issued to london
+9|issued to london
+10|issued to london
+11|issued to london
+12|issued to london
+13|issued to london
+14|issued to london
+15|issued to london
+16|issued to london
+17|issued to london
+18|issued to london
+19|issued to london
+20|issued to london
+21|issued to london
+22|issued to london
+23|issued to paris
+24|issued to paris
+25|issued to london
+26|issued to london},
+ "Check expected t_009_tbl data on master");
+
+$cur_master->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl2",
+ stdout => \$psql_out);
+is( $psql_out,
+ qq{27|issued to paris},
+ "Check expected t_009_tbl2 data on master");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT count(*) FROM pg_prepared_xacts",
+ stdout => \$psql_out);
+is($psql_out, '0', "No uncommitted prepared transactions on standby");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl ORDER BY id",
+ stdout => \$psql_out);
+is( $psql_out, qq{1|issued to london
+2|issued to london
+5|issued to london
+6|issued to london
+9|issued to london
+10|issued to london
+11|issued to london
+12|issued to london
+13|issued to london
+14|issued to london
+15|issued to london
+16|issued to london
+17|issued to london
+18|issued to london
+19|issued to london
+20|issued to london
+21|issued to london
+22|issued to london
+23|issued to paris
+24|issued to paris
+25|issued to london
+26|issued to london},
+ "Check expected t_009_tbl data on standby");
+
+$cur_standby->psql(
+ 'postgres',
+ "SELECT * FROM t_009_tbl2",
+ stdout => \$psql_out);
+is( $psql_out,
+ qq{27|issued to paris},
+ "Check expected t_009_tbl2 data on standby");
diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl
new file mode 100644
index 0000000..1007666
--- /dev/null
+++ b/src/test/recovery/t/010_logical_decoding_timelines.pl
@@ -0,0 +1,196 @@
+# Demonstrate that logical can follow timeline switches.
+#
+# Logical replication slots can follow timeline switches but it's
+# normally not possible to have a logical slot on a replica where
+# promotion and a timeline switch can occur. The only ways
+# we can create that circumstance are:
+#
+# * By doing a filesystem-level copy of the DB, since pg_basebackup
+# excludes pg_replslot but we can copy it directly; or
+#
+# * by creating a slot directly at the C level on the replica and
+# advancing it as we go using the low level APIs. It can't be done
+# from SQL since logical decoding isn't allowed on replicas.
+#
+# This module uses the first approach to show that timeline following
+# on a logical slot works.
+#
+# (For convenience, it also tests some recovery-related operations
+# on logical slots).
+#
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 13;
+use File::Copy;
+use IPC::Run ();
+use Scalar::Util qw(blessed);
+
+my ($stdout, $stderr, $ret);
+
+# Initialize master node
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1, has_archiving => 1);
+$node_master->append_conf(
+ 'postgresql.conf', q[
+wal_level = 'logical'
+max_replication_slots = 3
+max_wal_senders = 2
+log_min_messages = 'debug2'
+hot_standby_feedback = on
+wal_receiver_status_interval = 1
+]);
+$node_master->dump_info;
+$node_master->start;
+
+note "testing logical timeline following with a filesystem-level copy";
+
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('before_basebackup', 'test_decoding');"
+);
+$node_master->safe_psql('postgres', "CREATE TABLE decoding(blah text);");
+$node_master->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('beforebb');");
+
+# We also want to verify that DROP DATABASE on a standby with a logical
+# slot works. This isn't strictly related to timeline following, but
+# the only way to get a logical slot on a standby right now is to use
+# the same physical copy trick, so:
+$node_master->safe_psql('postgres', 'CREATE DATABASE dropme;');
+$node_master->safe_psql('dropme',
+ "SELECT pg_create_logical_replication_slot('dropme_slot', 'test_decoding');"
+);
+
+$node_master->safe_psql('postgres', 'CHECKPOINT;');
+
+my $backup_name = 'b1';
+$node_master->backup_fs_hot($backup_name);
+
+$node_master->safe_psql('postgres',
+ q[SELECT pg_create_physical_replication_slot('phys_slot');]);
+
+my $node_replica = get_new_node('replica');
+$node_replica->init_from_backup(
+ $node_master, $backup_name,
+ has_streaming => 1,
+ has_restoring => 1);
+$node_replica->append_conf('postgresql.conf',
+ q[primary_slot_name = 'phys_slot']);
+
+$node_replica->start;
+
+# If we drop 'dropme' on the master, the standby should drop the
+# db and associated slot.
+is($node_master->psql('postgres', 'DROP DATABASE dropme'),
+ 0, 'dropped DB with logical slot OK on master');
+$node_master->wait_for_catchup($node_replica, 'replay',
+ $node_master->lsn('insert'));
+is( $node_replica->safe_psql(
+ 'postgres', q[SELECT 1 FROM pg_database WHERE datname = 'dropme']),
+ '',
+ 'dropped DB dropme on standby');
+is($node_master->slot('dropme_slot')->{'slot_name'},
+ undef, 'logical slot was actually dropped on standby');
+
+# Back to testing failover...
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_logical_replication_slot('after_basebackup', 'test_decoding');"
+);
+$node_master->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('afterbb');");
+$node_master->safe_psql('postgres', 'CHECKPOINT;');
+
+# Verify that only the before base_backup slot is on the replica
+$stdout = $node_replica->safe_psql('postgres',
+ 'SELECT slot_name FROM pg_replication_slots ORDER BY slot_name');
+is($stdout, 'before_basebackup',
+ 'Expected to find only slot before_basebackup on replica');
+
+# Examine the physical slot the replica uses to stream changes
+# from the master to make sure its hot_standby_feedback
+# has locked in a catalog_xmin on the physical slot, and that
+# any xmin is < the catalog_xmin
+$node_master->poll_query_until(
+ 'postgres', q[
+ SELECT catalog_xmin IS NOT NULL
+ FROM pg_replication_slots
+ WHERE slot_name = 'phys_slot'
+ ]) or die "slot's catalog_xmin never became set";
+
+my $phys_slot = $node_master->slot('phys_slot');
+isnt($phys_slot->{'xmin'}, '', 'xmin assigned on physical slot of master');
+isnt($phys_slot->{'catalog_xmin'},
+ '', 'catalog_xmin assigned on physical slot of master');
+
+# Ignore wrap-around here, we're on a new cluster:
+cmp_ok(
+ $phys_slot->{'xmin'}, '>=',
+ $phys_slot->{'catalog_xmin'},
+ 'xmin on physical slot must not be lower than catalog_xmin');
+
+$node_master->safe_psql('postgres', 'CHECKPOINT');
+$node_master->wait_for_catchup($node_replica, 'write');
+
+# Boom, crash
+$node_master->stop('immediate');
+
+$node_replica->promote;
+
+$node_replica->safe_psql('postgres',
+ "INSERT INTO decoding(blah) VALUES ('after failover');");
+
+# Shouldn't be able to read from slot created after base backup
+($ret, $stdout, $stderr) = $node_replica->psql('postgres',
+ "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');"
+);
+is($ret, 3, 'replaying from after_basebackup slot fails');
+like(
+ $stderr,
+ qr/replication slot "after_basebackup" does not exist/,
+ 'after_basebackup slot missing');
+
+# Should be able to read from slot created before base backup
+($ret, $stdout, $stderr) = $node_replica->psql(
+ 'postgres',
+ "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');",
+ timeout => 180);
+is($ret, 0, 'replay from slot before_basebackup succeeds');
+
+my $final_expected_output_bb = q(BEGIN
+table public.decoding: INSERT: blah[text]:'beforebb'
+COMMIT
+BEGIN
+table public.decoding: INSERT: blah[text]:'afterbb'
+COMMIT
+BEGIN
+table public.decoding: INSERT: blah[text]:'after failover'
+COMMIT);
+is($stdout, $final_expected_output_bb,
+ 'decoded expected data from slot before_basebackup');
+is($stderr, '', 'replay from slot before_basebackup produces no stderr');
+
+# So far we've peeked the slots, so when we fetch the same info over
+# pg_recvlogical we should get complete results. First, find out the commit lsn
+# of the last transaction. There's no max(pg_lsn), so:
+
+my $endpos = $node_replica->safe_psql('postgres',
+ "SELECT lsn FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL) ORDER BY lsn DESC LIMIT 1;"
+);
+
+# now use the walsender protocol to peek the slot changes and make sure we see
+# the same results.
+
+$stdout = $node_replica->pg_recvlogical_upto(
+ 'postgres', 'before_basebackup',
+ $endpos, 180,
+ 'include-xids' => '0',
+ 'skip-empty-xacts' => '1');
+
+# walsender likes to add a newline
+chomp($stdout);
+is($stdout, $final_expected_output_bb,
+ 'got same output from walsender via pg_recvlogical on before_basebackup');
+
+$node_replica->teardown_node();
diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl
new file mode 100644
index 0000000..ca6e92b
--- /dev/null
+++ b/src/test/recovery/t/011_crash_recovery.pl
@@ -0,0 +1,68 @@
+#
+# Tests relating to PostgreSQL crash recovery and redo
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+use Config;
+if ($Config{osname} eq 'MSWin32')
+{
+
+ # some Windows Perls at least don't like IPC::Run's start/kill_kill regime.
+ plan skip_all => "Test fails on Windows perl";
+}
+else
+{
+ plan tests => 3;
+}
+
+my $node = get_new_node('master');
+$node->init(allows_streaming => 1);
+$node->start;
+
+my ($stdin, $stdout, $stderr) = ('', '', '');
+
+# Ensure that pg_xact_status reports 'aborted' for xacts
+# that were in-progress during crash. To do that, we need
+# an xact to be in-progress when we crash and we need to know
+# its xid.
+my $tx = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$stdin,
+ '>',
+ \$stdout,
+ '2>',
+ \$stderr);
+$stdin .= q[
+BEGIN;
+CREATE TABLE mine(x integer);
+SELECT pg_current_xact_id();
+];
+$tx->pump until $stdout =~ /[[:digit:]]+[\r\n]$/;
+
+# Status should be in-progress
+my $xid = $stdout;
+chomp($xid);
+
+is($node->safe_psql('postgres', qq[SELECT pg_xact_status('$xid');]),
+ 'in progress', 'own xid is in-progress');
+
+# Crash and restart the postmaster
+$node->stop('immediate');
+$node->start;
+
+# Make sure we really got a new xid
+cmp_ok($node->safe_psql('postgres', 'SELECT pg_current_xact_id()'),
+ '>', $xid, 'new xid after restart is greater');
+
+# and make sure we show the in-progress xact as aborted
+is($node->safe_psql('postgres', qq[SELECT pg_xact_status('$xid');]),
+ 'aborted', 'xid is aborted after crash');
+
+$tx->kill_kill;
diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl
new file mode 100644
index 0000000..292cd40
--- /dev/null
+++ b/src/test/recovery/t/012_subtransactions.pl
@@ -0,0 +1,216 @@
+# Tests dedicated to subtransactions in recovery
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 12;
+
+# Setup master node
+my $node_master = get_new_node("master");
+$node_master->init(allows_streaming => 1);
+$node_master->append_conf(
+ 'postgresql.conf', qq(
+ max_prepared_transactions = 10
+ log_checkpoints = true
+));
+$node_master->start;
+$node_master->backup('master_backup');
+$node_master->psql('postgres', "CREATE TABLE t_012_tbl (id int)");
+
+# Setup standby node
+my $node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_master, 'master_backup',
+ has_streaming => 1);
+$node_standby->start;
+
+# Switch to synchronous replication
+$node_master->append_conf(
+ 'postgresql.conf', qq(
+ synchronous_standby_names = '*'
+));
+$node_master->psql('postgres', "SELECT pg_reload_conf()");
+
+my $psql_out = '';
+my $psql_rc = '';
+
+###############################################################################
+# Check that replay will correctly set SUBTRANS and properly advance nextXid
+# so that it won't conflict with savepoint xids.
+###############################################################################
+
+$node_master->psql(
+ 'postgres', "
+ BEGIN;
+ DELETE FROM t_012_tbl;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s1;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s2;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s3;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s4;
+ INSERT INTO t_012_tbl VALUES (43);
+ SAVEPOINT s5;
+ INSERT INTO t_012_tbl VALUES (43);
+ PREPARE TRANSACTION 'xact_012_1';
+ CHECKPOINT;");
+
+$node_master->stop;
+$node_master->start;
+$node_master->psql(
+ 'postgres', "
+ -- here we can get xid of previous savepoint if nextXid
+ -- wasn't properly advanced
+ BEGIN;
+ INSERT INTO t_012_tbl VALUES (142);
+ ROLLBACK;
+ COMMIT PREPARED 'xact_012_1';");
+
+$node_master->psql(
+ 'postgres',
+ "SELECT count(*) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '6', "Check nextXid handling for prepared subtransactions");
+
+###############################################################################
+# Check that replay will correctly set 2PC with more than
+# PGPROC_MAX_CACHED_SUBXIDS subtransactions and also show data properly
+# on promotion
+###############################################################################
+$node_master->psql('postgres', "DELETE FROM t_012_tbl");
+
+# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql
+$node_master->psql(
+ 'postgres', "
+ CREATE OR REPLACE FUNCTION hs_subxids (n integer)
+ RETURNS void
+ LANGUAGE plpgsql
+ AS \$\$
+ BEGIN
+ IF n <= 0 THEN RETURN; END IF;
+ INSERT INTO t_012_tbl VALUES (n);
+ PERFORM hs_subxids(n - 1);
+ RETURN;
+ EXCEPTION WHEN raise_exception THEN NULL; END;
+ \$\$;");
+$node_master->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(127);
+ COMMIT;");
+$node_master->wait_for_catchup($node_standby, 'replay',
+ $node_master->lsn('insert'));
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+$node_master->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+# restore state
+($node_master, $node_standby) = ($node_standby, $node_master);
+$node_standby->enable_streaming($node_master);
+$node_standby->start;
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+$node_master->psql('postgres', "DELETE FROM t_012_tbl");
+
+# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql
+$node_master->psql(
+ 'postgres', "
+ CREATE OR REPLACE FUNCTION hs_subxids (n integer)
+ RETURNS void
+ LANGUAGE plpgsql
+ AS \$\$
+ BEGIN
+ IF n <= 0 THEN RETURN; END IF;
+ INSERT INTO t_012_tbl VALUES (n);
+ PERFORM hs_subxids(n - 1);
+ RETURN;
+ EXCEPTION WHEN raise_exception THEN NULL; END;
+ \$\$;");
+$node_master->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(127);
+ PREPARE TRANSACTION 'xact_012_1';");
+$node_master->wait_for_catchup($node_standby, 'replay',
+ $node_master->lsn('insert'));
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+$node_master->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+
+# restore state
+($node_master, $node_standby) = ($node_standby, $node_master);
+$node_standby->enable_streaming($node_master);
+$node_standby->start;
+$psql_rc = $node_master->psql('postgres', "COMMIT PREPARED 'xact_012_1'");
+is($psql_rc, '0',
+ "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby"
+);
+
+$node_master->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '8128', "Visible");
+
+$node_master->psql('postgres', "DELETE FROM t_012_tbl");
+$node_master->psql(
+ 'postgres', "
+ BEGIN;
+ SELECT hs_subxids(201);
+ PREPARE TRANSACTION 'xact_012_1';");
+$node_master->wait_for_catchup($node_standby, 'replay',
+ $node_master->lsn('insert'));
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+$node_master->stop;
+$node_standby->promote;
+
+$node_standby->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
+
+# restore state
+($node_master, $node_standby) = ($node_standby, $node_master);
+$node_standby->enable_streaming($node_master);
+$node_standby->start;
+$psql_rc = $node_master->psql('postgres', "ROLLBACK PREPARED 'xact_012_1'");
+is($psql_rc, '0',
+ "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby"
+);
+
+$node_master->psql(
+ 'postgres',
+ "SELECT coalesce(sum(id),-1) FROM t_012_tbl",
+ stdout => \$psql_out);
+is($psql_out, '-1', "Not visible");
diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl
new file mode 100644
index 0000000..3e3d50b
--- /dev/null
+++ b/src/test/recovery/t/013_crash_restart.pl
@@ -0,0 +1,272 @@
+#
+# Tests restarts of postgres due to crashes of a subprocess.
+#
+# Two longer-running psql subprocesses are used: One to kill a
+# backend, triggering a crash-restart cycle, one to detect when
+# postmaster noticed the backend died. The second backend is
+# necessary because it's otherwise hard to determine if postmaster is
+# still accepting new sessions (because it hasn't noticed that the
+# backend died), or because it's already restarted.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+use Config;
+use Time::HiRes qw(usleep);
+
+plan tests => 18;
+
+
+# To avoid hanging while expecting some specific input from a psql
+# instance being driven by us, add a timeout high enough that it
+# should never trigger even on very slow machines, unless something
+# is really wrong.
+my $psql_timeout = IPC::Run::timer(60);
+
+my $node = get_new_node('master');
+$node->init(allows_streaming => 1);
+$node->start();
+
+# by default PostgresNode doesn't doesn't restart after a crash
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET restart_after_crash = 1;
+ ALTER SYSTEM SET log_connections = 1;
+ SELECT pg_reload_conf();]);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin,
+ '>',
+ \$killme_stdout,
+ '2>',
+ \$killme_stderr,
+ $psql_timeout);
+
+# Need a second psql to check if crash-restart happened.
+my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+my $monitor = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$monitor_stdin,
+ '>',
+ \$monitor_stdout,
+ '2>',
+ \$monitor_stderr,
+ $psql_timeout);
+
+#create table, insert row that should survive
+$killme_stdin .= q[
+CREATE TABLE alive(status text);
+INSERT INTO alive VALUES($$committed-before-sigquit$$);
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGQUIT');
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+#insert a row that should *not* survive, due to in-progress xact
+$killme_stdin .= q[
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigquit/m),
+ 'inserted in-progress-before-sigquit');
+$killme_stdout = '';
+$killme_stderr = '';
+
+
+# Start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok(pump_until($monitor, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+# kill once with QUIT - we expect psql to exit, while emitting error message first
+my $ret = TestLib::system_log('pg_ctl', 'kill', 'QUIT', $pid);
+
+# Exactly process should have been alive to be killed
+is($ret, 0, "killed process with SIGQUIT");
+
+# Check that psql sees the killed backend as having been terminated
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ \$killme_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql query died successfully after SIGQUIT");
+$killme_stderr = '';
+$killme_stdout = '';
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql monitor died successfully after SIGQUIT");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGQUIT");
+
+
+# restart psql processes, now that the crash cycle finished
+($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+$killme->run();
+($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+$monitor->run();
+
+
+# Acquire pid of new backend
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ "acquired pid for SIGKILL");
+$pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Insert test rows
+$killme_stdin .= q[
+INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status;
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'inserted in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Re-start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok(pump_until($monitor, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+
+# kill with SIGKILL this time - we expect the backend to exit, without
+# being able to emit an error message
+$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, "killed process with KILL");
+
+# Check that psql sees the server as being terminated. No WARNING,
+# because signal handlers aren't being run on SIGKILL.
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ \$killme_stderr,
+ qr/server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql query died successfully after SIGKILL");
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql monitor died successfully after SIGKILL");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGKILL");
+
+# Make sure the committed rows survived, in-progress ones not
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status'
+ ),
+ 'before-orderly-restart',
+ 'can still write after crash restart');
+
+# Just to be sure, check that an orderly restart now still works
+$node->restart();
+
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status'
+ ),
+ 'after-orderly-restart',
+ 'can still write after orderly restart');
+
+$node->stop();
+
+# Pump until string is matched, or timeout occurs
+sub pump_until
+{
+ my ($proc, $stream, $untl) = @_;
+ $proc->pump_nb();
+ while (1)
+ {
+ last if $$stream =~ /$untl/;
+ if ($psql_timeout->is_expired)
+ {
+ diag("aborting wait: program timed out");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ if (not $proc->pumpable())
+ {
+ diag("aborting wait: program died");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ $proc->pump();
+ }
+ return 1;
+
+}
diff --git a/src/test/recovery/t/014_unlogged_reinit.pl b/src/test/recovery/t/014_unlogged_reinit.pl
new file mode 100644
index 0000000..ee05e1a
--- /dev/null
+++ b/src/test/recovery/t/014_unlogged_reinit.pl
@@ -0,0 +1,81 @@
+# Tests that unlogged tables are properly reinitialized after a crash.
+#
+# The behavior should be the same when restoring from a backup, but
+# that is not tested here.
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 12;
+
+my $node = get_new_node('main');
+
+$node->init;
+$node->start;
+my $pgdata = $node->data_dir;
+
+# Create an unlogged table to test that forks other than init are not
+# copied.
+$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)');
+
+my $baseUnloggedPath = $node->safe_psql('postgres',
+ q{select pg_relation_filepath('base_unlogged')});
+
+# Test that main and init forks exist.
+ok(-f "$pgdata/${baseUnloggedPath}_init", 'init fork in base exists');
+ok(-f "$pgdata/$baseUnloggedPath", 'main fork in base exists');
+
+# Create an unlogged table in a tablespace.
+
+my $tablespaceDir = TestLib::tempdir;
+
+my $realTSDir = TestLib::perl2host($tablespaceDir);
+
+$node->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$realTSDir'");
+$node->safe_psql('postgres',
+ 'CREATE UNLOGGED TABLE ts1_unlogged (id int) TABLESPACE ts1');
+
+my $ts1UnloggedPath = $node->safe_psql('postgres',
+ q{select pg_relation_filepath('ts1_unlogged')});
+
+# Test that main and init forks exist.
+ok(-f "$pgdata/${ts1UnloggedPath}_init", 'init fork in tablespace exists');
+ok(-f "$pgdata/$ts1UnloggedPath", 'main fork in tablespace exists');
+
+# Crash the postmaster.
+$node->stop('immediate');
+
+# Write fake forks to test that they are removed during recovery.
+append_to_file("$pgdata/${baseUnloggedPath}_vm", 'TEST_VM');
+append_to_file("$pgdata/${baseUnloggedPath}_fsm", 'TEST_FSM');
+
+# Remove main fork to test that it is recopied from init.
+unlink("$pgdata/${baseUnloggedPath}")
+ or BAIL_OUT("could not remove \"${baseUnloggedPath}\": $!");
+
+# the same for the tablespace
+append_to_file("$pgdata/${ts1UnloggedPath}_vm", 'TEST_VM');
+append_to_file("$pgdata/${ts1UnloggedPath}_fsm", 'TEST_FSM');
+unlink("$pgdata/${ts1UnloggedPath}")
+ or BAIL_OUT("could not remove \"${ts1UnloggedPath}\": $!");
+
+$node->start;
+
+# check unlogged table in base
+ok(-f "$pgdata/${baseUnloggedPath}_init", 'init fork in base still exists');
+ok(-f "$pgdata/$baseUnloggedPath", 'main fork in base recreated at startup');
+ok(!-f "$pgdata/${baseUnloggedPath}_vm",
+ 'vm fork in base removed at startup');
+ok( !-f "$pgdata/${baseUnloggedPath}_fsm",
+ 'fsm fork in base removed at startup');
+
+# check unlogged table in tablespace
+ok( -f "$pgdata/${ts1UnloggedPath}_init",
+ 'init fork still exists in tablespace');
+ok(-f "$pgdata/$ts1UnloggedPath",
+ 'main fork in tablespace recreated at startup');
+ok( !-f "$pgdata/${ts1UnloggedPath}_vm",
+ 'vm fork in tablespace removed at startup');
+ok( !-f "$pgdata/${ts1UnloggedPath}_fsm",
+ 'fsm fork in tablespace removed at startup');
diff --git a/src/test/recovery/t/015_promotion_pages.pl b/src/test/recovery/t/015_promotion_pages.pl
new file mode 100644
index 0000000..6fb70b5
--- /dev/null
+++ b/src/test/recovery/t/015_promotion_pages.pl
@@ -0,0 +1,85 @@
+# Test for promotion handling with WAL records generated post-promotion
+# before the first checkpoint is generated. This test case checks for
+# invalid page references at replay based on the minimum consistent
+# recovery point defined.
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+# Initialize primary node
+my $alpha = get_new_node('alpha');
+$alpha->init(allows_streaming => 1);
+# Setting wal_log_hints to off is important to get invalid page
+# references.
+$alpha->append_conf("postgresql.conf", <<EOF);
+wal_log_hints = off
+EOF
+
+# Start the primary
+$alpha->start;
+
+# setup/start a standby
+$alpha->backup('bkp');
+my $bravo = get_new_node('bravo');
+$bravo->init_from_backup($alpha, 'bkp', has_streaming => 1);
+$bravo->append_conf('postgresql.conf', <<EOF);
+checkpoint_timeout=1h
+checkpoint_completion_target=0.9
+EOF
+$bravo->start;
+
+# Dummy table for the upcoming tests.
+$alpha->safe_psql('postgres', 'create table test1 (a int)');
+$alpha->safe_psql('postgres',
+ 'insert into test1 select generate_series(1, 10000)');
+
+# take a checkpoint
+$alpha->safe_psql('postgres', 'checkpoint');
+
+# The following vacuum will set visibility map bits and create
+# problematic WAL records.
+$alpha->safe_psql('postgres', 'vacuum verbose test1');
+# Wait for last record to have been replayed on the standby.
+$alpha->wait_for_catchup($bravo, 'replay', $alpha->lsn('insert'));
+
+# Now force a checkpoint on the standby. This seems unnecessary but for "some"
+# reason, the previous checkpoint on the primary does not reflect on the standby
+# and without an explicit checkpoint, it may start redo recovery from a much
+# older point, which includes even create table and initial page additions.
+$bravo->safe_psql('postgres', 'checkpoint');
+
+# Now just use a dummy table and run some operations to move minRecoveryPoint
+# beyond the previous vacuum.
+$alpha->safe_psql('postgres', 'create table test2 (a int, b text)');
+$alpha->safe_psql('postgres',
+ 'insert into test2 select generate_series(1,10000), md5(random()::text)');
+$alpha->safe_psql('postgres', 'truncate test2');
+
+# Wait again for all records to be replayed.
+$alpha->wait_for_catchup($bravo, 'replay', $alpha->lsn('insert'));
+
+# Do the promotion, which reinitializes minRecoveryPoint in the control
+# file so as WAL is replayed up to the end.
+$bravo->promote;
+
+# Truncate the table on the promoted standby, vacuum and extend it
+# again to create new page references. The first post-recovery checkpoint
+# has not happened yet.
+$bravo->safe_psql('postgres', 'truncate test1');
+$bravo->safe_psql('postgres', 'vacuum verbose test1');
+$bravo->safe_psql('postgres',
+ 'insert into test1 select generate_series(1,1000)');
+
+# Now crash-stop the promoted standby and restart. This makes sure that
+# replay does not see invalid page references because of an invalid
+# minimum consistent recovery point.
+$bravo->stop('immediate');
+$bravo->start;
+
+# Check state of the table after full crash recovery. All its data should
+# be here.
+my $psql_out;
+$bravo->psql('postgres', "SELECT count(*) FROM test1", stdout => \$psql_out);
+is($psql_out, '1000', "Check that table state is correct");
diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl
new file mode 100644
index 0000000..707538b
--- /dev/null
+++ b/src/test/recovery/t/016_min_consistency.pl
@@ -0,0 +1,138 @@
+# Test for checking consistency of on-disk pages for a cluster with
+# the minimum recovery LSN, ensuring that the updates happen across
+# all processes. In this test, the updates from the startup process
+# and the checkpointer (which triggers non-startup code paths) are
+# both checked.
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+
+# Find the largest LSN in the set of pages part of the given relation
+# file. This is used for offline checks of page consistency. The LSN
+# is historically stored as a set of two numbers of 4 byte-length
+# located at the beginning of each page.
+sub find_largest_lsn
+{
+ my $blocksize = int(shift);
+ my $filename = shift;
+ my ($max_hi, $max_lo) = (0, 0);
+ open(my $fh, "<:raw", $filename)
+ or die "failed to open $filename: $!";
+ my ($buf, $len);
+ while ($len = read($fh, $buf, $blocksize))
+ {
+ $len == $blocksize
+ or die "read only $len of $blocksize bytes from $filename";
+ my ($hi, $lo) = unpack("LL", $buf);
+
+ if ($hi > $max_hi or ($hi == $max_hi and $lo > $max_lo))
+ {
+ ($max_hi, $max_lo) = ($hi, $lo);
+ }
+ }
+ defined($len) or die "read error on $filename: $!";
+ close($fh);
+
+ return sprintf("%X/%X", $max_hi, $max_lo);
+}
+
+# Initialize primary node
+my $primary = get_new_node('primary');
+$primary->init(allows_streaming => 1);
+
+# Set shared_buffers to a very low value to enforce discard and flush
+# of PostgreSQL buffers on standby, enforcing other processes than the
+# startup process to update the minimum recovery LSN in the control
+# file. Autovacuum is disabled so as there is no risk of having other
+# processes than the checkpointer doing page flushes.
+$primary->append_conf("postgresql.conf", <<EOF);
+shared_buffers = 128kB
+autovacuum = off
+EOF
+
+# Start the primary
+$primary->start;
+
+# setup/start a standby
+$primary->backup('bkp');
+my $standby = get_new_node('standby');
+$standby->init_from_backup($primary, 'bkp', has_streaming => 1);
+$standby->start;
+
+# Create base table whose data consistency is checked.
+$primary->safe_psql(
+ 'postgres', "
+CREATE TABLE test1 (a int) WITH (fillfactor = 10);
+INSERT INTO test1 SELECT generate_series(1, 10000);");
+
+# Take a checkpoint and enforce post-checkpoint full page writes
+# which makes the startup process replay those pages, updating
+# minRecoveryPoint.
+$primary->safe_psql('postgres', 'CHECKPOINT;');
+$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;');
+
+# Wait for last record to have been replayed on the standby.
+$primary->wait_for_catchup($standby, 'replay', $primary->lsn('insert'));
+
+# Fill in the standby's shared buffers with the data filled in
+# previously.
+$standby->safe_psql('postgres', 'SELECT count(*) FROM test1;');
+
+# Update the table again, this does not generate full page writes so
+# the standby will replay records associated with it, but the startup
+# process will not flush those pages.
+$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;');
+
+# Extract from the relation the last block created and its relation
+# file, this will be used at the end of the test for sanity checks.
+my $blocksize = $primary->safe_psql('postgres',
+ "SELECT setting::int FROM pg_settings WHERE name = 'block_size';");
+my $last_block = $primary->safe_psql('postgres',
+ "SELECT pg_relation_size('test1')::int / $blocksize - 1;");
+my $relfilenode = $primary->safe_psql('postgres',
+ "SELECT pg_relation_filepath('test1'::regclass);");
+
+# Wait for last record to have been replayed on the standby.
+$primary->wait_for_catchup($standby, 'replay', $primary->lsn('insert'));
+
+# Issue a restart point on the standby now, which makes the checkpointer
+# update minRecoveryPoint.
+$standby->safe_psql('postgres', 'CHECKPOINT;');
+
+# Now shut down the primary violently so as the standby does not
+# receive the shutdown checkpoint, making sure that the startup
+# process does not flush any pages on its side. The standby is
+# cleanly stopped, which makes the checkpointer update minRecoveryPoint
+# with the restart point created at shutdown.
+$primary->stop('immediate');
+$standby->stop('fast');
+
+# Check the data consistency of the instance while offline. This is
+# done by directly scanning the on-disk relation blocks and what
+# pg_controldata lets know.
+my $standby_data = $standby->data_dir;
+my $offline_max_lsn =
+ find_largest_lsn($blocksize, "$standby_data/$relfilenode");
+
+# Fetch minRecoveryPoint from the control file itself
+my ($stdout, $stderr) = run_command([ 'pg_controldata', $standby_data ]);
+my @control_data = split("\n", $stdout);
+my $offline_recovery_lsn = undef;
+foreach (@control_data)
+{
+ if ($_ =~ /^Minimum recovery ending location:\s*(.*)$/mg)
+ {
+ $offline_recovery_lsn = $1;
+ last;
+ }
+}
+die "No minRecoveryPoint in control file found\n"
+ unless defined($offline_recovery_lsn);
+
+# minRecoveryPoint should never be older than the maximum LSN for all
+# the pages on disk.
+ok($offline_recovery_lsn ge $offline_max_lsn,
+ "Check offline that table data is consistent with minRecoveryPoint");
diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl
new file mode 100644
index 0000000..dc0dcd3
--- /dev/null
+++ b/src/test/recovery/t/017_shm.pl
@@ -0,0 +1,214 @@
+#
+# Tests of pg_shmem.h functions
+#
+use strict;
+use warnings;
+use Config;
+use File::stat qw(stat);
+use IPC::Run 'run';
+use PostgresNode;
+use Test::More;
+use TestLib;
+use Time::HiRes qw(usleep);
+
+# If we don't have shmem support, skip the whole thing
+eval {
+ require IPC::SharedMem;
+ IPC::SharedMem->import;
+ require IPC::SysV;
+ IPC::SysV->import(qw(IPC_CREAT IPC_EXCL S_IRUSR S_IWUSR));
+};
+if ($@ || $windows_os)
+{
+ plan skip_all => 'SysV shared memory not supported by this platform';
+}
+else
+{
+ plan tests => 4;
+}
+
+my $tempdir = TestLib::tempdir;
+
+# Log "ipcs" diffs on a best-effort basis, swallowing any error.
+my $ipcs_before = "$tempdir/ipcs_before";
+eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; };
+
+sub log_ipcs
+{
+ eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] };
+ return;
+}
+
+# Node setup.
+my $gnat = PostgresNode->get_new_node('gnat');
+$gnat->init;
+
+# Create a shmem segment that will conflict with gnat's first choice
+# of shmem key. (If we fail to create it because something else is
+# already using that key, that's perfectly fine, though the test will
+# exercise a different scenario than it usually does.)
+my $gnat_dir_stat = stat($gnat->data_dir);
+defined($gnat_dir_stat) or die('unable to stat ' . $gnat->data_dir);
+my $gnat_inode = $gnat_dir_stat->ino;
+note "gnat's datadir inode = $gnat_inode";
+
+# Note: must reference IPC::SysV's constants as functions, or this file
+# fails to compile when that module is not available.
+my $gnat_conflict_shm =
+ IPC::SharedMem->new($gnat_inode, 1024,
+ IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR());
+note "could not create conflicting shmem" if !defined($gnat_conflict_shm);
+log_ipcs();
+
+$gnat->start;
+log_ipcs();
+
+$gnat->restart; # should keep same shmem key
+log_ipcs();
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+poll_start($gnat); # gnat recycles its former shm key.
+log_ipcs();
+
+note "removing the conflicting shmem ...";
+$gnat_conflict_shm->remove if $gnat_conflict_shm;
+log_ipcs();
+
+# Upon postmaster death, postmaster children exit automatically.
+$gnat->kill9;
+log_ipcs();
+
+# In this start, gnat will use its normal shmem key, and fail to remove
+# the higher-keyed segment that the previous postmaster was using.
+# That's not great, but key collisions should be rare enough to not
+# make this a big problem.
+poll_start($gnat);
+log_ipcs();
+$gnat->stop;
+log_ipcs();
+
+# Re-create the conflicting segment, and start/stop normally, just so
+# this test script doesn't leak the higher-keyed segment.
+note "re-creating conflicting shmem ...";
+$gnat_conflict_shm =
+ IPC::SharedMem->new($gnat_inode, 1024,
+ IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR());
+note "could not create conflicting shmem" if !defined($gnat_conflict_shm);
+log_ipcs();
+
+$gnat->start;
+log_ipcs();
+$gnat->stop;
+log_ipcs();
+
+note "removing the conflicting shmem ...";
+$gnat_conflict_shm->remove if $gnat_conflict_shm;
+log_ipcs();
+
+# Scenarios involving no postmaster.pid, dead postmaster, and a live backend.
+# Use a regress.c function to emulate the responsiveness of a backend working
+# through a CPU-intensive task.
+$gnat->start;
+log_ipcs();
+
+my $regress_shlib = TestLib::perl2host($ENV{REGRESS_SHLIB});
+$gnat->safe_psql('postgres', <<EOSQL);
+CREATE FUNCTION wait_pid(int)
+ RETURNS void
+ AS '$regress_shlib'
+ LANGUAGE C STRICT;
+EOSQL
+my $slow_query = 'SELECT wait_pid(pg_backend_pid())';
+my ($stdout, $stderr);
+my $slow_client = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'),
+ '-c', $slow_query
+ ],
+ '<',
+ \undef,
+ '>',
+ \$stdout,
+ '2>',
+ \$stderr,
+ IPC::Run::timeout(900)); # five times the poll_query_until timeout
+ok( $gnat->poll_query_until(
+ 'postgres',
+ "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'),
+ 'slow query started');
+my $slow_pid = $gnat->safe_psql('postgres',
+ "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'");
+$gnat->kill9;
+unlink($gnat->data_dir . '/postmaster.pid');
+$gnat->rotate_logfile; # on Windows, can't open old log for writing
+log_ipcs();
+# Reject ordinary startup. Retry for the same reasons poll_start() does.
+my $pre_existing_msg = qr/pre-existing shared memory block/;
+{
+ my $max_attempts = 180 * 10; # Retry every 0.1s for at least 180s.
+ my $attempts = 0;
+ while ($attempts < $max_attempts)
+ {
+ last
+ if $gnat->start(fail_ok => 1)
+ || slurp_file($gnat->logfile) =~ $pre_existing_msg;
+ usleep(100_000);
+ $attempts++;
+ }
+}
+like(slurp_file($gnat->logfile),
+ $pre_existing_msg, 'detected live backend via shared memory');
+# Reject single-user startup.
+my $single_stderr;
+ok( !run_log(
+ [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ],
+ '<', \undef, '2>', \$single_stderr),
+ 'live query blocks --single');
+print STDERR $single_stderr;
+like($single_stderr, $pre_existing_msg,
+ 'single-user mode detected live backend via shared memory');
+log_ipcs();
+
+# cleanup slow backend
+TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid);
+$slow_client->finish; # client has detected backend termination
+log_ipcs();
+
+# now startup should work
+poll_start($gnat);
+log_ipcs();
+
+# finish testing
+$gnat->stop;
+log_ipcs();
+
+
+# We may need retries to start a new postmaster. Causes:
+# - kernel is slow to deliver SIGKILL
+# - postmaster parent is slow to waitpid()
+# - postmaster child is slow to exit in response to SIGQUIT
+# - postmaster child is slow to exit after postmaster death
+sub poll_start
+{
+ my ($node) = @_;
+
+ my $max_attempts = 180 * 10;
+ my $attempts = 0;
+
+ while ($attempts < $max_attempts)
+ {
+ $node->start(fail_ok => 1) && return 1;
+
+ # Wait 0.1 second before retrying.
+ usleep(100_000);
+
+ $attempts++;
+ }
+
+ # No success within 180 seconds. Try one last time without fail_ok, which
+ # will BAIL_OUT unless it succeeds.
+ $node->start && return 1;
+ return 0;
+}
diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl
new file mode 100644
index 0000000..1bc01b5
--- /dev/null
+++ b/src/test/recovery/t/018_wal_optimize.pl
@@ -0,0 +1,373 @@
+# Test WAL replay when some operation has skipped WAL.
+#
+# These tests exercise code that once violated the mandate described in
+# src/backend/access/transam/README section "Skipping WAL for New
+# RelFileNode". The tests work by committing some transactions, initiating an
+# immediate shutdown, and confirming that the expected data survives recovery.
+# For many years, individual commands made the decision to skip WAL, hence the
+# frequent appearance of COPY in these tests.
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 34;
+
+sub check_orphan_relfilenodes
+{
+ my ($node, $test_name) = @_;
+
+ my $db_oid = $node->safe_psql('postgres',
+ "SELECT oid FROM pg_database WHERE datname = 'postgres'");
+ my $prefix = "base/$db_oid/";
+ my $filepaths_referenced = $node->safe_psql(
+ 'postgres', "
+ SELECT pg_relation_filepath(oid) FROM pg_class
+ WHERE reltablespace = 0 AND relpersistence <> 't' AND
+ pg_relation_filepath(oid) IS NOT NULL;");
+ is_deeply(
+ [
+ sort(map { "$prefix$_" }
+ grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix")))
+ ],
+ [ sort split /\n/, $filepaths_referenced ],
+ $test_name);
+ return;
+}
+
+# We run this same test suite for both wal_level=minimal and replica.
+sub run_wal_optimize
+{
+ my $wal_level = shift;
+
+ my $node = get_new_node("node_$wal_level");
+ $node->init;
+ $node->append_conf(
+ 'postgresql.conf', qq(
+wal_level = $wal_level
+max_prepared_transactions = 1
+wal_log_hints = on
+wal_skip_threshold = 0
+#wal_debug = on
+));
+ $node->start;
+
+ # Setup
+ my $tablespace_dir = $node->basedir . '/tablespace_other';
+ mkdir($tablespace_dir);
+ $tablespace_dir = TestLib::perl2host($tablespace_dir);
+ $node->safe_psql('postgres',
+ "CREATE TABLESPACE other LOCATION '$tablespace_dir';");
+
+ # Test direct truncation optimization. No tuples.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc (id serial PRIMARY KEY);
+ TRUNCATE trunc;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ my $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;");
+ is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table");
+
+ # Test truncation with inserted tuples within the same transaction.
+ # Tuples inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_ins (id serial PRIMARY KEY);
+ INSERT INTO trunc_ins VALUES (DEFAULT);
+ TRUNCATE trunc_ins;
+ INSERT INTO trunc_ins VALUES (DEFAULT);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres',
+ "SELECT count(*), min(id) FROM trunc_ins;");
+ is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT");
+
+ # Same for prepared transaction.
+ # Tuples inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE twophase (id serial PRIMARY KEY);
+ INSERT INTO twophase VALUES (DEFAULT);
+ TRUNCATE twophase;
+ INSERT INTO twophase VALUES (DEFAULT);
+ PREPARE TRANSACTION 't';
+ COMMIT PREPARED 't';");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres',
+ "SELECT count(*), min(id) FROM trunc_ins;");
+ is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE");
+
+ # Writing WAL at end of xact, instead of syncing.
+ $node->safe_psql(
+ 'postgres', "
+ SET wal_skip_threshold = '1GB';
+ BEGIN;
+ CREATE TABLE noskip (id serial PRIMARY KEY);
+ INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;");
+ is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL");
+
+ # Data file for COPY query in subsequent tests
+ my $basedir = $node->basedir;
+ my $copy_file = "$basedir/copy_data.txt";
+ TestLib::append_to_file(
+ $copy_file, qq(20000,30000
+20001,30001
+20002,30002));
+ $copy_file = TestLib::perl2host($copy_file);
+
+ # Test truncation with inserted tuples using both INSERT and COPY. Tuples
+ # inserted after the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int);
+ INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000));
+ TRUNCATE ins_trunc;
+ INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
+ COPY ins_trunc FROM '$copy_file' DELIMITER ',';
+ INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;");
+ is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT");
+
+ # Test truncation with inserted tuples using COPY. Tuples copied after
+ # the truncation should be seen.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int);
+ INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE trunc_copy;
+ COPY trunc_copy FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;");
+ is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY");
+
+ # Like previous test, but rollback SET TABLESPACE in a subtransaction.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_abort;
+ SAVEPOINT s;
+ ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s;
+ COPY spc_abort FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE abort subtransaction");
+
+ # in different subtransaction patterns
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_commit;
+ SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s;
+ COPY spc_commit FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE commit subtransaction");
+
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int);
+ INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000));
+ TRUNCATE spc_nest;
+ SAVEPOINT s;
+ ALTER TABLE spc_nest SET TABLESPACE other;
+ SAVEPOINT s2;
+ ALTER TABLE spc_nest SET TABLESPACE pg_default;
+ ROLLBACK TO s2;
+ SAVEPOINT s2;
+ ALTER TABLE spc_nest SET TABLESPACE pg_default;
+ RELEASE s2;
+ ROLLBACK TO s;
+ COPY spc_nest FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;");
+ is($result, qq(3),
+ "wal_level = $wal_level, SET TABLESPACE nested subtransaction");
+
+ $node->safe_psql(
+ 'postgres', "
+ CREATE TABLE spc_hint (id int);
+ INSERT INTO spc_hint VALUES (1);
+ BEGIN;
+ ALTER TABLE spc_hint SET TABLESPACE other;
+ CHECKPOINT;
+ SELECT * FROM spc_hint; -- set hint bit
+ INSERT INTO spc_hint VALUES (2);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;");
+ is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit");
+
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE idx_hint (c int PRIMARY KEY);
+ SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q;
+ CHECKPOINT;
+ INSERT INTO idx_hint VALUES (1); -- set index hint bit
+ INSERT INTO idx_hint VALUES (2);
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->psql('postgres',);
+ my ($ret, $stdout, $stderr) =
+ $node->psql('postgres', "INSERT INTO idx_hint VALUES (2);");
+ is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD");
+ like(
+ $stderr,
+ qr/violates unique/,
+ "wal_level = $wal_level, unique index LP_DEAD message");
+
+ # UPDATE touches two buffers for one row.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE upd (id serial PRIMARY KEY, id2 int);
+ INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000));
+ COPY upd FROM '$copy_file' DELIMITER ',';
+ UPDATE upd SET id2 = id2 + 1;
+ DELETE FROM upd;
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;");
+ is($result, qq(0),
+ "wal_level = $wal_level, UPDATE touches two buffers for one row");
+
+ # Test consistency of COPY with INSERT for table created in the same
+ # transaction.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int);
+ INSERT INTO ins_copy VALUES (DEFAULT, 1);
+ COPY ins_copy FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;");
+ is($result, qq(4), "wal_level = $wal_level, INSERT COPY");
+
+ # Test consistency of COPY that inserts more to the same table using
+ # triggers. If the INSERTS from the trigger go to the same block data
+ # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when
+ # it tries to replay the WAL record but the "before" image doesn't match,
+ # because not all changes were WAL-logged.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO ins_trig
+ VALUES (DEFAULT, 'triggered row before' || NEW.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ IF new.id2 NOT LIKE 'triggered%' THEN
+ INSERT INTO ins_trig
+ VALUES (DEFAULT, 'triggered row after' || NEW.id2);
+ END IF;
+ RETURN NEW;
+ END; \$\$;
+ CREATE TRIGGER ins_trig_before_row_insert
+ BEFORE INSERT ON ins_trig
+ FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig();
+ CREATE TRIGGER ins_trig_after_row_insert
+ AFTER INSERT ON ins_trig
+ FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig();
+ COPY ins_trig FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;");
+ is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers");
+
+ # Test consistency of INSERT, COPY and TRUNCATE in same transaction block
+ # with TRUNCATE triggers.
+ $node->safe_psql(
+ 'postgres', "
+ BEGIN;
+ CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text);
+ CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger
+ LANGUAGE plpgsql as \$\$
+ BEGIN
+ INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before');
+ RETURN NULL;
+ END; \$\$;
+ CREATE TRIGGER trunc_trig_before_stat_truncate
+ BEFORE TRUNCATE ON trunc_trig
+ FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig();
+ CREATE TRIGGER trunc_trig_after_stat_truncate
+ AFTER TRUNCATE ON trunc_trig
+ FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig();
+ INSERT INTO trunc_trig VALUES (DEFAULT, 1);
+ TRUNCATE trunc_trig;
+ COPY trunc_trig FROM '$copy_file' DELIMITER ',';
+ COMMIT;");
+ $node->stop('immediate');
+ $node->start;
+ $result =
+ $node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;");
+ is($result, qq(4),
+ "wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers");
+
+ # Test redo of temp table creation.
+ $node->safe_psql(
+ 'postgres', "
+ CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);");
+ $node->stop('immediate');
+ $node->start;
+ check_orphan_relfilenodes($node,
+ "wal_level = $wal_level, no orphan relfilenode remains");
+
+ return;
+}
+
+# Run same test suite for multiple wal_level values.
+run_wal_optimize("minimal");
+run_wal_optimize("replica");
diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl
new file mode 100644
index 0000000..d33ca1e
--- /dev/null
+++ b/src/test/recovery/t/019_replslot_limit.pl
@@ -0,0 +1,334 @@
+# Test for replication slot limit
+# Ensure that max_slot_wal_keep_size limits the number of WAL files to
+# be kept by replication slots.
+use strict;
+use warnings;
+
+use TestLib;
+use PostgresNode;
+
+use File::Path qw(rmtree);
+use Test::More tests => 16;
+use Time::HiRes qw(usleep);
+
+$ENV{PGDATABASE} = 'postgres';
+
+# Initialize master node, setting wal-segsize to 1MB
+my $node_master = get_new_node('master');
+$node_master->init(allows_streaming => 1, extra => ['--wal-segsize=1']);
+$node_master->append_conf(
+ 'postgresql.conf', qq(
+min_wal_size = 2MB
+max_wal_size = 4MB
+log_checkpoints = yes
+));
+$node_master->start;
+$node_master->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('rep1')");
+
+# The slot state and remain should be null before the first connection
+my $result = $node_master->safe_psql('postgres',
+ "SELECT restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"');
+
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_master->backup($backup_name);
+
+# Create a standby linking to it using the replication slot
+my $node_standby = get_new_node('standby_1');
+$node_standby->init_from_backup($node_master, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
+
+$node_standby->start;
+
+# Wait until standby has replayed enough data
+my $start_lsn = $node_master->lsn('write');
+$node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
+
+# Stop standby
+$node_standby->stop;
+
+# Preparation done, the slot is the state "reserved" now
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t", 'check the catching-up state');
+
+# Advance WAL by five segments (= 5MB) on master
+advance_wal($node_master, 1);
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is always "safe" when fitting max_wal_size
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t",
+ 'check that it is safe if WAL fits in max_wal_size');
+
+advance_wal($node_master, 4);
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is always "safe" when max_slot_wal_keep_size is not set
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "reserved|t", 'check that slot is working');
+
+# The standby can reconnect to master
+$node_standby->start;
+
+$start_lsn = $node_master->lsn('write');
+$node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
+
+$node_standby->stop;
+
+# Set max_slot_wal_keep_size on master
+my $max_slot_wal_keep_size_mb = 6;
+$node_master->append_conf(
+ 'postgresql.conf', qq(
+max_slot_wal_keep_size = ${max_slot_wal_keep_size_mb}MB
+));
+$node_master->reload;
+
+# The slot is in safe state.
+
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "reserved", 'check that max_slot_wal_keep_size is working');
+
+# Advance WAL again then checkpoint, reducing remain by 2 MB.
+advance_wal($node_master, 2);
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+
+# The slot is still working
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "reserved",
+ 'check that safe_wal_size gets close to the current LSN');
+
+# The standby can reconnect to master
+$node_standby->start;
+$start_lsn = $node_master->lsn('write');
+$node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
+$node_standby->stop;
+
+# wal_keep_size overrides max_slot_wal_keep_size
+$result = $node_master->safe_psql('postgres',
+ "ALTER SYSTEM SET wal_keep_size to '8MB'; SELECT pg_reload_conf();");
+# Advance WAL again then checkpoint, reducing remain by 6 MB.
+advance_wal($node_master, 6);
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "extended",
+ 'check that wal_keep_size overrides max_slot_wal_keep_size');
+# restore wal_keep_size
+$result = $node_master->safe_psql('postgres',
+ "ALTER SYSTEM SET wal_keep_size to 0; SELECT pg_reload_conf();");
+
+# The standby can reconnect to master
+$node_standby->start;
+$start_lsn = $node_master->lsn('write');
+$node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
+$node_standby->stop;
+
+# Advance WAL again without checkpoint, reducing remain by 6 MB.
+advance_wal($node_master, 6);
+
+# Slot gets into 'reserved' state
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'");
+is($result, "extended", 'check that the slot state changes to "extended"');
+
+# do checkpoint so that the next checkpoint runs too early
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+
+# Advance WAL again without checkpoint; remain goes to 0.
+advance_wal($node_master, 1);
+
+# Slot gets into 'unreserved' state and safe_wal_size is negative
+$result = $node_master->safe_psql('postgres',
+ "SELECT wal_status, safe_wal_size <= 0 FROM pg_replication_slots WHERE slot_name = 'rep1'"
+);
+is($result, "unreserved|t",
+ 'check that the slot state changes to "unreserved"');
+
+# The standby still can connect to master before a checkpoint
+$node_standby->start;
+
+$start_lsn = $node_master->lsn('write');
+$node_master->wait_for_catchup($node_standby, 'replay', $start_lsn);
+
+$node_standby->stop;
+
+ok( !find_in_log(
+ $node_standby,
+ "requested WAL segment [0-9A-F]+ has already been removed"),
+ 'check that required WAL segments are still available');
+
+# Create one checkpoint, to improve stability of the next steps
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+
+# Prevent other checkpoints from occurring while advancing WAL segments
+$node_master->safe_psql('postgres',
+ "ALTER SYSTEM SET max_wal_size='40MB'; SELECT pg_reload_conf()");
+
+# Advance WAL again. The slot loses the oldest segment by the next checkpoint
+my $logstart = get_log_size($node_master);
+advance_wal($node_master, 7);
+
+# Now create another checkpoint and wait until the WARNING is issued
+$node_master->safe_psql('postgres',
+ 'ALTER SYSTEM RESET max_wal_size; SELECT pg_reload_conf()');
+$node_master->safe_psql('postgres', "CHECKPOINT;");
+my $invalidated = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if (find_in_log(
+ $node_master,
+ "invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size",
+ $logstart))
+ {
+ $invalidated = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($invalidated, 'check that slot invalidation has been logged');
+
+$result = $node_master->safe_psql(
+ 'postgres',
+ qq[
+ SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size
+ FROM pg_replication_slots WHERE slot_name = 'rep1']);
+is($result, "rep1|f|t|lost|",
+ 'check that the slot became inactive and the state "lost" persists');
+
+# Wait until current checkpoint ends
+my $checkpoint_ended = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if (find_in_log($node_master, "checkpoint complete: ", $logstart))
+ {
+ $checkpoint_ended = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($checkpoint_ended, 'waited for checkpoint to end');
+
+# The invalidated slot shouldn't keep the old-segment horizon back;
+# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org
+# Test for this by creating a new slot and comparing its restart LSN
+# to the oldest existing file.
+my $redoseg = $node_master->safe_psql('postgres',
+ "SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)"
+);
+my $oldestseg = $node_master->safe_psql('postgres',
+ "SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1"
+);
+$node_master->safe_psql('postgres',
+ qq[SELECT pg_drop_replication_slot('s2')]);
+is($oldestseg, $redoseg, "check that segments have been removed");
+
+# The standby no longer can connect to the master
+$logstart = get_log_size($node_standby);
+$node_standby->start;
+
+my $failed = 0;
+for (my $i = 0; $i < 10000; $i++)
+{
+ if (find_in_log(
+ $node_standby,
+ "requested WAL segment [0-9A-F]+ has already been removed",
+ $logstart))
+ {
+ $failed = 1;
+ last;
+ }
+ usleep(100_000);
+}
+ok($failed, 'check that replication has been broken');
+
+$node_master->stop('immediate');
+$node_standby->stop('immediate');
+
+my $node_master2 = get_new_node('master2');
+$node_master2->init(allows_streaming => 1);
+$node_master2->append_conf(
+ 'postgresql.conf', qq(
+min_wal_size = 32MB
+max_wal_size = 32MB
+log_checkpoints = yes
+));
+$node_master2->start;
+$node_master2->safe_psql('postgres',
+ "SELECT pg_create_physical_replication_slot('rep1')");
+$backup_name = 'my_backup2';
+$node_master2->backup($backup_name);
+
+$node_master2->stop;
+$node_master2->append_conf(
+ 'postgresql.conf', qq(
+max_slot_wal_keep_size = 0
+));
+$node_master2->start;
+
+$node_standby = get_new_node('standby_2');
+$node_standby->init_from_backup($node_master2, $backup_name,
+ has_streaming => 1);
+$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'");
+$node_standby->start;
+my @result =
+ split(
+ '\n',
+ $node_master2->safe_psql(
+ 'postgres',
+ "CREATE TABLE tt();
+ DROP TABLE tt;
+ SELECT pg_switch_wal();
+ CHECKPOINT;
+ SELECT 'finished';",
+ timeout => '60'));
+is($result[1], 'finished', 'check if checkpoint command is not blocked');
+
+#####################################
+# Advance WAL of $node by $n segments
+sub advance_wal
+{
+ my ($node, $n) = @_;
+
+ # Advance by $n segments (= (16 * $n) MB) on master
+ for (my $i = 0; $i < $n; $i++)
+ {
+ $node->safe_psql('postgres',
+ "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();");
+ }
+ return;
+}
+
+# return the size of logfile of $node in bytes
+sub get_log_size
+{
+ my ($node) = @_;
+
+ return (stat $node->logfile)[7];
+}
+
+# find $pat in logfile of $node after $off-th byte
+sub find_in_log
+{
+ my ($node, $pat, $off) = @_;
+
+ $off = 0 unless defined $off;
+ my $log = TestLib::slurp_file($node->logfile);
+ return 0 if (length($log) <= $off);
+
+ $log = substr($log, $off);
+
+ return $log =~ m/$pat/;
+}
diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl
new file mode 100644
index 0000000..fb0dff6
--- /dev/null
+++ b/src/test/recovery/t/020_archive_status.pl
@@ -0,0 +1,233 @@
+#
+# Tests related to WAL archiving and recovery.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 16;
+use Config;
+
+my $primary = get_new_node('master');
+$primary->init(
+ has_archiving => 1,
+ allows_streaming => 1);
+$primary->append_conf('postgresql.conf', 'autovacuum = off');
+$primary->start;
+my $primary_data = $primary->data_dir;
+
+# Temporarily use an archive_command value to make the archiver fail,
+# knowing that archiving is enabled. Note that we cannot use a command
+# that does not exist as in this case the archiver process would just exit
+# without reporting the failure to pg_stat_archiver. This also cannot
+# use a plain "false" as that's unportable on Windows. So, instead, as
+# a portable solution, use an archive command based on a command known to
+# work but will fail: copy with an incorrect original path.
+my $incorrect_command =
+ $TestLib::windows_os
+ ? qq{copy "%p_does_not_exist" "%f_does_not_exist"}
+ : qq{cp "%p_does_not_exist" "%f_does_not_exist"};
+$primary->safe_psql(
+ 'postgres', qq{
+ ALTER SYSTEM SET archive_command TO '$incorrect_command';
+ SELECT pg_reload_conf();
+});
+
+# Save the WAL segment currently in use and switch to a new segment.
+# This will be used to track the activity of the archiver.
+my $segment_name_1 = $primary->safe_psql('postgres',
+ q{SELECT pg_walfile_name(pg_current_wal_lsn())});
+my $segment_path_1 = "pg_wal/archive_status/$segment_name_1";
+my $segment_path_1_ready = "$segment_path_1.ready";
+my $segment_path_1_done = "$segment_path_1.done";
+$primary->safe_psql(
+ 'postgres', q{
+ CREATE TABLE mine AS SELECT generate_series(1,10) AS x;
+ SELECT pg_switch_wal();
+ CHECKPOINT;
+});
+
+# Wait for an archive failure.
+$primary->poll_query_until('postgres',
+ q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't')
+ or die "Timed out while waiting for archiving to fail";
+ok( -f "$primary_data/$segment_path_1_ready",
+ ".ready file exists for WAL segment $segment_name_1 waiting to be archived"
+);
+ok( !-f "$primary_data/$segment_path_1_done",
+ ".done file does not exist for WAL segment $segment_name_1 waiting to be archived"
+);
+
+is( $primary->safe_psql(
+ 'postgres', q{
+ SELECT archived_count, last_failed_wal
+ FROM pg_stat_archiver
+ }),
+ "0|$segment_name_1",
+ "pg_stat_archiver failed to archive $segment_name_1");
+
+# Crash the cluster for the next test in charge of checking that non-archived
+# WAL segments are not removed.
+$primary->stop('immediate');
+
+# Recovery tests for the archiving with a standby partially check
+# the recovery behavior when restoring a backup taken using a
+# snapshot with no pg_start/stop_backup. In this situation,
+# the recovered standby should enter first crash recovery then
+# switch to regular archive recovery. Note that the base backup
+# is taken here so as archive_command will fail. This is necessary
+# for the assumptions of the tests done with the standbys below.
+$primary->backup_fs_cold('backup');
+
+$primary->start;
+ok( -f "$primary_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary"
+);
+
+# Allow WAL archiving again and wait for a success.
+$primary->safe_psql(
+ 'postgres', q{
+ ALTER SYSTEM RESET archive_command;
+ SELECT pg_reload_conf();
+});
+
+$primary->poll_query_until('postgres',
+ q{SELECT archived_count FROM pg_stat_archiver}, '1')
+ or die "Timed out while waiting for archiving to finish";
+
+ok(!-f "$primary_data/$segment_path_1_ready",
+ ".ready file for archived WAL segment $segment_name_1 removed");
+
+ok(-f "$primary_data/$segment_path_1_done",
+ ".done file for archived WAL segment $segment_name_1 exists");
+
+is( $primary->safe_psql(
+ 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }),
+ $segment_name_1,
+ "archive success reported in pg_stat_archiver for WAL segment $segment_name_1"
+);
+
+# Create some WAL activity and a new checkpoint so as the next standby can
+# create a restartpoint. As this standby starts in crash recovery because
+# of the cold backup taken previously, it needs a clean restartpoint to deal
+# with existing status files.
+my $segment_name_2 = $primary->safe_psql('postgres',
+ q{SELECT pg_walfile_name(pg_current_wal_lsn())});
+my $segment_path_2 = "pg_wal/archive_status/$segment_name_2";
+my $segment_path_2_ready = "$segment_path_2.ready";
+my $segment_path_2_done = "$segment_path_2.done";
+$primary->safe_psql(
+ 'postgres', q{
+ INSERT INTO mine SELECT generate_series(10,20) AS x;
+ CHECKPOINT;
+});
+
+# Switch to a new segment and use the returned LSN to make sure that
+# standbys have caught up to this point.
+my $primary_lsn = $primary->safe_psql(
+ 'postgres', q{
+ SELECT pg_switch_wal();
+});
+
+$primary->poll_query_until('postgres',
+ q{ SELECT last_archived_wal FROM pg_stat_archiver },
+ $segment_name_2)
+ or die "Timed out while waiting for archiving to finish";
+
+# Test standby with archive_mode = on.
+my $standby1 = get_new_node('standby');
+$standby1->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby1->append_conf('postgresql.conf', "archive_mode = on");
+my $standby1_data = $standby1->data_dir;
+$standby1->start;
+
+# Wait for the replay of the segment switch done previously, ensuring
+# that all segments needed are restored from the archives.
+$standby1->poll_query_until('postgres',
+ qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
+) or die "Timed out while waiting for xlog replay on standby2";
+
+$standby1->safe_psql('postgres', q{CHECKPOINT});
+
+# Recovery with archive_mode=on does not keep .ready signal files inherited
+# from backup. Note that this WAL segment existed in the backup.
+ok( !-f "$standby1_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby"
+);
+
+# Recovery with archive_mode=on should not create .ready files.
+# Note that this segment did not exist in the backup.
+ok( !-f "$standby1_data/$segment_path_2_ready",
+ ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby"
+);
+
+# Recovery with archive_mode = on creates .done files.
+ok( -f "$standby1_data/$segment_path_2_done",
+ ".done file for WAL segment $segment_name_2 created when archive_mode=on on standby"
+);
+
+# Test recovery with archive_mode = always, which should always keep
+# .ready files if archiving is enabled, though here we want the archive
+# command to fail to persist the .ready files. Note that this node
+# has inherited the archive command of the previous cold backup that
+# will cause archiving failures.
+my $standby2 = get_new_node('standby2');
+$standby2->init_from_backup($primary, 'backup', has_restoring => 1);
+$standby2->append_conf('postgresql.conf', 'archive_mode = always');
+my $standby2_data = $standby2->data_dir;
+$standby2->start;
+
+# Wait for the replay of the segment switch done previously, ensuring
+# that all segments needed are restored from the archives.
+$standby2->poll_query_until('postgres',
+ qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
+) or die "Timed out while waiting for xlog replay on standby2";
+
+$standby2->safe_psql('postgres', q{CHECKPOINT});
+
+ok( -f "$standby2_data/$segment_path_1_ready",
+ ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby"
+);
+
+ok( -f "$standby2_data/$segment_path_2_ready",
+ ".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby"
+);
+
+# Reset statistics of the archiver for the next checks.
+$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')});
+
+# Now crash the cluster to check that recovery step does not
+# remove non-archived WAL segments on a standby where archiving
+# is enabled.
+$standby2->stop('immediate');
+$standby2->start;
+
+ok( -f "$standby2_data/$segment_path_1_ready",
+ "WAL segment still ready to archive after crash recovery on standby with archive_mode=always"
+);
+
+# Allow WAL archiving again, and wait for the segments to be archived.
+$standby2->safe_psql(
+ 'postgres', q{
+ ALTER SYSTEM RESET archive_command;
+ SELECT pg_reload_conf();
+});
+$standby2->poll_query_until('postgres',
+ q{SELECT last_archived_wal FROM pg_stat_archiver},
+ $segment_name_2)
+ or die "Timed out while waiting for archiving to finish";
+
+is( $standby2->safe_psql(
+ 'postgres', q{SELECT archived_count FROM pg_stat_archiver}),
+ '2',
+ "correct number of WAL segments archived from standby");
+
+ok( !-f "$standby2_data/$segment_path_1_ready"
+ && !-f "$standby2_data/$segment_path_2_ready",
+ ".ready files removed after archive success with archive_mode=always on standby"
+);
+
+ok( -f "$standby2_data/$segment_path_1_done"
+ && -f "$standby2_data/$segment_path_2_done",
+ ".done files created after archive success with archive_mode=always on standby"
+);
diff --git a/src/test/recovery/t/023_pitr_prepared_xact.pl b/src/test/recovery/t/023_pitr_prepared_xact.pl
new file mode 100644
index 0000000..533cd1a
--- /dev/null
+++ b/src/test/recovery/t/023_pitr_prepared_xact.pl
@@ -0,0 +1,86 @@
+# Test for point-in-time-recovery (PITR) with prepared transactions
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 1;
+use File::Compare;
+
+# Initialize and start primary node with WAL archiving
+my $node_primary = get_new_node('primary');
+$node_primary->init(has_archiving => 1, allows_streaming => 1);
+$node_primary->append_conf(
+ 'postgresql.conf', qq{
+max_prepared_transactions = 10});
+$node_primary->start;
+
+# Take backup
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Initialize node for PITR targeting a very specific restore point, just
+# after a PREPARE TRANSACTION is issued so as we finish with a promoted
+# node where this 2PC transaction needs an explicit COMMIT PREPARED.
+my $node_pitr = get_new_node('node_pitr');
+$node_pitr->init_from_backup(
+ $node_primary, $backup_name,
+ standby => 0,
+ has_restoring => 1);
+$node_pitr->append_conf(
+ 'postgresql.conf', qq{
+recovery_target_name = 'rp'
+recovery_target_action = 'promote'});
+
+# Workload with a prepared transaction and the target restore point.
+$node_primary->psql(
+ 'postgres', qq{
+CREATE TABLE foo(i int);
+BEGIN;
+INSERT INTO foo VALUES(1);
+PREPARE TRANSACTION 'fooinsert';
+SELECT pg_create_restore_point('rp');
+INSERT INTO foo VALUES(2);
+});
+
+# Find next WAL segment to be archived
+my $walfile_to_be_archived = $node_primary->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Wait until the WAL segment has been archived.
+my $archive_wait_query =
+ "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;";
+$node_primary->poll_query_until('postgres', $archive_wait_query)
+ or die "Timed out while waiting for WAL segment to be archived";
+my $last_archived_wal_file = $walfile_to_be_archived;
+
+# Now start the PITR node.
+$node_pitr->start;
+
+# Wait until the PITR node exits recovery.
+$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';")
+ or die "Timed out while waiting for PITR promotion";
+
+# Commit the prepared transaction in the latest timeline and check its
+# result. There should only be one row in the table, coming from the
+# prepared transaction. The row from the INSERT after the restore point
+# should not show up, since our recovery target was older than the second
+# INSERT done.
+$node_pitr->psql('postgres', qq{COMMIT PREPARED 'fooinsert';});
+my $result = $node_pitr->safe_psql('postgres', "SELECT * FROM foo;");
+is($result, qq{1}, "check table contents after COMMIT PREPARED");
+
+# Insert more data and do a checkpoint. These should be generated on the
+# timeline chosen after the PITR promotion.
+$node_pitr->psql(
+ 'postgres', qq{
+INSERT INTO foo VALUES(3);
+CHECKPOINT;
+});
+
+# Enforce recovery, the checkpoint record generated previously should
+# still be found.
+$node_pitr->stop('immediate');
+$node_pitr->start;
diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl
new file mode 100644
index 0000000..dbaab8e
--- /dev/null
+++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl
@@ -0,0 +1,107 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# Testing streaming replication where standby is promoted and a new cascading
+# standby (without WAL) is connected to the promoted standby. Both archiving
+# and streaming are enabled, but only the history file is available from the
+# archive, so the WAL files all have to be streamed. Test that the cascading
+# standby can follow the new primary (promoted standby).
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+
+use File::Basename;
+use FindBin;
+use Test::More tests => 1;
+
+# Initialize primary node
+my $node_primary = get_new_node('primary');
+
+# Set up an archive command that will copy the history file but not the WAL
+# files. No real archive command should behave this way; the point is to
+# simulate a race condition where the new cascading standby starts up after
+# the timeline history file reaches the archive but before any of the WAL files
+# get there.
+$node_primary->init(allows_streaming => 1, has_archiving => 1);
+
+# Note: consistent use of forward slashes here avoids any escaping problems
+# that arise from use of backslashes. That means we need to double-quote all
+# the paths in the archive_command
+my $perlbin = TestLib::perl2host($^X);
+$perlbin =~ s!\\!/!g if $TestLib::windows_os;
+my $archivedir_primary = $node_primary->archive_dir;
+$archivedir_primary =~ s!\\!/!g if $TestLib::windows_os;
+$node_primary->append_conf('postgresql.conf', qq(
+archive_command = '"$perlbin" "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"'
+wal_keep_size=128MB
+));
+# Make sure that Msys perl doesn't complain about difficulty in setting locale
+# when called from the archive_command.
+local $ENV{PERL_BADLANG}=0;
+$node_primary->start;
+
+# Take backup from primary
+my $backup_name = 'my_backup';
+$node_primary->backup($backup_name);
+
+# Create streaming standby linking to primary
+my $node_standby = get_new_node('standby');
+$node_standby->init_from_backup($node_primary, $backup_name,
+ allows_streaming => 1, has_streaming => 1, has_archiving => 1);
+$node_standby->start;
+
+# Take backup of standby, use -Xnone so that pg_wal is empty.
+$node_standby->backup($backup_name, backup_options => ['-Xnone']);
+
+# Create cascading standby but don't start it yet.
+# Must set up both streaming and archiving.
+my $node_cascade = get_new_node('cascade');
+$node_cascade->init_from_backup($node_standby, $backup_name,
+ has_streaming => 1);
+$node_cascade->enable_restoring($node_primary);
+$node_cascade->append_conf('postgresql.conf', qq(
+recovery_target_timeline='latest'
+));
+
+# Promote the standby.
+$node_standby->promote;
+
+# Wait for promotion to complete
+$node_standby->poll_query_until('postgres',
+ "SELECT NOT pg_is_in_recovery();")
+ or die "Timed out while waiting for promotion";
+
+# Find next WAL segment to be archived
+my $walfile_to_be_archived = $node_standby->safe_psql('postgres',
+ "SELECT pg_walfile_name(pg_current_wal_lsn());");
+
+# Make WAL segment eligible for archival
+$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()');
+
+# Wait until the WAL segment has been archived.
+# Since the history file gets created on promotion and is archived before any
+# WAL segment, this is enough to guarantee that the history file was
+# archived.
+my $archive_wait_query =
+ "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver";
+$node_standby->poll_query_until('postgres', $archive_wait_query)
+ or die "Timed out while waiting for WAL segment to be archived";
+my $last_archived_wal_file = $walfile_to_be_archived;
+
+# Start cascade node
+$node_cascade->start;
+
+# Create some content on promoted standby and check its presence on the
+# cascading standby.
+$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a");
+
+# Wait for the replication to catch up
+$node_standby->wait_for_catchup($node_cascade, 'replay',
+ $node_standby->lsn('insert'));
+
+# Check that cascading standby has the new content
+my $result =
+ $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int");
+print "cascade: $result\n";
+is($result, 1, 'check streamed content on cascade standby');
diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files
new file mode 100644
index 0000000..66f1b59
--- /dev/null
+++ b/src/test/recovery/t/cp_history_files
@@ -0,0 +1,17 @@
+#!/usr/bin/perl
+
+use File::Copy;
+use strict;
+use warnings;
+
+die "wrong number of arguments" if @ARGV != 2;
+my ($source, $target) = @ARGV;
+exit if $source !~ /history/;
+if ($^O eq 'msys')
+{
+ # make a windows path look like an msys path if necessary
+ $source =~ s!^([A-Za-z]):!'/' . lc($1)!e;
+ $source =~ s!\\!/!g;
+}
+
+copy($source, $target) or die "couldn't copy $source to $target: $!";