1 files changed, 394 insertions, 0 deletions
diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm
new file mode 100644
index 0000000..422a563
--- /dev/null
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@@ -0,0 +1,394 @@
+package RewindTest;
+
+# Test driver for pg_rewind. Each test consists of a cycle where a new cluster
+# is first created with initdb, and a streaming replication standby is set up
+# to follow the master. Then the master is shut down and the standby is
+# promoted, and finally pg_rewind is used to rewind the old master, using the
+# standby as the source.
+#
+# To run a test, the test script (in t/ subdirectory) calls the functions
+# in this module. These functions should be called in this sequence:
+#
+# 1. setup_cluster - creates a PostgreSQL cluster that runs as the master
+#
+# 2. start_master - starts the master server
+#
+# 3. create_standby - runs pg_basebackup to initialize a standby server, and
+#    sets it up to follow the master.
+#
+# 4. promote_standby - runs "pg_ctl promote" to promote the standby server.
+# The old master keeps running.
+#
+# 5. run_pg_rewind - stops the old master (if it's still running) and runs
+# pg_rewind to synchronize it with the now-promoted standby server.
+#
+# 6. clean_rewind_test - stops both servers used in the test, if they're
+# still running.
+#
+# The test script can use the helper functions master_psql and standby_psql
+# to run psql against the master and standby servers, respectively.
+
+use strict;
+use warnings;
+
+use Carp;
+use Config;
+use Exporter 'import';
+use File::Copy;
+use File::Path qw(rmtree);
+use IPC::Run qw(run);
+use PostgresNode;
+use RecursiveCopy;
+use TestLib;
+use Test::More;
+
+our @EXPORT = qw(
+  $node_master
+  $node_standby
+
+  master_psql
+  standby_psql
+  check_query
+
+  setup_cluster
+  start_master
+  create_standby
+  promote_standby
+  run_pg_rewind
+  clean_rewind_test
+);
+
+# Our nodes.
+our $node_master;
+our $node_standby;
+
+sub master_psql
+{
+	my $cmd = shift;
+	my $dbname = shift || 'postgres';
+
+	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
+	  $node_master->connstr($dbname), '-c', "$cmd";
+	return;
+}
+
+sub standby_psql
+{
+	my $cmd = shift;
+	my $dbname = shift || 'postgres';
+
+	system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
+	  $node_standby->connstr($dbname), '-c', "$cmd";
+	return;
+}
+
+# Run a query against the master, and check that the output matches what's
+# expected
+sub check_query
+{
+	local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+	my ($query, $expected_stdout, $test_name) = @_;
+	my ($stdout, $stderr);
+
+	# we want just the output, no formatting
+	my $result = run [
+		'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
+		$node_master->connstr('postgres'),
+		'-c', $query
+	  ],
+	  '>', \$stdout, '2>', \$stderr;
+
+	# We don't use ok() for the exit code and stderr, because we want this
+	# check to be just a single test.
+	if (!$result)
+	{
+		fail("$test_name: psql exit code");
+	}
+	elsif ($stderr ne '')
+	{
+		diag $stderr;
+		fail("$test_name: psql no stderr");
+	}
+	else
+	{
+		$stdout =~ s/\r\n/\n/g if $Config{osname} eq 'msys';
+		is($stdout, $expected_stdout, "$test_name: query result matches");
+	}
+	return;
+}
+
+sub setup_cluster
+{
+	my $extra_name = shift;    # Used to differentiate clusters
+	my $extra      = shift;    # Extra params for initdb
+
+	# Initialize master, data checksums are mandatory
+	$node_master =
+	  get_new_node('master' . ($extra_name ? "_${extra_name}" : ''));
+
+	# Set up pg_hba.conf and pg_ident.conf for the role running
+	# pg_rewind.  This role is used for all the tests, and has
+	# minimal permissions enough to rewind from an online source.
+	$node_master->init(
+		allows_streaming => 1,
+		extra            => $extra,
+		auth_extra       => [ '--create-role', 'rewind_user' ]);
+
+	# Set wal_keep_size to prevent WAL segment recycling after enforced
+	# checkpoints in the tests.
+	$node_master->append_conf(
+		'postgresql.conf', qq(
+wal_keep_size = 320MB
+));
+	return;
+}
+
+sub start_master
+{
+	$node_master->start;
+
+	# Create custom role which is used to run pg_rewind, and adjust its
+	# permissions to the minimum necessary.
+	$node_master->safe_psql(
+		'postgres', "
+		CREATE ROLE rewind_user LOGIN;
+		GRANT EXECUTE ON function pg_catalog.pg_ls_dir(text, boolean, boolean)
+		  TO rewind_user;
+		GRANT EXECUTE ON function pg_catalog.pg_stat_file(text, boolean)
+		  TO rewind_user;
+		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text)
+		  TO rewind_user;
+		GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean)
+		  TO rewind_user;");
+
+	#### Now run the test-specific parts to initialize the master before setting
+	# up standby
+
+	return;
+}
+
+sub create_standby
+{
+	my $extra_name = shift;
+
+	$node_standby =
+	  get_new_node('standby' . ($extra_name ? "_${extra_name}" : ''));
+	$node_master->backup('my_backup');
+	$node_standby->init_from_backup($node_master, 'my_backup');
+	my $connstr_master = $node_master->connstr();
+
+	$node_standby->append_conf(
+		"postgresql.conf", qq(
+primary_conninfo='$connstr_master'
+));
+
+	$node_standby->set_standby_mode();
+
+	# Start standby
+	$node_standby->start;
+
+	# The standby may have WAL to apply before it matches the primary.  That
+	# is fine, because no test examines the standby before promotion.
+
+	return;
+}
+
+sub promote_standby
+{
+	#### Now run the test-specific parts to run after standby has been started
+	# up standby
+
+	# Wait for the standby to receive and write all WAL.
+	$node_master->wait_for_catchup($node_standby, 'write');
+
+	# Now promote standby and insert some new data on master, this will put
+	# the master out-of-sync with the standby.
+	$node_standby->promote;
+
+	# Force a checkpoint after the promotion. pg_rewind looks at the control
+	# file to determine what timeline the server is on, and that isn't updated
+	# immediately at promotion, but only at the next checkpoint. When running
+	# pg_rewind in remote mode, it's possible that we complete the test steps
+	# after promotion so quickly that when pg_rewind runs, the standby has not
+	# performed a checkpoint after promotion yet.
+	standby_psql("checkpoint");
+
+	return;
+}
+
+sub run_pg_rewind
+{
+	my $test_mode       = shift;
+	my $master_pgdata   = $node_master->data_dir;
+	my $standby_pgdata  = $node_standby->data_dir;
+	my $standby_connstr = $node_standby->connstr('postgres');
+	my $tmp_folder      = TestLib::tempdir;
+
+	# Append the rewind-specific role to the connection string.
+	$standby_connstr = "$standby_connstr user=rewind_user";
+
+	if ($test_mode eq 'archive')
+	{
+		# pg_rewind is tested with --restore-target-wal by moving all
+		# WAL files to a secondary location.  Note that this leads to
+		# a failure in ensureCleanShutdown(), forcing to the use of
+		# --no-ensure-shutdown in this mode as the initial set of WAL
+		# files needed to ensure a clean restart is gone.  This could
+		# be improved by keeping around only a minimum set of WAL
+		# segments but that would just make the test more costly,
+		# without improving the coverage.  Hence, instead, stop
+		# gracefully the primary here.
+		$node_master->stop;
+	}
+	else
+	{
+		# Stop the master and be ready to perform the rewind.  The cluster
+		# needs recovery to finish once, and pg_rewind makes sure that it
+		# happens automatically.
+		$node_master->stop('immediate');
+	}
+
+	# At this point, the rewind processing is ready to run.
+	# We now have a very simple scenario with a few diverged WAL record.
+	# The real testing begins really now with a bifurcation of the possible
+	# scenarios that pg_rewind supports.
+
+	# Keep a temporary postgresql.conf for master node or it would be
+	# overwritten during the rewind.
+	copy(
+		"$master_pgdata/postgresql.conf",
+		"$tmp_folder/master-postgresql.conf.tmp");
+
+	# Now run pg_rewind
+	if ($test_mode eq "local")
+	{
+
+		# Do rewind using a local pgdata as source
+		# Stop the master and be ready to perform the rewind
+		$node_standby->stop;
+		command_ok(
+			[
+				'pg_rewind',
+				"--debug",
+				"--source-pgdata=$standby_pgdata",
+				"--target-pgdata=$master_pgdata",
+				"--no-sync"
+			],
+			'pg_rewind local');
+	}
+	elsif ($test_mode eq "remote")
+	{
+		# Do rewind using a remote connection as source, generating
+		# recovery configuration automatically.
+		command_ok(
+			[
+				'pg_rewind',                      "--debug",
+				"--source-server",                $standby_connstr,
+				"--target-pgdata=$master_pgdata", "--no-sync",
+				"--write-recovery-conf"
+			],
+			'pg_rewind remote');
+
+		# Check that standby.signal is here as recovery configuration
+		# was requested.
+		ok( -e "$master_pgdata/standby.signal",
+			'standby.signal created after pg_rewind');
+
+		# Now, when pg_rewind apparently succeeded with minimal permissions,
+		# add REPLICATION privilege.  So we could test that new standby
+		# is able to connect to the new master with generated config.
+		$node_standby->safe_psql('postgres',
+			"ALTER ROLE rewind_user WITH REPLICATION;");
+	}
+	elsif ($test_mode eq "archive")
+	{
+
+		# Do rewind using a local pgdata as source and specified
+		# directory with target WAL archive.  The old master has
+		# to be stopped at this point.
+
+		# Remove the existing archive directory and move all WAL
+		# segments from the old master to the archives.  These
+		# will be used by pg_rewind.
+		rmtree($node_master->archive_dir);
+		RecursiveCopy::copypath($node_master->data_dir . "/pg_wal",
+			$node_master->archive_dir);
+
+		# Fast way to remove entire directory content
+		rmtree($node_master->data_dir . "/pg_wal");
+		mkdir($node_master->data_dir . "/pg_wal");
+
+		# Make sure that directories have the right umask as this is
+		# required by a follow-up check on permissions, and better
+		# safe than sorry.
+		chmod(0700, $node_master->archive_dir);
+		chmod(0700, $node_master->data_dir . "/pg_wal");
+
+		# Add appropriate restore_command to the target cluster
+		$node_master->enable_restoring($node_master, 0);
+
+		# Stop the new master and be ready to perform the rewind.
+		$node_standby->stop;
+
+		# Note the use of --no-ensure-shutdown here.  WAL files are
+		# gone in this mode and the primary has been stopped
+		# gracefully already.
+		command_ok(
+			[
+				'pg_rewind',
+				"--debug",
+				"--source-pgdata=$standby_pgdata",
+				"--target-pgdata=$master_pgdata",
+				"--no-sync",
+				"--no-ensure-shutdown",
+				"--restore-target-wal"
+			],
+			'pg_rewind archive');
+	}
+	else
+	{
+
+		# Cannot come here normally
+		croak("Incorrect test mode specified");
+	}
+
+	# Now move back postgresql.conf with old settings
+	move(
+		"$tmp_folder/master-postgresql.conf.tmp",
+		"$master_pgdata/postgresql.conf");
+
+	chmod(
+		$node_master->group_access() ? 0640 : 0600,
+		"$master_pgdata/postgresql.conf")
+	  or BAIL_OUT(
+		"unable to set permissions for $master_pgdata/postgresql.conf");
+
+	# Plug-in rewound node to the now-promoted standby node
+	if ($test_mode ne "remote")
+	{
+		my $port_standby = $node_standby->port;
+		$node_master->append_conf(
+			'postgresql.conf', qq(
+primary_conninfo='port=$port_standby'));
+
+		$node_master->set_standby_mode();
+	}
+
+	# Restart the master to check that rewind went correctly
+	$node_master->start;
+
+	#### Now run the test-specific parts to check the result
+
+	return;
+}
+
+# Clean up after the test. Stop both servers, if they're still running.
+sub clean_rewind_test
+{
+	$node_master->teardown_node  if defined $node_master;
+	$node_standby->teardown_node if defined $node_standby;
+	return;
+}
+
+1;