summaryrefslogtreecommitdiffstats
path: root/src/bin/pg_rewind/t
diff options
context:
space:
mode:
Diffstat (limited to 'src/bin/pg_rewind/t')
-rw-r--r--src/bin/pg_rewind/t/001_basic.pl176
-rw-r--r--src/bin/pg_rewind/t/002_databases.pl74
-rw-r--r--src/bin/pg_rewind/t/003_extrafiles.pl94
-rw-r--r--src/bin/pg_rewind/t/004_pg_xlog_symlink.pl85
-rw-r--r--src/bin/pg_rewind/t/005_same_timeline.pl20
-rw-r--r--src/bin/pg_rewind/t/006_options.pl40
-rw-r--r--src/bin/pg_rewind/t/008_min_recovery_point.pl171
-rw-r--r--src/bin/pg_rewind/t/RewindTest.pm394
8 files changed, 1054 insertions, 0 deletions
diff --git a/src/bin/pg_rewind/t/001_basic.pl b/src/bin/pg_rewind/t/001_basic.pl
new file mode 100644
index 0000000..d97e437
--- /dev/null
+++ b/src/bin/pg_rewind/t/001_basic.pl
@@ -0,0 +1,176 @@
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 20;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+sub run_test
+{
+ my $test_mode = shift;
+
+ RewindTest::setup_cluster($test_mode);
+ RewindTest::start_master();
+
+ # Create a test table and insert a row in master.
+ master_psql("CREATE TABLE tbl1 (d text)");
+ master_psql("INSERT INTO tbl1 VALUES ('in master')");
+
+ # This test table will be used to test truncation, i.e. the table
+ # is extended in the old master after promotion
+ master_psql("CREATE TABLE trunc_tbl (d text)");
+ master_psql("INSERT INTO trunc_tbl VALUES ('in master')");
+
+ # This test table will be used to test the "copy-tail" case, i.e. the
+ # table is truncated in the old master after promotion
+ master_psql("CREATE TABLE tail_tbl (id integer, d text)");
+ master_psql("INSERT INTO tail_tbl VALUES (0, 'in master')");
+
+ master_psql("CHECKPOINT");
+
+ RewindTest::create_standby($test_mode);
+
+ # Insert additional data on master that will be replicated to standby
+ master_psql("INSERT INTO tbl1 values ('in master, before promotion')");
+ master_psql(
+ "INSERT INTO trunc_tbl values ('in master, before promotion')");
+ master_psql(
+ "INSERT INTO tail_tbl SELECT g, 'in master, before promotion: ' || g FROM generate_series(1, 10000) g"
+ );
+
+ master_psql('CHECKPOINT');
+
+ RewindTest::promote_standby();
+
+ # Insert a row in the old master. This causes the master and standby
+ # to have "diverged", it's no longer possible to just apply the
+ # standy's logs over master directory - you need to rewind.
+ master_psql("INSERT INTO tbl1 VALUES ('in master, after promotion')");
+
+ # Also insert a new row in the standby, which won't be present in the
+ # old master.
+ standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')");
+
+ # Insert enough rows to trunc_tbl to extend the file. pg_rewind should
+ # truncate it back to the old size.
+ master_psql(
+ "INSERT INTO trunc_tbl SELECT 'in master, after promotion: ' || g FROM generate_series(1, 10000) g"
+ );
+
+ # Truncate tail_tbl. pg_rewind should copy back the truncated part
+ # (We cannot use an actual TRUNCATE command here, as that creates a
+ # whole new relfilenode)
+ master_psql("DELETE FROM tail_tbl WHERE id > 10");
+ master_psql("VACUUM tail_tbl");
+
+ # Before running pg_rewind, do a couple of extra tests with several
+ # option combinations. As the code paths taken by those tests
+ # do not change for the "local" and "remote" modes, just run them
+ # in "local" mode for simplicity's sake.
+ if ($test_mode eq 'local')
+ {
+ my $master_pgdata = $node_master->data_dir;
+ my $standby_pgdata = $node_standby->data_dir;
+
+ # First check that pg_rewind fails if the target cluster is
+ # not stopped as it fails to start up for the forced recovery
+ # step.
+ command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--source-pgdata', $standby_pgdata,
+ '--target-pgdata', $master_pgdata,
+ '--no-sync'
+ ],
+ 'pg_rewind with running target');
+
+ # Again with --no-ensure-shutdown, which should equally fail.
+ # This time pg_rewind complains without attempting to perform
+ # recovery once.
+ command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--source-pgdata', $standby_pgdata,
+ '--target-pgdata', $master_pgdata,
+ '--no-sync', '--no-ensure-shutdown'
+ ],
+ 'pg_rewind --no-ensure-shutdown with running target');
+
+ # Stop the target, and attempt to run with a local source
+ # still running. This fails as pg_rewind requires to have
+ # a source cleanly stopped.
+ $node_master->stop;
+ command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--source-pgdata', $standby_pgdata,
+ '--target-pgdata', $master_pgdata,
+ '--no-sync', '--no-ensure-shutdown'
+ ],
+ 'pg_rewind with unexpected running source');
+
+ # Stop the target cluster cleanly, and run again pg_rewind
+ # with --dry-run mode. If anything gets generated in the data
+ # folder, the follow-up run of pg_rewind will most likely fail,
+ # so keep this test as the last one of this subset.
+ $node_standby->stop;
+ command_ok(
+ [
+ 'pg_rewind', '--debug',
+ '--source-pgdata', $standby_pgdata,
+ '--target-pgdata', $master_pgdata,
+ '--no-sync', '--dry-run'
+ ],
+ 'pg_rewind --dry-run');
+
+ # Both clusters need to be alive moving forward.
+ $node_standby->start;
+ $node_master->start;
+ }
+
+ RewindTest::run_pg_rewind($test_mode);
+
+ check_query(
+ 'SELECT * FROM tbl1',
+ qq(in master
+in master, before promotion
+in standby, after promotion
+),
+ 'table content');
+
+ check_query(
+ 'SELECT * FROM trunc_tbl',
+ qq(in master
+in master, before promotion
+),
+ 'truncation');
+
+ check_query(
+ 'SELECT count(*) FROM tail_tbl',
+ qq(10001
+),
+ 'tail-copy');
+
+ # Permissions on PGDATA should be default
+ SKIP:
+ {
+ skip "unix-style permissions not supported on Windows", 1
+ if ($windows_os);
+
+ ok(check_mode_recursive($node_master->data_dir(), 0700, 0600),
+ 'check PGDATA permissions');
+ }
+
+ RewindTest::clean_rewind_test();
+ return;
+}
+
+# Run the test in both modes
+run_test('local');
+run_test('remote');
+run_test('archive');
+
+exit(0);
diff --git a/src/bin/pg_rewind/t/002_databases.pl b/src/bin/pg_rewind/t/002_databases.pl
new file mode 100644
index 0000000..1db534c
--- /dev/null
+++ b/src/bin/pg_rewind/t/002_databases.pl
@@ -0,0 +1,74 @@
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 7;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+sub run_test
+{
+ my $test_mode = shift;
+
+ RewindTest::setup_cluster($test_mode, ['-g']);
+ RewindTest::start_master();
+
+ # Create a database in master with a table.
+ master_psql('CREATE DATABASE inmaster');
+ master_psql('CREATE TABLE inmaster_tab (a int)', 'inmaster');
+
+ RewindTest::create_standby($test_mode);
+
+ # Create another database with another table, the creation is
+ # replicated to the standby.
+ master_psql('CREATE DATABASE beforepromotion');
+ master_psql('CREATE TABLE beforepromotion_tab (a int)',
+ 'beforepromotion');
+
+ RewindTest::promote_standby();
+
+ # Create databases in the old master and the new promoted standby.
+ master_psql('CREATE DATABASE master_afterpromotion');
+ master_psql('CREATE TABLE master_promotion_tab (a int)',
+ 'master_afterpromotion');
+ standby_psql('CREATE DATABASE standby_afterpromotion');
+ standby_psql('CREATE TABLE standby_promotion_tab (a int)',
+ 'standby_afterpromotion');
+
+ # The clusters are now diverged.
+
+ RewindTest::run_pg_rewind($test_mode);
+
+ # Check that the correct databases are present after pg_rewind.
+ check_query(
+ 'SELECT datname FROM pg_database ORDER BY 1',
+ qq(beforepromotion
+inmaster
+postgres
+standby_afterpromotion
+template0
+template1
+),
+ 'database names');
+
+ # Permissions on PGDATA should have group permissions
+ SKIP:
+ {
+ skip "unix-style permissions not supported on Windows", 1
+ if ($windows_os);
+
+ ok(check_mode_recursive($node_master->data_dir(), 0750, 0640),
+ 'check PGDATA permissions');
+ }
+
+ RewindTest::clean_rewind_test();
+ return;
+}
+
+# Run the test in both modes.
+run_test('local');
+run_test('remote');
+
+exit(0);
diff --git a/src/bin/pg_rewind/t/003_extrafiles.pl b/src/bin/pg_rewind/t/003_extrafiles.pl
new file mode 100644
index 0000000..f471044
--- /dev/null
+++ b/src/bin/pg_rewind/t/003_extrafiles.pl
@@ -0,0 +1,94 @@
+# Test how pg_rewind reacts to extra files and directories in the data dirs.
+
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 5;
+
+use File::Find;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+
+sub run_test
+{
+ my $test_mode = shift;
+
+ RewindTest::setup_cluster($test_mode);
+ RewindTest::start_master();
+
+ my $test_master_datadir = $node_master->data_dir;
+
+ # Create a subdir and files that will be present in both
+ mkdir "$test_master_datadir/tst_both_dir";
+ append_to_file "$test_master_datadir/tst_both_dir/both_file1", "in both1";
+ append_to_file "$test_master_datadir/tst_both_dir/both_file2", "in both2";
+ mkdir "$test_master_datadir/tst_both_dir/both_subdir/";
+ append_to_file "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+ "in both3";
+
+ RewindTest::create_standby($test_mode);
+
+ # Create different subdirs and files in master and standby
+ my $test_standby_datadir = $node_standby->data_dir;
+
+ mkdir "$test_standby_datadir/tst_standby_dir";
+ append_to_file "$test_standby_datadir/tst_standby_dir/standby_file1",
+ "in standby1";
+ append_to_file "$test_standby_datadir/tst_standby_dir/standby_file2",
+ "in standby2";
+ mkdir "$test_standby_datadir/tst_standby_dir/standby_subdir/";
+ append_to_file
+ "$test_standby_datadir/tst_standby_dir/standby_subdir/standby_file3",
+ "in standby3";
+
+ mkdir "$test_master_datadir/tst_master_dir";
+ append_to_file "$test_master_datadir/tst_master_dir/master_file1",
+ "in master1";
+ append_to_file "$test_master_datadir/tst_master_dir/master_file2",
+ "in master2";
+ mkdir "$test_master_datadir/tst_master_dir/master_subdir/";
+ append_to_file
+ "$test_master_datadir/tst_master_dir/master_subdir/master_file3",
+ "in master3";
+
+ RewindTest::promote_standby();
+ RewindTest::run_pg_rewind($test_mode);
+
+ # List files in the data directory after rewind.
+ my @paths;
+ find(
+ sub {
+ push @paths, $File::Find::name
+ if $File::Find::name =~ m/.*tst_.*/;
+ },
+ $test_master_datadir);
+ @paths = sort @paths;
+ is_deeply(
+ \@paths,
+ [
+ "$test_master_datadir/tst_both_dir",
+ "$test_master_datadir/tst_both_dir/both_file1",
+ "$test_master_datadir/tst_both_dir/both_file2",
+ "$test_master_datadir/tst_both_dir/both_subdir",
+ "$test_master_datadir/tst_both_dir/both_subdir/both_file3",
+ "$test_master_datadir/tst_standby_dir",
+ "$test_master_datadir/tst_standby_dir/standby_file1",
+ "$test_master_datadir/tst_standby_dir/standby_file2",
+ "$test_master_datadir/tst_standby_dir/standby_subdir",
+ "$test_master_datadir/tst_standby_dir/standby_subdir/standby_file3"
+ ],
+ "file lists match");
+
+ RewindTest::clean_rewind_test();
+ return;
+}
+
+# Run the test in both modes.
+run_test('local');
+run_test('remote');
+
+exit(0);
diff --git a/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl
new file mode 100644
index 0000000..639eeb9
--- /dev/null
+++ b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl
@@ -0,0 +1,85 @@
+#
+# Test pg_rewind when the target's pg_wal directory is a symlink.
+#
+use strict;
+use warnings;
+use File::Copy;
+use File::Path qw(rmtree);
+use TestLib;
+use Test::More;
+if ($windows_os)
+{
+ plan skip_all => 'symlinks not supported on Windows';
+ exit;
+}
+else
+{
+ plan tests => 5;
+}
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+sub run_test
+{
+ my $test_mode = shift;
+
+ my $master_xlogdir = "${TestLib::tmp_check}/xlog_master";
+
+ rmtree($master_xlogdir);
+ RewindTest::setup_cluster($test_mode);
+
+ my $test_master_datadir = $node_master->data_dir;
+
+ # turn pg_wal into a symlink
+ print("moving $test_master_datadir/pg_wal to $master_xlogdir\n");
+ move("$test_master_datadir/pg_wal", $master_xlogdir) or die;
+ symlink($master_xlogdir, "$test_master_datadir/pg_wal") or die;
+
+ RewindTest::start_master();
+
+ # Create a test table and insert a row in master.
+ master_psql("CREATE TABLE tbl1 (d text)");
+ master_psql("INSERT INTO tbl1 VALUES ('in master')");
+
+ master_psql("CHECKPOINT");
+
+ RewindTest::create_standby($test_mode);
+
+ # Insert additional data on master that will be replicated to standby
+ master_psql("INSERT INTO tbl1 values ('in master, before promotion')");
+
+ master_psql('CHECKPOINT');
+
+ RewindTest::promote_standby();
+
+ # Insert a row in the old master. This causes the master and standby
+ # to have "diverged", it's no longer possible to just apply the
+ # standy's logs over master directory - you need to rewind.
+ master_psql("INSERT INTO tbl1 VALUES ('in master, after promotion')");
+
+ # Also insert a new row in the standby, which won't be present in the
+ # old master.
+ standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')");
+
+ RewindTest::run_pg_rewind($test_mode);
+
+ check_query(
+ 'SELECT * FROM tbl1',
+ qq(in master
+in master, before promotion
+in standby, after promotion
+),
+ 'table content');
+
+ RewindTest::clean_rewind_test();
+ return;
+}
+
+# Run the test in both modes
+run_test('local');
+run_test('remote');
+
+exit(0);
diff --git a/src/bin/pg_rewind/t/005_same_timeline.pl b/src/bin/pg_rewind/t/005_same_timeline.pl
new file mode 100644
index 0000000..5464f42
--- /dev/null
+++ b/src/bin/pg_rewind/t/005_same_timeline.pl
@@ -0,0 +1,20 @@
+#
+# Test that running pg_rewind with the source and target clusters
+# on the same timeline runs successfully.
+#
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 1;
+
+use FindBin;
+use lib $FindBin::RealBin;
+
+use RewindTest;
+
+RewindTest::setup_cluster();
+RewindTest::start_master();
+RewindTest::create_standby();
+RewindTest::run_pg_rewind('local');
+RewindTest::clean_rewind_test();
+exit(0);
diff --git a/src/bin/pg_rewind/t/006_options.pl b/src/bin/pg_rewind/t/006_options.pl
new file mode 100644
index 0000000..1515696
--- /dev/null
+++ b/src/bin/pg_rewind/t/006_options.pl
@@ -0,0 +1,40 @@
+#
+# Test checking options of pg_rewind.
+#
+use strict;
+use warnings;
+use TestLib;
+use Test::More tests => 12;
+
+program_help_ok('pg_rewind');
+program_version_ok('pg_rewind');
+program_options_handling_ok('pg_rewind');
+
+my $primary_pgdata = TestLib::tempdir;
+my $standby_pgdata = TestLib::tempdir;
+command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--target-pgdata', $primary_pgdata,
+ '--source-pgdata', $standby_pgdata,
+ 'extra_arg1'
+ ],
+ 'too many arguments');
+command_fails([ 'pg_rewind', '--target-pgdata', $primary_pgdata ],
+ 'no source specified');
+command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--target-pgdata', $primary_pgdata,
+ '--source-pgdata', $standby_pgdata,
+ '--source-server', 'incorrect_source'
+ ],
+ 'both remote and local sources specified');
+command_fails(
+ [
+ 'pg_rewind', '--debug',
+ '--target-pgdata', $primary_pgdata,
+ '--source-pgdata', $standby_pgdata,
+ '--write-recovery-conf'
+ ],
+ 'no local source with --write-recovery-conf');
diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl
new file mode 100644
index 0000000..e3d94b3
--- /dev/null
+++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl
@@ -0,0 +1,171 @@
+#
+# Test situation where a target data directory contains
+# WAL records beyond both the last checkpoint and the divergence
+# point:
+#
+# Target WAL (TLI 2):
+#
+# backup ... Checkpoint A ... INSERT 'rewind this'
+# (TLI 1 -> 2)
+#
+# ^ last common ^ minRecoveryPoint
+# checkpoint
+#
+# Source WAL (TLI 3):
+#
+# backup ... Checkpoint A ... Checkpoint B ... INSERT 'keep this'
+# (TLI 1 -> 2) (TLI 2 -> 3)
+#
+#
+# The last common checkpoint is Checkpoint A. But there is WAL on TLI 2
+# after the last common checkpoint that needs to be rewound. We used to
+# have a bug where minRecoveryPoint was ignored, and pg_rewind concluded
+# that the target doesn't need rewinding in this scenario, because the
+# last checkpoint on the target TLI was an ancestor of the source TLI.
+#
+#
+# This test does not make use of RewindTest as it requires three
+# nodes.
+
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 3;
+
+use File::Copy;
+
+my $tmp_folder = TestLib::tempdir;
+
+my $node_1 = get_new_node('node_1');
+$node_1->init(allows_streaming => 1);
+$node_1->append_conf('postgresql.conf', qq(
+wal_keep_size='100 MB'
+));
+
+$node_1->start;
+
+# Create a couple of test tables
+$node_1->safe_psql('postgres', 'CREATE TABLE public.foo (t TEXT)');
+$node_1->safe_psql('postgres', 'CREATE TABLE public.bar (t TEXT)');
+$node_1->safe_psql('postgres', "INSERT INTO public.bar VALUES ('in both')");
+
+#
+# Create node_2 and node_3 as standbys following node_1
+#
+my $backup_name = 'my_backup';
+$node_1->backup($backup_name);
+
+my $node_2 = get_new_node('node_2');
+$node_2->init_from_backup($node_1, $backup_name,
+ has_streaming => 1);
+$node_2->start;
+
+my $node_3 = get_new_node('node_3');
+$node_3->init_from_backup($node_1, $backup_name,
+ has_streaming => 1);
+$node_3->start;
+
+# Wait until node 3 has connected and caught up
+my $lsn = $node_1->lsn('insert');
+$node_1->wait_for_catchup('node_3', 'replay', $lsn);
+
+#
+# Swap the roles of node_1 and node_3, so that node_1 follows node_3.
+#
+$node_1->stop('fast');
+$node_3->promote;
+# Force a checkpoint after the promotion. pg_rewind looks at the control
+# file to determine what timeline the server is on, and that isn't updated
+# immediately at promotion, but only at the next checkpoint. When running
+# pg_rewind in remote mode, it's possible that we complete the test steps
+# after promotion so quickly that when pg_rewind runs, the standby has not
+# performed a checkpoint after promotion yet.
+$node_3->safe_psql('postgres', "checkpoint");
+
+# reconfigure node_1 as a standby following node_3
+my $node_3_connstr = $node_3->connstr;
+$node_1->append_conf('postgresql.conf', qq(
+primary_conninfo='$node_3_connstr'
+));
+$node_1->set_standby_mode();
+$node_1->start();
+
+# also reconfigure node_2 to follow node_3
+$node_2->append_conf('postgresql.conf', qq(
+primary_conninfo='$node_3_connstr'
+));
+$node_2->restart();
+
+#
+# Promote node_1, to create a split-brain scenario.
+#
+
+# make sure node_1 is full caught up with node_3 first
+$lsn = $node_3->lsn('insert');
+$node_3->wait_for_catchup('node_1', 'replay', $lsn);
+
+$node_1->promote;
+# Force a checkpoint after promotion, like earlier.
+$node_1->safe_psql('postgres', "checkpoint");
+
+#
+# We now have a split-brain with two primaries. Insert a row on both to
+# demonstratively create a split brain. After the rewind, we should only
+# see the insert on 1, as the insert on node 3 is rewound away.
+#
+$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('keep this')");
+# 'bar' is unmodified in node 1, so it won't be overwritten by replaying the
+# WAL from node 1.
+$node_3->safe_psql('postgres', "INSERT INTO public.bar (t) VALUES ('rewind this')");
+
+# Insert more rows in node 1, to bump up the XID counter. Otherwise, if
+# rewind doesn't correctly rewind the changes made on the other node,
+# we might fail to notice if the inserts are invisible because the XIDs
+# are not marked as committed.
+$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this')");
+$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this too')");
+
+# Wait for node 2 to catch up
+$node_2->poll_query_until('postgres',
+ q|SELECT COUNT(*) > 1 FROM public.bar|, 't');
+
+# At this point node_2 will shut down without a shutdown checkpoint,
+# but with WAL entries beyond the preceding shutdown checkpoint.
+$node_2->stop('fast');
+$node_3->stop('fast');
+
+my $node_2_pgdata = $node_2->data_dir;
+my $node_1_connstr = $node_1->connstr;
+
+# Keep a temporary postgresql.conf or it would be overwritten during the rewind.
+copy(
+ "$node_2_pgdata/postgresql.conf",
+ "$tmp_folder/node_2-postgresql.conf.tmp");
+
+command_ok(
+ [
+ 'pg_rewind',
+ "--source-server=$node_1_connstr",
+ "--target-pgdata=$node_2_pgdata",
+ "--debug"
+ ],
+ 'run pg_rewind');
+
+# Now move back postgresql.conf with old settings
+move(
+ "$tmp_folder/node_2-postgresql.conf.tmp",
+ "$node_2_pgdata/postgresql.conf");
+
+$node_2->start;
+
+# Check contents of the test tables after rewind. The rows inserted in node 3
+# before rewind should've been overwritten with the data from node 1.
+my $result;
+$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.foo');
+is($result, qq(keep this
+and this
+and this too), 'table foo after rewind');
+
+$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.bar');
+is($result, qq(in both), 'table bar after rewind');
diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm
new file mode 100644
index 0000000..422a563
--- /dev/null
+++ b/src/bin/pg_rewind/t/RewindTest.pm
@@ -0,0 +1,394 @@
+package RewindTest;
+
+# Test driver for pg_rewind. Each test consists of a cycle where a new cluster
+# is first created with initdb, and a streaming replication standby is set up
+# to follow the master. Then the master is shut down and the standby is
+# promoted, and finally pg_rewind is used to rewind the old master, using the
+# standby as the source.
+#
+# To run a test, the test script (in t/ subdirectory) calls the functions
+# in this module. These functions should be called in this sequence:
+#
+# 1. setup_cluster - creates a PostgreSQL cluster that runs as the master
+#
+# 2. start_master - starts the master server
+#
+# 3. create_standby - runs pg_basebackup to initialize a standby server, and
+# sets it up to follow the master.
+#
+# 4. promote_standby - runs "pg_ctl promote" to promote the standby server.
+# The old master keeps running.
+#
+# 5. run_pg_rewind - stops the old master (if it's still running) and runs
+# pg_rewind to synchronize it with the now-promoted standby server.
+#
+# 6. clean_rewind_test - stops both servers used in the test, if they're
+# still running.
+#
+# The test script can use the helper functions master_psql and standby_psql
+# to run psql against the master and standby servers, respectively.
+
+use strict;
+use warnings;
+
+use Carp;
+use Config;
+use Exporter 'import';
+use File::Copy;
+use File::Path qw(rmtree);
+use IPC::Run qw(run);
+use PostgresNode;
+use RecursiveCopy;
+use TestLib;
+use Test::More;
+
+our @EXPORT = qw(
+ $node_master
+ $node_standby
+
+ master_psql
+ standby_psql
+ check_query
+
+ setup_cluster
+ start_master
+ create_standby
+ promote_standby
+ run_pg_rewind
+ clean_rewind_test
+);
+
+# Our nodes.
+our $node_master;
+our $node_standby;
+
+sub master_psql
+{
+ my $cmd = shift;
+ my $dbname = shift || 'postgres';
+
+ system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
+ $node_master->connstr($dbname), '-c', "$cmd";
+ return;
+}
+
+sub standby_psql
+{
+ my $cmd = shift;
+ my $dbname = shift || 'postgres';
+
+ system_or_bail 'psql', '-q', '--no-psqlrc', '-d',
+ $node_standby->connstr($dbname), '-c', "$cmd";
+ return;
+}
+
+# Run a query against the master, and check that the output matches what's
+# expected
+sub check_query
+{
+ local $Test::Builder::Level = $Test::Builder::Level + 1;
+
+ my ($query, $expected_stdout, $test_name) = @_;
+ my ($stdout, $stderr);
+
+ # we want just the output, no formatting
+ my $result = run [
+ 'psql', '-q', '-A', '-t', '--no-psqlrc', '-d',
+ $node_master->connstr('postgres'),
+ '-c', $query
+ ],
+ '>', \$stdout, '2>', \$stderr;
+
+ # We don't use ok() for the exit code and stderr, because we want this
+ # check to be just a single test.
+ if (!$result)
+ {
+ fail("$test_name: psql exit code");
+ }
+ elsif ($stderr ne '')
+ {
+ diag $stderr;
+ fail("$test_name: psql no stderr");
+ }
+ else
+ {
+ $stdout =~ s/\r\n/\n/g if $Config{osname} eq 'msys';
+ is($stdout, $expected_stdout, "$test_name: query result matches");
+ }
+ return;
+}
+
+sub setup_cluster
+{
+ my $extra_name = shift; # Used to differentiate clusters
+ my $extra = shift; # Extra params for initdb
+
+ # Initialize master, data checksums are mandatory
+ $node_master =
+ get_new_node('master' . ($extra_name ? "_${extra_name}" : ''));
+
+ # Set up pg_hba.conf and pg_ident.conf for the role running
+ # pg_rewind. This role is used for all the tests, and has
+ # minimal permissions enough to rewind from an online source.
+ $node_master->init(
+ allows_streaming => 1,
+ extra => $extra,
+ auth_extra => [ '--create-role', 'rewind_user' ]);
+
+ # Set wal_keep_size to prevent WAL segment recycling after enforced
+ # checkpoints in the tests.
+ $node_master->append_conf(
+ 'postgresql.conf', qq(
+wal_keep_size = 320MB
+));
+ return;
+}
+
+sub start_master
+{
+ $node_master->start;
+
+ # Create custom role which is used to run pg_rewind, and adjust its
+ # permissions to the minimum necessary.
+ $node_master->safe_psql(
+ 'postgres', "
+ CREATE ROLE rewind_user LOGIN;
+ GRANT EXECUTE ON function pg_catalog.pg_ls_dir(text, boolean, boolean)
+ TO rewind_user;
+ GRANT EXECUTE ON function pg_catalog.pg_stat_file(text, boolean)
+ TO rewind_user;
+ GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text)
+ TO rewind_user;
+ GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean)
+ TO rewind_user;");
+
+ #### Now run the test-specific parts to initialize the master before setting
+ # up standby
+
+ return;
+}
+
+sub create_standby
+{
+ my $extra_name = shift;
+
+ $node_standby =
+ get_new_node('standby' . ($extra_name ? "_${extra_name}" : ''));
+ $node_master->backup('my_backup');
+ $node_standby->init_from_backup($node_master, 'my_backup');
+ my $connstr_master = $node_master->connstr();
+
+ $node_standby->append_conf(
+ "postgresql.conf", qq(
+primary_conninfo='$connstr_master'
+));
+
+ $node_standby->set_standby_mode();
+
+ # Start standby
+ $node_standby->start;
+
+ # The standby may have WAL to apply before it matches the primary. That
+ # is fine, because no test examines the standby before promotion.
+
+ return;
+}
+
+sub promote_standby
+{
+ #### Now run the test-specific parts to run after standby has been started
+ # up standby
+
+ # Wait for the standby to receive and write all WAL.
+ $node_master->wait_for_catchup($node_standby, 'write');
+
+ # Now promote standby and insert some new data on master, this will put
+ # the master out-of-sync with the standby.
+ $node_standby->promote;
+
+ # Force a checkpoint after the promotion. pg_rewind looks at the control
+ # file to determine what timeline the server is on, and that isn't updated
+ # immediately at promotion, but only at the next checkpoint. When running
+ # pg_rewind in remote mode, it's possible that we complete the test steps
+ # after promotion so quickly that when pg_rewind runs, the standby has not
+ # performed a checkpoint after promotion yet.
+ standby_psql("checkpoint");
+
+ return;
+}
+
+sub run_pg_rewind
+{
+ my $test_mode = shift;
+ my $master_pgdata = $node_master->data_dir;
+ my $standby_pgdata = $node_standby->data_dir;
+ my $standby_connstr = $node_standby->connstr('postgres');
+ my $tmp_folder = TestLib::tempdir;
+
+ # Append the rewind-specific role to the connection string.
+ $standby_connstr = "$standby_connstr user=rewind_user";
+
+ if ($test_mode eq 'archive')
+ {
+ # pg_rewind is tested with --restore-target-wal by moving all
+ # WAL files to a secondary location. Note that this leads to
+ # a failure in ensureCleanShutdown(), forcing to the use of
+ # --no-ensure-shutdown in this mode as the initial set of WAL
+ # files needed to ensure a clean restart is gone. This could
+ # be improved by keeping around only a minimum set of WAL
+ # segments but that would just make the test more costly,
+ # without improving the coverage. Hence, instead, stop
+ # gracefully the primary here.
+ $node_master->stop;
+ }
+ else
+ {
+ # Stop the master and be ready to perform the rewind. The cluster
+ # needs recovery to finish once, and pg_rewind makes sure that it
+ # happens automatically.
+ $node_master->stop('immediate');
+ }
+
+ # At this point, the rewind processing is ready to run.
+ # We now have a very simple scenario with a few diverged WAL record.
+ # The real testing begins really now with a bifurcation of the possible
+ # scenarios that pg_rewind supports.
+
+ # Keep a temporary postgresql.conf for master node or it would be
+ # overwritten during the rewind.
+ copy(
+ "$master_pgdata/postgresql.conf",
+ "$tmp_folder/master-postgresql.conf.tmp");
+
+ # Now run pg_rewind
+ if ($test_mode eq "local")
+ {
+
+ # Do rewind using a local pgdata as source
+ # Stop the master and be ready to perform the rewind
+ $node_standby->stop;
+ command_ok(
+ [
+ 'pg_rewind',
+ "--debug",
+ "--source-pgdata=$standby_pgdata",
+ "--target-pgdata=$master_pgdata",
+ "--no-sync"
+ ],
+ 'pg_rewind local');
+ }
+ elsif ($test_mode eq "remote")
+ {
+ # Do rewind using a remote connection as source, generating
+ # recovery configuration automatically.
+ command_ok(
+ [
+ 'pg_rewind', "--debug",
+ "--source-server", $standby_connstr,
+ "--target-pgdata=$master_pgdata", "--no-sync",
+ "--write-recovery-conf"
+ ],
+ 'pg_rewind remote');
+
+ # Check that standby.signal is here as recovery configuration
+ # was requested.
+ ok( -e "$master_pgdata/standby.signal",
+ 'standby.signal created after pg_rewind');
+
+ # Now, when pg_rewind apparently succeeded with minimal permissions,
+ # add REPLICATION privilege. So we could test that new standby
+ # is able to connect to the new master with generated config.
+ $node_standby->safe_psql('postgres',
+ "ALTER ROLE rewind_user WITH REPLICATION;");
+ }
+ elsif ($test_mode eq "archive")
+ {
+
+ # Do rewind using a local pgdata as source and specified
+ # directory with target WAL archive. The old master has
+ # to be stopped at this point.
+
+ # Remove the existing archive directory and move all WAL
+ # segments from the old master to the archives. These
+ # will be used by pg_rewind.
+ rmtree($node_master->archive_dir);
+ RecursiveCopy::copypath($node_master->data_dir . "/pg_wal",
+ $node_master->archive_dir);
+
+ # Fast way to remove entire directory content
+ rmtree($node_master->data_dir . "/pg_wal");
+ mkdir($node_master->data_dir . "/pg_wal");
+
+ # Make sure that directories have the right umask as this is
+ # required by a follow-up check on permissions, and better
+ # safe than sorry.
+ chmod(0700, $node_master->archive_dir);
+ chmod(0700, $node_master->data_dir . "/pg_wal");
+
+ # Add appropriate restore_command to the target cluster
+ $node_master->enable_restoring($node_master, 0);
+
+ # Stop the new master and be ready to perform the rewind.
+ $node_standby->stop;
+
+ # Note the use of --no-ensure-shutdown here. WAL files are
+ # gone in this mode and the primary has been stopped
+ # gracefully already.
+ command_ok(
+ [
+ 'pg_rewind',
+ "--debug",
+ "--source-pgdata=$standby_pgdata",
+ "--target-pgdata=$master_pgdata",
+ "--no-sync",
+ "--no-ensure-shutdown",
+ "--restore-target-wal"
+ ],
+ 'pg_rewind archive');
+ }
+ else
+ {
+
+ # Cannot come here normally
+ croak("Incorrect test mode specified");
+ }
+
+ # Now move back postgresql.conf with old settings
+ move(
+ "$tmp_folder/master-postgresql.conf.tmp",
+ "$master_pgdata/postgresql.conf");
+
+ chmod(
+ $node_master->group_access() ? 0640 : 0600,
+ "$master_pgdata/postgresql.conf")
+ or BAIL_OUT(
+ "unable to set permissions for $master_pgdata/postgresql.conf");
+
+ # Plug-in rewound node to the now-promoted standby node
+ if ($test_mode ne "remote")
+ {
+ my $port_standby = $node_standby->port;
+ $node_master->append_conf(
+ 'postgresql.conf', qq(
+primary_conninfo='port=$port_standby'));
+
+ $node_master->set_standby_mode();
+ }
+
+ # Restart the master to check that rewind went correctly
+ $node_master->start;
+
+ #### Now run the test-specific parts to check the result
+
+ return;
+}
+
+# Clean up after the test. Stop both servers, if they're still running.
+sub clean_rewind_test
+{
+ $node_master->teardown_node if defined $node_master;
+ $node_standby->teardown_node if defined $node_standby;
+ return;
+}
+
+1;