diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:19:15 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:19:15 +0000 |
commit | 6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch) | |
tree | 657d8194422a5daccecfd42d654b8a245ef7b4c8 /src/bin/pg_rewind/t | |
parent | Initial commit. (diff) | |
download | postgresql-13-upstream.tar.xz postgresql-13-upstream.zip |
Adding upstream version 13.4.upstream/13.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/bin/pg_rewind/t')
-rw-r--r-- | src/bin/pg_rewind/t/001_basic.pl | 176 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/002_databases.pl | 74 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/003_extrafiles.pl | 94 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/004_pg_xlog_symlink.pl | 85 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/005_same_timeline.pl | 20 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/006_options.pl | 40 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/008_min_recovery_point.pl | 171 | ||||
-rw-r--r-- | src/bin/pg_rewind/t/RewindTest.pm | 394 |
8 files changed, 1054 insertions, 0 deletions
diff --git a/src/bin/pg_rewind/t/001_basic.pl b/src/bin/pg_rewind/t/001_basic.pl new file mode 100644 index 0000000..d97e437 --- /dev/null +++ b/src/bin/pg_rewind/t/001_basic.pl @@ -0,0 +1,176 @@ +use strict; +use warnings; +use TestLib; +use Test::More tests => 20; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +sub run_test +{ + my $test_mode = shift; + + RewindTest::setup_cluster($test_mode); + RewindTest::start_master(); + + # Create a test table and insert a row in master. + master_psql("CREATE TABLE tbl1 (d text)"); + master_psql("INSERT INTO tbl1 VALUES ('in master')"); + + # This test table will be used to test truncation, i.e. the table + # is extended in the old master after promotion + master_psql("CREATE TABLE trunc_tbl (d text)"); + master_psql("INSERT INTO trunc_tbl VALUES ('in master')"); + + # This test table will be used to test the "copy-tail" case, i.e. the + # table is truncated in the old master after promotion + master_psql("CREATE TABLE tail_tbl (id integer, d text)"); + master_psql("INSERT INTO tail_tbl VALUES (0, 'in master')"); + + master_psql("CHECKPOINT"); + + RewindTest::create_standby($test_mode); + + # Insert additional data on master that will be replicated to standby + master_psql("INSERT INTO tbl1 values ('in master, before promotion')"); + master_psql( + "INSERT INTO trunc_tbl values ('in master, before promotion')"); + master_psql( + "INSERT INTO tail_tbl SELECT g, 'in master, before promotion: ' || g FROM generate_series(1, 10000) g" + ); + + master_psql('CHECKPOINT'); + + RewindTest::promote_standby(); + + # Insert a row in the old master. This causes the master and standby + # to have "diverged", it's no longer possible to just apply the + # standy's logs over master directory - you need to rewind. + master_psql("INSERT INTO tbl1 VALUES ('in master, after promotion')"); + + # Also insert a new row in the standby, which won't be present in the + # old master. + standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')"); + + # Insert enough rows to trunc_tbl to extend the file. pg_rewind should + # truncate it back to the old size. + master_psql( + "INSERT INTO trunc_tbl SELECT 'in master, after promotion: ' || g FROM generate_series(1, 10000) g" + ); + + # Truncate tail_tbl. pg_rewind should copy back the truncated part + # (We cannot use an actual TRUNCATE command here, as that creates a + # whole new relfilenode) + master_psql("DELETE FROM tail_tbl WHERE id > 10"); + master_psql("VACUUM tail_tbl"); + + # Before running pg_rewind, do a couple of extra tests with several + # option combinations. As the code paths taken by those tests + # do not change for the "local" and "remote" modes, just run them + # in "local" mode for simplicity's sake. + if ($test_mode eq 'local') + { + my $master_pgdata = $node_master->data_dir; + my $standby_pgdata = $node_standby->data_dir; + + # First check that pg_rewind fails if the target cluster is + # not stopped as it fails to start up for the forced recovery + # step. + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $master_pgdata, + '--no-sync' + ], + 'pg_rewind with running target'); + + # Again with --no-ensure-shutdown, which should equally fail. + # This time pg_rewind complains without attempting to perform + # recovery once. + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $master_pgdata, + '--no-sync', '--no-ensure-shutdown' + ], + 'pg_rewind --no-ensure-shutdown with running target'); + + # Stop the target, and attempt to run with a local source + # still running. This fails as pg_rewind requires to have + # a source cleanly stopped. + $node_master->stop; + command_fails( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $master_pgdata, + '--no-sync', '--no-ensure-shutdown' + ], + 'pg_rewind with unexpected running source'); + + # Stop the target cluster cleanly, and run again pg_rewind + # with --dry-run mode. If anything gets generated in the data + # folder, the follow-up run of pg_rewind will most likely fail, + # so keep this test as the last one of this subset. + $node_standby->stop; + command_ok( + [ + 'pg_rewind', '--debug', + '--source-pgdata', $standby_pgdata, + '--target-pgdata', $master_pgdata, + '--no-sync', '--dry-run' + ], + 'pg_rewind --dry-run'); + + # Both clusters need to be alive moving forward. + $node_standby->start; + $node_master->start; + } + + RewindTest::run_pg_rewind($test_mode); + + check_query( + 'SELECT * FROM tbl1', + qq(in master +in master, before promotion +in standby, after promotion +), + 'table content'); + + check_query( + 'SELECT * FROM trunc_tbl', + qq(in master +in master, before promotion +), + 'truncation'); + + check_query( + 'SELECT count(*) FROM tail_tbl', + qq(10001 +), + 'tail-copy'); + + # Permissions on PGDATA should be default + SKIP: + { + skip "unix-style permissions not supported on Windows", 1 + if ($windows_os); + + ok(check_mode_recursive($node_master->data_dir(), 0700, 0600), + 'check PGDATA permissions'); + } + + RewindTest::clean_rewind_test(); + return; +} + +# Run the test in both modes +run_test('local'); +run_test('remote'); +run_test('archive'); + +exit(0); diff --git a/src/bin/pg_rewind/t/002_databases.pl b/src/bin/pg_rewind/t/002_databases.pl new file mode 100644 index 0000000..1db534c --- /dev/null +++ b/src/bin/pg_rewind/t/002_databases.pl @@ -0,0 +1,74 @@ +use strict; +use warnings; +use TestLib; +use Test::More tests => 7; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +sub run_test +{ + my $test_mode = shift; + + RewindTest::setup_cluster($test_mode, ['-g']); + RewindTest::start_master(); + + # Create a database in master with a table. + master_psql('CREATE DATABASE inmaster'); + master_psql('CREATE TABLE inmaster_tab (a int)', 'inmaster'); + + RewindTest::create_standby($test_mode); + + # Create another database with another table, the creation is + # replicated to the standby. + master_psql('CREATE DATABASE beforepromotion'); + master_psql('CREATE TABLE beforepromotion_tab (a int)', + 'beforepromotion'); + + RewindTest::promote_standby(); + + # Create databases in the old master and the new promoted standby. + master_psql('CREATE DATABASE master_afterpromotion'); + master_psql('CREATE TABLE master_promotion_tab (a int)', + 'master_afterpromotion'); + standby_psql('CREATE DATABASE standby_afterpromotion'); + standby_psql('CREATE TABLE standby_promotion_tab (a int)', + 'standby_afterpromotion'); + + # The clusters are now diverged. + + RewindTest::run_pg_rewind($test_mode); + + # Check that the correct databases are present after pg_rewind. + check_query( + 'SELECT datname FROM pg_database ORDER BY 1', + qq(beforepromotion +inmaster +postgres +standby_afterpromotion +template0 +template1 +), + 'database names'); + + # Permissions on PGDATA should have group permissions + SKIP: + { + skip "unix-style permissions not supported on Windows", 1 + if ($windows_os); + + ok(check_mode_recursive($node_master->data_dir(), 0750, 0640), + 'check PGDATA permissions'); + } + + RewindTest::clean_rewind_test(); + return; +} + +# Run the test in both modes. +run_test('local'); +run_test('remote'); + +exit(0); diff --git a/src/bin/pg_rewind/t/003_extrafiles.pl b/src/bin/pg_rewind/t/003_extrafiles.pl new file mode 100644 index 0000000..f471044 --- /dev/null +++ b/src/bin/pg_rewind/t/003_extrafiles.pl @@ -0,0 +1,94 @@ +# Test how pg_rewind reacts to extra files and directories in the data dirs. + +use strict; +use warnings; +use TestLib; +use Test::More tests => 5; + +use File::Find; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + + +sub run_test +{ + my $test_mode = shift; + + RewindTest::setup_cluster($test_mode); + RewindTest::start_master(); + + my $test_master_datadir = $node_master->data_dir; + + # Create a subdir and files that will be present in both + mkdir "$test_master_datadir/tst_both_dir"; + append_to_file "$test_master_datadir/tst_both_dir/both_file1", "in both1"; + append_to_file "$test_master_datadir/tst_both_dir/both_file2", "in both2"; + mkdir "$test_master_datadir/tst_both_dir/both_subdir/"; + append_to_file "$test_master_datadir/tst_both_dir/both_subdir/both_file3", + "in both3"; + + RewindTest::create_standby($test_mode); + + # Create different subdirs and files in master and standby + my $test_standby_datadir = $node_standby->data_dir; + + mkdir "$test_standby_datadir/tst_standby_dir"; + append_to_file "$test_standby_datadir/tst_standby_dir/standby_file1", + "in standby1"; + append_to_file "$test_standby_datadir/tst_standby_dir/standby_file2", + "in standby2"; + mkdir "$test_standby_datadir/tst_standby_dir/standby_subdir/"; + append_to_file + "$test_standby_datadir/tst_standby_dir/standby_subdir/standby_file3", + "in standby3"; + + mkdir "$test_master_datadir/tst_master_dir"; + append_to_file "$test_master_datadir/tst_master_dir/master_file1", + "in master1"; + append_to_file "$test_master_datadir/tst_master_dir/master_file2", + "in master2"; + mkdir "$test_master_datadir/tst_master_dir/master_subdir/"; + append_to_file + "$test_master_datadir/tst_master_dir/master_subdir/master_file3", + "in master3"; + + RewindTest::promote_standby(); + RewindTest::run_pg_rewind($test_mode); + + # List files in the data directory after rewind. + my @paths; + find( + sub { + push @paths, $File::Find::name + if $File::Find::name =~ m/.*tst_.*/; + }, + $test_master_datadir); + @paths = sort @paths; + is_deeply( + \@paths, + [ + "$test_master_datadir/tst_both_dir", + "$test_master_datadir/tst_both_dir/both_file1", + "$test_master_datadir/tst_both_dir/both_file2", + "$test_master_datadir/tst_both_dir/both_subdir", + "$test_master_datadir/tst_both_dir/both_subdir/both_file3", + "$test_master_datadir/tst_standby_dir", + "$test_master_datadir/tst_standby_dir/standby_file1", + "$test_master_datadir/tst_standby_dir/standby_file2", + "$test_master_datadir/tst_standby_dir/standby_subdir", + "$test_master_datadir/tst_standby_dir/standby_subdir/standby_file3" + ], + "file lists match"); + + RewindTest::clean_rewind_test(); + return; +} + +# Run the test in both modes. +run_test('local'); +run_test('remote'); + +exit(0); diff --git a/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl new file mode 100644 index 0000000..639eeb9 --- /dev/null +++ b/src/bin/pg_rewind/t/004_pg_xlog_symlink.pl @@ -0,0 +1,85 @@ +# +# Test pg_rewind when the target's pg_wal directory is a symlink. +# +use strict; +use warnings; +use File::Copy; +use File::Path qw(rmtree); +use TestLib; +use Test::More; +if ($windows_os) +{ + plan skip_all => 'symlinks not supported on Windows'; + exit; +} +else +{ + plan tests => 5; +} + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +sub run_test +{ + my $test_mode = shift; + + my $master_xlogdir = "${TestLib::tmp_check}/xlog_master"; + + rmtree($master_xlogdir); + RewindTest::setup_cluster($test_mode); + + my $test_master_datadir = $node_master->data_dir; + + # turn pg_wal into a symlink + print("moving $test_master_datadir/pg_wal to $master_xlogdir\n"); + move("$test_master_datadir/pg_wal", $master_xlogdir) or die; + symlink($master_xlogdir, "$test_master_datadir/pg_wal") or die; + + RewindTest::start_master(); + + # Create a test table and insert a row in master. + master_psql("CREATE TABLE tbl1 (d text)"); + master_psql("INSERT INTO tbl1 VALUES ('in master')"); + + master_psql("CHECKPOINT"); + + RewindTest::create_standby($test_mode); + + # Insert additional data on master that will be replicated to standby + master_psql("INSERT INTO tbl1 values ('in master, before promotion')"); + + master_psql('CHECKPOINT'); + + RewindTest::promote_standby(); + + # Insert a row in the old master. This causes the master and standby + # to have "diverged", it's no longer possible to just apply the + # standy's logs over master directory - you need to rewind. + master_psql("INSERT INTO tbl1 VALUES ('in master, after promotion')"); + + # Also insert a new row in the standby, which won't be present in the + # old master. + standby_psql("INSERT INTO tbl1 VALUES ('in standby, after promotion')"); + + RewindTest::run_pg_rewind($test_mode); + + check_query( + 'SELECT * FROM tbl1', + qq(in master +in master, before promotion +in standby, after promotion +), + 'table content'); + + RewindTest::clean_rewind_test(); + return; +} + +# Run the test in both modes +run_test('local'); +run_test('remote'); + +exit(0); diff --git a/src/bin/pg_rewind/t/005_same_timeline.pl b/src/bin/pg_rewind/t/005_same_timeline.pl new file mode 100644 index 0000000..5464f42 --- /dev/null +++ b/src/bin/pg_rewind/t/005_same_timeline.pl @@ -0,0 +1,20 @@ +# +# Test that running pg_rewind with the source and target clusters +# on the same timeline runs successfully. +# +use strict; +use warnings; +use TestLib; +use Test::More tests => 1; + +use FindBin; +use lib $FindBin::RealBin; + +use RewindTest; + +RewindTest::setup_cluster(); +RewindTest::start_master(); +RewindTest::create_standby(); +RewindTest::run_pg_rewind('local'); +RewindTest::clean_rewind_test(); +exit(0); diff --git a/src/bin/pg_rewind/t/006_options.pl b/src/bin/pg_rewind/t/006_options.pl new file mode 100644 index 0000000..1515696 --- /dev/null +++ b/src/bin/pg_rewind/t/006_options.pl @@ -0,0 +1,40 @@ +# +# Test checking options of pg_rewind. +# +use strict; +use warnings; +use TestLib; +use Test::More tests => 12; + +program_help_ok('pg_rewind'); +program_version_ok('pg_rewind'); +program_options_handling_ok('pg_rewind'); + +my $primary_pgdata = TestLib::tempdir; +my $standby_pgdata = TestLib::tempdir; +command_fails( + [ + 'pg_rewind', '--debug', + '--target-pgdata', $primary_pgdata, + '--source-pgdata', $standby_pgdata, + 'extra_arg1' + ], + 'too many arguments'); +command_fails([ 'pg_rewind', '--target-pgdata', $primary_pgdata ], + 'no source specified'); +command_fails( + [ + 'pg_rewind', '--debug', + '--target-pgdata', $primary_pgdata, + '--source-pgdata', $standby_pgdata, + '--source-server', 'incorrect_source' + ], + 'both remote and local sources specified'); +command_fails( + [ + 'pg_rewind', '--debug', + '--target-pgdata', $primary_pgdata, + '--source-pgdata', $standby_pgdata, + '--write-recovery-conf' + ], + 'no local source with --write-recovery-conf'); diff --git a/src/bin/pg_rewind/t/008_min_recovery_point.pl b/src/bin/pg_rewind/t/008_min_recovery_point.pl new file mode 100644 index 0000000..e3d94b3 --- /dev/null +++ b/src/bin/pg_rewind/t/008_min_recovery_point.pl @@ -0,0 +1,171 @@ +# +# Test situation where a target data directory contains +# WAL records beyond both the last checkpoint and the divergence +# point: +# +# Target WAL (TLI 2): +# +# backup ... Checkpoint A ... INSERT 'rewind this' +# (TLI 1 -> 2) +# +# ^ last common ^ minRecoveryPoint +# checkpoint +# +# Source WAL (TLI 3): +# +# backup ... Checkpoint A ... Checkpoint B ... INSERT 'keep this' +# (TLI 1 -> 2) (TLI 2 -> 3) +# +# +# The last common checkpoint is Checkpoint A. But there is WAL on TLI 2 +# after the last common checkpoint that needs to be rewound. We used to +# have a bug where minRecoveryPoint was ignored, and pg_rewind concluded +# that the target doesn't need rewinding in this scenario, because the +# last checkpoint on the target TLI was an ancestor of the source TLI. +# +# +# This test does not make use of RewindTest as it requires three +# nodes. + +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 3; + +use File::Copy; + +my $tmp_folder = TestLib::tempdir; + +my $node_1 = get_new_node('node_1'); +$node_1->init(allows_streaming => 1); +$node_1->append_conf('postgresql.conf', qq( +wal_keep_size='100 MB' +)); + +$node_1->start; + +# Create a couple of test tables +$node_1->safe_psql('postgres', 'CREATE TABLE public.foo (t TEXT)'); +$node_1->safe_psql('postgres', 'CREATE TABLE public.bar (t TEXT)'); +$node_1->safe_psql('postgres', "INSERT INTO public.bar VALUES ('in both')"); + +# +# Create node_2 and node_3 as standbys following node_1 +# +my $backup_name = 'my_backup'; +$node_1->backup($backup_name); + +my $node_2 = get_new_node('node_2'); +$node_2->init_from_backup($node_1, $backup_name, + has_streaming => 1); +$node_2->start; + +my $node_3 = get_new_node('node_3'); +$node_3->init_from_backup($node_1, $backup_name, + has_streaming => 1); +$node_3->start; + +# Wait until node 3 has connected and caught up +my $lsn = $node_1->lsn('insert'); +$node_1->wait_for_catchup('node_3', 'replay', $lsn); + +# +# Swap the roles of node_1 and node_3, so that node_1 follows node_3. +# +$node_1->stop('fast'); +$node_3->promote; +# Force a checkpoint after the promotion. pg_rewind looks at the control +# file to determine what timeline the server is on, and that isn't updated +# immediately at promotion, but only at the next checkpoint. When running +# pg_rewind in remote mode, it's possible that we complete the test steps +# after promotion so quickly that when pg_rewind runs, the standby has not +# performed a checkpoint after promotion yet. +$node_3->safe_psql('postgres', "checkpoint"); + +# reconfigure node_1 as a standby following node_3 +my $node_3_connstr = $node_3->connstr; +$node_1->append_conf('postgresql.conf', qq( +primary_conninfo='$node_3_connstr' +)); +$node_1->set_standby_mode(); +$node_1->start(); + +# also reconfigure node_2 to follow node_3 +$node_2->append_conf('postgresql.conf', qq( +primary_conninfo='$node_3_connstr' +)); +$node_2->restart(); + +# +# Promote node_1, to create a split-brain scenario. +# + +# make sure node_1 is full caught up with node_3 first +$lsn = $node_3->lsn('insert'); +$node_3->wait_for_catchup('node_1', 'replay', $lsn); + +$node_1->promote; +# Force a checkpoint after promotion, like earlier. +$node_1->safe_psql('postgres', "checkpoint"); + +# +# We now have a split-brain with two primaries. Insert a row on both to +# demonstratively create a split brain. After the rewind, we should only +# see the insert on 1, as the insert on node 3 is rewound away. +# +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('keep this')"); +# 'bar' is unmodified in node 1, so it won't be overwritten by replaying the +# WAL from node 1. +$node_3->safe_psql('postgres', "INSERT INTO public.bar (t) VALUES ('rewind this')"); + +# Insert more rows in node 1, to bump up the XID counter. Otherwise, if +# rewind doesn't correctly rewind the changes made on the other node, +# we might fail to notice if the inserts are invisible because the XIDs +# are not marked as committed. +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this')"); +$node_1->safe_psql('postgres', "INSERT INTO public.foo (t) VALUES ('and this too')"); + +# Wait for node 2 to catch up +$node_2->poll_query_until('postgres', + q|SELECT COUNT(*) > 1 FROM public.bar|, 't'); + +# At this point node_2 will shut down without a shutdown checkpoint, +# but with WAL entries beyond the preceding shutdown checkpoint. +$node_2->stop('fast'); +$node_3->stop('fast'); + +my $node_2_pgdata = $node_2->data_dir; +my $node_1_connstr = $node_1->connstr; + +# Keep a temporary postgresql.conf or it would be overwritten during the rewind. +copy( + "$node_2_pgdata/postgresql.conf", + "$tmp_folder/node_2-postgresql.conf.tmp"); + +command_ok( + [ + 'pg_rewind', + "--source-server=$node_1_connstr", + "--target-pgdata=$node_2_pgdata", + "--debug" + ], + 'run pg_rewind'); + +# Now move back postgresql.conf with old settings +move( + "$tmp_folder/node_2-postgresql.conf.tmp", + "$node_2_pgdata/postgresql.conf"); + +$node_2->start; + +# Check contents of the test tables after rewind. The rows inserted in node 3 +# before rewind should've been overwritten with the data from node 1. +my $result; +$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.foo'); +is($result, qq(keep this +and this +and this too), 'table foo after rewind'); + +$result = $node_2->safe_psql('postgres', 'SELECT * FROM public.bar'); +is($result, qq(in both), 'table bar after rewind'); diff --git a/src/bin/pg_rewind/t/RewindTest.pm b/src/bin/pg_rewind/t/RewindTest.pm new file mode 100644 index 0000000..422a563 --- /dev/null +++ b/src/bin/pg_rewind/t/RewindTest.pm @@ -0,0 +1,394 @@ +package RewindTest; + +# Test driver for pg_rewind. Each test consists of a cycle where a new cluster +# is first created with initdb, and a streaming replication standby is set up +# to follow the master. Then the master is shut down and the standby is +# promoted, and finally pg_rewind is used to rewind the old master, using the +# standby as the source. +# +# To run a test, the test script (in t/ subdirectory) calls the functions +# in this module. These functions should be called in this sequence: +# +# 1. setup_cluster - creates a PostgreSQL cluster that runs as the master +# +# 2. start_master - starts the master server +# +# 3. create_standby - runs pg_basebackup to initialize a standby server, and +# sets it up to follow the master. +# +# 4. promote_standby - runs "pg_ctl promote" to promote the standby server. +# The old master keeps running. +# +# 5. run_pg_rewind - stops the old master (if it's still running) and runs +# pg_rewind to synchronize it with the now-promoted standby server. +# +# 6. clean_rewind_test - stops both servers used in the test, if they're +# still running. +# +# The test script can use the helper functions master_psql and standby_psql +# to run psql against the master and standby servers, respectively. + +use strict; +use warnings; + +use Carp; +use Config; +use Exporter 'import'; +use File::Copy; +use File::Path qw(rmtree); +use IPC::Run qw(run); +use PostgresNode; +use RecursiveCopy; +use TestLib; +use Test::More; + +our @EXPORT = qw( + $node_master + $node_standby + + master_psql + standby_psql + check_query + + setup_cluster + start_master + create_standby + promote_standby + run_pg_rewind + clean_rewind_test +); + +# Our nodes. +our $node_master; +our $node_standby; + +sub master_psql +{ + my $cmd = shift; + my $dbname = shift || 'postgres'; + + system_or_bail 'psql', '-q', '--no-psqlrc', '-d', + $node_master->connstr($dbname), '-c', "$cmd"; + return; +} + +sub standby_psql +{ + my $cmd = shift; + my $dbname = shift || 'postgres'; + + system_or_bail 'psql', '-q', '--no-psqlrc', '-d', + $node_standby->connstr($dbname), '-c', "$cmd"; + return; +} + +# Run a query against the master, and check that the output matches what's +# expected +sub check_query +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($query, $expected_stdout, $test_name) = @_; + my ($stdout, $stderr); + + # we want just the output, no formatting + my $result = run [ + 'psql', '-q', '-A', '-t', '--no-psqlrc', '-d', + $node_master->connstr('postgres'), + '-c', $query + ], + '>', \$stdout, '2>', \$stderr; + + # We don't use ok() for the exit code and stderr, because we want this + # check to be just a single test. + if (!$result) + { + fail("$test_name: psql exit code"); + } + elsif ($stderr ne '') + { + diag $stderr; + fail("$test_name: psql no stderr"); + } + else + { + $stdout =~ s/\r\n/\n/g if $Config{osname} eq 'msys'; + is($stdout, $expected_stdout, "$test_name: query result matches"); + } + return; +} + +sub setup_cluster +{ + my $extra_name = shift; # Used to differentiate clusters + my $extra = shift; # Extra params for initdb + + # Initialize master, data checksums are mandatory + $node_master = + get_new_node('master' . ($extra_name ? "_${extra_name}" : '')); + + # Set up pg_hba.conf and pg_ident.conf for the role running + # pg_rewind. This role is used for all the tests, and has + # minimal permissions enough to rewind from an online source. + $node_master->init( + allows_streaming => 1, + extra => $extra, + auth_extra => [ '--create-role', 'rewind_user' ]); + + # Set wal_keep_size to prevent WAL segment recycling after enforced + # checkpoints in the tests. + $node_master->append_conf( + 'postgresql.conf', qq( +wal_keep_size = 320MB +)); + return; +} + +sub start_master +{ + $node_master->start; + + # Create custom role which is used to run pg_rewind, and adjust its + # permissions to the minimum necessary. + $node_master->safe_psql( + 'postgres', " + CREATE ROLE rewind_user LOGIN; + GRANT EXECUTE ON function pg_catalog.pg_ls_dir(text, boolean, boolean) + TO rewind_user; + GRANT EXECUTE ON function pg_catalog.pg_stat_file(text, boolean) + TO rewind_user; + GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text) + TO rewind_user; + GRANT EXECUTE ON function pg_catalog.pg_read_binary_file(text, bigint, bigint, boolean) + TO rewind_user;"); + + #### Now run the test-specific parts to initialize the master before setting + # up standby + + return; +} + +sub create_standby +{ + my $extra_name = shift; + + $node_standby = + get_new_node('standby' . ($extra_name ? "_${extra_name}" : '')); + $node_master->backup('my_backup'); + $node_standby->init_from_backup($node_master, 'my_backup'); + my $connstr_master = $node_master->connstr(); + + $node_standby->append_conf( + "postgresql.conf", qq( +primary_conninfo='$connstr_master' +)); + + $node_standby->set_standby_mode(); + + # Start standby + $node_standby->start; + + # The standby may have WAL to apply before it matches the primary. That + # is fine, because no test examines the standby before promotion. + + return; +} + +sub promote_standby +{ + #### Now run the test-specific parts to run after standby has been started + # up standby + + # Wait for the standby to receive and write all WAL. + $node_master->wait_for_catchup($node_standby, 'write'); + + # Now promote standby and insert some new data on master, this will put + # the master out-of-sync with the standby. + $node_standby->promote; + + # Force a checkpoint after the promotion. pg_rewind looks at the control + # file to determine what timeline the server is on, and that isn't updated + # immediately at promotion, but only at the next checkpoint. When running + # pg_rewind in remote mode, it's possible that we complete the test steps + # after promotion so quickly that when pg_rewind runs, the standby has not + # performed a checkpoint after promotion yet. + standby_psql("checkpoint"); + + return; +} + +sub run_pg_rewind +{ + my $test_mode = shift; + my $master_pgdata = $node_master->data_dir; + my $standby_pgdata = $node_standby->data_dir; + my $standby_connstr = $node_standby->connstr('postgres'); + my $tmp_folder = TestLib::tempdir; + + # Append the rewind-specific role to the connection string. + $standby_connstr = "$standby_connstr user=rewind_user"; + + if ($test_mode eq 'archive') + { + # pg_rewind is tested with --restore-target-wal by moving all + # WAL files to a secondary location. Note that this leads to + # a failure in ensureCleanShutdown(), forcing to the use of + # --no-ensure-shutdown in this mode as the initial set of WAL + # files needed to ensure a clean restart is gone. This could + # be improved by keeping around only a minimum set of WAL + # segments but that would just make the test more costly, + # without improving the coverage. Hence, instead, stop + # gracefully the primary here. + $node_master->stop; + } + else + { + # Stop the master and be ready to perform the rewind. The cluster + # needs recovery to finish once, and pg_rewind makes sure that it + # happens automatically. + $node_master->stop('immediate'); + } + + # At this point, the rewind processing is ready to run. + # We now have a very simple scenario with a few diverged WAL record. + # The real testing begins really now with a bifurcation of the possible + # scenarios that pg_rewind supports. + + # Keep a temporary postgresql.conf for master node or it would be + # overwritten during the rewind. + copy( + "$master_pgdata/postgresql.conf", + "$tmp_folder/master-postgresql.conf.tmp"); + + # Now run pg_rewind + if ($test_mode eq "local") + { + + # Do rewind using a local pgdata as source + # Stop the master and be ready to perform the rewind + $node_standby->stop; + command_ok( + [ + 'pg_rewind', + "--debug", + "--source-pgdata=$standby_pgdata", + "--target-pgdata=$master_pgdata", + "--no-sync" + ], + 'pg_rewind local'); + } + elsif ($test_mode eq "remote") + { + # Do rewind using a remote connection as source, generating + # recovery configuration automatically. + command_ok( + [ + 'pg_rewind', "--debug", + "--source-server", $standby_connstr, + "--target-pgdata=$master_pgdata", "--no-sync", + "--write-recovery-conf" + ], + 'pg_rewind remote'); + + # Check that standby.signal is here as recovery configuration + # was requested. + ok( -e "$master_pgdata/standby.signal", + 'standby.signal created after pg_rewind'); + + # Now, when pg_rewind apparently succeeded with minimal permissions, + # add REPLICATION privilege. So we could test that new standby + # is able to connect to the new master with generated config. + $node_standby->safe_psql('postgres', + "ALTER ROLE rewind_user WITH REPLICATION;"); + } + elsif ($test_mode eq "archive") + { + + # Do rewind using a local pgdata as source and specified + # directory with target WAL archive. The old master has + # to be stopped at this point. + + # Remove the existing archive directory and move all WAL + # segments from the old master to the archives. These + # will be used by pg_rewind. + rmtree($node_master->archive_dir); + RecursiveCopy::copypath($node_master->data_dir . "/pg_wal", + $node_master->archive_dir); + + # Fast way to remove entire directory content + rmtree($node_master->data_dir . "/pg_wal"); + mkdir($node_master->data_dir . "/pg_wal"); + + # Make sure that directories have the right umask as this is + # required by a follow-up check on permissions, and better + # safe than sorry. + chmod(0700, $node_master->archive_dir); + chmod(0700, $node_master->data_dir . "/pg_wal"); + + # Add appropriate restore_command to the target cluster + $node_master->enable_restoring($node_master, 0); + + # Stop the new master and be ready to perform the rewind. + $node_standby->stop; + + # Note the use of --no-ensure-shutdown here. WAL files are + # gone in this mode and the primary has been stopped + # gracefully already. + command_ok( + [ + 'pg_rewind', + "--debug", + "--source-pgdata=$standby_pgdata", + "--target-pgdata=$master_pgdata", + "--no-sync", + "--no-ensure-shutdown", + "--restore-target-wal" + ], + 'pg_rewind archive'); + } + else + { + + # Cannot come here normally + croak("Incorrect test mode specified"); + } + + # Now move back postgresql.conf with old settings + move( + "$tmp_folder/master-postgresql.conf.tmp", + "$master_pgdata/postgresql.conf"); + + chmod( + $node_master->group_access() ? 0640 : 0600, + "$master_pgdata/postgresql.conf") + or BAIL_OUT( + "unable to set permissions for $master_pgdata/postgresql.conf"); + + # Plug-in rewound node to the now-promoted standby node + if ($test_mode ne "remote") + { + my $port_standby = $node_standby->port; + $node_master->append_conf( + 'postgresql.conf', qq( +primary_conninfo='port=$port_standby')); + + $node_master->set_standby_mode(); + } + + # Restart the master to check that rewind went correctly + $node_master->start; + + #### Now run the test-specific parts to check the result + + return; +} + +# Clean up after the test. Stop both servers, if they're still running. +sub clean_rewind_test +{ + $node_master->teardown_node if defined $node_master; + $node_standby->teardown_node if defined $node_standby; + return; +} + +1; |