diff options
Diffstat (limited to 'src/test/recovery')
32 files changed, 5795 insertions, 0 deletions
diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/recovery/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile new file mode 100644 index 0000000..288c04b --- /dev/null +++ b/src/test/recovery/Makefile @@ -0,0 +1,29 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/recovery +# +# Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/recovery/Makefile +# +#------------------------------------------------------------------------- + +EXTRA_INSTALL=contrib/test_decoding + +subdir = src/test/recovery +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +# required for 017_shm.pl +REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX) +export REGRESS_SHLIB + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/recovery/README b/src/test/recovery/README new file mode 100644 index 0000000..632e720 --- /dev/null +++ b/src/test/recovery/README @@ -0,0 +1,25 @@ +src/test/recovery/README + +Regression tests for recovery and replication +============================================= + +This directory contains a test suite for recovery and replication. + +Running the tests +================= + +NOTE: You must have given the --enable-tap-tests argument to configure. +Also, to use "make installcheck", you must have built and installed +contrib/test_decoding in addition to the core code. + +Run + make check +or + make installcheck +You can use "make installcheck" if you previously did "make install". +In that case, the code in the installation tree is tested. With +"make check", a temporary installation tree is built from the current +sources and then tested. + +Either way, this test initializes, starts, and stops several test Postgres +clusters. diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl new file mode 100644 index 0000000..2a1bc81 --- /dev/null +++ b/src/test/recovery/t/001_stream_rep.pl @@ -0,0 +1,486 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Minimal test testing streaming replication +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 49; + +# Initialize primary node +my $node_primary = get_new_node('primary'); +# A specific role is created to perform some tests related to replication, +# and it needs proper authentication configuration. +$node_primary->init( + allows_streaming => 1, + auth_extra => [ '--create-role', 'repl_role' ]); +$node_primary->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby_1 = get_new_node('standby_1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_1->start; + +# Take backup of standby 1 (not mandatory, but useful to check if +# pg_basebackup works on a standby). +$node_standby_1->backup($backup_name); + +# Take a second backup of the standby while the primary is offline. +$node_primary->stop; +$node_standby_1->backup('my_backup_2'); +$node_primary->start; + +# Create second standby node linking to standby 1 +my $node_standby_2 = get_new_node('standby_2'); +$node_standby_2->init_from_backup($node_standby_1, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on primary and check its presence in standby 1 +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a"); + +# Wait for standbys to catch up +$node_primary->wait_for_catchup($node_standby_1, 'replay', + $node_primary->lsn('insert')); +$node_standby_1->wait_for_catchup($node_standby_2, 'replay', + $node_standby_1->lsn('replay')); + +my $result = + $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 1: $result\n"; +is($result, qq(1002), 'check streamed content on standby 1'); + +$result = + $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 2: $result\n"; +is($result, qq(1002), 'check streamed content on standby 2'); + +# Check that only READ-only queries can run on standbys +is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'), + 3, 'read-only queries on standby 1'); +is($node_standby_2->psql('postgres', 'INSERT INTO tab_int VALUES (1)'), + 3, 'read-only queries on standby 2'); + +# Tests for connection parameter target_session_attrs +note "testing connection parameter \"target_session_attrs\""; + +# Attempt to connect to $node1, then $node2, using target_session_attrs=$mode. +# Expect to connect to $target_node (undef for failure) with given $status. +sub test_target_session_attrs +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $node1 = shift; + my $node2 = shift; + my $target_node = shift; + my $mode = shift; + my $status = shift; + + my $node1_host = $node1->host; + my $node1_port = $node1->port; + my $node1_name = $node1->name; + my $node2_host = $node2->host; + my $node2_port = $node2->port; + my $node2_name = $node2->name; + my $target_port = undef; + $target_port = $target_node->port if (defined $target_node); + my $target_name = undef; + $target_name = $target_node->name if (defined $target_node); + + # Build connection string for connection attempt. + my $connstr = "host=$node1_host,$node2_host "; + $connstr .= "port=$node1_port,$node2_port "; + $connstr .= "target_session_attrs=$mode"; + + # Attempt to connect, and if successful, get the server port number + # we connected to. Note we must pass the SQL command via the command + # line not stdin, else Perl may spit up trying to write to stdin of + # an already-failed psql process. + my ($ret, $stdout, $stderr) = + $node1->psql('postgres', undef, + extra_params => [ '-d', $connstr, '-c', 'SHOW port;' ]); + if ($status == 0) + { + is( $status == $ret && $stdout eq $target_port, + 1, + "connect to node $target_name with mode \"$mode\" and $node1_name,$node2_name listed" + ); + } + else + { + print "status = $status\n"; + print "ret = $ret\n"; + print "stdout = $stdout\n"; + print "stderr = $stderr\n"; + + is( $status == $ret && !defined $target_node, + 1, + "fail to connect with mode \"$mode\" and $node1_name,$node2_name listed" + ); + } + + return; +} + +# Connect to primary in "read-write" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "read-write", 0); + +# Connect to primary in "read-write" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_primary, + "read-write", 0); + +# Connect to primary in "any" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "any", 0); + +# Connect to standby1 in "any" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "any", 0); + +# Connect to primary in "primary" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "primary", 0); + +# Connect to primary in "primary" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_primary, + "primary", 0); + +# Connect to standby1 in "read-only" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "read-only", 0); + +# Connect to standby1 in "read-only" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "read-only", 0); + +# Connect to primary in "prefer-standby" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, $node_primary, + "prefer-standby", 0); + +# Connect to standby1 in "prefer-standby" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "prefer-standby", 0); + +# Connect to standby1 in "prefer-standby" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "prefer-standby", 0); + +# Connect to standby1 in "standby" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "standby", 0); + +# Connect to standby1 in "standby" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "standby", 0); + +# Fail to connect in "read-write" mode with standby1,standby2 list. +test_target_session_attrs($node_standby_1, $node_standby_2, undef, + "read-write", 2); + +# Fail to connect in "primary" mode with standby1,standby2 list. +test_target_session_attrs($node_standby_1, $node_standby_2, undef, + "primary", 2); + +# Fail to connect in "read-only" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, undef, + "read-only", 2); + +# Fail to connect in "standby" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, undef, "standby", 2); + +# Test for SHOW commands using a WAL sender connection with a replication +# role. +note "testing SHOW commands for replication connection"; + +$node_primary->psql( + 'postgres', " +CREATE ROLE repl_role REPLICATION LOGIN; +GRANT pg_read_all_settings TO repl_role;"); +my $primary_host = $node_primary->host; +my $primary_port = $node_primary->port; +my $connstr_common = "host=$primary_host port=$primary_port user=repl_role"; +my $connstr_rep = "$connstr_common replication=1"; +my $connstr_db = "$connstr_common replication=database dbname=postgres"; + +# Test SHOW ALL +my ($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW ALL;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok($ret == 0, "SHOW ALL with replication role and physical replication"); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW ALL;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok($ret == 0, "SHOW ALL with replication role and logical replication"); + +# Test SHOW with a user-settable parameter +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW work_mem;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok( $ret == 0, + "SHOW with user-settable parameter, replication role and physical replication" +); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW work_mem;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok( $ret == 0, + "SHOW with user-settable parameter, replication role and logical replication" +); + +# Test SHOW with a superuser-settable parameter +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW primary_conninfo;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok( $ret == 0, + "SHOW with superuser-settable parameter, replication role and physical replication" +); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW primary_conninfo;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok( $ret == 0, + "SHOW with superuser-settable parameter, replication role and logical replication" +); + +note "switching to physical replication slot"; + +# Switch to using a physical replication slot. We can do this without a new +# backup since physical slots can go backwards if needed. Do so on both +# standbys. Since we're going to be testing things that affect the slot state, +# also increase the standby feedback interval to ensure timely updates. +my ($slotname_1, $slotname_2) = ('standby_1', 'standby_2'); +$node_primary->append_conf('postgresql.conf', "max_replication_slots = 4"); +$node_primary->restart; +is( $node_primary->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('$slotname_1');]), + 0, + 'physical slot created on primary'); +$node_standby_1->append_conf('postgresql.conf', + "primary_slot_name = $slotname_1"); +$node_standby_1->append_conf('postgresql.conf', + "wal_receiver_status_interval = 1"); +$node_standby_1->append_conf('postgresql.conf', "max_replication_slots = 4"); +$node_standby_1->restart; +is( $node_standby_1->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('$slotname_2');]), + 0, + 'physical slot created on intermediate replica'); +$node_standby_2->append_conf('postgresql.conf', + "primary_slot_name = $slotname_2"); +$node_standby_2->append_conf('postgresql.conf', + "wal_receiver_status_interval = 1"); +# should be able change primary_slot_name without restart +# will wait effect in get_slot_xmins above +$node_standby_2->reload; + +# Fetch xmin columns from slot's pg_replication_slots row, after waiting for +# given boolean condition to be true to ensure we've reached a quiescent state +sub get_slot_xmins +{ + my ($node, $slotname, $check_expr) = @_; + + $node->poll_query_until( + 'postgres', qq[ + SELECT $check_expr + FROM pg_catalog.pg_replication_slots + WHERE slot_name = '$slotname'; + ]) or die "Timed out waiting for slot xmins to advance"; + + my $slotinfo = $node->slot($slotname); + return ($slotinfo->{'xmin'}, $slotinfo->{'catalog_xmin'}); +} + +# There's no hot standby feedback and there are no logical slots on either peer +# so xmin and catalog_xmin should be null on both slots. +my ($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of non-cascaded slot null with no hs_feedback'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot null with no hs_feedback'); + +($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of cascaded slot null with no hs_feedback'); +is($catalog_xmin, '', + 'catalog xmin of cascaded slot null with no hs_feedback'); + +# Replication still works? +$node_primary->safe_psql('postgres', 'CREATE TABLE replayed(val integer);'); + +sub replay_check +{ + my $newval = $node_primary->safe_psql('postgres', + 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val' + ); + $node_primary->wait_for_catchup($node_standby_1, 'replay', + $node_primary->lsn('insert')); + $node_standby_1->wait_for_catchup($node_standby_2, 'replay', + $node_standby_1->lsn('replay')); + $node_standby_1->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]) + or die "standby_1 didn't replay primary value $newval"; + $node_standby_2->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]) + or die "standby_2 didn't replay standby_1 value $newval"; + return; +} + +replay_check(); + +note "enabling hot_standby_feedback"; + +# Enable hs_feedback. The slot should gain an xmin. We set the status interval +# so we'll see the results promptly. +$node_standby_1->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = on;'); +$node_standby_1->reload; +$node_standby_2->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = on;'); +$node_standby_2->reload; +replay_check(); + +($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NOT NULL AND catalog_xmin IS NULL"); +isnt($xmin, '', 'xmin of non-cascaded slot non-null with hs feedback'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback'); + +my ($xmin1, $catalog_xmin1) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NOT NULL AND catalog_xmin IS NULL"); +isnt($xmin1, '', 'xmin of cascaded slot non-null with hs feedback'); +is($catalog_xmin1, '', + 'catalog xmin of cascaded slot still null with hs_feedback'); + +note "doing some work to advance xmin"; +$node_primary->safe_psql( + 'postgres', q{ +do $$ +begin + for i in 10000..11000 loop + -- use an exception block so that each iteration eats an XID + begin + insert into tab_int values (i); + exception + when division_by_zero then null; + end; + end loop; +end$$; +}); + +$node_primary->safe_psql('postgres', 'VACUUM;'); +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +my ($xmin2, $catalog_xmin2) = + get_slot_xmins($node_primary, $slotname_1, "xmin <> '$xmin'"); +note "primary slot's new xmin $xmin2, old xmin $xmin"; +isnt($xmin2, $xmin, 'xmin of non-cascaded slot with hs feedback has changed'); +is($catalog_xmin2, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback unchanged' +); + +($xmin2, $catalog_xmin2) = + get_slot_xmins($node_standby_1, $slotname_2, "xmin <> '$xmin1'"); +note "standby_1 slot's new xmin $xmin2, old xmin $xmin1"; +isnt($xmin2, $xmin1, 'xmin of cascaded slot with hs feedback has changed'); +is($catalog_xmin2, '', + 'catalog xmin of cascaded slot still null with hs_feedback unchanged'); + +note "disabling hot_standby_feedback"; + +# Disable hs_feedback. Xmin should be cleared. +$node_standby_1->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = off;'); +$node_standby_1->reload; +$node_standby_2->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = off;'); +$node_standby_2->reload; +replay_check(); + +($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of non-cascaded slot null with hs feedback reset'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback reset'); + +($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); +is($catalog_xmin, '', + 'catalog xmin of cascaded slot still null with hs_feedback reset'); + +note "check change primary_conninfo without restart"; +$node_standby_2->append_conf('postgresql.conf', "primary_slot_name = ''"); +$node_standby_2->enable_streaming($node_primary); +$node_standby_2->reload; + +# be sure do not streaming from cascade +$node_standby_1->stop; + +my $newval = $node_primary->safe_psql('postgres', + 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val' +); +$node_primary->wait_for_catchup($node_standby_2, 'replay', + $node_primary->lsn('insert')); +my $is_replayed = $node_standby_2->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]); +is($is_replayed, qq(1), "standby_2 didn't replay primary value $newval"); + +# Drop any existing slots on the primary, for the follow-up tests. +$node_primary->safe_psql('postgres', + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;"); + +# Test physical slot advancing and its durability. Create a new slot on +# the primary, not used by any of the standbys. This reserves WAL at creation. +my $phys_slot = 'phys_slot'; +$node_primary->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('$phys_slot', true);"); +# Generate some WAL, and switch to a new segment, used to check that +# the previous segment is correctly getting recycled as the slot advancing +# would recompute the minimum LSN calculated across all slots. +my $segment_removed = $node_primary->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_lsn())'); +chomp($segment_removed); +$node_primary->psql( + 'postgres', " + CREATE TABLE tab_phys_slot (a int); + INSERT INTO tab_phys_slot VALUES (generate_series(1,10)); + SELECT pg_switch_wal();"); +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_primary->psql('postgres', + "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);" +); +is($psql_rc, '0', 'slot advancing with physical slot'); +my $phys_restart_lsn_pre = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';" +); +chomp($phys_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_primary->restart; +my $phys_restart_lsn_post = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';" +); +chomp($phys_restart_lsn_post); +ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0, + "physical slot advance persists across restarts"); + +# Check if the previous segment gets correctly recycled after the +# server stopped cleanly, causing a shutdown checkpoint to be generated. +my $primary_data = $node_primary->data_dir; +ok(!-f "$primary_data/pg_wal/$segment_removed", + "WAL segment $segment_removed recycled after physical slot advancing"); diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl new file mode 100644 index 0000000..c8498c7 --- /dev/null +++ b/src/test/recovery/t/002_archiving.pl @@ -0,0 +1,99 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# test for archiving with hot standby +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 4; +use File::Copy; + +# Initialize primary node, doing archives +my $node_primary = get_new_node('primary'); +$node_primary->init( + has_archiving => 1, + allows_streaming => 1); +my $backup_name = 'my_backup'; + +# Start it +$node_primary->start; + +# Take backup for standby +$node_primary->backup($backup_name); + +# Initialize standby node from backup, fetching WAL from archives +my $node_standby = get_new_node('standby'); +# Note that this makes the standby store its contents on the archives +# of the primary. +$node_standby->init_from_backup($node_primary, $backup_name, + has_restoring => 1); +$node_standby->append_conf('postgresql.conf', + "wal_retrieve_retry_interval = '100ms'"); +$node_standby->start; + +# Create some content on primary +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Force archiving of WAL file to make it present on primary +$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); + +# Add some more content, it should not be present on standby +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); + +# Wait until necessary replay has been done on standby +my $caughtup_query = + "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +my $result = + $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(1000), 'check content from archives'); + +# Check the presence of temporary files specifically generated during +# archive recovery. To ensure the presence of the temporary history +# file, switch to a timeline large enough to allow a standby to recover +# a history file from an archive. As this requires at least two timeline +# switches, promote the existing standby first. Then create a second +# standby based on the primary, using its archives. Finally, the second +# standby is promoted. +$node_standby->promote; + +# Wait until the history file has been stored on the archives of the +# primary once the promotion of the standby completes. This ensures that +# the second standby created below will be able to restore this file, +# creating a RECOVERYHISTORY. +my $primary_archive = $node_primary->archive_dir; +$caughtup_query = + "SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history')"; +$node_primary->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for archiving of 00000002.history"; + +my $node_standby2 = get_new_node('standby2'); +$node_standby2->init_from_backup($node_primary, $backup_name, + has_restoring => 1); +$node_standby2->start; + +my $log_location = -s $node_standby2->logfile; + +# Now promote standby2, and check that temporary files specifically +# generated during archive recovery are removed by the end of recovery. +$node_standby2->promote; + +# Check the logs of the standby to see that the commands have failed. +my $log_contents = slurp_file($node_standby2->logfile, $log_location); +my $node_standby2_data = $node_standby2->data_dir; + +like( + $log_contents, + qr/restored log file "00000002.history" from archive/s, + "00000002.history retrieved from the archives"); +ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY", + "RECOVERYHISTORY removed after promotion"); +ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG", + "RECOVERYXLOG removed after promotion"); diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl new file mode 100644 index 0000000..0815e6e --- /dev/null +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -0,0 +1,184 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for recovery targets: name, timestamp, XID +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 9; +use Time::HiRes qw(usleep); + +# Create and test a standby from given backup, with a certain recovery target. +# Choose $until_lsn later than the transaction commit that causes the row +# count to reach $num_rows, yet not later than the recovery target. +sub test_recovery_standby +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $test_name = shift; + my $node_name = shift; + my $node_primary = shift; + my $recovery_params = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = get_new_node($node_name); + $node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + + foreach my $param_item (@$recovery_params) + { + $node_standby->append_conf('postgresql.conf', qq($param_item)); + } + + $node_standby->start; + + # Wait until standby has replayed enough data + my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; + $node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + + # Create some content on primary and check its presence in standby + my $result = + $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); + is($result, qq($num_rows), "check standby content for $test_name"); + + # Stop standby node + $node_standby->teardown_node; + + return; +} + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); + +# Bump the transaction ID epoch. This is useful to stress the portability +# of recovery_target_xid parsing. +system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir); + +# Start it +$node_primary->start; + +# Create data before taking the backup, aimed at testing +# recovery_target = 'immediate' +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $lsn1 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Take backup from which all operations will be run +$node_primary->backup('my_backup'); + +# Insert some data with used as a replay reference, with a recovery +# target TXID. +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +my $ret = $node_primary->safe_psql('postgres', + "SELECT pg_current_wal_lsn(), pg_current_xact_id();"); +my ($lsn2, $recovery_txid) = split /\|/, $ret; + +# More data, with recovery target timestamp +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(2001,3000))"); +my $lsn3 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +my $recovery_time = $node_primary->safe_psql('postgres', "SELECT now()"); + +# Even more data, this time with a recovery target name +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(3001,4000))"); +my $recovery_name = "my_target"; +my $lsn4 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +$node_primary->safe_psql('postgres', + "SELECT pg_create_restore_point('$recovery_name');"); + +# And now for a recovery target LSN +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(4001,5000))"); +my $lsn5 = my $recovery_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(5001,6000))"); + +# Force archiving of WAL file +$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); + +# Test recovery targets +my @recovery_params = ("recovery_target = 'immediate'"); +test_recovery_standby('immediate target', + 'standby_1', $node_primary, \@recovery_params, "1000", $lsn1); +@recovery_params = ("recovery_target_xid = '$recovery_txid'"); +test_recovery_standby('XID', 'standby_2', $node_primary, \@recovery_params, + "2000", $lsn2); +@recovery_params = ("recovery_target_time = '$recovery_time'"); +test_recovery_standby('time', 'standby_3', $node_primary, \@recovery_params, + "3000", $lsn3); +@recovery_params = ("recovery_target_name = '$recovery_name'"); +test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params, + "4000", $lsn4); +@recovery_params = ("recovery_target_lsn = '$recovery_lsn'"); +test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params, + "5000", $lsn5); + +# Multiple targets +# +# Multiple conflicting settings are not allowed, but setting the same +# parameter multiple times or unsetting a parameter and setting a +# different one is allowed. + +@recovery_params = ( + "recovery_target_name = '$recovery_name'", + "recovery_target_name = ''", + "recovery_target_time = '$recovery_time'"); +test_recovery_standby('multiple overriding settings', + 'standby_6', $node_primary, \@recovery_params, "3000", $lsn3); + +my $node_standby = get_new_node('standby_7'); +$node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); +$node_standby->append_conf( + 'postgresql.conf', "recovery_target_name = '$recovery_name' +recovery_target_time = '$recovery_time'"); + +my $res = run_log( + [ + 'pg_ctl', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, 'start' + ]); +ok(!$res, 'invalid recovery startup fails'); + +my $logfile = slurp_file($node_standby->logfile()); +ok($logfile =~ qr/multiple recovery targets specified/, + 'multiple conflicting settings'); + +# Check behavior when recovery ends before target is reached + +$node_standby = get_new_node('standby_8'); +$node_standby->init_from_backup( + $node_primary, 'my_backup', + has_restoring => 1, + standby => 0); +$node_standby->append_conf('postgresql.conf', + "recovery_target_name = 'does_not_exist'"); + +run_log( + [ + 'pg_ctl', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, 'start' + ]); + +# wait for postgres to terminate +foreach my $i (0 .. 10 * $TestLib::timeout_default) +{ + last if !-f $node_standby->data_dir . '/postmaster.pid'; + usleep(100_000); +} +$logfile = slurp_file($node_standby->logfile()); +ok( $logfile =~ + qr/FATAL: .* recovery ended before configured recovery target was reached/, + 'recovery end before target reached is a fatal error'); diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl new file mode 100644 index 0000000..c101980 --- /dev/null +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -0,0 +1,111 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for timeline switch +use strict; +use warnings; +use File::Path qw(rmtree); +use PostgresNode; +use TestLib; +use Test::More tests => 3; + +$ENV{PGDATABASE} = 'postgres'; + +# Ensure that a cascading standby is able to follow a newly-promoted standby +# on a new timeline. + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create two standbys linking to it +my $node_standby_1 = get_new_node('standby_1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_1->start; +my $node_standby_2 = get_new_node('standby_2'); +$node_standby_2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on primary +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); + +# Wait until standby has replayed enough data on standby 1 +$node_primary->wait_for_catchup($node_standby_1, 'replay', + $node_primary->lsn('write')); + +# Stop and remove primary +$node_primary->teardown_node; + +# promote standby 1 using "pg_promote", switching it to a new timeline +my $psql_out = ''; +$node_standby_1->psql( + 'postgres', + "SELECT pg_promote(wait_seconds => 300)", + stdout => \$psql_out); +is($psql_out, 't', "promotion of standby with pg_promote"); + +# Switch standby 2 to replay from standby 1 +my $connstr_1 = $node_standby_1->connstr; +$node_standby_2->append_conf( + 'postgresql.conf', qq( +primary_conninfo='$connstr_1' +)); +$node_standby_2->restart; + +# Insert some data in standby 1 and check its presence in standby 2 +# to ensure that the timeline switch has been done. +$node_standby_1->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +$node_standby_1->wait_for_catchup($node_standby_2, 'replay', + $node_standby_1->lsn('write')); + +my $result = + $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(2000), 'check content of standby 2'); + + +# Ensure that a standby is able to follow a primary on a newer timeline +# when WAL archiving is enabled. + +# Initialize primary node +my $node_primary_2 = get_new_node('primary_2'); +$node_primary_2->init(allows_streaming => 1, has_archiving => 1); +$node_primary_2->append_conf( + 'postgresql.conf', qq( +wal_keep_size = 512MB +)); +$node_primary_2->start; + +# Take backup +$node_primary_2->backup($backup_name); + +# Create standby node +my $node_standby_3 = get_new_node('standby_3'); +$node_standby_3->init_from_backup($node_primary_2, $backup_name, + has_streaming => 1); + +# Restart primary node in standby mode and promote it, switching it +# to a new timeline. +$node_primary_2->set_standby_mode; +$node_primary_2->restart; +$node_primary_2->promote; + +# Start standby node, create some content on primary and check its presence +# in standby, to ensure that the timeline switch has been done. +$node_standby_3->start; +$node_primary_2->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT 1 AS a"); +$node_primary_2->wait_for_catchup($node_standby_3, 'replay', + $node_primary_2->lsn('write')); + +my $result_2 = + $node_standby_3->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result_2, qq(1), 'check content of standby 3'); diff --git a/src/test/recovery/t/005_replay_delay.pl b/src/test/recovery/t/005_replay_delay.pl new file mode 100644 index 0000000..bd7ed4e --- /dev/null +++ b/src/test/recovery/t/005_replay_delay.pl @@ -0,0 +1,112 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Checks for recovery_min_apply_delay and recovery pause +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 3; + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# And some content +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a"); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby from backup +my $node_standby = get_new_node('standby'); +my $delay = 3; +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf( + 'postgresql.conf', qq( +recovery_min_apply_delay = '${delay}s' +)); +$node_standby->start; + +# Make new content on primary and check its presence in standby depending +# on the delay applied above. Before doing the insertion, get the +# current timestamp that will be used as a comparison base. Even on slow +# machines, this allows to have a predictable behavior when comparing the +# delay between data insertion moment on primary and replay time on standby. +my $primary_insert_time = time(); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(11, 20))"); + +# Now wait for replay to complete on standby. We're done waiting when the +# standby has replayed up to the previously saved primary LSN. +my $until_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +$node_standby->poll_query_until('postgres', + "SELECT (pg_last_wal_replay_lsn() - '$until_lsn'::pg_lsn) >= 0") + or die "standby never caught up"; + +# This test is successful if and only if the LSN has been applied with at least +# the configured apply delay. +ok(time() - $primary_insert_time >= $delay, + "standby applies WAL only after replication delay"); + + +# Check that recovery can be paused or resumed expectedly. +my $node_standby2 = get_new_node('standby2'); +$node_standby2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby2->start; + +# Recovery is not yet paused. +is( $node_standby2->safe_psql( + 'postgres', "SELECT pg_get_wal_replay_pause_state()"), + 'not paused', + 'pg_get_wal_replay_pause_state() reports not paused'); + +# Request to pause recovery and wait until it's actually paused. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(21,30))"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out while waiting for recovery to be paused"; + +# Even if new WAL records are streamed from the primary, +# recovery in the paused state doesn't replay them. +my $receive_lsn = + $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_receive_lsn()"); +my $replay_lsn = + $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(31,40))"); +$node_standby2->poll_query_until('postgres', + "SELECT '$receive_lsn'::pg_lsn < pg_last_wal_receive_lsn()") + or die "Timed out while waiting for new WAL to be streamed"; +is( $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()"), + qq($replay_lsn), + 'no WAL is replayed in the paused state'); + +# Request to resume recovery and wait until it's actually resumed. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_resume()"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'not paused' AND pg_last_wal_replay_lsn() > '$replay_lsn'::pg_lsn" +) or die "Timed out while waiting for recovery to be resumed"; + +# Check that the paused state ends and promotion continues if a promotion +# is triggered while recovery is paused. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(41,50))"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out while waiting for recovery to be paused"; + +$node_standby2->promote; +$node_standby2->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") + or die "Timed out while waiting for promotion to finish"; diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl new file mode 100644 index 0000000..ae13c4f --- /dev/null +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -0,0 +1,195 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Testing of logical decoding using SQL interface and/or pg_recvlogical +# +# Most logical decoding tests are in contrib/test_decoding. This module +# is for work that doesn't fit well there, like where server restarts +# are required. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 14; +use Config; + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq( +wal_level = logical +)); +$node_primary->start; +my $backup_name = 'primary_backup'; + +$node_primary->safe_psql('postgres', + qq[CREATE TABLE decoding_test(x integer, y text);]); + +$node_primary->safe_psql('postgres', + qq[SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');] +); + +# Cover walsender error shutdown code +my ($result, $stdout, $stderr) = $node_primary->psql( + 'template1', + qq[START_REPLICATION SLOT test_slot LOGICAL 0/0], + replication => 'database'); +ok( $stderr =~ + m/replication slot "test_slot" was not created in this database/, + "Logical decoding correctly fails to start"); + +# Check case of walsender not using a database connection. Logical +# decoding should not be allowed. +($result, $stdout, $stderr) = $node_primary->psql( + 'template1', + qq[START_REPLICATION SLOT s1 LOGICAL 0/1], + replication => 'true'); +ok($stderr =~ /ERROR: logical decoding requires a database connection/, + "Logical decoding fails on non-database connection"); + +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,10) s;] +); + +# Basic decoding works +$result = $node_primary->safe_psql('postgres', + qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]); +is(scalar(my @foobar = split /^/m, $result), + 12, 'Decoding produced 12 rows inc BEGIN/COMMIT'); + +# If we immediately crash the server we might lose the progress we just made +# and replay the same changes again. But a clean shutdown should never repeat +# the same changes when we use the SQL decoding interface. +$node_primary->restart('fast'); + +# There are no new writes, so the result should be empty. +$result = $node_primary->safe_psql('postgres', + qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]); +chomp($result); +is($result, '', 'Decoding after fast restart repeats no rows'); + +# Insert some rows and verify that we get the same results from pg_recvlogical +# and the SQL interface. +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,4) s;] +); + +my $expected = q{BEGIN +table public.decoding_test: INSERT: x[integer]:1 y[text]:'1' +table public.decoding_test: INSERT: x[integer]:2 y[text]:'2' +table public.decoding_test: INSERT: x[integer]:3 y[text]:'3' +table public.decoding_test: INSERT: x[integer]:4 y[text]:'4' +COMMIT}; + +my $stdout_sql = $node_primary->safe_psql('postgres', + qq[SELECT data FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');] +); +is($stdout_sql, $expected, 'got expected output from SQL decoding session'); + +my $endpos = $node_primary->safe_psql('postgres', + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" +); +print "waiting to replay $endpos\n"; + +# Insert some rows after $endpos, which we won't read. +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(5,50) s;] +); + +my $stdout_recv = $node_primary->pg_recvlogical_upto( + 'postgres', 'test_slot', $endpos, + $TestLib::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); +chomp($stdout_recv); +is($stdout_recv, $expected, + 'got same expected output from pg_recvlogical decoding session'); + +$node_primary->poll_query_until('postgres', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'test_slot' AND active_pid IS NULL)" +) or die "slot never became inactive"; + +$stdout_recv = $node_primary->pg_recvlogical_upto( + 'postgres', 'test_slot', $endpos, + $TestLib::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); +chomp($stdout_recv); +is($stdout_recv, '', 'pg_recvlogical acknowledged changes'); + +$node_primary->safe_psql('postgres', 'CREATE DATABASE otherdb'); + +is( $node_primary->psql( + 'otherdb', + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" + ), + 3, + 'replaying logical slot from another database fails'); + +$node_primary->safe_psql('otherdb', + qq[SELECT pg_create_logical_replication_slot('otherdb_slot', 'test_decoding');] +); + +# make sure you can't drop a slot while active +SKIP: +{ + + # some Windows Perls at least don't like IPC::Run's start/kill_kill regime. + skip "Test fails on Windows perl", 2 if $Config{osname} eq 'MSWin32'; + + my $pg_recvlogical = IPC::Run::start( + [ + 'pg_recvlogical', '-d', $node_primary->connstr('otherdb'), + '-S', 'otherdb_slot', '-f', '-', '--start' + ]); + $node_primary->poll_query_until('otherdb', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NOT NULL)" + ) or die "slot never became active"; + is($node_primary->psql('postgres', 'DROP DATABASE otherdb'), + 3, 'dropping a DB with active logical slots fails'); + $pg_recvlogical->kill_kill; + is($node_primary->slot('otherdb_slot')->{'slot_name'}, + undef, 'logical slot still exists'); +} + +$node_primary->poll_query_until('otherdb', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NULL)" +) or die "slot never became inactive"; + +is($node_primary->psql('postgres', 'DROP DATABASE otherdb'), + 0, 'dropping a DB with inactive logical slots succeeds'); +is($node_primary->slot('otherdb_slot')->{'slot_name'}, + undef, 'logical slot was actually dropped with DB'); + +# Test logical slot advancing and its durability. +my $logical_slot = 'logical_slot'; +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);" +); +$node_primary->psql( + 'postgres', " + CREATE TABLE tab_logical_slot (a int); + INSERT INTO tab_logical_slot VALUES (generate_series(1,10));"); +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_primary->psql('postgres', + "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);" +); +is($psql_rc, '0', 'slot advancing with logical slot'); +my $logical_restart_lsn_pre = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';" +); +chomp($logical_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_primary->restart; +my $logical_restart_lsn_post = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';" +); +chomp($logical_restart_lsn_post); +ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0, + "logical slot advance persists across restarts"); + +# done with the node +$node_primary->stop; diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl new file mode 100644 index 0000000..0ff9b5d --- /dev/null +++ b/src/test/recovery/t/007_sync_rep.pl @@ -0,0 +1,219 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Minimal test testing synchronous replication sync_state transition +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 11; + +# Query checking sync_priority and sync_state of each standby +my $check_sql = + "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication ORDER BY application_name;"; + +# Check that sync_state of each standby is expected (waiting till it is). +# If $setting is given, synchronous_standby_names is set to it and +# the configuration file is reloaded before the test. +sub test_sync_state +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $expected, $msg, $setting) = @_; + + if (defined($setting)) + { + $self->safe_psql('postgres', + "ALTER SYSTEM SET synchronous_standby_names = '$setting';"); + $self->reload; + } + + ok($self->poll_query_until('postgres', $check_sql, $expected), $msg); + return; +} + +# Start a standby and check that it is registered within the WAL sender +# array of the given primary. This polls the primary's pg_stat_replication +# until the standby is confirmed as registered. +sub start_standby_and_wait +{ + my ($primary, $standby) = @_; + my $primary_name = $primary->name; + my $standby_name = $standby->name; + my $query = + "SELECT count(1) = 1 FROM pg_stat_replication WHERE application_name = '$standby_name'"; + + $standby->start; + + print("### Waiting for standby \"$standby_name\" on \"$primary_name\"\n"); + $primary->poll_query_until('postgres', $query); + return; +} + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; +my $backup_name = 'primary_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create all the standbys. Their status on the primary is checked to ensure +# the ordering of each one of them in the WAL sender array of the primary. + +# Create standby1 linking to primary +my $node_standby_1 = get_new_node('standby1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_1); + +# Create standby2 linking to primary +my $node_standby_2 = get_new_node('standby2'); +$node_standby_2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_2); + +# Create standby3 linking to primary +my $node_standby_3 = get_new_node('standby3'); +$node_standby_3->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_3); + +# Check that sync_state is determined correctly when +# synchronous_standby_names is specified in old syntax. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|potential +standby3|0|async), + 'old syntax of synchronous_standby_names', + 'standby1,standby2'); + +# Check that all the standbys are considered as either sync or +# potential when * is specified in synchronous_standby_names. +# Note that standby1 is chosen as sync standby because +# it's stored in the head of WalSnd array which manages +# all the standbys though they have the same priority. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|1|potential +standby3|1|potential), + 'asterisk in synchronous_standby_names', + '*'); + +# Stop and start standbys to rearrange the order of standbys +# in WalSnd array. Now, if standbys have the same priority, +# standby2 is selected preferentially and standby3 is next. +$node_standby_1->stop; +$node_standby_2->stop; +$node_standby_3->stop; + +# Make sure that each standby reports back to the primary in the wanted +# order. +start_standby_and_wait($node_primary, $node_standby_2); +start_standby_and_wait($node_primary, $node_standby_3); + +# Specify 2 as the number of sync standbys. +# Check that two standbys are in 'sync' state. +test_sync_state( + $node_primary, qq(standby2|2|sync +standby3|3|sync), + '2 synchronous standbys', + '2(standby1,standby2,standby3)'); + +# Start standby1 +start_standby_and_wait($node_primary, $node_standby_1); + +# Create standby4 linking to primary +my $node_standby_4 = get_new_node('standby4'); +$node_standby_4->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_4->start; + +# Check that standby1 and standby2 whose names appear earlier in +# synchronous_standby_names are considered as sync. Also check that +# standby3 appearing later represents potential, and standby4 is +# in 'async' state because it's not in the list. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby3|3|potential +standby4|0|async), + '2 sync, 1 potential, and 1 async'); + +# Check that sync_state of each standby is determined correctly +# when num_sync exceeds the number of names of potential sync standbys +# specified in synchronous_standby_names. +test_sync_state( + $node_primary, qq(standby1|0|async +standby2|4|sync +standby3|3|sync +standby4|1|sync), + 'num_sync exceeds the num of potential sync standbys', + '6(standby4,standby0,standby3,standby2)'); + +# The setting that * comes before another standby name is acceptable +# but does not make sense in most cases. Check that sync_state is +# chosen properly even in case of that setting. standby1 is selected +# as synchronous as it has the highest priority, and is followed by a +# second standby listed first in the WAL sender array, which is +# standby2 in this case. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby3|2|potential +standby4|2|potential), + 'asterisk before another standby name', + '2(standby1,*,standby2)'); + +# Check that the setting of '2(*)' chooses standby2 and standby3 that are stored +# earlier in WalSnd array as sync standbys. +test_sync_state( + $node_primary, qq(standby1|1|potential +standby2|1|sync +standby3|1|sync +standby4|1|potential), + 'multiple standbys having the same priority are chosen as sync', + '2(*)'); + +# Stop Standby3 which is considered in 'sync' state. +$node_standby_3->stop; + +# Check that the state of standby1 stored earlier in WalSnd array than +# standby4 is transited from potential to sync. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|1|sync +standby4|1|potential), + 'potential standby found earlier in array is promoted to sync'); + +# Check that standby1 and standby2 are chosen as sync standbys +# based on their priorities. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby4|0|async), + 'priority-based sync replication specified by FIRST keyword', + 'FIRST 2(standby1, standby2)'); + +# Check that all the listed standbys are considered as candidates +# for sync standbys in a quorum-based sync replication. +test_sync_state( + $node_primary, qq(standby1|1|quorum +standby2|1|quorum +standby4|0|async), + '2 quorum and 1 async', + 'ANY 2(standby1, standby2)'); + +# Start Standby3 which will be considered in 'quorum' state. +$node_standby_3->start; + +# Check that the setting of 'ANY 2(*)' chooses all standbys as +# candidates for quorum sync standbys. +test_sync_state( + $node_primary, qq(standby1|1|quorum +standby2|1|quorum +standby3|1|quorum +standby4|1|quorum), + 'all standbys are considered as candidates for quorum sync standbys', + 'ANY 2(*)'); diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl new file mode 100644 index 0000000..14b4b97 --- /dev/null +++ b/src/test/recovery/t/008_fsm_truncation.pl @@ -0,0 +1,99 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test WAL replay of FSM changes. +# +# FSM changes don't normally need to be WAL-logged, except for truncation. +# The FSM mustn't return a page that doesn't exist (anymore). +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 1; + +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); + +$node_primary->append_conf( + 'postgresql.conf', qq{ +fsync = on +wal_log_hints = on +max_prepared_transactions = 5 +autovacuum = off +}); + +# Create a primary node and its standby, initializing both with some data +# at the same time. +$node_primary->start; + +$node_primary->backup('primary_backup'); +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_primary, 'primary_backup', + has_streaming => 1); +$node_standby->start; + +$node_primary->psql( + 'postgres', qq{ +create table testtab (a int, b char(100)); +insert into testtab select generate_series(1,1000), 'foo'; +insert into testtab select generate_series(1,1000), 'foo'; +delete from testtab where ctid > '(8,0)'; +}); + +# Take a lock on the table to prevent following vacuum from truncating it +$node_primary->psql( + 'postgres', qq{ +begin; +lock table testtab in row share mode; +prepare transaction 'p1'; +}); + +# Vacuum, update FSM without truncation +$node_primary->psql('postgres', 'vacuum verbose testtab'); + +# Force a checkpoint +$node_primary->psql('postgres', 'checkpoint'); + +# Now do some more insert/deletes, another vacuum to ensure full-page writes +# are done +$node_primary->psql( + 'postgres', qq{ +insert into testtab select generate_series(1,1000), 'foo'; +delete from testtab where ctid > '(8,0)'; +vacuum verbose testtab; +}); + +# Ensure all buffers are now clean on the standby +$node_standby->psql('postgres', 'checkpoint'); + +# Release the lock, vacuum again which should lead to truncation +$node_primary->psql( + 'postgres', qq{ +rollback prepared 'p1'; +vacuum verbose testtab; +}); + +$node_primary->psql('postgres', 'checkpoint'); +my $until_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Wait long enough for standby to receive and apply all WAL +my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +# Promote the standby +$node_standby->promote; +$node_standby->psql('postgres', 'checkpoint'); + +# Restart to discard in-memory copy of FSM +$node_standby->restart; + +# Insert should work on standby +is( $node_standby->psql( + 'postgres', + qq{insert into testtab select generate_series(1,1000), 'foo';}), + 0, + 'INSERT succeeds with truncated relation FSM'); diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl new file mode 100644 index 0000000..900d181 --- /dev/null +++ b/src/test/recovery/t/009_twophase.pl @@ -0,0 +1,482 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Tests dedicated to two-phase commit in recovery +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 24; + +my $psql_out = ''; +my $psql_rc = ''; + +sub configure_and_reload +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($node, $parameter) = @_; + my $name = $node->name; + + $node->append_conf( + 'postgresql.conf', qq( + $parameter + )); + $node->psql('postgres', "SELECT pg_reload_conf()", stdout => \$psql_out); + is($psql_out, 't', "reload node $name with $parameter"); + return; +} + +# Set up two nodes, which will alternately be primary and replication standby. + +# Setup london node +my $node_london = get_new_node("london"); +$node_london->init(allows_streaming => 1); +$node_london->append_conf( + 'postgresql.conf', qq( + max_prepared_transactions = 10 + log_checkpoints = true +)); +$node_london->start; +$node_london->backup('london_backup'); + +# Setup paris node +my $node_paris = get_new_node('paris'); +$node_paris->init_from_backup($node_london, 'london_backup', + has_streaming => 1); +$node_paris->start; + +# Switch to synchronous replication in both directions +configure_and_reload($node_london, "synchronous_standby_names = 'paris'"); +configure_and_reload($node_paris, "synchronous_standby_names = 'london'"); + +# Set up nonce names for current primary and standby nodes +note "Initially, london is primary and paris is standby"; +my ($cur_primary, $cur_standby) = ($node_london, $node_paris); +my $cur_primary_name = $cur_primary->name; + +# Create table we'll use in the test transactions +$cur_primary->psql('postgres', "CREATE TABLE t_009_tbl (id int, msg text)"); + +############################################################################### +# Check that we can commit and abort transaction after soft restart. +# Here checkpoint happens before shutdown and no WAL replay will occur at next +# startup. In this case postgres re-creates shared-memory state from twophase +# files. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (1, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (2, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_1'; + BEGIN; + INSERT INTO t_009_tbl VALUES (3, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (4, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_2';"); +$cur_primary->stop; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_1'"); +is($psql_rc, '0', 'Commit prepared transaction after restart'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_2'"); +is($psql_rc, '0', 'Rollback prepared transaction after restart'); + +############################################################################### +# Check that we can commit and abort after a hard restart. +# At next startup, WAL replay will re-create shared memory state for prepared +# transaction using dedicated WAL records. +############################################################################### + +$cur_primary->psql( + 'postgres', " + CHECKPOINT; + BEGIN; + INSERT INTO t_009_tbl VALUES (5, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (6, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_3'; + BEGIN; + INSERT INTO t_009_tbl VALUES (7, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (8, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_4';"); +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_3'"); +is($psql_rc, '0', 'Commit prepared transaction after teardown'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_4'"); +is($psql_rc, '0', 'Rollback prepared transaction after teardown'); + +############################################################################### +# Check that WAL replay can handle several transactions with same GID name. +############################################################################### + +$cur_primary->psql( + 'postgres', " + CHECKPOINT; + BEGIN; + INSERT INTO t_009_tbl VALUES (9, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (10, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_5'; + COMMIT PREPARED 'xact_009_5'; + BEGIN; + INSERT INTO t_009_tbl VALUES (11, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (12, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_5';"); +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_5'"); +is($psql_rc, '0', 'Replay several transactions with same GID'); + +############################################################################### +# Check that WAL replay cleans up its shared memory state and releases locks +# while replaying transaction commits. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (13, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (14, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_6'; + COMMIT PREPARED 'xact_009_6';"); +$cur_primary->teardown_node; +$cur_primary->start; +$psql_rc = $cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (15, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (16, 'issued to ${cur_primary_name}'); + -- This prepare can fail due to conflicting GID or locks conflicts if + -- replay did not fully cleanup its state on previous commit. + PREPARE TRANSACTION 'xact_009_7';"); +is($psql_rc, '0', "Cleanup of shared memory state for 2PC commit"); + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_7'"); + +############################################################################### +# Check that WAL replay will cleanup its shared memory state on running standby. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (17, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (18, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_8'; + COMMIT PREPARED 'xact_009_8';"); +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', + "Cleanup of shared memory state on running standby without checkpoint"); + +############################################################################### +# Same as in previous case, but let's force checkpoint on standby between +# prepare and commit to use on-disk twophase files. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (19, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (20, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_9';"); +$cur_standby->psql('postgres', "CHECKPOINT"); +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_9'"); +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', + "Cleanup of shared memory state on running standby after checkpoint"); + +############################################################################### +# Check that prepared transactions can be committed on promoted standby. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (21, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_10';"); +$cur_primary->teardown_node; +$cur_standby->promote; + +# change roles +note "Now paris is primary and london is standby"; +($cur_primary, $cur_standby) = ($node_paris, $node_london); +$cur_primary_name = $cur_primary->name; + +# because london is not running at this point, we can't use syncrep commit +# on this command +$psql_rc = $cur_primary->psql('postgres', + "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'"); +is($psql_rc, '0', "Restore of prepared transaction on promoted standby"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +############################################################################### +# Check that prepared transactions are replayed after soft restart of standby +# while primary is down. Since standby knows that primary is down it uses a +# different code path on startup to ensure that the status of transactions is +# consistent. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (23, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (24, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_11';"); +$cur_primary->stop; +$cur_standby->restart; +$cur_standby->promote; + +# change roles +note "Now london is primary and paris is standby"; +($cur_primary, $cur_standby) = ($node_london, $node_paris); +$cur_primary_name = $cur_primary->name; + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '1', + "Restore prepared transactions from files with primary down"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_11'"); + +############################################################################### +# Check that prepared transactions are correctly replayed after standby hard +# restart while primary is down. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (25, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (26, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_12'; + "); +$cur_primary->stop; +$cur_standby->teardown_node; +$cur_standby->start; +$cur_standby->promote; + +# change roles +note "Now paris is primary and london is standby"; +($cur_primary, $cur_standby) = ($node_paris, $node_london); +$cur_primary_name = $cur_primary->name; + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '1', + "Restore prepared transactions from records with primary down"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'"); + +############################################################################### +# Check for a lock conflict between prepared transaction with DDL inside and +# replay of XLOG_STANDBY_LOCK wal record. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl2 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl2 VALUES (27, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_13'; + -- checkpoint will issue XLOG_STANDBY_LOCK that can conflict with lock + -- held by 'create table' statement + CHECKPOINT; + COMMIT PREPARED 'xact_009_13';"); + +# Ensure that last transaction is replayed on standby. +my $cur_primary_lsn = + $cur_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); +my $caughtup_query = + "SELECT '$cur_primary_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$cur_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM t_009_tbl2", + stdout => \$psql_out); +is($psql_out, '1', "Replay prepared transaction with DDL"); + +############################################################################### +# Check recovery of prepared transaction with DDL inside after a hard restart +# of the primary. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl3 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl3 VALUES (28, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_14'; + BEGIN; + CREATE TABLE t_009_tbl4 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl4 VALUES (29, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_15';"); + +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_14'"); +is($psql_rc, '0', 'Commit prepared transaction after teardown'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_15'"); +is($psql_rc, '0', 'Rollback prepared transaction after teardown'); + +############################################################################### +# Check recovery of prepared transaction with DDL inside after a soft restart +# of the primary. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl5 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl5 VALUES (30, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_16'; + BEGIN; + CREATE TABLE t_009_tbl6 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl6 VALUES (31, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_17';"); + +$cur_primary->stop; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_16'"); +is($psql_rc, '0', 'Commit prepared transaction after restart'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_17'"); +is($psql_rc, '0', 'Rollback prepared transaction after restart'); + +############################################################################### +# Verify expected data appears on both servers. +############################################################################### + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on primary"); + +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl ORDER BY id", + stdout => \$psql_out); +is( $psql_out, qq{1|issued to london +2|issued to london +5|issued to london +6|issued to london +9|issued to london +10|issued to london +11|issued to london +12|issued to london +13|issued to london +14|issued to london +15|issued to london +16|issued to london +17|issued to london +18|issued to london +19|issued to london +20|issued to london +21|issued to london +22|issued to london +23|issued to paris +24|issued to paris +25|issued to london +26|issued to london}, + "Check expected t_009_tbl data on primary"); + +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl2", + stdout => \$psql_out); +is( $psql_out, + qq{27|issued to paris}, + "Check expected t_009_tbl2 data on primary"); + +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on standby"); + +$cur_standby->psql( + 'postgres', + "SELECT * FROM t_009_tbl ORDER BY id", + stdout => \$psql_out); +is( $psql_out, qq{1|issued to london +2|issued to london +5|issued to london +6|issued to london +9|issued to london +10|issued to london +11|issued to london +12|issued to london +13|issued to london +14|issued to london +15|issued to london +16|issued to london +17|issued to london +18|issued to london +19|issued to london +20|issued to london +21|issued to london +22|issued to london +23|issued to paris +24|issued to paris +25|issued to london +26|issued to london}, + "Check expected t_009_tbl data on standby"); + +$cur_standby->psql( + 'postgres', + "SELECT * FROM t_009_tbl2", + stdout => \$psql_out); +is( $psql_out, + qq{27|issued to paris}, + "Check expected t_009_tbl2 data on standby"); diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl new file mode 100644 index 0000000..973dcb5 --- /dev/null +++ b/src/test/recovery/t/010_logical_decoding_timelines.pl @@ -0,0 +1,199 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Demonstrate that logical can follow timeline switches. +# +# Logical replication slots can follow timeline switches but it's +# normally not possible to have a logical slot on a replica where +# promotion and a timeline switch can occur. The only ways +# we can create that circumstance are: +# +# * By doing a filesystem-level copy of the DB, since pg_basebackup +# excludes pg_replslot but we can copy it directly; or +# +# * by creating a slot directly at the C level on the replica and +# advancing it as we go using the low level APIs. It can't be done +# from SQL since logical decoding isn't allowed on replicas. +# +# This module uses the first approach to show that timeline following +# on a logical slot works. +# +# (For convenience, it also tests some recovery-related operations +# on logical slots). +# +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 13; +use File::Copy; +use IPC::Run (); +use Scalar::Util qw(blessed); + +my ($stdout, $stderr, $ret); + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1, has_archiving => 1); +$node_primary->append_conf( + 'postgresql.conf', q[ +wal_level = 'logical' +max_replication_slots = 3 +max_wal_senders = 2 +log_min_messages = 'debug2' +hot_standby_feedback = on +wal_receiver_status_interval = 1 +]); +$node_primary->dump_info; +$node_primary->start; + +note "testing logical timeline following with a filesystem-level copy"; + +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('before_basebackup', 'test_decoding');" +); +$node_primary->safe_psql('postgres', "CREATE TABLE decoding(blah text);"); +$node_primary->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('beforebb');"); + +# We also want to verify that DROP DATABASE on a standby with a logical +# slot works. This isn't strictly related to timeline following, but +# the only way to get a logical slot on a standby right now is to use +# the same physical copy trick, so: +$node_primary->safe_psql('postgres', 'CREATE DATABASE dropme;'); +$node_primary->safe_psql('dropme', + "SELECT pg_create_logical_replication_slot('dropme_slot', 'test_decoding');" +); + +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +my $backup_name = 'b1'; +$node_primary->backup_fs_hot($backup_name); + +$node_primary->safe_psql('postgres', + q[SELECT pg_create_physical_replication_slot('phys_slot');]); + +my $node_replica = get_new_node('replica'); +$node_replica->init_from_backup( + $node_primary, $backup_name, + has_streaming => 1, + has_restoring => 1); +$node_replica->append_conf('postgresql.conf', + q[primary_slot_name = 'phys_slot']); + +$node_replica->start; + +# If we drop 'dropme' on the primary, the standby should drop the +# db and associated slot. +is($node_primary->psql('postgres', 'DROP DATABASE dropme'), + 0, 'dropped DB with logical slot OK on primary'); +$node_primary->wait_for_catchup($node_replica, 'replay', + $node_primary->lsn('insert')); +is( $node_replica->safe_psql( + 'postgres', q[SELECT 1 FROM pg_database WHERE datname = 'dropme']), + '', + 'dropped DB dropme on standby'); +is($node_primary->slot('dropme_slot')->{'slot_name'}, + undef, 'logical slot was actually dropped on standby'); + +# Back to testing failover... +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('after_basebackup', 'test_decoding');" +); +$node_primary->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('afterbb');"); +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +# Verify that only the before base_backup slot is on the replica +$stdout = $node_replica->safe_psql('postgres', + 'SELECT slot_name FROM pg_replication_slots ORDER BY slot_name'); +is($stdout, 'before_basebackup', + 'Expected to find only slot before_basebackup on replica'); + +# Examine the physical slot the replica uses to stream changes +# from the primary to make sure its hot_standby_feedback +# has locked in a catalog_xmin on the physical slot, and that +# any xmin is >= the catalog_xmin +$node_primary->poll_query_until( + 'postgres', q[ + SELECT catalog_xmin IS NOT NULL + FROM pg_replication_slots + WHERE slot_name = 'phys_slot' + ]) or die "slot's catalog_xmin never became set"; + +my $phys_slot = $node_primary->slot('phys_slot'); +isnt($phys_slot->{'xmin'}, '', 'xmin assigned on physical slot of primary'); +isnt($phys_slot->{'catalog_xmin'}, + '', 'catalog_xmin assigned on physical slot of primary'); + +# Ignore wrap-around here, we're on a new cluster: +cmp_ok( + $phys_slot->{'xmin'}, '>=', + $phys_slot->{'catalog_xmin'}, + 'xmin on physical slot must not be lower than catalog_xmin'); + +$node_primary->safe_psql('postgres', 'CHECKPOINT'); +$node_primary->wait_for_catchup($node_replica, 'write'); + +# Boom, crash +$node_primary->stop('immediate'); + +$node_replica->promote; + +$node_replica->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('after failover');"); + +# Shouldn't be able to read from slot created after base backup +($ret, $stdout, $stderr) = $node_replica->psql('postgres', + "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');" +); +is($ret, 3, 'replaying from after_basebackup slot fails'); +like( + $stderr, + qr/replication slot "after_basebackup" does not exist/, + 'after_basebackup slot missing'); + +# Should be able to read from slot created before base backup +($ret, $stdout, $stderr) = $node_replica->psql( + 'postgres', + "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + timeout => $TestLib::timeout_default); +is($ret, 0, 'replay from slot before_basebackup succeeds'); + +my $final_expected_output_bb = q(BEGIN +table public.decoding: INSERT: blah[text]:'beforebb' +COMMIT +BEGIN +table public.decoding: INSERT: blah[text]:'afterbb' +COMMIT +BEGIN +table public.decoding: INSERT: blah[text]:'after failover' +COMMIT); +is($stdout, $final_expected_output_bb, + 'decoded expected data from slot before_basebackup'); +is($stderr, '', 'replay from slot before_basebackup produces no stderr'); + +# So far we've peeked the slots, so when we fetch the same info over +# pg_recvlogical we should get complete results. First, find out the commit lsn +# of the last transaction. There's no max(pg_lsn), so: + +my $endpos = $node_replica->safe_psql('postgres', + "SELECT lsn FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" +); + +# now use the walsender protocol to peek the slot changes and make sure we see +# the same results. + +$stdout = $node_replica->pg_recvlogical_upto( + 'postgres', 'before_basebackup', + $endpos, $TestLib::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); + +# walsender likes to add a newline +chomp($stdout); +is($stdout, $final_expected_output_bb, + 'got same output from walsender via pg_recvlogical on before_basebackup'); + +$node_replica->teardown_node(); diff --git a/src/test/recovery/t/011_crash_recovery.pl b/src/test/recovery/t/011_crash_recovery.pl new file mode 100644 index 0000000..a26e995 --- /dev/null +++ b/src/test/recovery/t/011_crash_recovery.pl @@ -0,0 +1,64 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# +# Tests relating to PostgreSQL crash recovery and redo +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; +use Config; + +plan tests => 3; + +my $node = get_new_node('primary'); +$node->init(allows_streaming => 1); +$node->start; + +my ($stdin, $stdout, $stderr) = ('', '', ''); + +# Ensure that pg_xact_status reports 'aborted' for xacts +# that were in-progress during crash. To do that, we need +# an xact to be in-progress when we crash and we need to know +# its xid. +my $tx = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$stdin, + '>', + \$stdout, + '2>', + \$stderr); +$stdin .= q[ +BEGIN; +CREATE TABLE mine(x integer); +SELECT pg_current_xact_id(); +]; +$tx->pump until $stdout =~ /[[:digit:]]+[\r\n]$/; + +# Status should be in-progress +my $xid = $stdout; +chomp($xid); + +is($node->safe_psql('postgres', qq[SELECT pg_xact_status('$xid');]), + 'in progress', 'own xid is in-progress'); + +# Crash and restart the postmaster +$node->stop('immediate'); +$node->start; + +# Make sure we really got a new xid +cmp_ok($node->safe_psql('postgres', 'SELECT pg_current_xact_id()'), + '>', $xid, 'new xid after restart is greater'); + +# and make sure we show the in-progress xact as aborted +is($node->safe_psql('postgres', qq[SELECT pg_xact_status('$xid');]), + 'aborted', 'xid is aborted after crash'); + +$stdin .= "\\q\n"; +$tx->finish; # wait for psql to quit gracefully diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl new file mode 100644 index 0000000..aa84073 --- /dev/null +++ b/src/test/recovery/t/012_subtransactions.pl @@ -0,0 +1,219 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Tests dedicated to subtransactions in recovery +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 12; + +# Setup primary node +my $node_primary = get_new_node("primary"); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq( + max_prepared_transactions = 10 + log_checkpoints = true +)); +$node_primary->start; +$node_primary->backup('primary_backup'); +$node_primary->psql('postgres', "CREATE TABLE t_012_tbl (id int)"); + +# Setup standby node +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_primary, 'primary_backup', + has_streaming => 1); +$node_standby->start; + +# Switch to synchronous replication +$node_primary->append_conf( + 'postgresql.conf', qq( + synchronous_standby_names = '*' +)); +$node_primary->psql('postgres', "SELECT pg_reload_conf()"); + +my $psql_out = ''; +my $psql_rc = ''; + +############################################################################### +# Check that replay will correctly set SUBTRANS and properly advance nextXid +# so that it won't conflict with savepoint xids. +############################################################################### + +$node_primary->psql( + 'postgres', " + BEGIN; + DELETE FROM t_012_tbl; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s1; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s2; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s3; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s4; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s5; + INSERT INTO t_012_tbl VALUES (43); + PREPARE TRANSACTION 'xact_012_1'; + CHECKPOINT;"); + +$node_primary->stop; +$node_primary->start; +$node_primary->psql( + 'postgres', " + -- here we can get xid of previous savepoint if nextXid + -- wasn't properly advanced + BEGIN; + INSERT INTO t_012_tbl VALUES (142); + ROLLBACK; + COMMIT PREPARED 'xact_012_1';"); + +$node_primary->psql( + 'postgres', + "SELECT count(*) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '6', "Check nextXid handling for prepared subtransactions"); + +############################################################################### +# Check that replay will correctly set 2PC with more than +# PGPROC_MAX_CACHED_SUBXIDS subtransactions and also show data properly +# on promotion +############################################################################### +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); + +# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql +$node_primary->psql( + 'postgres', " + CREATE OR REPLACE FUNCTION hs_subxids (n integer) + RETURNS void + LANGUAGE plpgsql + AS \$\$ + BEGIN + IF n <= 0 THEN RETURN; END IF; + INSERT INTO t_012_tbl VALUES (n); + PERFORM hs_subxids(n - 1); + RETURN; + EXCEPTION WHEN raise_exception THEN NULL; END; + \$\$;"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(127); + COMMIT;"); +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); + +# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql +$node_primary->psql( + 'postgres', " + CREATE OR REPLACE FUNCTION hs_subxids (n integer) + RETURNS void + LANGUAGE plpgsql + AS \$\$ + BEGIN + IF n <= 0 THEN RETURN; END IF; + INSERT INTO t_012_tbl VALUES (n); + PERFORM hs_subxids(n - 1); + RETURN; + EXCEPTION WHEN raise_exception THEN NULL; END; + \$\$;"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(127); + PREPARE TRANSACTION 'xact_012_1';"); +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$psql_rc = $node_primary->psql('postgres', "COMMIT PREPARED 'xact_012_1'"); +is($psql_rc, '0', + "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby" +); + +$node_primary->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(201); + PREPARE TRANSACTION 'xact_012_1';"); +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$psql_rc = $node_primary->psql('postgres', "ROLLBACK PREPARED 'xact_012_1'"); +is($psql_rc, '0', + "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby" +); + +$node_primary->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl new file mode 100644 index 0000000..0ed1551 --- /dev/null +++ b/src/test/recovery/t/013_crash_restart.pl @@ -0,0 +1,248 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# +# Tests restarts of postgres due to crashes of a subprocess. +# +# Two longer-running psql subprocesses are used: One to kill a +# backend, triggering a crash-restart cycle, one to detect when +# postmaster noticed the backend died. The second backend is +# necessary because it's otherwise hard to determine if postmaster is +# still accepting new sessions (because it hasn't noticed that the +# backend died), or because it's already restarted. +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; +use Config; + +plan tests => 18; + + +# To avoid hanging while expecting some specific input from a psql +# instance being driven by us, add a timeout high enough that it +# should never trigger even on very slow machines, unless something +# is really wrong. +my $psql_timeout = IPC::Run::timer($TestLib::timeout_default); + +my $node = get_new_node('primary'); +$node->init(allows_streaming => 1); +$node->start(); + +# by default PostgresNode doesn't doesn't restart after a crash +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET restart_after_crash = 1; + ALTER SYSTEM SET log_connections = 1; + SELECT pg_reload_conf();]); + +# Run psql, keeping session alive, so we have an alive backend to kill. +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); + +# Need a second psql to check if crash-restart happened. +my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); +my $monitor = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$monitor_stdin, + '>', + \$monitor_stdout, + '2>', + \$monitor_stderr, + $psql_timeout); + +#create table, insert row that should survive +$killme_stdin .= q[ +CREATE TABLE alive(status text); +INSERT INTO alive VALUES($$committed-before-sigquit$$); +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGQUIT'); +my $pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +#insert a row that should *not* survive, due to in-progress xact +$killme_stdin .= q[ +BEGIN; +INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status; +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/in-progress-before-sigquit/m), + 'inserted in-progress-before-sigquit'); +$killme_stdout = ''; +$killme_stderr = ''; + + +# Start longrunning query in second session; its failure will signal that +# crash-restart has occurred. The initial wait for the trivial select is to +# be sure that psql successfully connected to backend. +$monitor_stdin .= q[ +SELECT $$psql-connected$$; +SELECT pg_sleep(3600); +]; +ok(pump_until($monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m), + 'monitor connected'); +$monitor_stdout = ''; +$monitor_stderr = ''; + +# kill once with QUIT - we expect psql to exit, while emitting error message first +my $ret = TestLib::system_log('pg_ctl', 'kill', 'QUIT', $pid); + +# Exactly process should have been alive to be killed +is($ret, 0, "killed process with SIGQUIT"); + +# Check that psql sees the killed backend as having been terminated +$killme_stdin .= q[ +SELECT 1; +]; +ok( pump_until( + $killme, + $psql_timeout, + \$killme_stderr, + qr/WARNING: terminating connection because of unexpected SIGQUIT signal|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql query died successfully after SIGQUIT"); +$killme_stderr = ''; +$killme_stdout = ''; +$killme->finish; + +# Wait till server restarts - we should get the WARNING here, but +# sometimes the server is unable to send that, if interrupted while +# sending. +ok( pump_until( + $monitor, + $psql_timeout, + \$monitor_stderr, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql monitor died successfully after SIGQUIT"); +$monitor->finish; + +# Wait till server restarts +is($node->poll_query_until('postgres', undef, ''), + "1", "reconnected after SIGQUIT"); + + +# restart psql processes, now that the crash cycle finished +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +$killme->run(); +($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); +$monitor->run(); + + +# Acquire pid of new backend +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + "acquired pid for SIGKILL"); +$pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Insert test rows +$killme_stdin .= q[ +INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status; +BEGIN; +INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status; +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/in-progress-before-sigkill/m), + 'inserted in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Re-start longrunning query in second session; its failure will signal that +# crash-restart has occurred. The initial wait for the trivial select is to +# be sure that psql successfully connected to backend. +$monitor_stdin .= q[ +SELECT $$psql-connected$$; +SELECT pg_sleep(3600); +]; +ok(pump_until($monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m), + 'monitor connected'); +$monitor_stdout = ''; +$monitor_stderr = ''; + + +# kill with SIGKILL this time - we expect the backend to exit, without +# being able to emit an error message +$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, "killed process with KILL"); + +# Check that psql sees the server as being terminated. No WARNING, +# because signal handlers aren't being run on SIGKILL. +$killme_stdin .= q[ +SELECT 1; +]; +ok( pump_until( + $killme, + $psql_timeout, + \$killme_stderr, + qr/server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql query died successfully after SIGKILL"); +$killme->finish; + +# Wait till server restarts - we should get the WARNING here, but +# sometimes the server is unable to send that, if interrupted while +# sending. +ok( pump_until( + $monitor, + $psql_timeout, + \$monitor_stderr, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql monitor died successfully after SIGKILL"); +$monitor->finish; + +# Wait till server restarts +is($node->poll_query_until('postgres', undef, ''), + "1", "reconnected after SIGKILL"); + +# Make sure the committed rows survived, in-progress ones not +is( $node->safe_psql('postgres', 'SELECT * FROM alive'), + "committed-before-sigquit\ncommitted-before-sigkill", + 'data survived'); + +is( $node->safe_psql( + 'postgres', + 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status' + ), + 'before-orderly-restart', + 'can still write after crash restart'); + +# Just to be sure, check that an orderly restart now still works +$node->restart(); + +is( $node->safe_psql('postgres', 'SELECT * FROM alive'), + "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart", + 'data survived'); + +is( $node->safe_psql( + 'postgres', + 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status' + ), + 'after-orderly-restart', + 'can still write after orderly restart'); + +$node->stop(); diff --git a/src/test/recovery/t/014_unlogged_reinit.pl b/src/test/recovery/t/014_unlogged_reinit.pl new file mode 100644 index 0000000..6d40535 --- /dev/null +++ b/src/test/recovery/t/014_unlogged_reinit.pl @@ -0,0 +1,82 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Tests that unlogged tables are properly reinitialized after a crash. +# +# The behavior should be the same when restoring from a backup, but +# that is not tested here. + +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 12; + +my $node = get_new_node('main'); + +$node->init; +$node->start; +my $pgdata = $node->data_dir; + +# Create an unlogged table to test that forks other than init are not +# copied. +$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)'); + +my $baseUnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('base_unlogged')}); + +# Test that main and init forks exist. +ok(-f "$pgdata/${baseUnloggedPath}_init", 'init fork in base exists'); +ok(-f "$pgdata/$baseUnloggedPath", 'main fork in base exists'); + +# Create an unlogged table in a tablespace. + +my $tablespaceDir = TestLib::tempdir; + +$node->safe_psql('postgres', "CREATE TABLESPACE ts1 LOCATION '$tablespaceDir'"); +$node->safe_psql('postgres', + 'CREATE UNLOGGED TABLE ts1_unlogged (id int) TABLESPACE ts1'); + +my $ts1UnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('ts1_unlogged')}); + +# Test that main and init forks exist. +ok(-f "$pgdata/${ts1UnloggedPath}_init", 'init fork in tablespace exists'); +ok(-f "$pgdata/$ts1UnloggedPath", 'main fork in tablespace exists'); + +# Crash the postmaster. +$node->stop('immediate'); + +# Write fake forks to test that they are removed during recovery. +append_to_file("$pgdata/${baseUnloggedPath}_vm", 'TEST_VM'); +append_to_file("$pgdata/${baseUnloggedPath}_fsm", 'TEST_FSM'); + +# Remove main fork to test that it is recopied from init. +unlink("$pgdata/${baseUnloggedPath}") + or BAIL_OUT("could not remove \"${baseUnloggedPath}\": $!"); + +# the same for the tablespace +append_to_file("$pgdata/${ts1UnloggedPath}_vm", 'TEST_VM'); +append_to_file("$pgdata/${ts1UnloggedPath}_fsm", 'TEST_FSM'); +unlink("$pgdata/${ts1UnloggedPath}") + or BAIL_OUT("could not remove \"${ts1UnloggedPath}\": $!"); + +$node->start; + +# check unlogged table in base +ok(-f "$pgdata/${baseUnloggedPath}_init", 'init fork in base still exists'); +ok(-f "$pgdata/$baseUnloggedPath", 'main fork in base recreated at startup'); +ok(!-f "$pgdata/${baseUnloggedPath}_vm", + 'vm fork in base removed at startup'); +ok( !-f "$pgdata/${baseUnloggedPath}_fsm", + 'fsm fork in base removed at startup'); + +# check unlogged table in tablespace +ok( -f "$pgdata/${ts1UnloggedPath}_init", + 'init fork still exists in tablespace'); +ok(-f "$pgdata/$ts1UnloggedPath", + 'main fork in tablespace recreated at startup'); +ok( !-f "$pgdata/${ts1UnloggedPath}_vm", + 'vm fork in tablespace removed at startup'); +ok( !-f "$pgdata/${ts1UnloggedPath}_fsm", + 'fsm fork in tablespace removed at startup'); diff --git a/src/test/recovery/t/015_promotion_pages.pl b/src/test/recovery/t/015_promotion_pages.pl new file mode 100644 index 0000000..2b72dc2 --- /dev/null +++ b/src/test/recovery/t/015_promotion_pages.pl @@ -0,0 +1,87 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for promotion handling with WAL records generated post-promotion +# before the first checkpoint is generated. This test case checks for +# invalid page references at replay based on the minimum consistent +# recovery point defined. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 1; + +# Initialize primary node +my $alpha = get_new_node('alpha'); +$alpha->init(allows_streaming => 1); +# Setting wal_log_hints to off is important to get invalid page +# references. +$alpha->append_conf("postgresql.conf", <<EOF); +wal_log_hints = off +EOF + +# Start the primary +$alpha->start; + +# setup/start a standby +$alpha->backup('bkp'); +my $bravo = get_new_node('bravo'); +$bravo->init_from_backup($alpha, 'bkp', has_streaming => 1); +$bravo->append_conf('postgresql.conf', <<EOF); +checkpoint_timeout=1h +EOF +$bravo->start; + +# Dummy table for the upcoming tests. +$alpha->safe_psql('postgres', 'create table test1 (a int)'); +$alpha->safe_psql('postgres', + 'insert into test1 select generate_series(1, 10000)'); + +# take a checkpoint +$alpha->safe_psql('postgres', 'checkpoint'); + +# The following vacuum will set visibility map bits and create +# problematic WAL records. +$alpha->safe_psql('postgres', 'vacuum verbose test1'); +# Wait for last record to have been replayed on the standby. +$alpha->wait_for_catchup($bravo, 'replay', $alpha->lsn('insert')); + +# Now force a checkpoint on the standby. This seems unnecessary but for "some" +# reason, the previous checkpoint on the primary does not reflect on the standby +# and without an explicit checkpoint, it may start redo recovery from a much +# older point, which includes even create table and initial page additions. +$bravo->safe_psql('postgres', 'checkpoint'); + +# Now just use a dummy table and run some operations to move minRecoveryPoint +# beyond the previous vacuum. +$alpha->safe_psql('postgres', 'create table test2 (a int, b text)'); +$alpha->safe_psql('postgres', + 'insert into test2 select generate_series(1,10000), md5(random()::text)'); +$alpha->safe_psql('postgres', 'truncate test2'); + +# Wait again for all records to be replayed. +$alpha->wait_for_catchup($bravo, 'replay', $alpha->lsn('insert')); + +# Do the promotion, which reinitializes minRecoveryPoint in the control +# file so as WAL is replayed up to the end. +$bravo->promote; + +# Truncate the table on the promoted standby, vacuum and extend it +# again to create new page references. The first post-recovery checkpoint +# has not happened yet. +$bravo->safe_psql('postgres', 'truncate test1'); +$bravo->safe_psql('postgres', 'vacuum verbose test1'); +$bravo->safe_psql('postgres', + 'insert into test1 select generate_series(1,1000)'); + +# Now crash-stop the promoted standby and restart. This makes sure that +# replay does not see invalid page references because of an invalid +# minimum consistent recovery point. +$bravo->stop('immediate'); +$bravo->start; + +# Check state of the table after full crash recovery. All its data should +# be here. +my $psql_out; +$bravo->psql('postgres', "SELECT count(*) FROM test1", stdout => \$psql_out); +is($psql_out, '1000', "Check that table state is correct"); diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl new file mode 100644 index 0000000..60ecd3d --- /dev/null +++ b/src/test/recovery/t/016_min_consistency.pl @@ -0,0 +1,141 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for checking consistency of on-disk pages for a cluster with +# the minimum recovery LSN, ensuring that the updates happen across +# all processes. In this test, the updates from the startup process +# and the checkpointer (which triggers non-startup code paths) are +# both checked. + +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 1; + +# Find the largest LSN in the set of pages part of the given relation +# file. This is used for offline checks of page consistency. The LSN +# is historically stored as a set of two numbers of 4 byte-length +# located at the beginning of each page. +sub find_largest_lsn +{ + my $blocksize = int(shift); + my $filename = shift; + my ($max_hi, $max_lo) = (0, 0); + open(my $fh, "<:raw", $filename) + or die "failed to open $filename: $!"; + my ($buf, $len); + while ($len = read($fh, $buf, $blocksize)) + { + $len == $blocksize + or die "read only $len of $blocksize bytes from $filename"; + my ($hi, $lo) = unpack("LL", $buf); + + if ($hi > $max_hi or ($hi == $max_hi and $lo > $max_lo)) + { + ($max_hi, $max_lo) = ($hi, $lo); + } + } + defined($len) or die "read error on $filename: $!"; + close($fh); + + return sprintf("%X/%X", $max_hi, $max_lo); +} + +# Initialize primary node +my $primary = get_new_node('primary'); +$primary->init(allows_streaming => 1); + +# Set shared_buffers to a very low value to enforce discard and flush +# of PostgreSQL buffers on standby, enforcing other processes than the +# startup process to update the minimum recovery LSN in the control +# file. Autovacuum is disabled so as there is no risk of having other +# processes than the checkpointer doing page flushes. +$primary->append_conf("postgresql.conf", <<EOF); +shared_buffers = 128kB +autovacuum = off +EOF + +# Start the primary +$primary->start; + +# setup/start a standby +$primary->backup('bkp'); +my $standby = get_new_node('standby'); +$standby->init_from_backup($primary, 'bkp', has_streaming => 1); +$standby->start; + +# Create base table whose data consistency is checked. +$primary->safe_psql( + 'postgres', " +CREATE TABLE test1 (a int) WITH (fillfactor = 10); +INSERT INTO test1 SELECT generate_series(1, 10000);"); + +# Take a checkpoint and enforce post-checkpoint full page writes +# which makes the startup process replay those pages, updating +# minRecoveryPoint. +$primary->safe_psql('postgres', 'CHECKPOINT;'); +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Wait for last record to have been replayed on the standby. +$primary->wait_for_catchup($standby, 'replay', $primary->lsn('insert')); + +# Fill in the standby's shared buffers with the data filled in +# previously. +$standby->safe_psql('postgres', 'SELECT count(*) FROM test1;'); + +# Update the table again, this does not generate full page writes so +# the standby will replay records associated with it, but the startup +# process will not flush those pages. +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Extract from the relation the last block created and its relation +# file, this will be used at the end of the test for sanity checks. +my $blocksize = $primary->safe_psql('postgres', + "SELECT setting::int FROM pg_settings WHERE name = 'block_size';"); +my $last_block = $primary->safe_psql('postgres', + "SELECT pg_relation_size('test1')::int / $blocksize - 1;"); +my $relfilenode = $primary->safe_psql('postgres', + "SELECT pg_relation_filepath('test1'::regclass);"); + +# Wait for last record to have been replayed on the standby. +$primary->wait_for_catchup($standby, 'replay', $primary->lsn('insert')); + +# Issue a restart point on the standby now, which makes the checkpointer +# update minRecoveryPoint. +$standby->safe_psql('postgres', 'CHECKPOINT;'); + +# Now shut down the primary violently so as the standby does not +# receive the shutdown checkpoint, making sure that the startup +# process does not flush any pages on its side. The standby is +# cleanly stopped, which makes the checkpointer update minRecoveryPoint +# with the restart point created at shutdown. +$primary->stop('immediate'); +$standby->stop('fast'); + +# Check the data consistency of the instance while offline. This is +# done by directly scanning the on-disk relation blocks and what +# pg_controldata lets know. +my $standby_data = $standby->data_dir; +my $offline_max_lsn = + find_largest_lsn($blocksize, "$standby_data/$relfilenode"); + +# Fetch minRecoveryPoint from the control file itself +my ($stdout, $stderr) = run_command([ 'pg_controldata', $standby_data ]); +my @control_data = split("\n", $stdout); +my $offline_recovery_lsn = undef; +foreach (@control_data) +{ + if ($_ =~ /^Minimum recovery ending location:\s*(.*)$/mg) + { + $offline_recovery_lsn = $1; + last; + } +} +die "No minRecoveryPoint in control file found\n" + unless defined($offline_recovery_lsn); + +# minRecoveryPoint should never be older than the maximum LSN for all +# the pages on disk. +ok($offline_recovery_lsn ge $offline_max_lsn, + "Check offline that table data is consistent with minRecoveryPoint"); diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl new file mode 100644 index 0000000..48eb465 --- /dev/null +++ b/src/test/recovery/t/017_shm.pl @@ -0,0 +1,221 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# +# Tests of pg_shmem.h functions +# +use strict; +use warnings; +use Config; +use File::stat qw(stat); +use IPC::Run 'run'; +use PostgresNode; +use Test::More; +use TestLib; +use Time::HiRes qw(usleep); + +# If we don't have shmem support, skip the whole thing +eval { + require IPC::SharedMem; + IPC::SharedMem->import; + require IPC::SysV; + IPC::SysV->import(qw(IPC_CREAT IPC_EXCL S_IRUSR S_IWUSR)); +}; +if ($@ || $windows_os) +{ + plan skip_all => 'SysV shared memory not supported by this platform'; +} +else +{ + plan tests => 4; +} + +my $tempdir = TestLib::tempdir; + +# Log "ipcs" diffs on a best-effort basis, swallowing any error. +my $ipcs_before = "$tempdir/ipcs_before"; +eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; }; + +sub log_ipcs +{ + eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] }; + return; +} + +# Node setup. +my $gnat = PostgresNode->get_new_node('gnat'); +$gnat->init; + +# Create a shmem segment that will conflict with gnat's first choice +# of shmem key. (If we fail to create it because something else is +# already using that key, that's perfectly fine, though the test will +# exercise a different scenario than it usually does.) +my $gnat_dir_stat = stat($gnat->data_dir); +defined($gnat_dir_stat) or die('unable to stat ' . $gnat->data_dir); +my $gnat_inode = $gnat_dir_stat->ino; +note "gnat's datadir inode = $gnat_inode"; + +# Note: must reference IPC::SysV's constants as functions, or this file +# fails to compile when that module is not available. +my $gnat_conflict_shm = + IPC::SharedMem->new($gnat_inode, 1024, + IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR()); +note "could not create conflicting shmem" if !defined($gnat_conflict_shm); +log_ipcs(); + +$gnat->start; +log_ipcs(); + +$gnat->restart; # should keep same shmem key +log_ipcs(); + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); +poll_start($gnat); # gnat recycles its former shm key. +log_ipcs(); + +note "removing the conflicting shmem ..."; +$gnat_conflict_shm->remove if $gnat_conflict_shm; +log_ipcs(); + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); + +# In this start, gnat will use its normal shmem key, and fail to remove +# the higher-keyed segment that the previous postmaster was using. +# That's not great, but key collisions should be rare enough to not +# make this a big problem. +poll_start($gnat); +log_ipcs(); +$gnat->stop; +log_ipcs(); + +# Re-create the conflicting segment, and start/stop normally, just so +# this test script doesn't leak the higher-keyed segment. +note "re-creating conflicting shmem ..."; +$gnat_conflict_shm = + IPC::SharedMem->new($gnat_inode, 1024, + IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR()); +note "could not create conflicting shmem" if !defined($gnat_conflict_shm); +log_ipcs(); + +$gnat->start; +log_ipcs(); +$gnat->stop; +log_ipcs(); + +note "removing the conflicting shmem ..."; +$gnat_conflict_shm->remove if $gnat_conflict_shm; +log_ipcs(); + +# Scenarios involving no postmaster.pid, dead postmaster, and a live backend. +# Use a regress.c function to emulate the responsiveness of a backend working +# through a CPU-intensive task. +$gnat->start; +log_ipcs(); + +my $regress_shlib = $ENV{REGRESS_SHLIB}; +$gnat->safe_psql('postgres', <<EOSQL); +CREATE FUNCTION wait_pid(int) + RETURNS void + AS '$regress_shlib' + LANGUAGE C STRICT; +EOSQL +my $slow_query = 'SELECT wait_pid(pg_backend_pid())'; +my ($stdout, $stderr); +my $slow_client = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'), + '-c', $slow_query + ], + '<', + \undef, + '>', + \$stdout, + '2>', + \$stderr, + IPC::Run::timeout(5 * $TestLib::timeout_default)); +ok( $gnat->poll_query_until( + 'postgres', + "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), + 'slow query started'); +my $slow_pid = $gnat->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'"); +$gnat->kill9; +unlink($gnat->data_dir . '/postmaster.pid'); +$gnat->rotate_logfile; # on Windows, can't open old log for writing +log_ipcs(); +# Reject ordinary startup. Retry for the same reasons poll_start() does, +# every 0.1s for at least $TestLib::timeout_default seconds. +my $pre_existing_msg = qr/pre-existing shared memory block/; +{ + my $max_attempts = 10 * $TestLib::timeout_default; + my $attempts = 0; + while ($attempts < $max_attempts) + { + last + if $gnat->start(fail_ok => 1) + || slurp_file($gnat->logfile) =~ $pre_existing_msg; + usleep(100_000); + $attempts++; + } +} +like(slurp_file($gnat->logfile), + $pre_existing_msg, 'detected live backend via shared memory'); +# Reject single-user startup. +my $single_stderr; +ok( !run_log( + [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ], + '<', \undef, '2>', \$single_stderr), + 'live query blocks --single'); +print STDERR $single_stderr; +like($single_stderr, $pre_existing_msg, + 'single-user mode detected live backend via shared memory'); +log_ipcs(); + +# cleanup slow backend +TestLib::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid); +$slow_client->finish; # client has detected backend termination +log_ipcs(); + +# now startup should work +poll_start($gnat); +log_ipcs(); + +# finish testing +$gnat->stop; +log_ipcs(); + + +# We may need retries to start a new postmaster. Causes: +# - kernel is slow to deliver SIGKILL +# - postmaster parent is slow to waitpid() +# - postmaster child is slow to exit in response to SIGQUIT +# - postmaster child is slow to exit after postmaster death +sub poll_start +{ + my ($node) = @_; + + my $max_attempts = 10 * $TestLib::timeout_default; + my $attempts = 0; + + while ($attempts < $max_attempts) + { + $node->start(fail_ok => 1) && return 1; + + # Wait 0.1 second before retrying. + usleep(100_000); + + # Clean up in case the start attempt just timed out or some such. + $node->stop('fast', fail_ok => 1); + + $attempts++; + } + + # Try one last time without fail_ok, which will BAIL_OUT unless it + # succeeds. + $node->start && return 1; + return 0; +} diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl new file mode 100644 index 0000000..69e1462 --- /dev/null +++ b/src/test/recovery/t/018_wal_optimize.pl @@ -0,0 +1,399 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test WAL replay when some operation has skipped WAL. +# +# These tests exercise code that once violated the mandate described in +# src/backend/access/transam/README section "Skipping WAL for New +# RelFileNode". The tests work by committing some transactions, initiating an +# immediate shutdown, and confirming that the expected data survives recovery. +# For many years, individual commands made the decision to skip WAL, hence the +# frequent appearance of COPY in these tests. +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 38; + +sub check_orphan_relfilenodes +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($node, $test_name) = @_; + + my $db_oid = $node->safe_psql('postgres', + "SELECT oid FROM pg_database WHERE datname = 'postgres'"); + my $prefix = "base/$db_oid/"; + my $filepaths_referenced = $node->safe_psql( + 'postgres', " + SELECT pg_relation_filepath(oid) FROM pg_class + WHERE reltablespace = 0 AND relpersistence <> 't' AND + pg_relation_filepath(oid) IS NOT NULL;"); + is_deeply( + [ + sort(map { "$prefix$_" } + grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix"))) + ], + [ sort split /\n/, $filepaths_referenced ], + $test_name); + return; +} + +# We run this same test suite for both wal_level=minimal and replica. +sub run_wal_optimize +{ + my $wal_level = shift; + + my $node = get_new_node("node_$wal_level"); + $node->init; + $node->append_conf( + 'postgresql.conf', qq( +wal_level = $wal_level +max_prepared_transactions = 1 +wal_log_hints = on +wal_skip_threshold = 0 +#wal_debug = on +)); + $node->start; + + # Setup + my $tablespace_dir = $node->basedir . '/tablespace_other'; + mkdir($tablespace_dir); + my $result; + + # Test redo of CREATE TABLESPACE. + $node->safe_psql( + 'postgres', " + CREATE TABLE moved (id int); + INSERT INTO moved VALUES (1); + CREATE TABLESPACE other LOCATION '$tablespace_dir'; + BEGIN; + ALTER TABLE moved SET TABLESPACE other; + CREATE TABLE originated (id int); + INSERT INTO originated VALUES (1); + CREATE UNIQUE INDEX ON originated(id) TABLESPACE other; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;"); + is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE"); + $result = $node->safe_psql( + 'postgres', " + INSERT INTO originated VALUES (1) ON CONFLICT (id) + DO UPDATE set id = originated.id + 1 + RETURNING id;"); + is($result, qq(2), + "wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX"); + + # Test direct truncation optimization. No tuples. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc (id serial PRIMARY KEY); + TRUNCATE trunc; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;"); + is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table"); + + # Test truncation with inserted tuples within the same transaction. + # Tuples inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_ins (id serial PRIMARY KEY); + INSERT INTO trunc_ins VALUES (DEFAULT); + TRUNCATE trunc_ins; + INSERT INTO trunc_ins VALUES (DEFAULT); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', + "SELECT count(*), min(id) FROM trunc_ins;"); + is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT"); + + # Same for prepared transaction. + # Tuples inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE twophase (id serial PRIMARY KEY); + INSERT INTO twophase VALUES (DEFAULT); + TRUNCATE twophase; + INSERT INTO twophase VALUES (DEFAULT); + PREPARE TRANSACTION 't'; + COMMIT PREPARED 't';"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', + "SELECT count(*), min(id) FROM trunc_ins;"); + is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE"); + + # Writing WAL at end of xact, instead of syncing. + $node->safe_psql( + 'postgres', " + SET wal_skip_threshold = '1GB'; + BEGIN; + CREATE TABLE noskip (id serial PRIMARY KEY); + INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;"); + is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL"); + + # Data file for COPY query in subsequent tests + my $basedir = $node->basedir; + my $copy_file = "$basedir/copy_data.txt"; + TestLib::append_to_file( + $copy_file, qq(20000,30000 +20001,30001 +20002,30002)); + + # Test truncation with inserted tuples using both INSERT and COPY. Tuples + # inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int); + INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000)); + TRUNCATE ins_trunc; + INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000); + COPY ins_trunc FROM '$copy_file' DELIMITER ','; + INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;"); + is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT"); + + # Test truncation with inserted tuples using COPY. Tuples copied after + # the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int); + INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE trunc_copy; + COPY trunc_copy FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;"); + is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY"); + + # Like previous test, but rollback SET TABLESPACE in a subtransaction. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_abort; + SAVEPOINT s; + ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s; + COPY spc_abort FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE abort subtransaction"); + + # in different subtransaction patterns + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_commit; + SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s; + COPY spc_commit FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE commit subtransaction"); + + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_nest; + SAVEPOINT s; + ALTER TABLE spc_nest SET TABLESPACE other; + SAVEPOINT s2; + ALTER TABLE spc_nest SET TABLESPACE pg_default; + ROLLBACK TO s2; + SAVEPOINT s2; + ALTER TABLE spc_nest SET TABLESPACE pg_default; + RELEASE s2; + ROLLBACK TO s; + COPY spc_nest FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE nested subtransaction"); + + $node->safe_psql( + 'postgres', " + CREATE TABLE spc_hint (id int); + INSERT INTO spc_hint VALUES (1); + BEGIN; + ALTER TABLE spc_hint SET TABLESPACE other; + CHECKPOINT; + SELECT * FROM spc_hint; -- set hint bit + INSERT INTO spc_hint VALUES (2); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;"); + is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit"); + + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE idx_hint (c int PRIMARY KEY); + SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q; + CHECKPOINT; + INSERT INTO idx_hint VALUES (1); -- set index hint bit + INSERT INTO idx_hint VALUES (2); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->psql('postgres',); + my ($ret, $stdout, $stderr) = + $node->psql('postgres', "INSERT INTO idx_hint VALUES (2);"); + is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD"); + like( + $stderr, + qr/violates unique/, + "wal_level = $wal_level, unique index LP_DEAD message"); + + # UPDATE touches two buffers for one row. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE upd (id serial PRIMARY KEY, id2 int); + INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000)); + COPY upd FROM '$copy_file' DELIMITER ','; + UPDATE upd SET id2 = id2 + 1; + DELETE FROM upd; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;"); + is($result, qq(0), + "wal_level = $wal_level, UPDATE touches two buffers for one row"); + + # Test consistency of COPY with INSERT for table created in the same + # transaction. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int); + INSERT INTO ins_copy VALUES (DEFAULT, 1); + COPY ins_copy FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;"); + is($result, qq(4), "wal_level = $wal_level, INSERT COPY"); + + # Test consistency of COPY that inserts more to the same table using + # triggers. If the INSERTS from the trigger go to the same block data + # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when + # it tries to replay the WAL record but the "before" image doesn't match, + # because not all changes were WAL-logged. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text); + CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig + VALUES (DEFAULT, 'triggered row before' || NEW.id2); + END IF; + RETURN NEW; + END; \$\$; + CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig + VALUES (DEFAULT, 'triggered row after' || NEW.id2); + END IF; + RETURN NEW; + END; \$\$; + CREATE TRIGGER ins_trig_before_row_insert + BEFORE INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig(); + CREATE TRIGGER ins_trig_after_row_insert + AFTER INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig(); + COPY ins_trig FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;"); + is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers"); + + # Test consistency of INSERT, COPY and TRUNCATE in same transaction block + # with TRUNCATE triggers. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text); + CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; \$\$; + CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; \$\$; + CREATE TRIGGER trunc_trig_before_stat_truncate + BEFORE TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig(); + CREATE TRIGGER trunc_trig_after_stat_truncate + AFTER TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig(); + INSERT INTO trunc_trig VALUES (DEFAULT, 1); + TRUNCATE trunc_trig; + COPY trunc_trig FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;"); + is($result, qq(4), + "wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers"); + + # Test redo of temp table creation. + $node->safe_psql( + 'postgres', " + CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);"); + $node->stop('immediate'); + $node->start; + check_orphan_relfilenodes($node, + "wal_level = $wal_level, no orphan relfilenode remains"); + + return; +} + +# Run same test suite for multiple wal_level values. +run_wal_optimize("minimal"); +run_wal_optimize("replica"); diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl new file mode 100644 index 0000000..f6dcf0a --- /dev/null +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -0,0 +1,428 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for replication slot limit +# Ensure that max_slot_wal_keep_size limits the number of WAL files to +# be kept by replication slots. +use strict; +use warnings; + +use TestLib; +use PostgresNode; + +use File::Path qw(rmtree); +use Test::More tests => $TestLib::windows_os ? 16 : 20; +use Time::HiRes qw(usleep); + +$ENV{PGDATABASE} = 'postgres'; + +# Initialize primary node, setting wal-segsize to 1MB +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1, extra => ['--wal-segsize=1']); +$node_primary->append_conf( + 'postgresql.conf', qq( +min_wal_size = 2MB +max_wal_size = 4MB +log_checkpoints = yes +)); +$node_primary->start; +$node_primary->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep1')"); + +# The slot state and remain should be null before the first connection +my $result = $node_primary->safe_psql('postgres', + "SELECT restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"'); + + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create a standby linking to it using the replication slot +my $node_standby = get_new_node('standby_1'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); + +$node_standby->start; + +# Wait until standby has replayed enough data +my $start_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby, 'replay', $start_lsn); + +# Stop standby +$node_standby->stop; + +# Preparation done, the slot is the state "reserved" now +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", 'check the catching-up state'); + +# Advance WAL by five segments (= 5MB) on primary +advance_wal($node_primary, 1); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is always "safe" when fitting max_wal_size +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", + 'check that it is safe if WAL fits in max_wal_size'); + +advance_wal($node_primary, 4); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is always "safe" when max_slot_wal_keep_size is not set +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", 'check that slot is working'); + +# The standby can reconnect to primary +$node_standby->start; + +$start_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby, 'replay', $start_lsn); + +$node_standby->stop; + +# Set max_slot_wal_keep_size on primary +my $max_slot_wal_keep_size_mb = 6; +$node_primary->append_conf( + 'postgresql.conf', qq( +max_slot_wal_keep_size = ${max_slot_wal_keep_size_mb}MB +)); +$node_primary->reload; + +# The slot is in safe state. + +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "reserved", 'check that max_slot_wal_keep_size is working'); + +# Advance WAL again then checkpoint, reducing remain by 2 MB. +advance_wal($node_primary, 2); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is still working +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "reserved", + 'check that safe_wal_size gets close to the current LSN'); + +# The standby can reconnect to primary +$node_standby->start; +$start_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby, 'replay', $start_lsn); +$node_standby->stop; + +# wal_keep_size overrides max_slot_wal_keep_size +$result = $node_primary->safe_psql('postgres', + "ALTER SYSTEM SET wal_keep_size to '8MB'; SELECT pg_reload_conf();"); +# Advance WAL again then checkpoint, reducing remain by 6 MB. +advance_wal($node_primary, 6); +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "extended", + 'check that wal_keep_size overrides max_slot_wal_keep_size'); +# restore wal_keep_size +$result = $node_primary->safe_psql('postgres', + "ALTER SYSTEM SET wal_keep_size to 0; SELECT pg_reload_conf();"); + +# The standby can reconnect to primary +$node_standby->start; +$start_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby, 'replay', $start_lsn); +$node_standby->stop; + +# Advance WAL again without checkpoint, reducing remain by 6 MB. +advance_wal($node_primary, 6); + +# Slot gets into 'reserved' state +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "extended", 'check that the slot state changes to "extended"'); + +# do checkpoint so that the next checkpoint runs too early +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# Advance WAL again without checkpoint; remain goes to 0. +advance_wal($node_primary, 1); + +# Slot gets into 'unreserved' state and safe_wal_size is negative +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size <= 0 FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "unreserved|t", + 'check that the slot state changes to "unreserved"'); + +# The standby still can connect to primary before a checkpoint +$node_standby->start; + +$start_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby, 'replay', $start_lsn); + +$node_standby->stop; + +ok( !find_in_log( + $node_standby, + "requested WAL segment [0-9A-F]+ has already been removed"), + 'check that required WAL segments are still available'); + +# Create one checkpoint, to improve stability of the next steps +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# Prevent other checkpoints from occurring while advancing WAL segments +$node_primary->safe_psql('postgres', + "ALTER SYSTEM SET max_wal_size='40MB'; SELECT pg_reload_conf()"); + +# Advance WAL again. The slot loses the oldest segment by the next checkpoint +my $logstart = get_log_size($node_primary); +advance_wal($node_primary, 7); + +# Now create another checkpoint and wait until the WARNING is issued +$node_primary->safe_psql('postgres', + 'ALTER SYSTEM RESET max_wal_size; SELECT pg_reload_conf()'); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); +my $invalidated = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if (find_in_log( + $node_primary, + "invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size", + $logstart)) + { + $invalidated = 1; + last; + } + usleep(100_000); +} +ok($invalidated, 'check that slot invalidation has been logged'); + +$result = $node_primary->safe_psql( + 'postgres', + qq[ + SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size + FROM pg_replication_slots WHERE slot_name = 'rep1']); +is($result, "rep1|f|t|lost|", + 'check that the slot became inactive and the state "lost" persists'); + +# Wait until current checkpoint ends +my $checkpoint_ended = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if (find_in_log($node_primary, "checkpoint complete: ", $logstart)) + { + $checkpoint_ended = 1; + last; + } + usleep(100_000); +} +ok($checkpoint_ended, 'waited for checkpoint to end'); + +# The invalidated slot shouldn't keep the old-segment horizon back; +# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org +# Test for this by creating a new slot and comparing its restart LSN +# to the oldest existing file. +my $redoseg = $node_primary->safe_psql('postgres', + "SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)" +); +my $oldestseg = $node_primary->safe_psql('postgres', + "SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1" +); +$node_primary->safe_psql('postgres', + qq[SELECT pg_drop_replication_slot('s2')]); +is($oldestseg, $redoseg, "check that segments have been removed"); + +# The standby no longer can connect to the primary +$logstart = get_log_size($node_standby); +$node_standby->start; + +my $failed = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if (find_in_log( + $node_standby, + "requested WAL segment [0-9A-F]+ has already been removed", + $logstart)) + { + $failed = 1; + last; + } + usleep(100_000); +} +ok($failed, 'check that replication has been broken'); + +$node_primary->stop; +$node_standby->stop; + +my $node_primary2 = get_new_node('primary2'); +$node_primary2->init(allows_streaming => 1); +$node_primary2->append_conf( + 'postgresql.conf', qq( +min_wal_size = 32MB +max_wal_size = 32MB +log_checkpoints = yes +)); +$node_primary2->start; +$node_primary2->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep1')"); +$backup_name = 'my_backup2'; +$node_primary2->backup($backup_name); + +$node_primary2->stop; +$node_primary2->append_conf( + 'postgresql.conf', qq( +max_slot_wal_keep_size = 0 +)); +$node_primary2->start; + +$node_standby = get_new_node('standby_2'); +$node_standby->init_from_backup($node_primary2, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); +$node_standby->start; +my @result = + split( + '\n', + $node_primary2->safe_psql( + 'postgres', + "CREATE TABLE tt(); + DROP TABLE tt; + SELECT pg_switch_wal(); + CHECKPOINT; + SELECT 'finished';", + timeout => $TestLib::timeout_default)); +is($result[1], 'finished', 'check if checkpoint command is not blocked'); + +$node_primary2->stop; +$node_standby->stop; + +# The next test depends on Perl's `kill`, which apparently is not +# portable to Windows. (It would be nice to use Test::More's `subtest`, +# but that's not in the ancient version we require.) +if ($TestLib::windows_os) +{ + done_testing(); + exit; +} + +# Get a slot terminated while the walsender is active +# We do this by sending SIGSTOP to the walsender. Skip this on Windows. +my $node_primary3 = get_new_node('primary3'); +$node_primary3->init(allows_streaming => 1, extra => ['--wal-segsize=1']); +$node_primary3->append_conf( + 'postgresql.conf', qq( + min_wal_size = 2MB + max_wal_size = 2MB + log_checkpoints = yes + max_slot_wal_keep_size = 1MB + )); +$node_primary3->start; +$node_primary3->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep3')"); +# Take backup +$backup_name = 'my_backup'; +$node_primary3->backup($backup_name); +# Create standby +my $node_standby3 = get_new_node('standby_3'); +$node_standby3->init_from_backup($node_primary3, $backup_name, + has_streaming => 1); +$node_standby3->append_conf('postgresql.conf', "primary_slot_name = 'rep3'"); +$node_standby3->start; +$node_primary3->wait_for_catchup($node_standby3->name, 'replay'); +my $senderpid = $node_primary3->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'"); +like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid"); +my $receiverpid = $node_standby3->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'"); +like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid"); + +$logstart = get_log_size($node_primary3); +# freeze walsender and walreceiver. Slot will still be active, but walreceiver +# won't get anything anymore. +kill 'STOP', $senderpid, $receiverpid; +advance_wal($node_primary3, 2); + +my $max_attempts = $TestLib::timeout_default; +while ($max_attempts-- >= 0) +{ + if (find_in_log( + $node_primary3, + "terminating process $senderpid to release replication slot \"rep3\"", + $logstart)) + { + ok(1, "walsender termination logged"); + last; + } + sleep 1; +} + +# Now let the walsender continue; slot should be killed now. +# (Must not let walreceiver run yet; otherwise the standby could start another +# one before the slot can be killed) +kill 'CONT', $senderpid; +$node_primary3->poll_query_until('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep3'", + "lost") + or die "timed out waiting for slot to be lost"; + +$max_attempts = $TestLib::timeout_default; +while ($max_attempts-- >= 0) +{ + if (find_in_log( + $node_primary3, + 'invalidating slot "rep3" because its restart_lsn', $logstart)) + { + ok(1, "slot invalidation logged"); + last; + } + sleep 1; +} + +# Now let the walreceiver continue, so that the node can be stopped cleanly +kill 'CONT', $receiverpid; + +$node_primary3->stop; +$node_standby3->stop; + +##################################### +# Advance WAL of $node by $n segments +sub advance_wal +{ + my ($node, $n) = @_; + + # Advance by $n segments (= (16 * $n) MB) on primary + for (my $i = 0; $i < $n; $i++) + { + $node->safe_psql('postgres', + "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();"); + } + return; +} + +# return the size of logfile of $node in bytes +sub get_log_size +{ + my ($node) = @_; + + return (stat $node->logfile)[7]; +} + +# find $pat in logfile of $node after $off-th byte +sub find_in_log +{ + my ($node, $pat, $off) = @_; + + $off = 0 unless defined $off; + my $log = TestLib::slurp_file($node->logfile); + return 0 if (length($log) <= $off); + + $log = substr($log, $off); + + return $log =~ m/$pat/; +} diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl new file mode 100644 index 0000000..777bf05 --- /dev/null +++ b/src/test/recovery/t/020_archive_status.pl @@ -0,0 +1,236 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# +# Tests related to WAL archiving and recovery. +# +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 16; +use Config; + +my $primary = get_new_node('primary'); +$primary->init( + has_archiving => 1, + allows_streaming => 1); +$primary->append_conf('postgresql.conf', 'autovacuum = off'); +$primary->start; +my $primary_data = $primary->data_dir; + +# Temporarily use an archive_command value to make the archiver fail, +# knowing that archiving is enabled. Note that we cannot use a command +# that does not exist as in this case the archiver process would just exit +# without reporting the failure to pg_stat_archiver. This also cannot +# use a plain "false" as that's unportable on Windows. So, instead, as +# a portable solution, use an archive command based on a command known to +# work but will fail: copy with an incorrect original path. +my $incorrect_command = + $TestLib::windows_os + ? qq{copy "%p_does_not_exist" "%f_does_not_exist"} + : qq{cp "%p_does_not_exist" "%f_does_not_exist"}; +$primary->safe_psql( + 'postgres', qq{ + ALTER SYSTEM SET archive_command TO '$incorrect_command'; + SELECT pg_reload_conf(); +}); + +# Save the WAL segment currently in use and switch to a new segment. +# This will be used to track the activity of the archiver. +my $segment_name_1 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_1 = "pg_wal/archive_status/$segment_name_1"; +my $segment_path_1_ready = "$segment_path_1.ready"; +my $segment_path_1_done = "$segment_path_1.done"; +$primary->safe_psql( + 'postgres', q{ + CREATE TABLE mine AS SELECT generate_series(1,10) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +# Wait for an archive failure. +$primary->poll_query_until('postgres', + q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't') + or die "Timed out while waiting for archiving to fail"; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file exists for WAL segment $segment_name_1 waiting to be archived" +); +ok( !-f "$primary_data/$segment_path_1_done", + ".done file does not exist for WAL segment $segment_name_1 waiting to be archived" +); + +is( $primary->safe_psql( + 'postgres', q{ + SELECT archived_count, last_failed_wal + FROM pg_stat_archiver + }), + "0|$segment_name_1", + "pg_stat_archiver failed to archive $segment_name_1"); + +# Crash the cluster for the next test in charge of checking that non-archived +# WAL segments are not removed. +$primary->stop('immediate'); + +# Recovery tests for the archiving with a standby partially check +# the recovery behavior when restoring a backup taken using a +# snapshot with no pg_start/stop_backup. In this situation, +# the recovered standby should enter first crash recovery then +# switch to regular archive recovery. Note that the base backup +# is taken here so as archive_command will fail. This is necessary +# for the assumptions of the tests done with the standbys below. +$primary->backup_fs_cold('backup'); + +$primary->start; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary" +); + +# Allow WAL archiving again and wait for a success. +$primary->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); + +$primary->poll_query_until('postgres', + q{SELECT archived_count FROM pg_stat_archiver}, '1') + or die "Timed out while waiting for archiving to finish"; + +ok(!-f "$primary_data/$segment_path_1_ready", + ".ready file for archived WAL segment $segment_name_1 removed"); + +ok(-f "$primary_data/$segment_path_1_done", + ".done file for archived WAL segment $segment_name_1 exists"); + +is( $primary->safe_psql( + 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }), + $segment_name_1, + "archive success reported in pg_stat_archiver for WAL segment $segment_name_1" +); + +# Create some WAL activity and a new checkpoint so as the next standby can +# create a restartpoint. As this standby starts in crash recovery because +# of the cold backup taken previously, it needs a clean restartpoint to deal +# with existing status files. +my $segment_name_2 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_2 = "pg_wal/archive_status/$segment_name_2"; +my $segment_path_2_ready = "$segment_path_2.ready"; +my $segment_path_2_done = "$segment_path_2.done"; +$primary->safe_psql( + 'postgres', q{ + INSERT INTO mine SELECT generate_series(10,20) AS x; + CHECKPOINT; +}); + +# Switch to a new segment and use the returned LSN to make sure that +# standbys have caught up to this point. +my $primary_lsn = $primary->safe_psql( + 'postgres', q{ + SELECT pg_switch_wal(); +}); + +$primary->poll_query_until('postgres', + q{ SELECT last_archived_wal FROM pg_stat_archiver }, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +# Test standby with archive_mode = on. +my $standby1 = get_new_node('standby'); +$standby1->init_from_backup($primary, 'backup', has_restoring => 1); +$standby1->append_conf('postgresql.conf', "archive_mode = on"); +my $standby1_data = $standby1->data_dir; +$standby1->start; + +# Wait for the replay of the segment switch done previously, ensuring +# that all segments needed are restored from the archives. +$standby1->poll_query_until('postgres', + qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 } +) or die "Timed out while waiting for xlog replay on standby1"; + +$standby1->safe_psql('postgres', q{CHECKPOINT}); + +# Recovery with archive_mode=on does not keep .ready signal files inherited +# from backup. Note that this WAL segment existed in the backup. +ok( !-f "$standby1_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby" +); + +# Recovery with archive_mode=on should not create .ready files. +# Note that this segment did not exist in the backup. +ok( !-f "$standby1_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby" +); + +# Recovery with archive_mode = on creates .done files. +ok( -f "$standby1_data/$segment_path_2_done", + ".done file for WAL segment $segment_name_2 created when archive_mode=on on standby" +); + +# Test recovery with archive_mode = always, which should always keep +# .ready files if archiving is enabled, though here we want the archive +# command to fail to persist the .ready files. Note that this node +# has inherited the archive command of the previous cold backup that +# will cause archiving failures. +my $standby2 = get_new_node('standby2'); +$standby2->init_from_backup($primary, 'backup', has_restoring => 1); +$standby2->append_conf('postgresql.conf', 'archive_mode = always'); +my $standby2_data = $standby2->data_dir; +$standby2->start; + +# Wait for the replay of the segment switch done previously, ensuring +# that all segments needed are restored from the archives. +$standby2->poll_query_until('postgres', + qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 } +) or die "Timed out while waiting for xlog replay on standby2"; + +$standby2->safe_psql('postgres', q{CHECKPOINT}); + +ok( -f "$standby2_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby" +); + +# Reset statistics of the archiver for the next checks. +$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); + +# Now crash the cluster to check that recovery step does not +# remove non-archived WAL segments on a standby where archiving +# is enabled. +$standby2->stop('immediate'); +$standby2->start; + +ok( -f "$standby2_data/$segment_path_1_ready", + "WAL segment still ready to archive after crash recovery on standby with archive_mode=always" +); + +# Allow WAL archiving again, and wait for the segments to be archived. +$standby2->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); +$standby2->poll_query_until('postgres', + q{SELECT last_archived_wal FROM pg_stat_archiver}, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +is( $standby2->safe_psql( + 'postgres', q{SELECT archived_count FROM pg_stat_archiver}), + '2', + "correct number of WAL segments archived from standby"); + +ok( !-f "$standby2_data/$segment_path_1_ready" + && !-f "$standby2_data/$segment_path_2_ready", + ".ready files removed after archive success with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_1_done" + && -f "$standby2_data/$segment_path_2_done", + ".done files created after archive success with archive_mode=always on standby" +); diff --git a/src/test/recovery/t/021_row_visibility.pl b/src/test/recovery/t/021_row_visibility.pl new file mode 100644 index 0000000..8a554c5 --- /dev/null +++ b/src/test/recovery/t/021_row_visibility.pl @@ -0,0 +1,209 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Checks that snapshots on standbys behave in a minimally reasonable +# way. +use strict; +use warnings; + +use PostgresNode; +use TestLib; +use Test::More tests => 10; +use Config; + +# Initialize primary node +my $node_primary = get_new_node('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf('postgresql.conf', 'max_prepared_transactions=10'); +$node_primary->start; + +# Initialize with empty test table +$node_primary->safe_psql('postgres', + 'CREATE TABLE public.test_visibility (data text not null)'); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby from backup +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', 'max_prepared_transactions=10'); +$node_standby->start; + +my $psql_timeout = + IPC::Run::timer(2 * $TestLib::timeout_default); + +# One psql to primary and standby each, for all queries. That allows +# to check uncommitted changes being replicated and such. +my %psql_primary = (stdin => '', stdout => '', stderr => ''); +$psql_primary{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ], + '<', + \$psql_primary{stdin}, + '>', + \$psql_primary{stdout}, + '2>', + \$psql_primary{stderr}, + $psql_timeout); + +my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => ''); +$psql_standby{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ], + '<', + \$psql_standby{stdin}, + '>', + \$psql_standby{stdout}, + '2>', + \$psql_standby{stderr}, + $psql_timeout); + +# +# 1. Check initial data is the same +# +ok( send_query_and_wait( + \%psql_standby, + q/SELECT * FROM test_visibility ORDER BY data;/, + qr/^\(0 rows\)$/m), + 'data not visible'); + +# +# 2. Check if an INSERT is replayed and visible +# +$node_primary->psql('postgres', + "INSERT INTO test_visibility VALUES ('first insert')"); +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first insert.*\n\(1 row\)/m), + 'insert visible'); + +# +# 3. Verify that uncommitted changes aren't visible. +# +ok( send_query_and_wait( + \%psql_primary, + q[ +BEGIN; +UPDATE test_visibility SET data = 'first update' RETURNING data; + ], + qr/^UPDATE 1$/m), + 'UPDATE'); + +$node_primary->psql('postgres', "SELECT txid_current();"); # ensure WAL flush +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first insert.*\n\(1 row\)/m), + 'uncommitted update invisible'); + +# +# 4. That a commit turns 3. visible +# +ok(send_query_and_wait(\%psql_primary, q[COMMIT;], qr/^COMMIT$/m), 'COMMIT'); + +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first update\n\(1 row\)$/m), + 'committed update visible'); + +# +# 5. Check that changes in prepared xacts is invisible +# +ok( send_query_and_wait( + \%psql_primary, q[ +DELETE from test_visibility; -- delete old data, so we start with clean slate +BEGIN; +INSERT INTO test_visibility VALUES('inserted in prepared will_commit'); +PREPARE TRANSACTION 'will_commit';], + qr/^PREPARE TRANSACTION$/m), + 'prepared will_commit'); + +ok( send_query_and_wait( + \%psql_primary, q[ +BEGIN; +INSERT INTO test_visibility VALUES('inserted in prepared will_abort'); +PREPARE TRANSACTION 'will_abort'; + ], + qr/^PREPARE TRANSACTION$/m), + 'prepared will_abort'); + +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/^\(0 rows\)$/m), + 'uncommitted prepared invisible'); + +# For some variation, finish prepared xacts via separate connections +$node_primary->safe_psql('postgres', "COMMIT PREPARED 'will_commit';"); +$node_primary->safe_psql('postgres', "ROLLBACK PREPARED 'will_abort';"); +$node_primary->wait_for_catchup($node_standby, 'replay', + $node_primary->lsn('insert')); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/will_commit.*\n\(1 row\)$/m), + 'finished prepared visible'); + +# explicitly shut down psql instances gracefully - to avoid hangs +# or worse on windows +$psql_primary{stdin} .= "\\q\n"; +$psql_primary{run}->finish; +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_primary->stop; +$node_standby->stop; + +# Send query, wait until string matches +sub send_query_and_wait +{ + my ($psql, $query, $untl) = @_; + my $ret; + + # send query + $$psql{stdin} .= $query; + $$psql{stdin} .= "\n"; + + # wait for query results + $$psql{run}->pump_nb(); + while (1) + { + last if $$psql{stdout} =~ /$untl/; + + if ($psql_timeout->is_expired) + { + BAIL_OUT("aborting wait: program timed out\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + if (not $$psql{run}->pumpable()) + { + BAIL_OUT("aborting wait: program died\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + $$psql{run}->pump(); + } + + $$psql{stdout} = ''; + + return 1; +} diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl new file mode 100644 index 0000000..b0f5ba0 --- /dev/null +++ b/src/test/recovery/t/022_crash_temp_files.pl @@ -0,0 +1,268 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test remove of temporary files after a crash. +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More; +use Config; + +if ($Config{osname} eq 'MSWin32') +{ + plan skip_all => 'tests hang on Windows'; + exit; +} +else +{ + plan tests => 11; +} + + +my $psql_timeout = IPC::Run::timer($TestLib::timeout_default); + +my $node = get_new_node('node_crash'); +$node->init(); +$node->start(); + +# By default, PostgresNode doesn't restart after crash +# Reduce work_mem to generate temporary file with a few number of rows +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = on; + ALTER SYSTEM SET log_connections = 1; + ALTER SYSTEM SET work_mem = '64kB'; + ALTER SYSTEM SET restart_after_crash = on; + SELECT pg_reload_conf();]); + +# create table, insert rows +$node->safe_psql('postgres', q[CREATE TABLE tab_crash (a integer UNIQUE);]); + +# Run psql, keeping session alive, so we have an alive backend to kill. +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +my $pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Open a 2nd session that will block the 1st one, using the UNIQUE constraint. +# This will prevent removal of the temporary file created by the 1st session. +my ($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', ''); +my $killme2 = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin2, + '>', + \$killme_stdout2, + '2>', + \$killme_stderr2, + $psql_timeout); + +# Insert one tuple and leave the transaction open +$killme_stdin2 .= q[ +BEGIN; +INSERT INTO tab_crash (a) VALUES(1); +SELECT $$insert-tuple-to-lock-next-insert$$; +]; +pump_until($killme2, $psql_timeout, \$killme_stdout2, qr/insert-tuple-to-lock-next-insert/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/in-progress-before-sigkill/m), + 'insert in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait until the batch insert gets stuck on the lock. +$killme_stdin2 .= q[ +DO $c$ +DECLARE + c INT; +BEGIN + LOOP + SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid + . q[ AND NOT granted; + IF c > 0 THEN + EXIT; + END IF; + END LOOP; +END; $c$; +SELECT $$insert-tuple-lock-waiting$$; +]; + +pump_until($killme2, $psql_timeout, \$killme_stdout2, qr/insert-tuple-lock-waiting/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Kill with SIGKILL +my $ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close that psql session +$killme->finish; + +# Wait till the other session reports failure, ensuring that the postmaster +# has noticed its dead child and begun a restart cycle. +$killme_stdin2 .= qq[ +SELECT pg_sleep($TestLib::timeout_default); +]; +ok( pump_until( + $killme2, + \$killme_stderr2, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "second psql session died successfully after SIGKILL"); +$killme2->finish; + +# Wait till server finishes restarting +$node->poll_query_until('postgres', undef, ''); + +# Check for temporary files +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), + 'no temporary files'); + +# +# Test old behavior (don't remove temporary files after crash) +# +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = off; + SELECT pg_reload_conf();]); + +# Restart psql session +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +$killme->run(); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +$pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Restart the 2nd psql session +($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', ''); +$killme2->run(); + +# Insert one tuple and leave the transaction open +$killme_stdin2 .= q[ +BEGIN; +INSERT INTO tab_crash (a) VALUES(1); +SELECT $$insert-tuple-to-lock-next-insert$$; +]; +pump_until($killme2, $psql_timeout, \$killme_stdout2, qr/insert-tuple-to-lock-next-insert/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i); +]; +ok(pump_until($killme, $psql_timeout, \$killme_stdout, qr/in-progress-before-sigkill/m), + 'insert in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait until the batch insert gets stuck on the lock. +$killme_stdin2 .= q[ +DO $c$ +DECLARE + c INT; +BEGIN + LOOP + SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid + . q[ AND NOT granted; + IF c > 0 THEN + EXIT; + END IF; + END LOOP; +END; $c$; +SELECT $$insert-tuple-lock-waiting$$; +]; + +pump_until($killme2, $psql_timeout, \$killme_stdout2, qr/insert-tuple-lock-waiting/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Kill with SIGKILL +$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close that psql session +$killme->finish; + +# Wait till the other session reports failure, ensuring that the postmaster +# has noticed its dead child and begun a restart cycle. +$killme_stdin2 .= qq[ +SELECT pg_sleep($TestLib::timeout_default); +]; +ok( pump_until( + $killme2, + \$killme_stderr2, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "second psql session died successfully after SIGKILL"); +$killme2->finish; + +# Wait till server finishes restarting +$node->poll_query_until('postgres', undef, ''); + +# Check for temporary files -- should be there +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(1), + 'one temporary file'); + +# Restart should remove the temporary files +$node->restart(); + +# Check the temporary files -- should be gone +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), + 'temporary file was removed'); + +$node->stop(); diff --git a/src/test/recovery/t/023_pitr_prepared_xact.pl b/src/test/recovery/t/023_pitr_prepared_xact.pl new file mode 100644 index 0000000..9190a38 --- /dev/null +++ b/src/test/recovery/t/023_pitr_prepared_xact.pl @@ -0,0 +1,89 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for point-in-time-recovery (PITR) with prepared transactions +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 1; +use File::Compare; + +# Initialize and start primary node with WAL archiving +my $node_primary = get_new_node('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq{ +max_prepared_transactions = 10}); +$node_primary->start; + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Initialize node for PITR targeting a very specific restore point, just +# after a PREPARE TRANSACTION is issued so as we finish with a promoted +# node where this 2PC transaction needs an explicit COMMIT PREPARED. +my $node_pitr = get_new_node('node_pitr'); +$node_pitr->init_from_backup( + $node_primary, $backup_name, + standby => 0, + has_restoring => 1); +$node_pitr->append_conf( + 'postgresql.conf', qq{ +recovery_target_name = 'rp' +recovery_target_action = 'promote'}); + +# Workload with a prepared transaction and the target restore point. +$node_primary->psql( + 'postgres', qq{ +CREATE TABLE foo(i int); +BEGIN; +INSERT INTO foo VALUES(1); +PREPARE TRANSACTION 'fooinsert'; +SELECT pg_create_restore_point('rp'); +INSERT INTO foo VALUES(2); +}); + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_primary->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# Wait until the WAL segment has been archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; +$node_primary->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Now start the PITR node. +$node_pitr->start; + +# Wait until the PITR node exits recovery. +$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';") + or die "Timed out while waiting for PITR promotion"; + +# Commit the prepared transaction in the latest timeline and check its +# result. There should only be one row in the table, coming from the +# prepared transaction. The row from the INSERT after the restore point +# should not show up, since our recovery target was older than the second +# INSERT done. +$node_pitr->psql('postgres', qq{COMMIT PREPARED 'fooinsert';}); +my $result = $node_pitr->safe_psql('postgres', "SELECT * FROM foo;"); +is($result, qq{1}, "check table contents after COMMIT PREPARED"); + +# Insert more data and do a checkpoint. These should be generated on the +# timeline chosen after the PITR promotion. +$node_pitr->psql( + 'postgres', qq{ +INSERT INTO foo VALUES(3); +CHECKPOINT; +}); + +# Enforce recovery, the checkpoint record generated previously should +# still be found. +$node_pitr->stop('immediate'); +$node_pitr->start; diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl new file mode 100644 index 0000000..1befa82 --- /dev/null +++ b/src/test/recovery/t/024_archive_recovery.pl @@ -0,0 +1,103 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Test for archive recovery of WAL generated with wal_level=minimal +use strict; +use warnings; +use PostgresNode; +use TestLib; +use Test::More tests => 2; +use Time::HiRes qw(usleep); + +# Initialize and start node with wal_level = replica and WAL archiving +# enabled. +my $node = get_new_node('orig'); +$node->init(has_archiving => 1, allows_streaming => 1); +my $replica_config = q[ +wal_level = replica +archive_mode = on +max_wal_senders = 10 +hot_standby = off +]; +$node->append_conf('postgresql.conf', $replica_config); +$node->start; + +# Take backup +my $backup_name = 'my_backup'; +$node->backup($backup_name); + +# Restart node with wal_level = minimal and WAL archiving disabled +# to generate WAL with that setting. Note that such WAL has not been +# archived yet at this moment because WAL archiving is not enabled. +$node->append_conf( + 'postgresql.conf', q[ +wal_level = minimal +archive_mode = off +max_wal_senders = 0 +]); +$node->restart; + +# Restart node with wal_level = replica and WAL archiving enabled +# to archive WAL previously generated with wal_level = minimal. +# We ensure the WAL file containing the record indicating the change +# of wal_level to minimal is archived by checking pg_stat_archiver. +$node->append_conf('postgresql.conf', $replica_config); +$node->restart; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node->safe_psql('postgres', 'SELECT pg_switch_wal()'); +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; + +# Wait until the WAL segment has been archived. +$node->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; + +$node->stop; + +# Initialize new node from backup, and start archive recovery. Check that +# archive recovery fails with an error when it detects the WAL record +# indicating the change of wal_level to minimal and node stops. +sub test_recovery_wal_level_minimal +{ + my ($node_name, $node_text, $standby_setting) = @_; + + my $recovery_node = get_new_node($node_name); + $recovery_node->init_from_backup( + $node, $backup_name, + has_restoring => 1, + standby => $standby_setting); + + # Use run_log instead of recovery_node->start because this test expects + # that the server ends with an error during recovery. + run_log( + [ + 'pg_ctl', '-D', + $recovery_node->data_dir, '-l', + $recovery_node->logfile, 'start' + ]); + + # wait for postgres to terminate + foreach my $i (0 .. 10 * $TestLib::timeout_default) + { + last if !-f $recovery_node->data_dir . '/postmaster.pid'; + usleep(100_000); + } + + # Confirm that the archive recovery fails with an expected error + my $logfile = slurp_file($recovery_node->logfile()); + ok( $logfile =~ + qr/FATAL: .* WAL was generated with wal_level=minimal, cannot continue recovering/, + "$node_text ends with an error because it finds WAL generated with wal_level=minimal" + ); +} + +# Test for archive recovery +test_recovery_wal_level_minimal('archive_recovery', 'archive recovery', 0); + +# Test for standby server +test_recovery_wal_level_minimal('standby', 'standby', 1); diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl new file mode 100644 index 0000000..023cab4 --- /dev/null +++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl @@ -0,0 +1,111 @@ + +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Testing streaming replication where standby is promoted and a new cascading +# standby (without WAL) is connected to the promoted standby. Both archiving +# and streaming are enabled, but only the history file is available from the +# archive, so the WAL files all have to be streamed. Test that the cascading +# standby can follow the new primary (promoted standby). +use strict; +use warnings; +use PostgresNode; +use TestLib; + +use File::Basename; +use FindBin; +use Test::More tests => 1; + +# Initialize primary node +my $node_primary = get_new_node('primary'); + +# Set up an archive command that will copy the history file but not the WAL +# files. No real archive command should behave this way; the point is to +# simulate a race condition where the new cascading standby starts up after +# the timeline history file reaches the archive but before any of the WAL files +# get there. +$node_primary->init(allows_streaming => 1, has_archiving => 1); + +# Note: consistent use of forward slashes here avoids any escaping problems +# that arise from use of backslashes. That means we need to double-quote all +# the paths in the archive_command +my $perlbin = $^X; +$perlbin =~ s!\\!/!g if $TestLib::windows_os; +my $archivedir_primary = $node_primary->archive_dir; +$archivedir_primary =~ s!\\!/!g if $TestLib::windows_os; +$node_primary->append_conf( + 'postgresql.conf', qq( +archive_command = '"$perlbin" "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"' +wal_keep_size=128MB +)); +# Make sure that Msys perl doesn't complain about difficulty in setting locale +# when called from the archive_command. +local $ENV{PERL_BADLANG} = 0; +$node_primary->start; + +# Take backup from primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby = get_new_node('standby'); +$node_standby->init_from_backup( + $node_primary, $backup_name, + allows_streaming => 1, + has_streaming => 1, + has_archiving => 1); +$node_standby->start; + +# Take backup of standby, use -Xnone so that pg_wal is empty. +$node_standby->backup($backup_name, backup_options => ['-Xnone']); + +# Create cascading standby but don't start it yet. +# Must set up both streaming and archiving. +my $node_cascade = get_new_node('cascade'); +$node_cascade->init_from_backup($node_standby, $backup_name, + has_streaming => 1); +$node_cascade->enable_restoring($node_primary); +$node_cascade->append_conf( + 'postgresql.conf', qq( +recovery_target_timeline='latest' +)); + +# Promote the standby. +$node_standby->promote; + +# Wait for promotion to complete +$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery();") + or die "Timed out while waiting for promotion"; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_standby->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# Wait until the WAL segment has been archived. +# Since the history file gets created on promotion and is archived before any +# WAL segment, this is enough to guarantee that the history file was +# archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver"; +$node_standby->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Start cascade node +$node_cascade->start; + +# Create some content on promoted standby and check its presence on the +# cascading standby. +$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a"); + +# Wait for the replication to catch up +$node_standby->wait_for_catchup($node_cascade, 'replay', + $node_standby->lsn('insert')); + +# Check that cascading standby has the new content +my $result = + $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "cascade: $result\n"; +is($result, 1, 'check streamed content on cascade standby'); diff --git a/src/test/recovery/t/026_overwrite_contrecord.pl b/src/test/recovery/t/026_overwrite_contrecord.pl new file mode 100644 index 0000000..d451fe6 --- /dev/null +++ b/src/test/recovery/t/026_overwrite_contrecord.pl @@ -0,0 +1,109 @@ +# Copyright (c) 2021, PostgreSQL Global Development Group + +# Tests for already-propagated WAL segments ending in incomplete WAL records. + +use strict; +use warnings; + +use FindBin; +use PostgresNode; +use TestLib; +use Test::More; + +plan tests => 3; + +# Test: Create a physical replica that's missing the last WAL file, +# then restart the primary to create a divergent WAL file and observe +# that the replica replays the "overwrite contrecord" from that new +# file and the standby promotes successfully. + +my $node = PostgresNode->get_new_node('primary'); +$node->init(allows_streaming => 1); +# We need these settings for stability of WAL behavior. +$node->append_conf( + 'postgresql.conf', qq( +autovacuum = off +wal_keep_size = 1GB +)); +$node->start; + +$node->safe_psql('postgres', 'create table filler (a int, b text)'); + +# Now consume all remaining room in the current WAL segment, leaving +# space enough only for the start of a largish record. +$node->safe_psql( + 'postgres', q{ +DO $$ +DECLARE + wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; + remain int; + iters int := 0; +BEGIN + LOOP + INSERT into filler + select g, repeat(md5(g::text), (random() * 60 + 1)::int) + from generate_series(1, 10) g; + + remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; + IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN + RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; + EXIT; + END IF; + iters := iters + 1; + END LOOP; +END +$$; +}); + +my $initfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))} +); +#$node->safe_psql('postgres', qq{create table foo ()}); +my $endfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +ok($initfile ne $endfile, "$initfile differs from $endfile"); + +# Now stop abruptly, to avoid a stop checkpoint. We can remove the tail file +# afterwards, and on startup the large message should be overwritten with new +# contents +$node->stop('immediate'); + +unlink $node->basedir . "/pgdata/pg_wal/$endfile" + or die "could not unlink " . $node->basedir . "/pgdata/pg_wal/$endfile: $!"; + +# OK, create a standby at this spot. +$node->backup_fs_cold('backup'); +my $node_standby = PostgresNode->get_new_node('standby'); +$node_standby->init_from_backup($node, 'backup', has_streaming => 1); + +$node_standby->start; +$node->start; + +$node->safe_psql('postgres', + qq{create table foo (a text); insert into foo values ('hello')}); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')}); + +my $until_lsn = $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); +my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +ok($node_standby->safe_psql('postgres', 'select * from foo') eq 'hello', + 'standby replays past overwritten contrecord'); + +# Verify message appears in standby's log +my $log = slurp_file($node_standby->logfile); +like( + $log, + qr[successfully skipped missing contrecord at], + "found log line in standby"); + +# Verify promotion is successful +$node_standby->promote; + +$node->stop; +$node_standby->stop; diff --git a/src/test/recovery/t/031_recovery_conflict.pl b/src/test/recovery/t/031_recovery_conflict.pl new file mode 100644 index 0000000..72e0fa9 --- /dev/null +++ b/src/test/recovery/t/031_recovery_conflict.pl @@ -0,0 +1,377 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test that connections to a hot standby are correctly canceled when a +# recovery conflict is detected Also, test that statistics in +# pg_stat_database_conflicts are populated correctly + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +plan skip_all => "disabled until after minor releases, due to instability"; + +# Set up nodes +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); + +my $tablespace1 = "test_recovery_conflict_tblspc"; + +$node_primary->append_conf( + 'postgresql.conf', qq[ + +# Doesn't currently exist pre 15, but might be backpatched later +#allow_in_place_tablespaces = on +#temp_tablespaces = $tablespace1 + +log_temp_files = 0 + +# for deadlock test +max_prepared_transactions = 10 + +# wait some to test the wait paths as well, but not long for obvious reasons +max_standby_streaming_delay = 50ms + +# Some of the recovery conflict logging code only gets exercised after +# deadlock_timeout. The test doesn't rely on that additional output, but it's +# nice to get some minimal coverage of that code. +log_recovery_conflict_waits = on +deadlock_timeout = 10ms +]); +$node_primary->start; + +my $backup_name = 'my_backup'; + +# See allow_in_place_tablespaces comment above +#$node_primary->safe_psql('postgres', +# qq[CREATE TABLESPACE $tablespace1 LOCATION '']); + +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); + +$node_standby->start; + +my $test_db = "test_db"; + +# use a new database, to trigger database recovery conflict +$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); + +# test schema / data +my $table1 = "test_recovery_conflict_table1"; +my $table2 = "test_recovery_conflict_table2"; +$node_primary->safe_psql( + $test_db, qq[ +CREATE TABLE ${table1}(a int, b int); +INSERT INTO $table1 SELECT i % 3, 0 FROM generate_series(1,20) i; +CREATE TABLE ${table2}(a int, b int); +]); +my $primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + + +# a longrunning psql that we can use to trigger conflicts +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); +my %psql_standby = ('stdin' => '', 'stdout' => ''); +$psql_standby{run} = + $node_standby->background_psql($test_db, \$psql_standby{stdin}, + \$psql_standby{stdout}, + $psql_timeout); +$psql_standby{stdout} = ''; + +my $expected_conflicts = 0; + + +## RECOVERY CONFLICT 1: Buffer pin conflict +my $sect = "buffer pin conflict"; +$expected_conflicts++; + +# Aborted INSERT on primary that will be cleaned up by vacuum. Has to be old +# enough so that there's not a snapshot conflict before the buffer pin +# conflict. + +$node_primary->safe_psql( + $test_db, + qq[ + BEGIN; + INSERT INTO $table1 VALUES (1,0); + ROLLBACK; + -- ensure flush, rollback doesn't do so + BEGIN; LOCK $table1; COMMIT; + ]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +my $cursor1 = "test_recovery_conflict_cursor"; + +# DECLARE and use a cursor on standby, causing buffer with the only block of +# the relation to be pinned on the standby +$psql_standby{stdin} .= qq[ + BEGIN; + DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1; + FETCH FORWARD FROM $cursor1; + ]; +# FETCH FORWARD should have returned a 0 since all values of b in the table +# are 0 +ok(pump_until_standby(qr/^0$/m), + "$sect: cursor with conflicting pin established"); + +# to check the log starting now for recovery conflict messages +my $log_location = -s $node_standby->logfile; + +# VACUUM on the primary +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); + +# Wait for catchup. Existing connection will be terminated before replay is +# finished, so waiting for catchup ensures that there is no race between +# encountering the recovery conflict which causes the disconnect and checking +# the logfile for the terminated connection. +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was holding shared buffer pin for too long"); +reconnect_and_clear(); +check_conflict_stat("bufferpin"); + + +## RECOVERY CONFLICT 2: Snapshot conflict +$sect = "snapshot conflict"; +$expected_conflicts++; + +$node_primary->safe_psql($test_db, + qq[INSERT INTO $table1 SELECT i, 0 FROM generate_series(1,20) i]); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +# DECLARE and FETCH from cursor on the standby +$psql_standby{stdin} .= qq[ + BEGIN; + DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1; + FETCH FORWARD FROM $cursor1; + ]; +ok( pump_until( + $psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, qr/^0$/m,), + "$sect: cursor with conflicting snapshot established"); + +# Do some HOT updates +$node_primary->safe_psql($test_db, + qq[UPDATE $table1 SET a = a + 1 WHERE a > 2;]); + +# VACUUM, pruning those dead tuples +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); + +# Wait for attempted replay of PRUNE records +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log( + "User query might have needed to see row versions that must be removed"); +reconnect_and_clear(); +check_conflict_stat("snapshot"); + + +## RECOVERY CONFLICT 3: Lock conflict +$sect = "lock conflict"; +$expected_conflicts++; + +# acquire lock to conflict with +$psql_standby{stdin} .= qq[ + BEGIN; + LOCK TABLE $table1 IN ACCESS SHARE MODE; + SELECT 1; + ]; +ok(pump_until_standby(qr/^1$/m), "$sect: conflicting lock acquired"); + +# DROP TABLE containing block which standby has in a pinned buffer +$node_primary->safe_psql($test_db, qq[DROP TABLE $table1;]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was holding a relation lock for too long"); +reconnect_and_clear(); +check_conflict_stat("lock"); + + +# See allow_in_place_tablespaces comment above +### RECOVERY CONFLICT 4: Tablespace conflict +#$sect = "tablespace conflict"; +#$expected_conflicts++; +# +## DECLARE a cursor for a query which, with sufficiently low work_mem, will +## spill tuples into temp files in the temporary tablespace created during +## setup. +#$psql_standby{stdin} .= qq[ +# BEGIN; +# SET work_mem = '64kB'; +# DECLARE $cursor1 CURSOR FOR +# SELECT count(*) FROM generate_series(1,6000); +# FETCH FORWARD FROM $cursor1; +# ]; +#ok(pump_until_standby(qr/^6000$/m), +# "$sect: cursor with conflicting temp file established"); +# +## Drop the tablespace currently containing spill files for the query on the +## standby +#$node_primary->safe_psql($test_db, qq[DROP TABLESPACE $tablespace1;]); +# +#$primary_lsn = $node_primary->lsn('flush'); +#$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); +# +#check_conflict_log( +# "User was or might have been using tablespace that must be dropped"); +#reconnect_and_clear(); +#check_conflict_stat("tablespace"); + + +## RECOVERY CONFLICT 5: Deadlock +SKIP: +{ + skip "disabled until after minor releases, due to instability"; + +$sect = "startup deadlock"; +$expected_conflicts++; + +# Generate a few dead rows, to later be cleaned up by vacuum. Then acquire a +# lock on another relation in a prepared xact, so it's held continuously by +# the startup process. The standby psql will block acquiring that lock while +# holding a pin that vacuum needs, triggering the deadlock. +$node_primary->safe_psql( + $test_db, + qq[ +CREATE TABLE $table1(a int, b int); +INSERT INTO $table1 VALUES (1); +BEGIN; +INSERT INTO $table1(a) SELECT generate_series(1, 100) i; +ROLLBACK; +BEGIN; +LOCK TABLE $table2; +PREPARE TRANSACTION 'lock'; +INSERT INTO $table1(a) VALUES (170); +SELECT txid_current(); +]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +$psql_standby{stdin} .= qq[ + BEGIN; + -- hold pin + DECLARE $cursor1 CURSOR FOR SELECT a FROM $table1; + FETCH FORWARD FROM $cursor1; + -- wait for lock held by prepared transaction + SELECT * FROM $table2; + ]; +ok( pump_until( + $psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, qr/^1$/m,), + "$sect: cursor holding conflicting pin, also waiting for lock, established" +); + +# just to make sure we're waiting for lock already +ok( $node_standby->poll_query_until( + 'postgres', qq[ +SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' AND NOT granted; +], 'waiting'), + "$sect: lock acquisition is waiting"); + +# VACUUM will prune away rows, causing a buffer pin conflict, while standby +# psql is waiting on lock +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User transaction caused buffer deadlock with recovery."); +reconnect_and_clear(); +check_conflict_stat("deadlock"); + +# clean up for next tests +$node_primary->safe_psql($test_db, qq[ROLLBACK PREPARED 'lock';]); +} + + +# Check that expected number of conflicts show in pg_stat_database. Needs to +# be tested before database is dropped, for obvious reasons. +is( $node_standby->safe_psql( + $test_db, + qq[SELECT conflicts FROM pg_stat_database WHERE datname='$test_db';]), + $expected_conflicts, + qq[$expected_conflicts recovery conflicts shown in pg_stat_database]); + + +## RECOVERY CONFLICT 6: Database conflict +$sect = "database conflict"; + +$node_primary->safe_psql('postgres', qq[DROP DATABASE $test_db;]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was connected to a database that must be dropped"); + + +# explicitly shut down psql instances gracefully - to avoid hangs or worse on +# windows +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_standby->stop(); +$node_primary->stop(); + + +done_testing(); + + +sub pump_until_standby +{ + my $match = shift; + + return pump_until($psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, $match); +} + +sub reconnect_and_clear +{ + # If psql isn't dead already, tell it to quit as \q, when already dead, + # causes IPC::Run to unhelpfully error out with "ack Broken pipe:". + $psql_standby{run}->pump_nb(); + if ($psql_standby{run}->pumpable()) + { + $psql_standby{stdin} .= "\\q\n"; + } + $psql_standby{run}->finish; + + # restart + $psql_standby{run}->run(); + $psql_standby{stdin} = ''; + $psql_standby{stdout} = ''; + + # Run query to ensure connection has finished re-establishing + $psql_standby{stdin} .= qq[SELECT 1;\n]; + die unless pump_until_standby(qr/^1$/m); + $psql_standby{stdout} = ''; +} + +sub check_conflict_log +{ + my $message = shift; + my $old_log_location = $log_location; + + $log_location = $node_standby->wait_for_log(qr/$message/, $log_location); + + cmp_ok($log_location, '>', $old_log_location, + "$sect: logfile contains terminated connection due to recovery conflict" + ); +} + +sub check_conflict_stat +{ + # Stats can't easily be checked before 15, requires waiting for stats to + # be reported to stats collector and then those messages need to be + # processed. Dealt with here to reduce intra-branch difference in the + # tests. +} diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl b/src/test/recovery/t/033_replay_tsp_drops.pl new file mode 100644 index 0000000..140d94d --- /dev/null +++ b/src/test/recovery/t/033_replay_tsp_drops.pl @@ -0,0 +1,152 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test replay of tablespace/database creation/drop + +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(usleep); + +sub test_tablespace +{ + my $node_primary = PostgreSQL::Test::Cluster->new("primary1"); + $node_primary->init(allows_streaming => 1); + $node_primary->start; + $node_primary->psql( + 'postgres', + qq[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE dropme_ts1 LOCATION ''; + CREATE TABLESPACE dropme_ts2 LOCATION ''; + CREATE TABLESPACE source_ts LOCATION ''; + CREATE TABLESPACE target_ts LOCATION ''; + CREATE DATABASE template_db IS_TEMPLATE = true; + SELECT pg_create_physical_replication_slot('slot', true); + ]); + my $backup_name = 'my_backup'; + $node_primary->backup($backup_name); + + my $node_standby = PostgreSQL::Test::Cluster->new("standby2"); + $node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); + $node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); + $node_standby->append_conf('postgresql.conf', + "primary_slot_name = slot"); + $node_standby->start; + + # Make sure the connection is made + $node_primary->wait_for_catchup($node_standby, 'write', + $node_primary->lsn('write')); + + # Do immediate shutdown just after a sequence of CREATE DATABASE / DROP + # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records + # to be applied to already-removed directories. + my $query = q[ + CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1; + CREATE TABLE t (a int) TABLESPACE dropme_ts2; + CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2; + CREATE DATABASE moveme_db TABLESPACE source_ts; + ALTER DATABASE moveme_db SET TABLESPACE target_ts; + CREATE DATABASE newdb TEMPLATE template_db; + ALTER DATABASE template_db IS_TEMPLATE = false; + DROP DATABASE dropme_db1; + DROP TABLE t; + DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2; + DROP TABLESPACE source_ts; + DROP DATABASE template_db; + ]; + + $node_primary->safe_psql('postgres', $query); + $node_primary->wait_for_catchup($node_standby, 'write', + $node_primary->lsn('write')); + + # show "create missing directory" log message + $node_standby->safe_psql('postgres', + "ALTER SYSTEM SET log_min_messages TO debug1;"); + $node_standby->stop('immediate'); + # Should restart ignoring directory creation error. + is($node_standby->start(fail_ok => 1), 1, "standby node started"); + $node_standby->stop('immediate'); +} + +test_tablespace(); + +# Ensure that a missing tablespace directory during create database +# replay immediately causes panic if the standby has already reached +# consistent state (archive recovery is in progress). + +my $node_primary = PostgreSQL::Test::Cluster->new('primary2'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# Create tablespace +$node_primary->safe_psql( + 'postgres', q[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE ts1 LOCATION '' + ]); +$node_primary->safe_psql('postgres', + "CREATE DATABASE db1 WITH TABLESPACE ts1"); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby3'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); +$node_standby->start; + +# Make sure standby reached consistency and starts accepting connections +$node_standby->poll_query_until('postgres', 'SELECT 1', '1'); + +# Remove standby tablespace directory so it will be missing when +# replay resumes. +my $tspoid = $node_standby->safe_psql('postgres', + "SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';"); +my $tspdir = $node_standby->data_dir . "/pg_tblspc/$tspoid"; +File::Path::rmtree($tspdir); + +my $logstart = -s $node_standby->logfile; + +# Create a database in the tablespace and a table in default tablespace +$node_primary->safe_psql( + 'postgres', + q[ + CREATE TABLE should_not_replay_insertion(a int); + CREATE DATABASE db2 WITH TABLESPACE ts1; + INSERT INTO should_not_replay_insertion VALUES (1); + ]); + +# Standby should fail and should not silently skip replaying the wal +# In this test, PANIC turns into WARNING by allow_in_place_tablespaces. +# Check the log messages instead of confirming standby failure. +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default * 10; +while ($max_attempts-- >= 0) +{ + last + if ( + find_in_log( + $node_standby, qr!WARNING: ( [A-Z0-9]+:)? creating missing directory: pg_tblspc/!, + $logstart)); + usleep(100_000); +} +ok($max_attempts > 0, "invalid directory creation is detected"); + +done_testing(); + +# find $pat in logfile of $node after $off-th byte +sub find_in_log +{ + my ($node, $pat, $off) = @_; + + my $log = PostgreSQL::Test::Utils::slurp_file($node->logfile, $off); + + return $log =~ m/$pat/; +} diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files new file mode 100644 index 0000000..cfeea41 --- /dev/null +++ b/src/test/recovery/t/cp_history_files @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +use File::Copy; +use strict; +use warnings; + +die "wrong number of arguments" if @ARGV != 2; +my ($source, $target) = @ARGV; +exit if $source !~ /history/; +copy($source, $target) or die "couldn't copy $source to $target: $!"; |