diff options
Diffstat (limited to 'src/test/recovery')
39 files changed, 7785 insertions, 0 deletions
diff --git a/src/test/recovery/.gitignore b/src/test/recovery/.gitignore new file mode 100644 index 0000000..871e943 --- /dev/null +++ b/src/test/recovery/.gitignore @@ -0,0 +1,2 @@ +# Generated by test suite +/tmp_check/ diff --git a/src/test/recovery/Makefile b/src/test/recovery/Makefile new file mode 100644 index 0000000..c47eee2 --- /dev/null +++ b/src/test/recovery/Makefile @@ -0,0 +1,29 @@ +#------------------------------------------------------------------------- +# +# Makefile for src/test/recovery +# +# Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group +# Portions Copyright (c) 1994, Regents of the University of California +# +# src/test/recovery/Makefile +# +#------------------------------------------------------------------------- + +EXTRA_INSTALL=contrib/test_decoding contrib/pg_prewarm + +subdir = src/test/recovery +top_builddir = ../../.. +include $(top_builddir)/src/Makefile.global + +# required for 017_shm.pl and 027_stream_regress.pl +REGRESS_SHLIB=$(abs_top_builddir)/src/test/regress/regress$(DLSUFFIX) +export REGRESS_SHLIB + +check: + $(prove_check) + +installcheck: + $(prove_installcheck) + +clean distclean maintainer-clean: + rm -rf tmp_check diff --git a/src/test/recovery/README b/src/test/recovery/README new file mode 100644 index 0000000..da11679 --- /dev/null +++ b/src/test/recovery/README @@ -0,0 +1,27 @@ +src/test/recovery/README + +Regression tests for recovery and replication +============================================= + +This directory contains a test suite for recovery and replication. + +Running the tests +================= + +NOTE: You must have given the --enable-tap-tests argument to configure. +Also, to use "make installcheck", you must have built and installed +contrib/pg_prewarm and contrib/test_decoding in addition to the core code. + +Run + make check +or + make installcheck +You can use "make installcheck" if you previously did "make install". +In that case, the code in the installation tree is tested. With +"make check", a temporary installation tree is built from the current +sources and then tested. + +Either way, this test initializes, starts, and stops several test Postgres +clusters. + +See src/test/perl/README for more info about running these tests. diff --git a/src/test/recovery/t/001_stream_rep.pl b/src/test/recovery/t/001_stream_rep.pl new file mode 100644 index 0000000..8686409 --- /dev/null +++ b/src/test/recovery/t/001_stream_rep.pl @@ -0,0 +1,534 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Minimal test testing streaming replication +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +# A specific role is created to perform some tests related to replication, +# and it needs proper authentication configuration. +$node_primary->init( + allows_streaming => 1, + auth_extra => [ '--create-role', 'repl_role' ]); +$node_primary->start; +my $backup_name = 'my_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_1->start; + +# Take backup of standby 1 (not mandatory, but useful to check if +# pg_basebackup works on a standby). +$node_standby_1->backup($backup_name); + +# Take a second backup of the standby while the primary is offline. +$node_primary->stop; +$node_standby_1->backup('my_backup_2'); +$node_primary->start; + +# Create second standby node linking to standby 1 +my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby_2'); +$node_standby_2->init_from_backup($node_standby_1, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on primary and check its presence in standby nodes +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1002) AS a"); + +# Wait for standbys to catch up +my $primary_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn); +$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn); + +my $result = + $node_standby_1->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 1: $result\n"; +is($result, qq(1002), 'check streamed content on standby 1'); + +$result = + $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "standby 2: $result\n"; +is($result, qq(1002), 'check streamed content on standby 2'); + +# Likewise, but for a sequence +$node_primary->safe_psql('postgres', + "CREATE SEQUENCE seq1; SELECT nextval('seq1')"); + +# Wait for standbys to catch up +$primary_lsn = $node_primary->lsn('write'); +$node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn); +$node_standby_1->wait_for_catchup($node_standby_2, 'replay', $primary_lsn); + +$result = $node_standby_1->safe_psql('postgres', "SELECT * FROM seq1"); +print "standby 1: $result\n"; +is($result, qq(33|0|t), 'check streamed sequence content on standby 1'); + +$result = $node_standby_2->safe_psql('postgres', "SELECT * FROM seq1"); +print "standby 2: $result\n"; +is($result, qq(33|0|t), 'check streamed sequence content on standby 2'); + +# Check that only READ-only queries can run on standbys +is($node_standby_1->psql('postgres', 'INSERT INTO tab_int VALUES (1)'), + 3, 'read-only queries on standby 1'); +is($node_standby_2->psql('postgres', 'INSERT INTO tab_int VALUES (1)'), + 3, 'read-only queries on standby 2'); + +# Tests for connection parameter target_session_attrs +note "testing connection parameter \"target_session_attrs\""; + +# Attempt to connect to $node1, then $node2, using target_session_attrs=$mode. +# Expect to connect to $target_node (undef for failure) with given $status. +sub test_target_session_attrs +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $node1 = shift; + my $node2 = shift; + my $target_node = shift; + my $mode = shift; + my $status = shift; + + my $node1_host = $node1->host; + my $node1_port = $node1->port; + my $node1_name = $node1->name; + my $node2_host = $node2->host; + my $node2_port = $node2->port; + my $node2_name = $node2->name; + my $target_port = undef; + $target_port = $target_node->port if (defined $target_node); + my $target_name = undef; + $target_name = $target_node->name if (defined $target_node); + + # Build connection string for connection attempt. + my $connstr = "host=$node1_host,$node2_host "; + $connstr .= "port=$node1_port,$node2_port "; + $connstr .= "target_session_attrs=$mode"; + + # Attempt to connect, and if successful, get the server port number + # we connected to. Note we must pass the SQL command via the command + # line not stdin, else Perl may spit up trying to write to stdin of + # an already-failed psql process. + my ($ret, $stdout, $stderr) = + $node1->psql('postgres', undef, + extra_params => [ '-d', $connstr, '-c', 'SHOW port;' ]); + if ($status == 0) + { + is( $status == $ret && $stdout eq $target_port, + 1, + "connect to node $target_name with mode \"$mode\" and $node1_name,$node2_name listed" + ); + } + else + { + print "status = $status\n"; + print "ret = $ret\n"; + print "stdout = $stdout\n"; + print "stderr = $stderr\n"; + + is( $status == $ret && !defined $target_node, + 1, + "fail to connect with mode \"$mode\" and $node1_name,$node2_name listed" + ); + } + + return; +} + +# Connect to primary in "read-write" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "read-write", 0); + +# Connect to primary in "read-write" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_primary, + "read-write", 0); + +# Connect to primary in "any" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "any", 0); + +# Connect to standby1 in "any" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "any", 0); + +# Connect to primary in "primary" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_primary, + "primary", 0); + +# Connect to primary in "primary" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_primary, + "primary", 0); + +# Connect to standby1 in "read-only" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "read-only", 0); + +# Connect to standby1 in "read-only" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "read-only", 0); + +# Connect to primary in "prefer-standby" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, $node_primary, + "prefer-standby", 0); + +# Connect to standby1 in "prefer-standby" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "prefer-standby", 0); + +# Connect to standby1 in "prefer-standby" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "prefer-standby", 0); + +# Connect to standby1 in "standby" mode with primary,standby1 list. +test_target_session_attrs($node_primary, $node_standby_1, $node_standby_1, + "standby", 0); + +# Connect to standby1 in "standby" mode with standby1,primary list. +test_target_session_attrs($node_standby_1, $node_primary, $node_standby_1, + "standby", 0); + +# Fail to connect in "read-write" mode with standby1,standby2 list. +test_target_session_attrs($node_standby_1, $node_standby_2, undef, + "read-write", 2); + +# Fail to connect in "primary" mode with standby1,standby2 list. +test_target_session_attrs($node_standby_1, $node_standby_2, undef, + "primary", 2); + +# Fail to connect in "read-only" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, undef, + "read-only", 2); + +# Fail to connect in "standby" mode with primary,primary list. +test_target_session_attrs($node_primary, $node_primary, undef, "standby", 2); + +# Test for SHOW commands using a WAL sender connection with a replication +# role. +note "testing SHOW commands for replication connection"; + +$node_primary->psql( + 'postgres', " +CREATE ROLE repl_role REPLICATION LOGIN; +GRANT pg_read_all_settings TO repl_role;"); +my $primary_host = $node_primary->host; +my $primary_port = $node_primary->port; +my $connstr_common = "host=$primary_host port=$primary_port user=repl_role"; +my $connstr_rep = "$connstr_common replication=1"; +my $connstr_db = "$connstr_common replication=database dbname=postgres"; + +# Test SHOW ALL +my ($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW ALL;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok($ret == 0, "SHOW ALL with replication role and physical replication"); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW ALL;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok($ret == 0, "SHOW ALL with replication role and logical replication"); + +# Test SHOW with a user-settable parameter +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW work_mem;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok( $ret == 0, + "SHOW with user-settable parameter, replication role and physical replication" +); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW work_mem;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok( $ret == 0, + "SHOW with user-settable parameter, replication role and logical replication" +); + +# Test SHOW with a superuser-settable parameter +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW primary_conninfo;', + on_error_die => 1, + extra_params => [ '-d', $connstr_rep ]); +ok( $ret == 0, + "SHOW with superuser-settable parameter, replication role and physical replication" +); +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', 'SHOW primary_conninfo;', + on_error_die => 1, + extra_params => [ '-d', $connstr_db ]); +ok( $ret == 0, + "SHOW with superuser-settable parameter, replication role and logical replication" +); + +note "testing READ_REPLICATION_SLOT command for replication connection"; + +my $slotname = 'test_read_replication_slot_physical'; + +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', + 'READ_REPLICATION_SLOT non_existent_slot;', + extra_params => [ '-d', $connstr_rep ]); +ok($ret == 0, "READ_REPLICATION_SLOT exit code 0 on success"); +like($stdout, qr/^\|\|$/, + "READ_REPLICATION_SLOT returns NULL values if slot does not exist"); + +$node_primary->psql( + 'postgres', + "CREATE_REPLICATION_SLOT $slotname PHYSICAL RESERVE_WAL;", + extra_params => [ '-d', $connstr_rep ]); + +($ret, $stdout, $stderr) = $node_primary->psql( + 'postgres', + "READ_REPLICATION_SLOT $slotname;", + extra_params => [ '-d', $connstr_rep ]); +ok($ret == 0, "READ_REPLICATION_SLOT success with existing slot"); +like($stdout, qr/^physical\|[^|]*\|1$/, + "READ_REPLICATION_SLOT returns tuple with slot information"); + +$node_primary->psql( + 'postgres', + "DROP_REPLICATION_SLOT $slotname;", + extra_params => [ '-d', $connstr_rep ]); + +note "switching to physical replication slot"; + +# Switch to using a physical replication slot. We can do this without a new +# backup since physical slots can go backwards if needed. Do so on both +# standbys. Since we're going to be testing things that affect the slot state, +# also increase the standby feedback interval to ensure timely updates. +my ($slotname_1, $slotname_2) = ('standby_1', 'standby_2'); +$node_primary->append_conf('postgresql.conf', "max_replication_slots = 4"); +$node_primary->restart; +is( $node_primary->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('$slotname_1');]), + 0, + 'physical slot created on primary'); +$node_standby_1->append_conf('postgresql.conf', + "primary_slot_name = $slotname_1"); +$node_standby_1->append_conf('postgresql.conf', + "wal_receiver_status_interval = 1"); +$node_standby_1->append_conf('postgresql.conf', "max_replication_slots = 4"); +$node_standby_1->restart; +is( $node_standby_1->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('$slotname_2');]), + 0, + 'physical slot created on intermediate replica'); +$node_standby_2->append_conf('postgresql.conf', + "primary_slot_name = $slotname_2"); +$node_standby_2->append_conf('postgresql.conf', + "wal_receiver_status_interval = 1"); +# should be able change primary_slot_name without restart +# will wait effect in get_slot_xmins above +$node_standby_2->reload; + +# Fetch xmin columns from slot's pg_replication_slots row, after waiting for +# given boolean condition to be true to ensure we've reached a quiescent state +sub get_slot_xmins +{ + my ($node, $slotname, $check_expr) = @_; + + $node->poll_query_until( + 'postgres', qq[ + SELECT $check_expr + FROM pg_catalog.pg_replication_slots + WHERE slot_name = '$slotname'; + ]) or die "Timed out waiting for slot xmins to advance"; + + my $slotinfo = $node->slot($slotname); + return ($slotinfo->{'xmin'}, $slotinfo->{'catalog_xmin'}); +} + +# There's no hot standby feedback and there are no logical slots on either peer +# so xmin and catalog_xmin should be null on both slots. +my ($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of non-cascaded slot null with no hs_feedback'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot null with no hs_feedback'); + +($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of cascaded slot null with no hs_feedback'); +is($catalog_xmin, '', + 'catalog xmin of cascaded slot null with no hs_feedback'); + +# Replication still works? +$node_primary->safe_psql('postgres', 'CREATE TABLE replayed(val integer);'); + +sub replay_check +{ + my $newval = $node_primary->safe_psql('postgres', + 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val' + ); + my $primary_lsn = $node_primary->lsn('write'); + $node_primary->wait_for_catchup($node_standby_1, 'replay', $primary_lsn); + $node_standby_1->wait_for_catchup($node_standby_2, 'replay', + $primary_lsn); + + $node_standby_1->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]) + or die "standby_1 didn't replay primary value $newval"; + $node_standby_2->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]) + or die "standby_2 didn't replay standby_1 value $newval"; + return; +} + +replay_check(); + +note "enabling hot_standby_feedback"; + +# Enable hs_feedback. The slot should gain an xmin. We set the status interval +# so we'll see the results promptly. +$node_standby_1->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = on;'); +$node_standby_1->reload; +$node_standby_2->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = on;'); +$node_standby_2->reload; +replay_check(); + +($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NOT NULL AND catalog_xmin IS NULL"); +isnt($xmin, '', 'xmin of non-cascaded slot non-null with hs feedback'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback'); + +my ($xmin1, $catalog_xmin1) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NOT NULL AND catalog_xmin IS NULL"); +isnt($xmin1, '', 'xmin of cascaded slot non-null with hs feedback'); +is($catalog_xmin1, '', + 'catalog xmin of cascaded slot still null with hs_feedback'); + +note "doing some work to advance xmin"; +$node_primary->safe_psql( + 'postgres', q{ +do $$ +begin + for i in 10000..11000 loop + -- use an exception block so that each iteration eats an XID + begin + insert into tab_int values (i); + exception + when division_by_zero then null; + end; + end loop; +end$$; +}); + +$node_primary->safe_psql('postgres', 'VACUUM;'); +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +my ($xmin2, $catalog_xmin2) = + get_slot_xmins($node_primary, $slotname_1, "xmin <> '$xmin'"); +note "primary slot's new xmin $xmin2, old xmin $xmin"; +isnt($xmin2, $xmin, 'xmin of non-cascaded slot with hs feedback has changed'); +is($catalog_xmin2, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback unchanged' +); + +($xmin2, $catalog_xmin2) = + get_slot_xmins($node_standby_1, $slotname_2, "xmin <> '$xmin1'"); +note "standby_1 slot's new xmin $xmin2, old xmin $xmin1"; +isnt($xmin2, $xmin1, 'xmin of cascaded slot with hs feedback has changed'); +is($catalog_xmin2, '', + 'catalog xmin of cascaded slot still null with hs_feedback unchanged'); + +note "disabling hot_standby_feedback"; + +# Disable hs_feedback. Xmin should be cleared. +$node_standby_1->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = off;'); +$node_standby_1->reload; +$node_standby_2->safe_psql('postgres', + 'ALTER SYSTEM SET hot_standby_feedback = off;'); +$node_standby_2->reload; +replay_check(); + +($xmin, $catalog_xmin) = get_slot_xmins($node_primary, $slotname_1, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of non-cascaded slot null with hs feedback reset'); +is($catalog_xmin, '', + 'catalog xmin of non-cascaded slot still null with hs_feedback reset'); + +($xmin, $catalog_xmin) = get_slot_xmins($node_standby_1, $slotname_2, + "xmin IS NULL AND catalog_xmin IS NULL"); +is($xmin, '', 'xmin of cascaded slot null with hs feedback reset'); +is($catalog_xmin, '', + 'catalog xmin of cascaded slot still null with hs_feedback reset'); + +note "check change primary_conninfo without restart"; +$node_standby_2->append_conf('postgresql.conf', "primary_slot_name = ''"); +$node_standby_2->enable_streaming($node_primary); +$node_standby_2->reload; + +# be sure do not streaming from cascade +$node_standby_1->stop; + +my $newval = $node_primary->safe_psql('postgres', + 'INSERT INTO replayed(val) SELECT coalesce(max(val),0) + 1 AS newval FROM replayed RETURNING val' +); +$node_primary->wait_for_catchup($node_standby_2); +my $is_replayed = $node_standby_2->safe_psql('postgres', + qq[SELECT 1 FROM replayed WHERE val = $newval]); +is($is_replayed, qq(1), "standby_2 didn't replay primary value $newval"); + +# Drop any existing slots on the primary, for the follow-up tests. +$node_primary->safe_psql('postgres', + "SELECT pg_drop_replication_slot(slot_name) FROM pg_replication_slots;"); + +# Test physical slot advancing and its durability. Create a new slot on +# the primary, not used by any of the standbys. This reserves WAL at creation. +my $phys_slot = 'phys_slot'; +$node_primary->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('$phys_slot', true);"); +# Generate some WAL, and switch to a new segment, used to check that +# the previous segment is correctly getting recycled as the slot advancing +# would recompute the minimum LSN calculated across all slots. +my $segment_removed = $node_primary->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_lsn())'); +chomp($segment_removed); +$node_primary->psql( + 'postgres', " + CREATE TABLE tab_phys_slot (a int); + INSERT INTO tab_phys_slot VALUES (generate_series(1,10)); + SELECT pg_switch_wal();"); +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_primary->psql('postgres', + "SELECT pg_replication_slot_advance('$phys_slot', '$current_lsn'::pg_lsn);" +); +is($psql_rc, '0', 'slot advancing with physical slot'); +my $phys_restart_lsn_pre = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';" +); +chomp($phys_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_primary->restart; +my $phys_restart_lsn_post = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$phys_slot';" +); +chomp($phys_restart_lsn_post); +ok( ($phys_restart_lsn_pre cmp $phys_restart_lsn_post) == 0, + "physical slot advance persists across restarts"); + +# Check if the previous segment gets correctly recycled after the +# server stopped cleanly, causing a shutdown checkpoint to be generated. +my $primary_data = $node_primary->data_dir; +ok(!-f "$primary_data/pg_wal/$segment_removed", + "WAL segment $segment_removed recycled after physical slot advancing"); + +done_testing(); diff --git a/src/test/recovery/t/002_archiving.pl b/src/test/recovery/t/002_archiving.pl new file mode 100644 index 0000000..d69da4e --- /dev/null +++ b/src/test/recovery/t/002_archiving.pl @@ -0,0 +1,144 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# test for archiving with hot standby +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Copy; + +# Initialize primary node, doing archives +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init( + has_archiving => 1, + allows_streaming => 1); +my $backup_name = 'my_backup'; + +# Start it +$node_primary->start; + +# Take backup for standby +$node_primary->backup($backup_name); + +# Initialize standby node from backup, fetching WAL from archives +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +# Note that this makes the standby store its contents on the archives +# of the primary. +$node_standby->init_from_backup($node_primary, $backup_name, + has_restoring => 1); +$node_standby->append_conf('postgresql.conf', + "wal_retrieve_retry_interval = '100ms'"); + +# Set archive_cleanup_command and recovery_end_command, checking their +# execution by the backend with dummy commands. +my $data_dir = $node_standby->data_dir; +my $archive_cleanup_command_file = "archive_cleanup_command.done"; +my $recovery_end_command_file = "recovery_end_command.done"; +$node_standby->append_conf( + 'postgresql.conf', qq( +archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file' +recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file' +)); +$node_standby->start; + +# Create some content on primary +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); + +# Note the presence of this checkpoint for the archive_cleanup_command +# check done below, before switching to a new segment. +$node_primary->safe_psql('postgres', "CHECKPOINT"); + +# Done after the checkpoint to ensure that it is replayed on the standby, +# for archive_cleanup_command. +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Force archiving of WAL file to make it present on primary +$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); + +# Add some more content, it should not be present on standby +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); + +# Wait until necessary replay has been done on standby +my $caughtup_query = + "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +my $result = + $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(1000), 'check content from archives'); + +# archive_cleanup_command is executed after generating a restart point, +# with a checkpoint. +$node_standby->safe_psql('postgres', q{CHECKPOINT}); +ok( -f "$data_dir/$archive_cleanup_command_file", + 'archive_cleanup_command executed on checkpoint'); +ok( !-f "$data_dir/$recovery_end_command_file", + 'recovery_end_command not executed yet'); + +# Check the presence of temporary files specifically generated during +# archive recovery. To ensure the presence of the temporary history +# file, switch to a timeline large enough to allow a standby to recover +# a history file from an archive. As this requires at least two timeline +# switches, promote the existing standby first. Then create a second +# standby based on the primary, using its archives. Finally, the second +# standby is promoted. +$node_standby->promote; + +# Wait until the history file has been stored on the archives of the +# primary once the promotion of the standby completes. This ensures that +# the second standby created below will be able to restore this file, +# creating a RECOVERYHISTORY. +my $primary_archive = $node_primary->archive_dir; +$caughtup_query = + "SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history')"; +$node_primary->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for archiving of 00000002.history"; + +# recovery_end_command should have been triggered on promotion. +ok( -f "$data_dir/$recovery_end_command_file", + 'recovery_end_command executed after promotion'); + +my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2'); +$node_standby2->init_from_backup($node_primary, $backup_name, + has_restoring => 1); + +# Make execution of recovery_end_command fail. This should not affect +# promotion, and its failure should be logged. +$node_standby2->append_conf( + 'postgresql.conf', qq( +recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file' +)); + +$node_standby2->start; + +# Save the log location, to see the failure of recovery_end_command. +my $log_location = -s $node_standby2->logfile; + +# Now promote standby2, and check that temporary files specifically +# generated during archive recovery are removed by the end of recovery. +$node_standby2->promote; + +# Check the logs of the standby to see that the commands have failed. +my $log_contents = slurp_file($node_standby2->logfile, $log_location); +my $node_standby2_data = $node_standby2->data_dir; + +like( + $log_contents, + qr/restored log file "00000002.history" from archive/s, + "00000002.history retrieved from the archives"); +ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY", + "RECOVERYHISTORY removed after promotion"); +ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG", + "RECOVERYXLOG removed after promotion"); +like( + $log_contents, + qr/WARNING:.*recovery_end_command/s, + "recovery_end_command failure detected in logs after promotion"); + +done_testing(); diff --git a/src/test/recovery/t/003_recovery_targets.pl b/src/test/recovery/t/003_recovery_targets.pl new file mode 100644 index 0000000..e8e1a42 --- /dev/null +++ b/src/test/recovery/t/003_recovery_targets.pl @@ -0,0 +1,186 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for recovery targets: name, timestamp, XID +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(usleep); + +# Create and test a standby from given backup, with a certain recovery target. +# Choose $until_lsn later than the transaction commit that causes the row +# count to reach $num_rows, yet not later than the recovery target. +sub test_recovery_standby +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my $test_name = shift; + my $node_name = shift; + my $node_primary = shift; + my $recovery_params = shift; + my $num_rows = shift; + my $until_lsn = shift; + + my $node_standby = PostgreSQL::Test::Cluster->new($node_name); + $node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); + + foreach my $param_item (@$recovery_params) + { + $node_standby->append_conf('postgresql.conf', qq($param_item)); + } + + $node_standby->start; + + # Wait until standby has replayed enough data + my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; + $node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + + # Create some content on primary and check its presence in standby + my $result = + $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int"); + is($result, qq($num_rows), "check standby content for $test_name"); + + # Stop standby node + $node_standby->teardown_node; + + return; +} + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); + +# Bump the transaction ID epoch. This is useful to stress the portability +# of recovery_target_xid parsing. +system_or_bail('pg_resetwal', '--epoch', '1', $node_primary->data_dir); + +# Start it +$node_primary->start; + +# Create data before taking the backup, aimed at testing +# recovery_target = 'immediate' +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); +my $lsn1 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Take backup from which all operations will be run +$node_primary->backup('my_backup'); + +# Insert some data with used as a replay reference, with a recovery +# target TXID. +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +my $ret = $node_primary->safe_psql('postgres', + "SELECT pg_current_wal_lsn(), pg_current_xact_id();"); +my ($lsn2, $recovery_txid) = split /\|/, $ret; + +# More data, with recovery target timestamp +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(2001,3000))"); +my $lsn3 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +my $recovery_time = $node_primary->safe_psql('postgres', "SELECT now()"); + +# Even more data, this time with a recovery target name +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(3001,4000))"); +my $recovery_name = "my_target"; +my $lsn4 = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +$node_primary->safe_psql('postgres', + "SELECT pg_create_restore_point('$recovery_name');"); + +# And now for a recovery target LSN +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(4001,5000))"); +my $lsn5 = my $recovery_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(5001,6000))"); + +# Force archiving of WAL file +$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()"); + +# Test recovery targets +my @recovery_params = ("recovery_target = 'immediate'"); +test_recovery_standby('immediate target', + 'standby_1', $node_primary, \@recovery_params, "1000", $lsn1); +@recovery_params = ("recovery_target_xid = '$recovery_txid'"); +test_recovery_standby('XID', 'standby_2', $node_primary, \@recovery_params, + "2000", $lsn2); +@recovery_params = ("recovery_target_time = '$recovery_time'"); +test_recovery_standby('time', 'standby_3', $node_primary, \@recovery_params, + "3000", $lsn3); +@recovery_params = ("recovery_target_name = '$recovery_name'"); +test_recovery_standby('name', 'standby_4', $node_primary, \@recovery_params, + "4000", $lsn4); +@recovery_params = ("recovery_target_lsn = '$recovery_lsn'"); +test_recovery_standby('LSN', 'standby_5', $node_primary, \@recovery_params, + "5000", $lsn5); + +# Multiple targets +# +# Multiple conflicting settings are not allowed, but setting the same +# parameter multiple times or unsetting a parameter and setting a +# different one is allowed. + +@recovery_params = ( + "recovery_target_name = '$recovery_name'", + "recovery_target_name = ''", + "recovery_target_time = '$recovery_time'"); +test_recovery_standby('multiple overriding settings', + 'standby_6', $node_primary, \@recovery_params, "3000", $lsn3); + +my $node_standby = PostgreSQL::Test::Cluster->new('standby_7'); +$node_standby->init_from_backup($node_primary, 'my_backup', + has_restoring => 1); +$node_standby->append_conf( + 'postgresql.conf', "recovery_target_name = '$recovery_name' +recovery_target_time = '$recovery_time'"); + +my $res = run_log( + [ + 'pg_ctl', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, 'start' + ]); +ok(!$res, 'invalid recovery startup fails'); + +my $logfile = slurp_file($node_standby->logfile()); +ok($logfile =~ qr/multiple recovery targets specified/, + 'multiple conflicting settings'); + +# Check behavior when recovery ends before target is reached + +$node_standby = PostgreSQL::Test::Cluster->new('standby_8'); +$node_standby->init_from_backup( + $node_primary, 'my_backup', + has_restoring => 1, + standby => 0); +$node_standby->append_conf('postgresql.conf', + "recovery_target_name = 'does_not_exist'"); + +run_log( + [ + 'pg_ctl', '-D', $node_standby->data_dir, '-l', + $node_standby->logfile, 'start' + ]); + +# wait for postgres to terminate +foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default) +{ + last if !-f $node_standby->data_dir . '/postmaster.pid'; + usleep(100_000); +} +$logfile = slurp_file($node_standby->logfile()); +ok( $logfile =~ + qr/FATAL: .* recovery ended before configured recovery target was reached/, + 'recovery end before target reached is a fatal error'); + +done_testing(); diff --git a/src/test/recovery/t/004_timeline_switch.pl b/src/test/recovery/t/004_timeline_switch.pl new file mode 100644 index 0000000..3203d93 --- /dev/null +++ b/src/test/recovery/t/004_timeline_switch.pl @@ -0,0 +1,110 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for timeline switch +use strict; +use warnings; +use File::Path qw(rmtree); +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +$ENV{PGDATABASE} = 'postgres'; + +# Ensure that a cascading standby is able to follow a newly-promoted standby +# on a new timeline. + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create two standbys linking to it +my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_1->start; +my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby_2'); +$node_standby_2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_2->start; + +# Create some content on primary +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a"); + +# Wait until standby has replayed enough data on standby 1 +$node_primary->wait_for_catchup($node_standby_1); + +# Stop and remove primary +$node_primary->teardown_node; + +# promote standby 1 using "pg_promote", switching it to a new timeline +my $psql_out = ''; +$node_standby_1->psql( + 'postgres', + "SELECT pg_promote(wait_seconds => 300)", + stdout => \$psql_out); +is($psql_out, 't', "promotion of standby with pg_promote"); + +# Switch standby 2 to replay from standby 1 +my $connstr_1 = $node_standby_1->connstr; +$node_standby_2->append_conf( + 'postgresql.conf', qq( +primary_conninfo='$connstr_1' +)); +$node_standby_2->restart; + +# Insert some data in standby 1 and check its presence in standby 2 +# to ensure that the timeline switch has been done. +$node_standby_1->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(1001,2000))"); +$node_standby_1->wait_for_catchup($node_standby_2); + +my $result = + $node_standby_2->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result, qq(2000), 'check content of standby 2'); + + +# Ensure that a standby is able to follow a primary on a newer timeline +# when WAL archiving is enabled. + +# Initialize primary node +my $node_primary_2 = PostgreSQL::Test::Cluster->new('primary_2'); +$node_primary_2->init(allows_streaming => 1, has_archiving => 1); +$node_primary_2->append_conf( + 'postgresql.conf', qq( +wal_keep_size = 512MB +)); +$node_primary_2->start; + +# Take backup +$node_primary_2->backup($backup_name); + +# Create standby node +my $node_standby_3 = PostgreSQL::Test::Cluster->new('standby_3'); +$node_standby_3->init_from_backup($node_primary_2, $backup_name, + has_streaming => 1); + +# Restart primary node in standby mode and promote it, switching it +# to a new timeline. +$node_primary_2->set_standby_mode; +$node_primary_2->restart; +$node_primary_2->promote; + +# Start standby node, create some content on primary and check its presence +# in standby, to ensure that the timeline switch has been done. +$node_standby_3->start; +$node_primary_2->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT 1 AS a"); +$node_primary_2->wait_for_catchup($node_standby_3); + +my $result_2 = + $node_standby_3->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +is($result_2, qq(1), 'check content of standby 3'); + +done_testing(); diff --git a/src/test/recovery/t/005_replay_delay.pl b/src/test/recovery/t/005_replay_delay.pl new file mode 100644 index 0000000..370fc9e --- /dev/null +++ b/src/test/recovery/t/005_replay_delay.pl @@ -0,0 +1,114 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Checks for recovery_min_apply_delay and recovery pause +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# And some content +$node_primary->safe_psql('postgres', + "CREATE TABLE tab_int AS SELECT generate_series(1, 10) AS a"); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby from backup +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +my $delay = 3; +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf( + 'postgresql.conf', qq( +recovery_min_apply_delay = '${delay}s' +)); +$node_standby->start; + +# Make new content on primary and check its presence in standby depending +# on the delay applied above. Before doing the insertion, get the +# current timestamp that will be used as a comparison base. Even on slow +# machines, this allows to have a predictable behavior when comparing the +# delay between data insertion moment on primary and replay time on standby. +my $primary_insert_time = time(); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(11, 20))"); + +# Now wait for replay to complete on standby. We're done waiting when the +# standby has replayed up to the previously saved primary LSN. +my $until_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); + +$node_standby->poll_query_until('postgres', + "SELECT (pg_last_wal_replay_lsn() - '$until_lsn'::pg_lsn) >= 0") + or die "standby never caught up"; + +# This test is successful if and only if the LSN has been applied with at least +# the configured apply delay. +ok(time() - $primary_insert_time >= $delay, + "standby applies WAL only after replication delay"); + + +# Check that recovery can be paused or resumed expectedly. +my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2'); +$node_standby2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby2->start; + +# Recovery is not yet paused. +is( $node_standby2->safe_psql( + 'postgres', "SELECT pg_get_wal_replay_pause_state()"), + 'not paused', + 'pg_get_wal_replay_pause_state() reports not paused'); + +# Request to pause recovery and wait until it's actually paused. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(21,30))"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out while waiting for recovery to be paused"; + +# Even if new WAL records are streamed from the primary, +# recovery in the paused state doesn't replay them. +my $receive_lsn = + $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_receive_lsn()"); +my $replay_lsn = + $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(31,40))"); +$node_standby2->poll_query_until('postgres', + "SELECT '$receive_lsn'::pg_lsn < pg_last_wal_receive_lsn()") + or die "Timed out while waiting for new WAL to be streamed"; +is( $node_standby2->safe_psql('postgres', "SELECT pg_last_wal_replay_lsn()"), + qq($replay_lsn), + 'no WAL is replayed in the paused state'); + +# Request to resume recovery and wait until it's actually resumed. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_resume()"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'not paused' AND pg_last_wal_replay_lsn() > '$replay_lsn'::pg_lsn" +) or die "Timed out while waiting for recovery to be resumed"; + +# Check that the paused state ends and promotion continues if a promotion +# is triggered while recovery is paused. +$node_standby2->safe_psql('postgres', "SELECT pg_wal_replay_pause()"); +$node_primary->safe_psql('postgres', + "INSERT INTO tab_int VALUES (generate_series(41,50))"); +$node_standby2->poll_query_until('postgres', + "SELECT pg_get_wal_replay_pause_state() = 'paused'") + or die "Timed out while waiting for recovery to be paused"; + +$node_standby2->promote; +$node_standby2->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery()") + or die "Timed out while waiting for promotion to finish"; + +done_testing(); diff --git a/src/test/recovery/t/006_logical_decoding.pl b/src/test/recovery/t/006_logical_decoding.pl new file mode 100644 index 0000000..a9edd8c --- /dev/null +++ b/src/test/recovery/t/006_logical_decoding.pl @@ -0,0 +1,275 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Testing of logical decoding using SQL interface and/or pg_recvlogical +# +# Most logical decoding tests are in contrib/test_decoding. This module +# is for work that doesn't fit well there, like where server restarts +# are required. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Config; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq( +wal_level = logical +)); +$node_primary->start; +my $backup_name = 'primary_backup'; + +$node_primary->safe_psql('postgres', + qq[CREATE TABLE decoding_test(x integer, y text);]); + +$node_primary->safe_psql('postgres', + qq[SELECT pg_create_logical_replication_slot('test_slot', 'test_decoding');] +); + +# Cover walsender error shutdown code +my ($result, $stdout, $stderr) = $node_primary->psql( + 'template1', + qq[START_REPLICATION SLOT test_slot LOGICAL 0/0], + replication => 'database'); +ok( $stderr =~ + m/replication slot "test_slot" was not created in this database/, + "Logical decoding correctly fails to start"); + +($result, $stdout, $stderr) = $node_primary->psql( + 'template1', + qq[READ_REPLICATION_SLOT test_slot;], + replication => 'database'); +like( + $stderr, + qr/cannot use READ_REPLICATION_SLOT with a logical replication slot/, + 'READ_REPLICATION_SLOT not supported for logical slots'); + +# Check case of walsender not using a database connection. Logical +# decoding should not be allowed. +($result, $stdout, $stderr) = $node_primary->psql( + 'template1', + qq[START_REPLICATION SLOT s1 LOGICAL 0/1], + replication => 'true'); +ok($stderr =~ /ERROR: logical decoding requires a database connection/, + "Logical decoding fails on non-database connection"); + +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,10) s;] +); + +# Basic decoding works +$result = $node_primary->safe_psql('postgres', + qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]); +is(scalar(my @foobar = split /^/m, $result), + 12, 'Decoding produced 12 rows inc BEGIN/COMMIT'); + +# If we immediately crash the server we might lose the progress we just made +# and replay the same changes again. But a clean shutdown should never repeat +# the same changes when we use the SQL decoding interface. +$node_primary->restart('fast'); + +# There are no new writes, so the result should be empty. +$result = $node_primary->safe_psql('postgres', + qq[SELECT pg_logical_slot_get_changes('test_slot', NULL, NULL);]); +chomp($result); +is($result, '', 'Decoding after fast restart repeats no rows'); + +# Insert some rows and verify that we get the same results from pg_recvlogical +# and the SQL interface. +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(1,4) s;] +); + +my $expected = q{BEGIN +table public.decoding_test: INSERT: x[integer]:1 y[text]:'1' +table public.decoding_test: INSERT: x[integer]:2 y[text]:'2' +table public.decoding_test: INSERT: x[integer]:3 y[text]:'3' +table public.decoding_test: INSERT: x[integer]:4 y[text]:'4' +COMMIT}; + +my $stdout_sql = $node_primary->safe_psql('postgres', + qq[SELECT data FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');] +); +is($stdout_sql, $expected, 'got expected output from SQL decoding session'); + +my $endpos = $node_primary->safe_psql('postgres', + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" +); +print "waiting to replay $endpos\n"; + +# Insert some rows after $endpos, which we won't read. +$node_primary->safe_psql('postgres', + qq[INSERT INTO decoding_test(x,y) SELECT s, s::text FROM generate_series(5,50) s;] +); + +my $stdout_recv = $node_primary->pg_recvlogical_upto( + 'postgres', 'test_slot', $endpos, + $PostgreSQL::Test::Utils::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); +chomp($stdout_recv); +is($stdout_recv, $expected, + 'got same expected output from pg_recvlogical decoding session'); + +$node_primary->poll_query_until('postgres', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'test_slot' AND active_pid IS NULL)" +) or die "slot never became inactive"; + +$stdout_recv = $node_primary->pg_recvlogical_upto( + 'postgres', 'test_slot', $endpos, + $PostgreSQL::Test::Utils::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); +chomp($stdout_recv); +is($stdout_recv, '', 'pg_recvlogical acknowledged changes'); + +$node_primary->safe_psql('postgres', 'CREATE DATABASE otherdb'); + +is( $node_primary->psql( + 'otherdb', + "SELECT lsn FROM pg_logical_slot_peek_changes('test_slot', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" + ), + 3, + 'replaying logical slot from another database fails'); + +$node_primary->safe_psql('otherdb', + qq[SELECT pg_create_logical_replication_slot('otherdb_slot', 'test_decoding');] +); + +# make sure you can't drop a slot while active +SKIP: +{ + + # some Windows Perls at least don't like IPC::Run's start/kill_kill regime. + skip "Test fails on Windows perl", 2 if $Config{osname} eq 'MSWin32'; + + my $pg_recvlogical = IPC::Run::start( + [ + 'pg_recvlogical', '-d', $node_primary->connstr('otherdb'), + '-S', 'otherdb_slot', '-f', '-', '--start' + ]); + $node_primary->poll_query_until('otherdb', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NOT NULL)" + ) or die "slot never became active"; + is($node_primary->psql('postgres', 'DROP DATABASE otherdb'), + 3, 'dropping a DB with active logical slots fails'); + $pg_recvlogical->kill_kill; + is($node_primary->slot('otherdb_slot')->{'slot_name'}, + undef, 'logical slot still exists'); +} + +$node_primary->poll_query_until('otherdb', + "SELECT EXISTS (SELECT 1 FROM pg_replication_slots WHERE slot_name = 'otherdb_slot' AND active_pid IS NULL)" +) or die "slot never became inactive"; + +is($node_primary->psql('postgres', 'DROP DATABASE otherdb'), + 0, 'dropping a DB with inactive logical slots succeeds'); +is($node_primary->slot('otherdb_slot')->{'slot_name'}, + undef, 'logical slot was actually dropped with DB'); + +# Test logical slot advancing and its durability. +my $logical_slot = 'logical_slot'; +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('$logical_slot', 'test_decoding', false);" +); +$node_primary->psql( + 'postgres', " + CREATE TABLE tab_logical_slot (a int); + INSERT INTO tab_logical_slot VALUES (generate_series(1,10));"); +my $current_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); +chomp($current_lsn); +my $psql_rc = $node_primary->psql('postgres', + "SELECT pg_replication_slot_advance('$logical_slot', '$current_lsn'::pg_lsn);" +); +is($psql_rc, '0', 'slot advancing with logical slot'); +my $logical_restart_lsn_pre = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';" +); +chomp($logical_restart_lsn_pre); +# Slot advance should persist across clean restarts. +$node_primary->restart; +my $logical_restart_lsn_post = $node_primary->safe_psql('postgres', + "SELECT restart_lsn from pg_replication_slots WHERE slot_name = '$logical_slot';" +); +chomp($logical_restart_lsn_post); +ok(($logical_restart_lsn_pre cmp $logical_restart_lsn_post) == 0, + "logical slot advance persists across restarts"); + +my $stats_test_slot1 = 'test_slot'; +my $stats_test_slot2 = 'logical_slot'; + +# Test that reset works for pg_stat_replication_slots + +# Stats exist for stats test slot 1 +is( $node_primary->safe_psql( + 'postgres', + qq(SELECT total_bytes > 0, stats_reset IS NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1') + ), + qq(t|t), + qq(Total bytes is > 0 and stats_reset is NULL for slot '$stats_test_slot1'.) +); + +# Do reset of stats for stats test slot 1 +$node_primary->safe_psql('postgres', + qq(SELECT pg_stat_reset_replication_slot('$stats_test_slot1'))); + +# Get reset value after reset +my $reset1 = $node_primary->safe_psql('postgres', + qq(SELECT stats_reset FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1') +); + +# Do reset again +$node_primary->safe_psql('postgres', + qq(SELECT pg_stat_reset_replication_slot('$stats_test_slot1'))); + +is( $node_primary->safe_psql( + 'postgres', + qq(SELECT stats_reset > '$reset1'::timestamptz, total_bytes = 0 FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1') + ), + qq(t|t), + qq(Check that reset timestamp is later after the second reset of stats for slot '$stats_test_slot1' and confirm total_bytes was set to 0.) +); + +# Check that test slot 2 has NULL in reset timestamp +is( $node_primary->safe_psql( + 'postgres', + qq(SELECT stats_reset IS NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot2') + ), + qq(t), + qq(Stats_reset is NULL for slot '$stats_test_slot2' before reset.)); + +# Get reset value again for test slot 1 +$reset1 = $node_primary->safe_psql('postgres', + qq(SELECT stats_reset FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1') +); + +# Reset stats for all replication slots +$node_primary->safe_psql('postgres', + qq(SELECT pg_stat_reset_replication_slot(NULL))); + +# Check that test slot 2 reset timestamp is no longer NULL after reset +is( $node_primary->safe_psql( + 'postgres', + qq(SELECT stats_reset IS NOT NULL FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot2') + ), + qq(t), + qq(Stats_reset is not NULL for slot '$stats_test_slot2' after reset all.) +); + +is( $node_primary->safe_psql( + 'postgres', + qq(SELECT stats_reset > '$reset1'::timestamptz FROM pg_stat_replication_slots WHERE slot_name = '$stats_test_slot1') + ), + qq(t), + qq(Check that reset timestamp is later after resetting stats for slot '$stats_test_slot1' again.) +); + +# done with the node +$node_primary->stop; + +done_testing(); diff --git a/src/test/recovery/t/007_sync_rep.pl b/src/test/recovery/t/007_sync_rep.pl new file mode 100644 index 0000000..86f89c6 --- /dev/null +++ b/src/test/recovery/t/007_sync_rep.pl @@ -0,0 +1,221 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Minimal test testing synchronous replication sync_state transition +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Query checking sync_priority and sync_state of each standby +my $check_sql = + "SELECT application_name, sync_priority, sync_state FROM pg_stat_replication ORDER BY application_name;"; + +# Check that sync_state of each standby is expected (waiting till it is). +# If $setting is given, synchronous_standby_names is set to it and +# the configuration file is reloaded before the test. +sub test_sync_state +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($self, $expected, $msg, $setting) = @_; + + if (defined($setting)) + { + $self->safe_psql('postgres', + "ALTER SYSTEM SET synchronous_standby_names = '$setting';"); + $self->reload; + } + + ok($self->poll_query_until('postgres', $check_sql, $expected), $msg); + return; +} + +# Start a standby and check that it is registered within the WAL sender +# array of the given primary. This polls the primary's pg_stat_replication +# until the standby is confirmed as registered. +sub start_standby_and_wait +{ + my ($primary, $standby) = @_; + my $primary_name = $primary->name; + my $standby_name = $standby->name; + my $query = + "SELECT count(1) = 1 FROM pg_stat_replication WHERE application_name = '$standby_name'"; + + $standby->start; + + print("### Waiting for standby \"$standby_name\" on \"$primary_name\"\n"); + $primary->poll_query_until('postgres', $query); + return; +} + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; +my $backup_name = 'primary_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create all the standbys. Their status on the primary is checked to ensure +# the ordering of each one of them in the WAL sender array of the primary. + +# Create standby1 linking to primary +my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_1); + +# Create standby2 linking to primary +my $node_standby_2 = PostgreSQL::Test::Cluster->new('standby2'); +$node_standby_2->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_2); + +# Create standby3 linking to primary +my $node_standby_3 = PostgreSQL::Test::Cluster->new('standby3'); +$node_standby_3->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +start_standby_and_wait($node_primary, $node_standby_3); + +# Check that sync_state is determined correctly when +# synchronous_standby_names is specified in old syntax. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|potential +standby3|0|async), + 'old syntax of synchronous_standby_names', + 'standby1,standby2'); + +# Check that all the standbys are considered as either sync or +# potential when * is specified in synchronous_standby_names. +# Note that standby1 is chosen as sync standby because +# it's stored in the head of WalSnd array which manages +# all the standbys though they have the same priority. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|1|potential +standby3|1|potential), + 'asterisk in synchronous_standby_names', + '*'); + +# Stop and start standbys to rearrange the order of standbys +# in WalSnd array. Now, if standbys have the same priority, +# standby2 is selected preferentially and standby3 is next. +$node_standby_1->stop; +$node_standby_2->stop; +$node_standby_3->stop; + +# Make sure that each standby reports back to the primary in the wanted +# order. +start_standby_and_wait($node_primary, $node_standby_2); +start_standby_and_wait($node_primary, $node_standby_3); + +# Specify 2 as the number of sync standbys. +# Check that two standbys are in 'sync' state. +test_sync_state( + $node_primary, qq(standby2|2|sync +standby3|3|sync), + '2 synchronous standbys', + '2(standby1,standby2,standby3)'); + +# Start standby1 +start_standby_and_wait($node_primary, $node_standby_1); + +# Create standby4 linking to primary +my $node_standby_4 = PostgreSQL::Test::Cluster->new('standby4'); +$node_standby_4->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_4->start; + +# Check that standby1 and standby2 whose names appear earlier in +# synchronous_standby_names are considered as sync. Also check that +# standby3 appearing later represents potential, and standby4 is +# in 'async' state because it's not in the list. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby3|3|potential +standby4|0|async), + '2 sync, 1 potential, and 1 async'); + +# Check that sync_state of each standby is determined correctly +# when num_sync exceeds the number of names of potential sync standbys +# specified in synchronous_standby_names. +test_sync_state( + $node_primary, qq(standby1|0|async +standby2|4|sync +standby3|3|sync +standby4|1|sync), + 'num_sync exceeds the num of potential sync standbys', + '6(standby4,standby0,standby3,standby2)'); + +# The setting that * comes before another standby name is acceptable +# but does not make sense in most cases. Check that sync_state is +# chosen properly even in case of that setting. standby1 is selected +# as synchronous as it has the highest priority, and is followed by a +# second standby listed first in the WAL sender array, which is +# standby2 in this case. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby3|2|potential +standby4|2|potential), + 'asterisk before another standby name', + '2(standby1,*,standby2)'); + +# Check that the setting of '2(*)' chooses standby2 and standby3 that are stored +# earlier in WalSnd array as sync standbys. +test_sync_state( + $node_primary, qq(standby1|1|potential +standby2|1|sync +standby3|1|sync +standby4|1|potential), + 'multiple standbys having the same priority are chosen as sync', + '2(*)'); + +# Stop Standby3 which is considered in 'sync' state. +$node_standby_3->stop; + +# Check that the state of standby1 stored earlier in WalSnd array than +# standby4 is transited from potential to sync. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|1|sync +standby4|1|potential), + 'potential standby found earlier in array is promoted to sync'); + +# Check that standby1 and standby2 are chosen as sync standbys +# based on their priorities. +test_sync_state( + $node_primary, qq(standby1|1|sync +standby2|2|sync +standby4|0|async), + 'priority-based sync replication specified by FIRST keyword', + 'FIRST 2(standby1, standby2)'); + +# Check that all the listed standbys are considered as candidates +# for sync standbys in a quorum-based sync replication. +test_sync_state( + $node_primary, qq(standby1|1|quorum +standby2|1|quorum +standby4|0|async), + '2 quorum and 1 async', + 'ANY 2(standby1, standby2)'); + +# Start Standby3 which will be considered in 'quorum' state. +$node_standby_3->start; + +# Check that the setting of 'ANY 2(*)' chooses all standbys as +# candidates for quorum sync standbys. +test_sync_state( + $node_primary, qq(standby1|1|quorum +standby2|1|quorum +standby3|1|quorum +standby4|1|quorum), + 'all standbys are considered as candidates for quorum sync standbys', + 'ANY 2(*)'); + +done_testing(); diff --git a/src/test/recovery/t/008_fsm_truncation.pl b/src/test/recovery/t/008_fsm_truncation.pl new file mode 100644 index 0000000..5be2153 --- /dev/null +++ b/src/test/recovery/t/008_fsm_truncation.pl @@ -0,0 +1,100 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test WAL replay of FSM changes. +# +# FSM changes don't normally need to be WAL-logged, except for truncation. +# The FSM mustn't return a page that doesn't exist (anymore). +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); + +$node_primary->append_conf( + 'postgresql.conf', qq{ +wal_log_hints = on +max_prepared_transactions = 5 +autovacuum = off +}); + +# Create a primary node and its standby, initializing both with some data +# at the same time. +$node_primary->start; + +$node_primary->backup('primary_backup'); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, 'primary_backup', + has_streaming => 1); +$node_standby->start; + +$node_primary->psql( + 'postgres', qq{ +create table testtab (a int, b char(100)); +insert into testtab select generate_series(1,1000), 'foo'; +insert into testtab select generate_series(1,1000), 'foo'; +delete from testtab where ctid > '(8,0)'; +}); + +# Take a lock on the table to prevent following vacuum from truncating it +$node_primary->psql( + 'postgres', qq{ +begin; +lock table testtab in row share mode; +prepare transaction 'p1'; +}); + +# Vacuum, update FSM without truncation +$node_primary->psql('postgres', 'vacuum verbose testtab'); + +# Force a checkpoint +$node_primary->psql('postgres', 'checkpoint'); + +# Now do some more insert/deletes, another vacuum to ensure full-page writes +# are done +$node_primary->psql( + 'postgres', qq{ +insert into testtab select generate_series(1,1000), 'foo'; +delete from testtab where ctid > '(8,0)'; +vacuum verbose testtab; +}); + +# Ensure all buffers are now clean on the standby +$node_standby->psql('postgres', 'checkpoint'); + +# Release the lock, vacuum again which should lead to truncation +$node_primary->psql( + 'postgres', qq{ +rollback prepared 'p1'; +vacuum verbose testtab; +}); + +$node_primary->psql('postgres', 'checkpoint'); +my $until_lsn = + $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();"); + +# Wait long enough for standby to receive and apply all WAL +my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +# Promote the standby +$node_standby->promote; +$node_standby->psql('postgres', 'checkpoint'); + +# Restart to discard in-memory copy of FSM +$node_standby->restart; + +# Insert should work on standby +is( $node_standby->psql( + 'postgres', + qq{insert into testtab select generate_series(1,1000), 'foo';}), + 0, + 'INSERT succeeds with truncated relation FSM'); + +done_testing(); diff --git a/src/test/recovery/t/009_twophase.pl b/src/test/recovery/t/009_twophase.pl new file mode 100644 index 0000000..3e25b8c --- /dev/null +++ b/src/test/recovery/t/009_twophase.pl @@ -0,0 +1,484 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests dedicated to two-phase commit in recovery +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $psql_out = ''; +my $psql_rc = ''; + +sub configure_and_reload +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($node, $parameter) = @_; + my $name = $node->name; + + $node->append_conf( + 'postgresql.conf', qq( + $parameter + )); + $node->psql('postgres', "SELECT pg_reload_conf()", stdout => \$psql_out); + is($psql_out, 't', "reload node $name with $parameter"); + return; +} + +# Set up two nodes, which will alternately be primary and replication standby. + +# Setup london node +my $node_london = PostgreSQL::Test::Cluster->new("london"); +$node_london->init(allows_streaming => 1); +$node_london->append_conf( + 'postgresql.conf', qq( + max_prepared_transactions = 10 + log_checkpoints = true +)); +$node_london->start; +$node_london->backup('london_backup'); + +# Setup paris node +my $node_paris = PostgreSQL::Test::Cluster->new('paris'); +$node_paris->init_from_backup($node_london, 'london_backup', + has_streaming => 1); +$node_paris->start; + +# Switch to synchronous replication in both directions +configure_and_reload($node_london, "synchronous_standby_names = 'paris'"); +configure_and_reload($node_paris, "synchronous_standby_names = 'london'"); + +# Set up nonce names for current primary and standby nodes +note "Initially, london is primary and paris is standby"; +my ($cur_primary, $cur_standby) = ($node_london, $node_paris); +my $cur_primary_name = $cur_primary->name; + +# Create table we'll use in the test transactions +$cur_primary->psql('postgres', "CREATE TABLE t_009_tbl (id int, msg text)"); + +############################################################################### +# Check that we can commit and abort transaction after soft restart. +# Here checkpoint happens before shutdown and no WAL replay will occur at next +# startup. In this case postgres re-creates shared-memory state from twophase +# files. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (1, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (2, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_1'; + BEGIN; + INSERT INTO t_009_tbl VALUES (3, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (4, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_2';"); +$cur_primary->stop; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_1'"); +is($psql_rc, '0', 'Commit prepared transaction after restart'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_2'"); +is($psql_rc, '0', 'Rollback prepared transaction after restart'); + +############################################################################### +# Check that we can commit and abort after a hard restart. +# At next startup, WAL replay will re-create shared memory state for prepared +# transaction using dedicated WAL records. +############################################################################### + +$cur_primary->psql( + 'postgres', " + CHECKPOINT; + BEGIN; + INSERT INTO t_009_tbl VALUES (5, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (6, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_3'; + BEGIN; + INSERT INTO t_009_tbl VALUES (7, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (8, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_4';"); +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_3'"); +is($psql_rc, '0', 'Commit prepared transaction after teardown'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_4'"); +is($psql_rc, '0', 'Rollback prepared transaction after teardown'); + +############################################################################### +# Check that WAL replay can handle several transactions with same GID name. +############################################################################### + +$cur_primary->psql( + 'postgres', " + CHECKPOINT; + BEGIN; + INSERT INTO t_009_tbl VALUES (9, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (10, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_5'; + COMMIT PREPARED 'xact_009_5'; + BEGIN; + INSERT INTO t_009_tbl VALUES (11, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (12, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_5';"); +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_5'"); +is($psql_rc, '0', 'Replay several transactions with same GID'); + +############################################################################### +# Check that WAL replay cleans up its shared memory state and releases locks +# while replaying transaction commits. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (13, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (14, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_6'; + COMMIT PREPARED 'xact_009_6';"); +$cur_primary->teardown_node; +$cur_primary->start; +$psql_rc = $cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (15, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (16, 'issued to ${cur_primary_name}'); + -- This prepare can fail due to conflicting GID or locks conflicts if + -- replay did not fully cleanup its state on previous commit. + PREPARE TRANSACTION 'xact_009_7';"); +is($psql_rc, '0', "Cleanup of shared memory state for 2PC commit"); + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_7'"); + +############################################################################### +# Check that WAL replay will cleanup its shared memory state on running standby. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (17, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (18, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_8'; + COMMIT PREPARED 'xact_009_8';"); +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', + "Cleanup of shared memory state on running standby without checkpoint"); + +############################################################################### +# Same as in previous case, but let's force checkpoint on standby between +# prepare and commit to use on-disk twophase files. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (19, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (20, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_9';"); +$cur_standby->psql('postgres', "CHECKPOINT"); +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_9'"); +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', + "Cleanup of shared memory state on running standby after checkpoint"); + +############################################################################### +# Check that prepared transactions can be committed on promoted standby. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (21, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (22, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_10';"); +$cur_primary->teardown_node; +$cur_standby->promote; + +# change roles +note "Now paris is primary and london is standby"; +($cur_primary, $cur_standby) = ($node_paris, $node_london); +$cur_primary_name = $cur_primary->name; + +# because london is not running at this point, we can't use syncrep commit +# on this command +$psql_rc = $cur_primary->psql('postgres', + "SET synchronous_commit = off; COMMIT PREPARED 'xact_009_10'"); +is($psql_rc, '0', "Restore of prepared transaction on promoted standby"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +############################################################################### +# Check that prepared transactions are replayed after soft restart of standby +# while primary is down. Since standby knows that primary is down it uses a +# different code path on startup to ensure that the status of transactions is +# consistent. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (23, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (24, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_11';"); +$cur_primary->stop; +$cur_standby->restart; +$cur_standby->promote; + +# change roles +note "Now london is primary and paris is standby"; +($cur_primary, $cur_standby) = ($node_london, $node_paris); +$cur_primary_name = $cur_primary->name; + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '1', + "Restore prepared transactions from files with primary down"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_11'"); + +############################################################################### +# Check that prepared transactions are correctly replayed after standby hard +# restart while primary is down. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + INSERT INTO t_009_tbl VALUES (25, 'issued to ${cur_primary_name}'); + SAVEPOINT s1; + INSERT INTO t_009_tbl VALUES (26, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_12'; + "); +$cur_primary->stop; +$cur_standby->teardown_node; +$cur_standby->start; +$cur_standby->promote; + +# change roles +note "Now paris is primary and london is standby"; +($cur_primary, $cur_standby) = ($node_paris, $node_london); +$cur_primary_name = $cur_primary->name; + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '1', + "Restore prepared transactions from records with primary down"); + +# restart old primary as new standby +$cur_standby->enable_streaming($cur_primary); +$cur_standby->start; + +$cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_12'"); + +############################################################################### +# Check for a lock conflict between prepared transaction with DDL inside and +# replay of XLOG_STANDBY_LOCK wal record. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl2 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl2 VALUES (27, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_13'; + -- checkpoint will issue XLOG_STANDBY_LOCK that can conflict with lock + -- held by 'create table' statement + CHECKPOINT; + COMMIT PREPARED 'xact_009_13';"); + +# Ensure that last transaction is replayed on standby. +my $cur_primary_lsn = + $cur_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); +my $caughtup_query = + "SELECT '$cur_primary_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$cur_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM t_009_tbl2", + stdout => \$psql_out); +is($psql_out, '1', "Replay prepared transaction with DDL"); + +############################################################################### +# Check recovery of prepared transaction with DDL inside after a hard restart +# of the primary. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl3 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl3 VALUES (28, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_14'; + BEGIN; + CREATE TABLE t_009_tbl4 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl4 VALUES (29, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_15';"); + +$cur_primary->teardown_node; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_14'"); +is($psql_rc, '0', 'Commit prepared transaction after teardown'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_15'"); +is($psql_rc, '0', 'Rollback prepared transaction after teardown'); + +############################################################################### +# Check recovery of prepared transaction with DDL inside after a soft restart +# of the primary. +############################################################################### + +$cur_primary->psql( + 'postgres', " + BEGIN; + CREATE TABLE t_009_tbl5 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl5 VALUES (30, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_16'; + BEGIN; + CREATE TABLE t_009_tbl6 (id int, msg text); + SAVEPOINT s1; + INSERT INTO t_009_tbl6 VALUES (31, 'issued to ${cur_primary_name}'); + PREPARE TRANSACTION 'xact_009_17';"); + +$cur_primary->stop; +$cur_primary->start; + +$psql_rc = $cur_primary->psql('postgres', "COMMIT PREPARED 'xact_009_16'"); +is($psql_rc, '0', 'Commit prepared transaction after restart'); + +$psql_rc = $cur_primary->psql('postgres', "ROLLBACK PREPARED 'xact_009_17'"); +is($psql_rc, '0', 'Rollback prepared transaction after restart'); + +############################################################################### +# Verify expected data appears on both servers. +############################################################################### + +$cur_primary->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on primary"); + +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl ORDER BY id", + stdout => \$psql_out); +is( $psql_out, qq{1|issued to london +2|issued to london +5|issued to london +6|issued to london +9|issued to london +10|issued to london +11|issued to london +12|issued to london +13|issued to london +14|issued to london +15|issued to london +16|issued to london +17|issued to london +18|issued to london +19|issued to london +20|issued to london +21|issued to london +22|issued to london +23|issued to paris +24|issued to paris +25|issued to london +26|issued to london}, + "Check expected t_009_tbl data on primary"); + +$cur_primary->psql( + 'postgres', + "SELECT * FROM t_009_tbl2", + stdout => \$psql_out); +is( $psql_out, + qq{27|issued to paris}, + "Check expected t_009_tbl2 data on primary"); + +$cur_standby->psql( + 'postgres', + "SELECT count(*) FROM pg_prepared_xacts", + stdout => \$psql_out); +is($psql_out, '0', "No uncommitted prepared transactions on standby"); + +$cur_standby->psql( + 'postgres', + "SELECT * FROM t_009_tbl ORDER BY id", + stdout => \$psql_out); +is( $psql_out, qq{1|issued to london +2|issued to london +5|issued to london +6|issued to london +9|issued to london +10|issued to london +11|issued to london +12|issued to london +13|issued to london +14|issued to london +15|issued to london +16|issued to london +17|issued to london +18|issued to london +19|issued to london +20|issued to london +21|issued to london +22|issued to london +23|issued to paris +24|issued to paris +25|issued to london +26|issued to london}, + "Check expected t_009_tbl data on standby"); + +$cur_standby->psql( + 'postgres', + "SELECT * FROM t_009_tbl2", + stdout => \$psql_out); +is( $psql_out, + qq{27|issued to paris}, + "Check expected t_009_tbl2 data on standby"); + +done_testing(); diff --git a/src/test/recovery/t/010_logical_decoding_timelines.pl b/src/test/recovery/t/010_logical_decoding_timelines.pl new file mode 100644 index 0000000..135fb1a --- /dev/null +++ b/src/test/recovery/t/010_logical_decoding_timelines.pl @@ -0,0 +1,202 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Demonstrate that logical can follow timeline switches. +# +# Logical replication slots can follow timeline switches but it's +# normally not possible to have a logical slot on a replica where +# promotion and a timeline switch can occur. The only ways +# we can create that circumstance are: +# +# * By doing a filesystem-level copy of the DB, since pg_basebackup +# excludes pg_replslot but we can copy it directly; or +# +# * by creating a slot directly at the C level on the replica and +# advancing it as we go using the low level APIs. It can't be done +# from SQL since logical decoding isn't allowed on replicas. +# +# This module uses the first approach to show that timeline following +# on a logical slot works. +# +# (For convenience, it also tests some recovery-related operations +# on logical slots). +# +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Copy; +use IPC::Run (); +use Scalar::Util qw(blessed); + +my ($stdout, $stderr, $ret); + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1, has_archiving => 1); +$node_primary->append_conf( + 'postgresql.conf', q[ +wal_level = 'logical' +max_replication_slots = 3 +max_wal_senders = 2 +log_min_messages = 'debug2' +hot_standby_feedback = on +wal_receiver_status_interval = 1 +]); +$node_primary->dump_info; +$node_primary->start; + +note "testing logical timeline following with a filesystem-level copy"; + +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('before_basebackup', 'test_decoding');" +); +$node_primary->safe_psql('postgres', "CREATE TABLE decoding(blah text);"); +$node_primary->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('beforebb');"); + +# We also want to verify that DROP DATABASE on a standby with a logical +# slot works. This isn't strictly related to timeline following, but +# the only way to get a logical slot on a standby right now is to use +# the same physical copy trick, so: +$node_primary->safe_psql('postgres', 'CREATE DATABASE dropme;'); +$node_primary->safe_psql('dropme', + "SELECT pg_create_logical_replication_slot('dropme_slot', 'test_decoding');" +); + +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +my $backup_name = 'b1'; +$node_primary->stop(); +$node_primary->backup_fs_cold($backup_name); +$node_primary->start(); + +$node_primary->safe_psql('postgres', + q[SELECT pg_create_physical_replication_slot('phys_slot');]); + +my $node_replica = PostgreSQL::Test::Cluster->new('replica'); +$node_replica->init_from_backup( + $node_primary, $backup_name, + has_streaming => 1, + has_restoring => 1); +$node_replica->append_conf('postgresql.conf', + q[primary_slot_name = 'phys_slot']); + +$node_replica->start; + +# If we drop 'dropme' on the primary, the standby should drop the +# db and associated slot. +is($node_primary->psql('postgres', 'DROP DATABASE dropme'), + 0, 'dropped DB with logical slot OK on primary'); +$node_primary->wait_for_catchup($node_replica); +is( $node_replica->safe_psql( + 'postgres', q[SELECT 1 FROM pg_database WHERE datname = 'dropme']), + '', + 'dropped DB dropme on standby'); +is($node_primary->slot('dropme_slot')->{'slot_name'}, + undef, 'logical slot was actually dropped on standby'); + +# Back to testing failover... +$node_primary->safe_psql('postgres', + "SELECT pg_create_logical_replication_slot('after_basebackup', 'test_decoding');" +); +$node_primary->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('afterbb');"); +$node_primary->safe_psql('postgres', 'CHECKPOINT;'); + +# Verify that only the before base_backup slot is on the replica +$stdout = $node_replica->safe_psql('postgres', + 'SELECT slot_name FROM pg_replication_slots ORDER BY slot_name'); +is($stdout, 'before_basebackup', + 'Expected to find only slot before_basebackup on replica'); + +# Examine the physical slot the replica uses to stream changes +# from the primary to make sure its hot_standby_feedback +# has locked in a catalog_xmin on the physical slot, and that +# any xmin is >= the catalog_xmin +$node_primary->poll_query_until( + 'postgres', q[ + SELECT catalog_xmin IS NOT NULL + FROM pg_replication_slots + WHERE slot_name = 'phys_slot' + ]) or die "slot's catalog_xmin never became set"; + +my $phys_slot = $node_primary->slot('phys_slot'); +isnt($phys_slot->{'xmin'}, '', 'xmin assigned on physical slot of primary'); +isnt($phys_slot->{'catalog_xmin'}, + '', 'catalog_xmin assigned on physical slot of primary'); + +# Ignore wrap-around here, we're on a new cluster: +cmp_ok( + $phys_slot->{'xmin'}, '>=', + $phys_slot->{'catalog_xmin'}, + 'xmin on physical slot must not be lower than catalog_xmin'); + +$node_primary->safe_psql('postgres', 'CHECKPOINT'); +$node_primary->wait_for_catchup($node_replica, 'write'); + +# Boom, crash +$node_primary->stop('immediate'); + +$node_replica->promote; + +$node_replica->safe_psql('postgres', + "INSERT INTO decoding(blah) VALUES ('after failover');"); + +# Shouldn't be able to read from slot created after base backup +($ret, $stdout, $stderr) = $node_replica->psql('postgres', + "SELECT data FROM pg_logical_slot_peek_changes('after_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');" +); +is($ret, 3, 'replaying from after_basebackup slot fails'); +like( + $stderr, + qr/replication slot "after_basebackup" does not exist/, + 'after_basebackup slot missing'); + +# Should be able to read from slot created before base backup +($ret, $stdout, $stderr) = $node_replica->psql( + 'postgres', + "SELECT data FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL, 'include-xids', '0', 'skip-empty-xacts', '1');", + timeout => $PostgreSQL::Test::Utils::timeout_default); +is($ret, 0, 'replay from slot before_basebackup succeeds'); + +my $final_expected_output_bb = q(BEGIN +table public.decoding: INSERT: blah[text]:'beforebb' +COMMIT +BEGIN +table public.decoding: INSERT: blah[text]:'afterbb' +COMMIT +BEGIN +table public.decoding: INSERT: blah[text]:'after failover' +COMMIT); +is($stdout, $final_expected_output_bb, + 'decoded expected data from slot before_basebackup'); +is($stderr, '', 'replay from slot before_basebackup produces no stderr'); + +# So far we've peeked the slots, so when we fetch the same info over +# pg_recvlogical we should get complete results. First, find out the commit lsn +# of the last transaction. There's no max(pg_lsn), so: + +my $endpos = $node_replica->safe_psql('postgres', + "SELECT lsn FROM pg_logical_slot_peek_changes('before_basebackup', NULL, NULL) ORDER BY lsn DESC LIMIT 1;" +); + +# now use the walsender protocol to peek the slot changes and make sure we see +# the same results. + +$stdout = $node_replica->pg_recvlogical_upto( + 'postgres', 'before_basebackup', + $endpos, $PostgreSQL::Test::Utils::timeout_default, + 'include-xids' => '0', + 'skip-empty-xacts' => '1'); + +# walsender likes to add a newline +chomp($stdout); +is($stdout, $final_expected_output_bb, + 'got same output from walsender via pg_recvlogical on before_basebackup'); + +$node_replica->teardown_node(); + +done_testing(); diff --git a/src/test/recovery/t/012_subtransactions.pl b/src/test/recovery/t/012_subtransactions.pl new file mode 100644 index 0000000..f807509 --- /dev/null +++ b/src/test/recovery/t/012_subtransactions.pl @@ -0,0 +1,218 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests dedicated to subtransactions in recovery +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Setup primary node +my $node_primary = PostgreSQL::Test::Cluster->new("primary"); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq( + max_prepared_transactions = 10 + log_checkpoints = true +)); +$node_primary->start; +$node_primary->backup('primary_backup'); +$node_primary->psql('postgres', "CREATE TABLE t_012_tbl (id int)"); + +# Setup standby node +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, 'primary_backup', + has_streaming => 1); +$node_standby->start; + +# Switch to synchronous replication +$node_primary->append_conf( + 'postgresql.conf', qq( + synchronous_standby_names = '*' +)); +$node_primary->psql('postgres', "SELECT pg_reload_conf()"); + +my $psql_out = ''; +my $psql_rc = ''; + +############################################################################### +# Check that replay will correctly set SUBTRANS and properly advance nextXid +# so that it won't conflict with savepoint xids. +############################################################################### + +$node_primary->psql( + 'postgres', " + BEGIN; + DELETE FROM t_012_tbl; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s1; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s2; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s3; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s4; + INSERT INTO t_012_tbl VALUES (43); + SAVEPOINT s5; + INSERT INTO t_012_tbl VALUES (43); + PREPARE TRANSACTION 'xact_012_1'; + CHECKPOINT;"); + +$node_primary->stop; +$node_primary->start; +$node_primary->psql( + 'postgres', " + -- here we can get xid of previous savepoint if nextXid + -- wasn't properly advanced + BEGIN; + INSERT INTO t_012_tbl VALUES (142); + ROLLBACK; + COMMIT PREPARED 'xact_012_1';"); + +$node_primary->psql( + 'postgres', + "SELECT count(*) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '6', "Check nextXid handling for prepared subtransactions"); + +############################################################################### +# Check that replay will correctly set 2PC with more than +# PGPROC_MAX_CACHED_SUBXIDS subtransactions and also show data properly +# on promotion +############################################################################### +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); + +# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql +$node_primary->psql( + 'postgres', " + CREATE OR REPLACE FUNCTION hs_subxids (n integer) + RETURNS void + LANGUAGE plpgsql + AS \$\$ + BEGIN + IF n <= 0 THEN RETURN; END IF; + INSERT INTO t_012_tbl VALUES (n); + PERFORM hs_subxids(n - 1); + RETURN; + EXCEPTION WHEN raise_exception THEN NULL; END; + \$\$;"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(127); + COMMIT;"); +$node_primary->wait_for_catchup($node_standby); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); + +# Function borrowed from src/test/regress/sql/hs_primary_extremes.sql +$node_primary->psql( + 'postgres', " + CREATE OR REPLACE FUNCTION hs_subxids (n integer) + RETURNS void + LANGUAGE plpgsql + AS \$\$ + BEGIN + IF n <= 0 THEN RETURN; END IF; + INSERT INTO t_012_tbl VALUES (n); + PERFORM hs_subxids(n - 1); + RETURN; + EXCEPTION WHEN raise_exception THEN NULL; END; + \$\$;"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(127); + PREPARE TRANSACTION 'xact_012_1';"); +$node_primary->wait_for_catchup($node_standby); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$psql_rc = $node_primary->psql('postgres', "COMMIT PREPARED 'xact_012_1'"); +is($psql_rc, '0', + "Restore of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby" +); + +$node_primary->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '8128', "Visible"); + +$node_primary->psql('postgres', "DELETE FROM t_012_tbl"); +$node_primary->psql( + 'postgres', " + BEGIN; + SELECT hs_subxids(201); + PREPARE TRANSACTION 'xact_012_1';"); +$node_primary->wait_for_catchup($node_standby); +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); +$node_primary->stop; +$node_standby->promote; + +$node_standby->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); + +# restore state +($node_primary, $node_standby) = ($node_standby, $node_primary); +$node_standby->enable_streaming($node_primary); +$node_standby->start; +$psql_rc = $node_primary->psql('postgres', "ROLLBACK PREPARED 'xact_012_1'"); +is($psql_rc, '0', + "Rollback of PGPROC_MAX_CACHED_SUBXIDS+ prepared transaction on promoted standby" +); + +$node_primary->psql( + 'postgres', + "SELECT coalesce(sum(id),-1) FROM t_012_tbl", + stdout => \$psql_out); +is($psql_out, '-1', "Not visible"); + +done_testing(); diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl new file mode 100644 index 0000000..c22844d --- /dev/null +++ b/src/test/recovery/t/013_crash_restart.pl @@ -0,0 +1,250 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# +# Tests restarts of postgres due to crashes of a subprocess. +# +# Two longer-running psql subprocesses are used: One to kill a +# backend, triggering a crash-restart cycle, one to detect when +# postmaster noticed the backend died. The second backend is +# necessary because it's otherwise hard to determine if postmaster is +# still accepting new sessions (because it hasn't noticed that the +# backend died), or because it's already restarted. +# +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); + +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); +$node->start(); + +# by default PostgreSQL::Test::Cluster doesn't restart after a crash +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET restart_after_crash = 1; + ALTER SYSTEM SET log_connections = 1; + SELECT pg_reload_conf();]); + +# Run psql, keeping session alive, so we have an alive backend to kill. +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); + +# Need a second psql to check if crash-restart happened. +my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); +my $monitor = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$monitor_stdin, + '>', + \$monitor_stdout, + '2>', + \$monitor_stderr, + $psql_timeout); + +#create table, insert row that should survive +$killme_stdin .= q[ +CREATE TABLE alive(status text); +INSERT INTO alive VALUES($$committed-before-sigquit$$); +SELECT pg_backend_pid(); +]; +ok( pump_until( + $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGQUIT'); +my $pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +#insert a row that should *not* survive, due to in-progress xact +$killme_stdin .= q[ +BEGIN; +INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status; +]; +ok( pump_until( + $killme, $psql_timeout, + \$killme_stdout, qr/in-progress-before-sigquit/m), + 'inserted in-progress-before-sigquit'); +$killme_stdout = ''; +$killme_stderr = ''; + + +# Start longrunning query in second session; its failure will signal that +# crash-restart has occurred. The initial wait for the trivial select is to +# be sure that psql successfully connected to backend. +$monitor_stdin .= q[ +SELECT $$psql-connected$$; +SELECT pg_sleep(3600); +]; +ok( pump_until( + $monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m), + 'monitor connected'); +$monitor_stdout = ''; +$monitor_stderr = ''; + +# kill once with QUIT - we expect psql to exit, while emitting error message first +my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'QUIT', $pid); + +# Exactly process should have been alive to be killed +is($ret, 0, "killed process with SIGQUIT"); + +# Check that psql sees the killed backend as having been terminated +$killme_stdin .= q[ +SELECT 1; +]; +ok( pump_until( + $killme, + $psql_timeout, + \$killme_stderr, + qr/WARNING: terminating connection because of unexpected SIGQUIT signal|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql query died successfully after SIGQUIT"); +$killme_stderr = ''; +$killme_stdout = ''; +$killme->finish; + +# Wait till server restarts - we should get the WARNING here, but +# sometimes the server is unable to send that, if interrupted while +# sending. +ok( pump_until( + $monitor, + $psql_timeout, + \$monitor_stderr, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql monitor died successfully after SIGQUIT"); +$monitor->finish; + +# Wait till server restarts +is($node->poll_query_until('postgres', undef, ''), + "1", "reconnected after SIGQUIT"); + + +# restart psql processes, now that the crash cycle finished +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +$killme->run(); +($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', ''); +$monitor->run(); + + +# Acquire pid of new backend +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok( pump_until( + $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + "acquired pid for SIGKILL"); +$pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Insert test rows +$killme_stdin .= q[ +INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status; +BEGIN; +INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status; +]; +ok( pump_until( + $killme, $psql_timeout, + \$killme_stdout, qr/in-progress-before-sigkill/m), + 'inserted in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Re-start longrunning query in second session; its failure will signal that +# crash-restart has occurred. The initial wait for the trivial select is to +# be sure that psql successfully connected to backend. +$monitor_stdin .= q[ +SELECT $$psql-connected$$; +SELECT pg_sleep(3600); +]; +ok( pump_until( + $monitor, $psql_timeout, \$monitor_stdout, qr/psql-connected/m), + 'monitor connected'); +$monitor_stdout = ''; +$monitor_stderr = ''; + + +# kill with SIGKILL this time - we expect the backend to exit, without +# being able to emit an error message +$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, "killed process with KILL"); + +# Check that psql sees the server as being terminated. No WARNING, +# because signal handlers aren't being run on SIGKILL. +$killme_stdin .= q[ +SELECT 1; +]; +ok( pump_until( + $killme, + $psql_timeout, + \$killme_stderr, + qr/server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql query died successfully after SIGKILL"); +$killme->finish; + +# Wait till server restarts - we should get the WARNING here, but +# sometimes the server is unable to send that, if interrupted while +# sending. +ok( pump_until( + $monitor, + $psql_timeout, + \$monitor_stderr, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "psql monitor died successfully after SIGKILL"); +$monitor->finish; + +# Wait till server restarts +is($node->poll_query_until('postgres', undef, ''), + "1", "reconnected after SIGKILL"); + +# Make sure the committed rows survived, in-progress ones not +is( $node->safe_psql('postgres', 'SELECT * FROM alive'), + "committed-before-sigquit\ncommitted-before-sigkill", + 'data survived'); + +is( $node->safe_psql( + 'postgres', + 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status' + ), + 'before-orderly-restart', + 'can still write after crash restart'); + +# Just to be sure, check that an orderly restart now still works +$node->restart(); + +is( $node->safe_psql('postgres', 'SELECT * FROM alive'), + "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart", + 'data survived'); + +is( $node->safe_psql( + 'postgres', + 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status' + ), + 'after-orderly-restart', + 'can still write after orderly restart'); + +$node->stop(); + +done_testing(); diff --git a/src/test/recovery/t/014_unlogged_reinit.pl b/src/test/recovery/t/014_unlogged_reinit.pl new file mode 100644 index 0000000..7289510 --- /dev/null +++ b/src/test/recovery/t/014_unlogged_reinit.pl @@ -0,0 +1,134 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests that unlogged tables are properly reinitialized after a crash. +# +# The behavior should be the same when restoring from a backup, but +# that is not tested here. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('main'); + +$node->init; +$node->start; +my $pgdata = $node->data_dir; + +# Create an unlogged table and an unlogged sequence to test that forks +# other than init are not copied. +$node->safe_psql('postgres', 'CREATE UNLOGGED TABLE base_unlogged (id int)'); +$node->safe_psql('postgres', 'CREATE UNLOGGED SEQUENCE seq_unlogged'); + +my $baseUnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('base_unlogged')}); +my $seqUnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('seq_unlogged')}); + +# Test that main and init forks exist. +ok(-f "$pgdata/${baseUnloggedPath}_init", 'table init fork exists'); +ok(-f "$pgdata/$baseUnloggedPath", 'table main fork exists'); +ok(-f "$pgdata/${seqUnloggedPath}_init", 'sequence init fork exists'); +ok(-f "$pgdata/$seqUnloggedPath", 'sequence main fork exists'); + +# Test the sequence +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"), + 1, 'sequence nextval'); +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"), + 2, 'sequence nextval again'); + +# Create an unlogged table in a tablespace. + +my $tablespaceDir = PostgreSQL::Test::Utils::tempdir; + +$node->safe_psql('postgres', + "CREATE TABLESPACE ts1 LOCATION '$tablespaceDir'"); +$node->safe_psql('postgres', + 'CREATE UNLOGGED TABLE ts1_unlogged (id int) TABLESPACE ts1'); + +my $ts1UnloggedPath = $node->safe_psql('postgres', + q{select pg_relation_filepath('ts1_unlogged')}); + +# Test that main and init forks exist. +ok(-f "$pgdata/${ts1UnloggedPath}_init", 'init fork in tablespace exists'); +ok(-f "$pgdata/$ts1UnloggedPath", 'main fork in tablespace exists'); + +# Create more unlogged sequences for testing. +$node->safe_psql('postgres', 'CREATE UNLOGGED SEQUENCE seq_unlogged2'); +# This rewrites the sequence relation in AlterSequence(). +$node->safe_psql('postgres', 'ALTER SEQUENCE seq_unlogged2 INCREMENT 2'); +$node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')"); + +$node->safe_psql('postgres', + 'CREATE UNLOGGED TABLE tab_seq_unlogged3 (a int GENERATED ALWAYS AS IDENTITY)' +); +# This rewrites the sequence relation in ResetSequence(). +$node->safe_psql('postgres', 'TRUNCATE tab_seq_unlogged3 RESTART IDENTITY'); +$node->safe_psql('postgres', 'INSERT INTO tab_seq_unlogged3 DEFAULT VALUES'); + +# Crash the postmaster. +$node->stop('immediate'); + +# Write fake forks to test that they are removed during recovery. +append_to_file("$pgdata/${baseUnloggedPath}_vm", 'TEST_VM'); +append_to_file("$pgdata/${baseUnloggedPath}_fsm", 'TEST_FSM'); + +# Remove main fork to test that it is recopied from init. +unlink("$pgdata/${baseUnloggedPath}") + or BAIL_OUT("could not remove \"${baseUnloggedPath}\": $!"); +unlink("$pgdata/${seqUnloggedPath}") + or BAIL_OUT("could not remove \"${seqUnloggedPath}\": $!"); + +# the same for the tablespace +append_to_file("$pgdata/${ts1UnloggedPath}_vm", 'TEST_VM'); +append_to_file("$pgdata/${ts1UnloggedPath}_fsm", 'TEST_FSM'); +unlink("$pgdata/${ts1UnloggedPath}") + or BAIL_OUT("could not remove \"${ts1UnloggedPath}\": $!"); + +$node->start; + +# check unlogged table in base +ok( -f "$pgdata/${baseUnloggedPath}_init", + 'table init fork in base still exists'); +ok(-f "$pgdata/$baseUnloggedPath", + 'table main fork in base recreated at startup'); +ok(!-f "$pgdata/${baseUnloggedPath}_vm", + 'vm fork in base removed at startup'); +ok( !-f "$pgdata/${baseUnloggedPath}_fsm", + 'fsm fork in base removed at startup'); + +# check unlogged sequence +ok(-f "$pgdata/${seqUnloggedPath}_init", 'sequence init fork still exists'); +ok(-f "$pgdata/$seqUnloggedPath", 'sequence main fork recreated at startup'); + +# Test the sequence after restart +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"), + 1, 'sequence nextval after restart'); +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged')"), + 2, 'sequence nextval after restart again'); + +# check unlogged table in tablespace +ok( -f "$pgdata/${ts1UnloggedPath}_init", + 'init fork still exists in tablespace'); +ok(-f "$pgdata/$ts1UnloggedPath", + 'main fork in tablespace recreated at startup'); +ok( !-f "$pgdata/${ts1UnloggedPath}_vm", + 'vm fork in tablespace removed at startup'); +ok( !-f "$pgdata/${ts1UnloggedPath}_fsm", + 'fsm fork in tablespace removed at startup'); + +# Test other sequences +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')"), + 1, 'altered sequence nextval after restart'); +is($node->safe_psql('postgres', "SELECT nextval('seq_unlogged2')"), + 3, 'altered sequence nextval after restart again'); + +$node->safe_psql('postgres', + "INSERT INTO tab_seq_unlogged3 VALUES (DEFAULT), (DEFAULT)"); +is($node->safe_psql('postgres', "SELECT * FROM tab_seq_unlogged3"), + "1\n2", 'reset sequence nextval after restart'); + +done_testing(); diff --git a/src/test/recovery/t/015_promotion_pages.pl b/src/test/recovery/t/015_promotion_pages.pl new file mode 100644 index 0000000..8d57b1b --- /dev/null +++ b/src/test/recovery/t/015_promotion_pages.pl @@ -0,0 +1,89 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for promotion handling with WAL records generated post-promotion +# before the first checkpoint is generated. This test case checks for +# invalid page references at replay based on the minimum consistent +# recovery point defined. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $alpha = PostgreSQL::Test::Cluster->new('alpha'); +$alpha->init(allows_streaming => 1); +# Setting wal_log_hints to off is important to get invalid page +# references. +$alpha->append_conf("postgresql.conf", <<EOF); +wal_log_hints = off +EOF + +# Start the primary +$alpha->start; + +# setup/start a standby +$alpha->backup('bkp'); +my $bravo = PostgreSQL::Test::Cluster->new('bravo'); +$bravo->init_from_backup($alpha, 'bkp', has_streaming => 1); +$bravo->append_conf('postgresql.conf', <<EOF); +checkpoint_timeout=1h +EOF +$bravo->start; + +# Dummy table for the upcoming tests. +$alpha->safe_psql('postgres', 'create table test1 (a int)'); +$alpha->safe_psql('postgres', + 'insert into test1 select generate_series(1, 10000)'); + +# take a checkpoint +$alpha->safe_psql('postgres', 'checkpoint'); + +# The following vacuum will set visibility map bits and create +# problematic WAL records. +$alpha->safe_psql('postgres', 'vacuum verbose test1'); +# Wait for last record to have been replayed on the standby. +$alpha->wait_for_catchup($bravo); + +# Now force a checkpoint on the standby. This seems unnecessary but for "some" +# reason, the previous checkpoint on the primary does not reflect on the standby +# and without an explicit checkpoint, it may start redo recovery from a much +# older point, which includes even create table and initial page additions. +$bravo->safe_psql('postgres', 'checkpoint'); + +# Now just use a dummy table and run some operations to move minRecoveryPoint +# beyond the previous vacuum. +$alpha->safe_psql('postgres', 'create table test2 (a int, b text)'); +$alpha->safe_psql('postgres', + 'insert into test2 select generate_series(1,10000), md5(random()::text)'); +$alpha->safe_psql('postgres', 'truncate test2'); + +# Wait again for all records to be replayed. +$alpha->wait_for_catchup($bravo); + +# Do the promotion, which reinitializes minRecoveryPoint in the control +# file so as WAL is replayed up to the end. +$bravo->promote; + +# Truncate the table on the promoted standby, vacuum and extend it +# again to create new page references. The first post-recovery checkpoint +# has not happened yet. +$bravo->safe_psql('postgres', 'truncate test1'); +$bravo->safe_psql('postgres', 'vacuum verbose test1'); +$bravo->safe_psql('postgres', + 'insert into test1 select generate_series(1,1000)'); + +# Now crash-stop the promoted standby and restart. This makes sure that +# replay does not see invalid page references because of an invalid +# minimum consistent recovery point. +$bravo->stop('immediate'); +$bravo->start; + +# Check state of the table after full crash recovery. All its data should +# be here. +my $psql_out; +$bravo->psql('postgres', "SELECT count(*) FROM test1", stdout => \$psql_out); +is($psql_out, '1000', "Check that table state is correct"); + +done_testing(); diff --git a/src/test/recovery/t/016_min_consistency.pl b/src/test/recovery/t/016_min_consistency.pl new file mode 100644 index 0000000..5e0655c --- /dev/null +++ b/src/test/recovery/t/016_min_consistency.pl @@ -0,0 +1,143 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for checking consistency of on-disk pages for a cluster with +# the minimum recovery LSN, ensuring that the updates happen across +# all processes. In this test, the updates from the startup process +# and the checkpointer (which triggers non-startup code paths) are +# both checked. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Find the largest LSN in the set of pages part of the given relation +# file. This is used for offline checks of page consistency. The LSN +# is historically stored as a set of two numbers of 4 byte-length +# located at the beginning of each page. +sub find_largest_lsn +{ + my $blocksize = int(shift); + my $filename = shift; + my ($max_hi, $max_lo) = (0, 0); + open(my $fh, "<:raw", $filename) + or die "failed to open $filename: $!"; + my ($buf, $len); + while ($len = read($fh, $buf, $blocksize)) + { + $len == $blocksize + or die "read only $len of $blocksize bytes from $filename"; + my ($hi, $lo) = unpack("LL", $buf); + + if ($hi > $max_hi or ($hi == $max_hi and $lo > $max_lo)) + { + ($max_hi, $max_lo) = ($hi, $lo); + } + } + defined($len) or die "read error on $filename: $!"; + close($fh); + + return sprintf("%X/%X", $max_hi, $max_lo); +} + +# Initialize primary node +my $primary = PostgreSQL::Test::Cluster->new('primary'); +$primary->init(allows_streaming => 1); + +# Set shared_buffers to a very low value to enforce discard and flush +# of PostgreSQL buffers on standby, enforcing other processes than the +# startup process to update the minimum recovery LSN in the control +# file. Autovacuum is disabled so as there is no risk of having other +# processes than the checkpointer doing page flushes. +$primary->append_conf("postgresql.conf", <<EOF); +shared_buffers = 128kB +autovacuum = off +EOF + +# Start the primary +$primary->start; + +# setup/start a standby +$primary->backup('bkp'); +my $standby = PostgreSQL::Test::Cluster->new('standby'); +$standby->init_from_backup($primary, 'bkp', has_streaming => 1); +$standby->start; + +# Create base table whose data consistency is checked. +$primary->safe_psql( + 'postgres', " +CREATE TABLE test1 (a int) WITH (fillfactor = 10); +INSERT INTO test1 SELECT generate_series(1, 10000);"); + +# Take a checkpoint and enforce post-checkpoint full page writes +# which makes the startup process replay those pages, updating +# minRecoveryPoint. +$primary->safe_psql('postgres', 'CHECKPOINT;'); +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Wait for last record to have been replayed on the standby. +$primary->wait_for_catchup($standby); + +# Fill in the standby's shared buffers with the data filled in +# previously. +$standby->safe_psql('postgres', 'SELECT count(*) FROM test1;'); + +# Update the table again, this does not generate full page writes so +# the standby will replay records associated with it, but the startup +# process will not flush those pages. +$primary->safe_psql('postgres', 'UPDATE test1 SET a = a + 1;'); + +# Extract from the relation the last block created and its relation +# file, this will be used at the end of the test for sanity checks. +my $blocksize = $primary->safe_psql('postgres', + "SELECT setting::int FROM pg_settings WHERE name = 'block_size';"); +my $last_block = $primary->safe_psql('postgres', + "SELECT pg_relation_size('test1')::int / $blocksize - 1;"); +my $relfilenode = $primary->safe_psql('postgres', + "SELECT pg_relation_filepath('test1'::regclass);"); + +# Wait for last record to have been replayed on the standby. +$primary->wait_for_catchup($standby); + +# Issue a restart point on the standby now, which makes the checkpointer +# update minRecoveryPoint. +$standby->safe_psql('postgres', 'CHECKPOINT;'); + +# Now shut down the primary violently so as the standby does not +# receive the shutdown checkpoint, making sure that the startup +# process does not flush any pages on its side. The standby is +# cleanly stopped, which makes the checkpointer update minRecoveryPoint +# with the restart point created at shutdown. +$primary->stop('immediate'); +$standby->stop('fast'); + +# Check the data consistency of the instance while offline. This is +# done by directly scanning the on-disk relation blocks and what +# pg_controldata lets know. +my $standby_data = $standby->data_dir; +my $offline_max_lsn = + find_largest_lsn($blocksize, "$standby_data/$relfilenode"); + +# Fetch minRecoveryPoint from the control file itself +my ($stdout, $stderr) = run_command([ 'pg_controldata', $standby_data ]); +my @control_data = split("\n", $stdout); +my $offline_recovery_lsn = undef; +foreach (@control_data) +{ + if ($_ =~ /^Minimum recovery ending location:\s*(.*)$/mg) + { + $offline_recovery_lsn = $1; + last; + } +} +die "No minRecoveryPoint in control file found\n" + unless defined($offline_recovery_lsn); + +# minRecoveryPoint should never be older than the maximum LSN for all +# the pages on disk. +ok($offline_recovery_lsn ge $offline_max_lsn, + "Check offline that table data is consistent with minRecoveryPoint"); + +done_testing(); diff --git a/src/test/recovery/t/017_shm.pl b/src/test/recovery/t/017_shm.pl new file mode 100644 index 0000000..875657b --- /dev/null +++ b/src/test/recovery/t/017_shm.pl @@ -0,0 +1,218 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# +# Tests of pg_shmem.h functions +# +use strict; +use warnings; +use File::stat qw(stat); +use IPC::Run 'run'; +use PostgreSQL::Test::Cluster; +use Test::More; +use PostgreSQL::Test::Utils; +use Time::HiRes qw(usleep); + +# If we don't have shmem support, skip the whole thing +eval { + require IPC::SharedMem; + IPC::SharedMem->import; + require IPC::SysV; + IPC::SysV->import(qw(IPC_CREAT IPC_EXCL S_IRUSR S_IWUSR)); +}; +if ($@ || $windows_os) +{ + plan skip_all => 'SysV shared memory not supported by this platform'; +} + +my $tempdir = PostgreSQL::Test::Utils::tempdir; + +# Log "ipcs" diffs on a best-effort basis, swallowing any error. +my $ipcs_before = "$tempdir/ipcs_before"; +eval { run_log [ 'ipcs', '-am' ], '>', $ipcs_before; }; + +sub log_ipcs +{ + eval { run_log [ 'ipcs', '-am' ], '|', [ 'diff', $ipcs_before, '-' ] }; + return; +} + +# Node setup. +my $gnat = PostgreSQL::Test::Cluster->new('gnat'); +$gnat->init; + +# Create a shmem segment that will conflict with gnat's first choice +# of shmem key. (If we fail to create it because something else is +# already using that key, that's perfectly fine, though the test will +# exercise a different scenario than it usually does.) +my $gnat_dir_stat = stat($gnat->data_dir); +defined($gnat_dir_stat) or die('unable to stat ' . $gnat->data_dir); +my $gnat_inode = $gnat_dir_stat->ino; +note "gnat's datadir inode = $gnat_inode"; + +# Note: must reference IPC::SysV's constants as functions, or this file +# fails to compile when that module is not available. +my $gnat_conflict_shm = + IPC::SharedMem->new($gnat_inode, 1024, + IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR()); +note "could not create conflicting shmem" if !defined($gnat_conflict_shm); +log_ipcs(); + +$gnat->start; +log_ipcs(); + +$gnat->restart; # should keep same shmem key +log_ipcs(); + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); +poll_start($gnat); # gnat recycles its former shm key. +log_ipcs(); + +note "removing the conflicting shmem ..."; +$gnat_conflict_shm->remove if $gnat_conflict_shm; +log_ipcs(); + +# Upon postmaster death, postmaster children exit automatically. +$gnat->kill9; +log_ipcs(); + +# In this start, gnat will use its normal shmem key, and fail to remove +# the higher-keyed segment that the previous postmaster was using. +# That's not great, but key collisions should be rare enough to not +# make this a big problem. +poll_start($gnat); +log_ipcs(); +$gnat->stop; +log_ipcs(); + +# Re-create the conflicting segment, and start/stop normally, just so +# this test script doesn't leak the higher-keyed segment. +note "re-creating conflicting shmem ..."; +$gnat_conflict_shm = + IPC::SharedMem->new($gnat_inode, 1024, + IPC_CREAT() | IPC_EXCL() | S_IRUSR() | S_IWUSR()); +note "could not create conflicting shmem" if !defined($gnat_conflict_shm); +log_ipcs(); + +$gnat->start; +log_ipcs(); +$gnat->stop; +log_ipcs(); + +note "removing the conflicting shmem ..."; +$gnat_conflict_shm->remove if $gnat_conflict_shm; +log_ipcs(); + +# Scenarios involving no postmaster.pid, dead postmaster, and a live backend. +# Use a regress.c function to emulate the responsiveness of a backend working +# through a CPU-intensive task. +$gnat->start; +log_ipcs(); + +my $regress_shlib = $ENV{REGRESS_SHLIB}; +$gnat->safe_psql('postgres', <<EOSQL); +CREATE FUNCTION wait_pid(int) + RETURNS void + AS '$regress_shlib' + LANGUAGE C STRICT; +EOSQL +my $slow_query = 'SELECT wait_pid(pg_backend_pid())'; +my ($stdout, $stderr); +my $slow_client = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-d', $gnat->connstr('postgres'), + '-c', $slow_query + ], + '<', + \undef, + '>', + \$stdout, + '2>', + \$stderr, + IPC::Run::timeout(5 * $PostgreSQL::Test::Utils::timeout_default)); +ok( $gnat->poll_query_until( + 'postgres', + "SELECT 1 FROM pg_stat_activity WHERE query = '$slow_query'", '1'), + 'slow query started'); +my $slow_pid = $gnat->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE query = '$slow_query'"); +$gnat->kill9; +unlink($gnat->data_dir . '/postmaster.pid'); +$gnat->rotate_logfile; # on Windows, can't open old log for writing +log_ipcs(); +# Reject ordinary startup. Retry for the same reasons poll_start() does, +# every 0.1s for at least $PostgreSQL::Test::Utils::timeout_default seconds. +my $pre_existing_msg = qr/pre-existing shared memory block/; +{ + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; + my $attempts = 0; + while ($attempts < $max_attempts) + { + last + if $gnat->start(fail_ok => 1) + || slurp_file($gnat->logfile) =~ $pre_existing_msg; + usleep(100_000); + $attempts++; + } +} +like(slurp_file($gnat->logfile), + $pre_existing_msg, 'detected live backend via shared memory'); +# Reject single-user startup. +my $single_stderr; +ok( !run_log( + [ 'postgres', '--single', '-D', $gnat->data_dir, 'template1' ], + '<', \undef, '2>', \$single_stderr), + 'live query blocks --single'); +print STDERR $single_stderr; +like($single_stderr, $pre_existing_msg, + 'single-user mode detected live backend via shared memory'); +log_ipcs(); + +# cleanup slow backend +PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'QUIT', $slow_pid); +$slow_client->finish; # client has detected backend termination +log_ipcs(); + +# now startup should work +poll_start($gnat); +log_ipcs(); + +# finish testing +$gnat->stop; +log_ipcs(); + + +# We may need retries to start a new postmaster. Causes: +# - kernel is slow to deliver SIGKILL +# - postmaster parent is slow to waitpid() +# - postmaster child is slow to exit in response to SIGQUIT +# - postmaster child is slow to exit after postmaster death +sub poll_start +{ + my ($node) = @_; + + my $max_attempts = 10 * $PostgreSQL::Test::Utils::timeout_default; + my $attempts = 0; + + while ($attempts < $max_attempts) + { + $node->start(fail_ok => 1) && return 1; + + # Wait 0.1 second before retrying. + usleep(100_000); + + # Clean up in case the start attempt just timed out or some such. + $node->stop('fast', fail_ok => 1); + + $attempts++; + } + + # Try one last time without fail_ok, which will BAIL_OUT unless it + # succeeds. + $node->start && return 1; + return 0; +} + +done_testing(); diff --git a/src/test/recovery/t/018_wal_optimize.pl b/src/test/recovery/t/018_wal_optimize.pl new file mode 100644 index 0000000..4700d49 --- /dev/null +++ b/src/test/recovery/t/018_wal_optimize.pl @@ -0,0 +1,401 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test WAL replay when some operation has skipped WAL. +# +# These tests exercise code that once violated the mandate described in +# src/backend/access/transam/README section "Skipping WAL for New +# RelFileNode". The tests work by committing some transactions, initiating an +# immediate shutdown, and confirming that the expected data survives recovery. +# For many years, individual commands made the decision to skip WAL, hence the +# frequent appearance of COPY in these tests. +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +sub check_orphan_relfilenodes +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + + my ($node, $test_name) = @_; + + my $db_oid = $node->safe_psql('postgres', + "SELECT oid FROM pg_database WHERE datname = 'postgres'"); + my $prefix = "base/$db_oid/"; + my $filepaths_referenced = $node->safe_psql( + 'postgres', " + SELECT pg_relation_filepath(oid) FROM pg_class + WHERE reltablespace = 0 AND relpersistence <> 't' AND + pg_relation_filepath(oid) IS NOT NULL;"); + is_deeply( + [ + sort(map { "$prefix$_" } + grep(/^[0-9]+$/, slurp_dir($node->data_dir . "/$prefix"))) + ], + [ sort split /\n/, $filepaths_referenced ], + $test_name); + return; +} + +# We run this same test suite for both wal_level=minimal and replica. +sub run_wal_optimize +{ + my $wal_level = shift; + + my $node = PostgreSQL::Test::Cluster->new("node_$wal_level"); + $node->init; + $node->append_conf( + 'postgresql.conf', qq( +wal_level = $wal_level +max_prepared_transactions = 1 +wal_log_hints = on +wal_skip_threshold = 0 +#wal_debug = on +)); + $node->start; + + # Setup + my $tablespace_dir = $node->basedir . '/tablespace_other'; + mkdir($tablespace_dir); + my $result; + + # Test redo of CREATE TABLESPACE. + $node->safe_psql( + 'postgres', " + CREATE TABLE moved (id int); + INSERT INTO moved VALUES (1); + CREATE TABLESPACE other LOCATION '$tablespace_dir'; + BEGIN; + ALTER TABLE moved SET TABLESPACE other; + CREATE TABLE originated (id int); + INSERT INTO originated VALUES (1); + CREATE UNIQUE INDEX ON originated(id) TABLESPACE other; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM moved;"); + is($result, qq(1), "wal_level = $wal_level, CREATE+SET TABLESPACE"); + $result = $node->safe_psql( + 'postgres', " + INSERT INTO originated VALUES (1) ON CONFLICT (id) + DO UPDATE set id = originated.id + 1 + RETURNING id;"); + is($result, qq(2), + "wal_level = $wal_level, CREATE TABLESPACE, CREATE INDEX"); + + # Test direct truncation optimization. No tuples. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc (id serial PRIMARY KEY); + TRUNCATE trunc; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM trunc;"); + is($result, qq(0), "wal_level = $wal_level, TRUNCATE with empty table"); + + # Test truncation with inserted tuples within the same transaction. + # Tuples inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_ins (id serial PRIMARY KEY); + INSERT INTO trunc_ins VALUES (DEFAULT); + TRUNCATE trunc_ins; + INSERT INTO trunc_ins VALUES (DEFAULT); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', + "SELECT count(*), min(id) FROM trunc_ins;"); + is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT"); + + # Same for prepared transaction. + # Tuples inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE twophase (id serial PRIMARY KEY); + INSERT INTO twophase VALUES (DEFAULT); + TRUNCATE twophase; + INSERT INTO twophase VALUES (DEFAULT); + PREPARE TRANSACTION 't'; + COMMIT PREPARED 't';"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', + "SELECT count(*), min(id) FROM trunc_ins;"); + is($result, qq(1|2), "wal_level = $wal_level, TRUNCATE INSERT PREPARE"); + + # Writing WAL at end of xact, instead of syncing. + $node->safe_psql( + 'postgres', " + SET wal_skip_threshold = '1GB'; + BEGIN; + CREATE TABLE noskip (id serial PRIMARY KEY); + INSERT INTO noskip (SELECT FROM generate_series(1, 20000) a) ; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM noskip;"); + is($result, qq(20000), "wal_level = $wal_level, end-of-xact WAL"); + + # Data file for COPY query in subsequent tests + my $basedir = $node->basedir; + my $copy_file = "$basedir/copy_data.txt"; + PostgreSQL::Test::Utils::append_to_file( + $copy_file, qq(20000,30000 +20001,30001 +20002,30002)); + + # Test truncation with inserted tuples using both INSERT and COPY. Tuples + # inserted after the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_trunc (id serial PRIMARY KEY, id2 int); + INSERT INTO ins_trunc VALUES (DEFAULT, generate_series(1,10000)); + TRUNCATE ins_trunc; + INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000); + COPY ins_trunc FROM '$copy_file' DELIMITER ','; + INSERT INTO ins_trunc (id, id2) VALUES (DEFAULT, 10000); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trunc;"); + is($result, qq(5), "wal_level = $wal_level, TRUNCATE COPY INSERT"); + + # Test truncation with inserted tuples using COPY. Tuples copied after + # the truncation should be seen. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_copy (id serial PRIMARY KEY, id2 int); + INSERT INTO trunc_copy VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE trunc_copy; + COPY trunc_copy FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM trunc_copy;"); + is($result, qq(3), "wal_level = $wal_level, TRUNCATE COPY"); + + # Like previous test, but rollback SET TABLESPACE in a subtransaction. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_abort (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_abort VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_abort; + SAVEPOINT s; + ALTER TABLE spc_abort SET TABLESPACE other; ROLLBACK TO s; + COPY spc_abort FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_abort;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE abort subtransaction"); + + # in different subtransaction patterns + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_commit (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_commit VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_commit; + SAVEPOINT s; ALTER TABLE spc_commit SET TABLESPACE other; RELEASE s; + COPY spc_commit FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM spc_commit;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE commit subtransaction"); + + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE spc_nest (id serial PRIMARY KEY, id2 int); + INSERT INTO spc_nest VALUES (DEFAULT, generate_series(1,3000)); + TRUNCATE spc_nest; + SAVEPOINT s; + ALTER TABLE spc_nest SET TABLESPACE other; + SAVEPOINT s2; + ALTER TABLE spc_nest SET TABLESPACE pg_default; + ROLLBACK TO s2; + SAVEPOINT s2; + ALTER TABLE spc_nest SET TABLESPACE pg_default; + RELEASE s2; + ROLLBACK TO s; + COPY spc_nest FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_nest;"); + is($result, qq(3), + "wal_level = $wal_level, SET TABLESPACE nested subtransaction"); + + $node->safe_psql( + 'postgres', " + CREATE TABLE spc_hint (id int); + INSERT INTO spc_hint VALUES (1); + BEGIN; + ALTER TABLE spc_hint SET TABLESPACE other; + CHECKPOINT; + SELECT * FROM spc_hint; -- set hint bit + INSERT INTO spc_hint VALUES (2); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM spc_hint;"); + is($result, qq(2), "wal_level = $wal_level, SET TABLESPACE, hint bit"); + + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE idx_hint (c int PRIMARY KEY); + SAVEPOINT q; INSERT INTO idx_hint VALUES (1); ROLLBACK TO q; + CHECKPOINT; + INSERT INTO idx_hint VALUES (1); -- set index hint bit + INSERT INTO idx_hint VALUES (2); + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->psql('postgres',); + my ($ret, $stdout, $stderr) = + $node->psql('postgres', "INSERT INTO idx_hint VALUES (2);"); + is($ret, qq(3), "wal_level = $wal_level, unique index LP_DEAD"); + like( + $stderr, + qr/violates unique/, + "wal_level = $wal_level, unique index LP_DEAD message"); + + # UPDATE touches two buffers for one row. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE upd (id serial PRIMARY KEY, id2 int); + INSERT INTO upd (id, id2) VALUES (DEFAULT, generate_series(1,10000)); + COPY upd FROM '$copy_file' DELIMITER ','; + UPDATE upd SET id2 = id2 + 1; + DELETE FROM upd; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM upd;"); + is($result, qq(0), + "wal_level = $wal_level, UPDATE touches two buffers for one row"); + + # Test consistency of COPY with INSERT for table created in the same + # transaction. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_copy (id serial PRIMARY KEY, id2 int); + INSERT INTO ins_copy VALUES (DEFAULT, 1); + COPY ins_copy FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_copy;"); + is($result, qq(4), "wal_level = $wal_level, INSERT COPY"); + + # Test consistency of COPY that inserts more to the same table using + # triggers. If the INSERTS from the trigger go to the same block data + # is copied to, and the INSERTs are WAL-logged, WAL replay will fail when + # it tries to replay the WAL record but the "before" image doesn't match, + # because not all changes were WAL-logged. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE ins_trig (id serial PRIMARY KEY, id2 text); + CREATE FUNCTION ins_trig_before_row_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig + VALUES (DEFAULT, 'triggered row before' || NEW.id2); + END IF; + RETURN NEW; + END; \$\$; + CREATE FUNCTION ins_trig_after_row_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + IF new.id2 NOT LIKE 'triggered%' THEN + INSERT INTO ins_trig + VALUES (DEFAULT, 'triggered row after' || NEW.id2); + END IF; + RETURN NEW; + END; \$\$; + CREATE TRIGGER ins_trig_before_row_insert + BEFORE INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_before_row_trig(); + CREATE TRIGGER ins_trig_after_row_insert + AFTER INSERT ON ins_trig + FOR EACH ROW EXECUTE PROCEDURE ins_trig_after_row_trig(); + COPY ins_trig FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = $node->safe_psql('postgres', "SELECT count(*) FROM ins_trig;"); + is($result, qq(9), "wal_level = $wal_level, COPY with INSERT triggers"); + + # Test consistency of INSERT, COPY and TRUNCATE in same transaction block + # with TRUNCATE triggers. + $node->safe_psql( + 'postgres', " + BEGIN; + CREATE TABLE trunc_trig (id serial PRIMARY KEY, id2 text); + CREATE FUNCTION trunc_trig_before_stat_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; \$\$; + CREATE FUNCTION trunc_trig_after_stat_trig() RETURNS trigger + LANGUAGE plpgsql as \$\$ + BEGIN + INSERT INTO trunc_trig VALUES (DEFAULT, 'triggered stat before'); + RETURN NULL; + END; \$\$; + CREATE TRIGGER trunc_trig_before_stat_truncate + BEFORE TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_before_stat_trig(); + CREATE TRIGGER trunc_trig_after_stat_truncate + AFTER TRUNCATE ON trunc_trig + FOR EACH STATEMENT EXECUTE PROCEDURE trunc_trig_after_stat_trig(); + INSERT INTO trunc_trig VALUES (DEFAULT, 1); + TRUNCATE trunc_trig; + COPY trunc_trig FROM '$copy_file' DELIMITER ','; + COMMIT;"); + $node->stop('immediate'); + $node->start; + $result = + $node->safe_psql('postgres', "SELECT count(*) FROM trunc_trig;"); + is($result, qq(4), + "wal_level = $wal_level, TRUNCATE COPY with TRUNCATE triggers"); + + # Test redo of temp table creation. + $node->safe_psql( + 'postgres', " + CREATE TEMP TABLE temp (id serial PRIMARY KEY, id2 text);"); + $node->stop('immediate'); + $node->start; + check_orphan_relfilenodes($node, + "wal_level = $wal_level, no orphan relfilenode remains"); + + return; +} + +# Run same test suite for multiple wal_level values. +run_wal_optimize("minimal"); +run_wal_optimize("replica"); + +done_testing(); diff --git a/src/test/recovery/t/019_replslot_limit.pl b/src/test/recovery/t/019_replslot_limit.pl new file mode 100644 index 0000000..4ec1a9a --- /dev/null +++ b/src/test/recovery/t/019_replslot_limit.pl @@ -0,0 +1,444 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for replication slot limit +# Ensure that max_slot_wal_keep_size limits the number of WAL files to +# be kept by replication slots. +use strict; +use warnings; + +use PostgreSQL::Test::Utils; +use PostgreSQL::Test::Cluster; + +use File::Path qw(rmtree); +use Test::More; +use Time::HiRes qw(usleep); + +$ENV{PGDATABASE} = 'postgres'; + +# Initialize primary node, setting wal-segsize to 1MB +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1, extra => ['--wal-segsize=1']); +$node_primary->append_conf( + 'postgresql.conf', qq( +min_wal_size = 2MB +max_wal_size = 4MB +log_checkpoints = yes +)); +$node_primary->start; +$node_primary->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep1')"); + +# The slot state and remain should be null before the first connection +my $result = $node_primary->safe_psql('postgres', + "SELECT restart_lsn IS NULL, wal_status is NULL, safe_wal_size is NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "t|t|t", 'check the state of non-reserved slot is "unknown"'); + + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create a standby linking to it using the replication slot +my $node_standby = PostgreSQL::Test::Cluster->new('standby_1'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); + +$node_standby->start; + +# Wait until standby has replayed enough data +$node_primary->wait_for_catchup($node_standby); + +# Stop standby +$node_standby->stop; + +# Preparation done, the slot is the state "reserved" now +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", 'check the catching-up state'); + +# Advance WAL by five segments (= 5MB) on primary +advance_wal($node_primary, 1); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is always "safe" when fitting max_wal_size +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", + 'check that it is safe if WAL fits in max_wal_size'); + +advance_wal($node_primary, 4); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is always "safe" when max_slot_wal_keep_size is not set +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size IS NULL FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "reserved|t", 'check that slot is working'); + +# The standby can reconnect to primary +$node_standby->start; + +$node_primary->wait_for_catchup($node_standby); + +$node_standby->stop; + +# Set max_slot_wal_keep_size on primary +my $max_slot_wal_keep_size_mb = 6; +$node_primary->append_conf( + 'postgresql.conf', qq( +max_slot_wal_keep_size = ${max_slot_wal_keep_size_mb}MB +)); +$node_primary->reload; + +# The slot is in safe state. + +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "reserved", 'check that max_slot_wal_keep_size is working'); + +# Advance WAL again then checkpoint, reducing remain by 2 MB. +advance_wal($node_primary, 2); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# The slot is still working +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "reserved", + 'check that safe_wal_size gets close to the current LSN'); + +# The standby can reconnect to primary +$node_standby->start; +$node_primary->wait_for_catchup($node_standby); +$node_standby->stop; + +# wal_keep_size overrides max_slot_wal_keep_size +$result = $node_primary->safe_psql('postgres', + "ALTER SYSTEM SET wal_keep_size to '8MB'; SELECT pg_reload_conf();"); +# Advance WAL again then checkpoint, reducing remain by 6 MB. +advance_wal($node_primary, 6); +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status as remain FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "extended", + 'check that wal_keep_size overrides max_slot_wal_keep_size'); +# restore wal_keep_size +$result = $node_primary->safe_psql('postgres', + "ALTER SYSTEM SET wal_keep_size to 0; SELECT pg_reload_conf();"); + +# The standby can reconnect to primary +$node_standby->start; +$node_primary->wait_for_catchup($node_standby); +$node_standby->stop; + +# Advance WAL again without checkpoint, reducing remain by 6 MB. +advance_wal($node_primary, 6); + +# Slot gets into 'reserved' state +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep1'"); +is($result, "extended", 'check that the slot state changes to "extended"'); + +# do checkpoint so that the next checkpoint runs too early +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# Advance WAL again without checkpoint; remain goes to 0. +advance_wal($node_primary, 1); + +# Slot gets into 'unreserved' state and safe_wal_size is negative +$result = $node_primary->safe_psql('postgres', + "SELECT wal_status, safe_wal_size <= 0 FROM pg_replication_slots WHERE slot_name = 'rep1'" +); +is($result, "unreserved|t", + 'check that the slot state changes to "unreserved"'); + +# The standby still can connect to primary before a checkpoint +$node_standby->start; + +$node_primary->wait_for_catchup($node_standby); + +$node_standby->stop; + +ok( !$node_standby->log_contains( + "requested WAL segment [0-9A-F]+ has already been removed"), + 'check that required WAL segments are still available'); + +# Create one checkpoint, to improve stability of the next steps +$node_primary->safe_psql('postgres', "CHECKPOINT;"); + +# Prevent other checkpoints from occurring while advancing WAL segments +$node_primary->safe_psql('postgres', + "ALTER SYSTEM SET max_wal_size='40MB'; SELECT pg_reload_conf()"); + +# Advance WAL again. The slot loses the oldest segment by the next checkpoint +my $logstart = get_log_size($node_primary); +advance_wal($node_primary, 7); + +# Now create another checkpoint and wait until the WARNING is issued +$node_primary->safe_psql('postgres', + 'ALTER SYSTEM RESET max_wal_size; SELECT pg_reload_conf()'); +$node_primary->safe_psql('postgres', "CHECKPOINT;"); +my $invalidated = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if ($node_primary->log_contains( + "invalidating slot \"rep1\" because its restart_lsn [0-9A-F/]+ exceeds max_slot_wal_keep_size", + $logstart)) + { + $invalidated = 1; + last; + } + usleep(100_000); +} +ok($invalidated, 'check that slot invalidation has been logged'); + +$result = $node_primary->safe_psql( + 'postgres', + qq[ + SELECT slot_name, active, restart_lsn IS NULL, wal_status, safe_wal_size + FROM pg_replication_slots WHERE slot_name = 'rep1']); +is($result, "rep1|f|t|lost|", + 'check that the slot became inactive and the state "lost" persists'); + +# Wait until current checkpoint ends +my $checkpoint_ended = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if ($node_primary->log_contains("checkpoint complete: ", $logstart)) + { + $checkpoint_ended = 1; + last; + } + usleep(100_000); +} +ok($checkpoint_ended, 'waited for checkpoint to end'); + +# The invalidated slot shouldn't keep the old-segment horizon back; +# see bug #17103: https://postgr.es/m/17103-004130e8f27782c9@postgresql.org +# Test for this by creating a new slot and comparing its restart LSN +# to the oldest existing file. +my $redoseg = $node_primary->safe_psql('postgres', + "SELECT pg_walfile_name(lsn) FROM pg_create_physical_replication_slot('s2', true)" +); +my $oldestseg = $node_primary->safe_psql('postgres', + "SELECT pg_ls_dir AS f FROM pg_ls_dir('pg_wal') WHERE pg_ls_dir ~ '^[0-9A-F]{24}\$' ORDER BY 1 LIMIT 1" +); +$node_primary->safe_psql('postgres', + qq[SELECT pg_drop_replication_slot('s2')]); +is($oldestseg, $redoseg, "check that segments have been removed"); + +# The standby no longer can connect to the primary +$logstart = get_log_size($node_standby); +$node_standby->start; + +my $failed = 0; +for (my $i = 0; $i < 10000; $i++) +{ + if ($node_standby->log_contains( + "requested WAL segment [0-9A-F]+ has already been removed", + $logstart)) + { + $failed = 1; + last; + } + usleep(100_000); +} +ok($failed, 'check that replication has been broken'); + +$node_primary->stop; +$node_standby->stop; + +my $node_primary2 = PostgreSQL::Test::Cluster->new('primary2'); +$node_primary2->init(allows_streaming => 1); +$node_primary2->append_conf( + 'postgresql.conf', qq( +min_wal_size = 32MB +max_wal_size = 32MB +log_checkpoints = yes +)); +$node_primary2->start; +$node_primary2->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep1')"); +$backup_name = 'my_backup2'; +$node_primary2->backup($backup_name); + +$node_primary2->stop; +$node_primary2->append_conf( + 'postgresql.conf', qq( +max_slot_wal_keep_size = 0 +)); +$node_primary2->start; + +$node_standby = PostgreSQL::Test::Cluster->new('standby_2'); +$node_standby->init_from_backup($node_primary2, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', "primary_slot_name = 'rep1'"); +$node_standby->start; +my @result = + split( + '\n', + $node_primary2->safe_psql( + 'postgres', + "CREATE TABLE tt(); + DROP TABLE tt; + SELECT pg_switch_wal(); + CHECKPOINT; + SELECT 'finished';", + timeout => $PostgreSQL::Test::Utils::timeout_default)); +is($result[1], 'finished', 'check if checkpoint command is not blocked'); + +$node_primary2->stop; +$node_standby->stop; + +# The next test depends on Perl's `kill`, which apparently is not +# portable to Windows. (It would be nice to use Test::More's `subtest`, +# but that's not in the ancient version we require.) +if ($PostgreSQL::Test::Utils::windows_os) +{ + done_testing(); + exit; +} + +# Get a slot terminated while the walsender is active +# We do this by sending SIGSTOP to the walsender. Skip this on Windows. +my $node_primary3 = PostgreSQL::Test::Cluster->new('primary3'); +$node_primary3->init(allows_streaming => 1, extra => ['--wal-segsize=1']); +$node_primary3->append_conf( + 'postgresql.conf', qq( + min_wal_size = 2MB + max_wal_size = 2MB + log_checkpoints = yes + max_slot_wal_keep_size = 1MB + )); +$node_primary3->start; +$node_primary3->safe_psql('postgres', + "SELECT pg_create_physical_replication_slot('rep3')"); +# Take backup +$backup_name = 'my_backup'; +$node_primary3->backup($backup_name); +# Create standby +my $node_standby3 = PostgreSQL::Test::Cluster->new('standby_3'); +$node_standby3->init_from_backup($node_primary3, $backup_name, + has_streaming => 1); +$node_standby3->append_conf('postgresql.conf', "primary_slot_name = 'rep3'"); +$node_standby3->start; +$node_primary3->wait_for_catchup($node_standby3); + +my $senderpid; + +# We've seen occasional cases where multiple walsender pids are still active +# at this point, apparently just due to process shutdown being slow. To avoid +# spurious failures, retry a couple times. +my $i = 0; +while (1) +{ + my ($stdout, $stderr); + + $senderpid = $node_primary3->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walsender'"); + + last if $senderpid =~ qr/^[0-9]+$/; + + diag "multiple walsenders active in iteration $i"; + + # show information about all active connections + $node_primary3->psql( + 'postgres', + "\\a\\t\nSELECT * FROM pg_stat_activity", + stdout => \$stdout, + stderr => \$stderr); + diag $stdout, $stderr; + + # unlikely that the problem would resolve after 15s, so give up at point + if ($i++ == 150) + { + # An immediate shutdown may hide evidence of a locking bug. If + # retrying didn't resolve the issue, shut down in fast mode. + $node_primary3->stop('fast'); + $node_standby3->stop('fast'); + die "could not determine walsender pid, can't continue"; + } + + usleep(100_000); +} + +like($senderpid, qr/^[0-9]+$/, "have walsender pid $senderpid"); + +my $receiverpid = $node_standby3->safe_psql('postgres', + "SELECT pid FROM pg_stat_activity WHERE backend_type = 'walreceiver'"); +like($receiverpid, qr/^[0-9]+$/, "have walreceiver pid $receiverpid"); + +$logstart = get_log_size($node_primary3); +# freeze walsender and walreceiver. Slot will still be active, but walreceiver +# won't get anything anymore. +kill 'STOP', $senderpid, $receiverpid; +advance_wal($node_primary3, 2); + +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default; +while ($max_attempts-- >= 0) +{ + if ($node_primary3->log_contains( + "terminating process $senderpid to release replication slot \"rep3\"", + $logstart)) + { + ok(1, "walsender termination logged"); + last; + } + sleep 1; +} + +# Now let the walsender continue; slot should be killed now. +# (Must not let walreceiver run yet; otherwise the standby could start another +# one before the slot can be killed) +kill 'CONT', $senderpid; +$node_primary3->poll_query_until('postgres', + "SELECT wal_status FROM pg_replication_slots WHERE slot_name = 'rep3'", + "lost") + or die "timed out waiting for slot to be lost"; + +$max_attempts = $PostgreSQL::Test::Utils::timeout_default; +while ($max_attempts-- >= 0) +{ + if ($node_primary3->log_contains( + 'invalidating slot "rep3" because its restart_lsn', $logstart)) + { + ok(1, "slot invalidation logged"); + last; + } + sleep 1; +} + +# Now let the walreceiver continue, so that the node can be stopped cleanly +kill 'CONT', $receiverpid; + +$node_primary3->stop; +$node_standby3->stop; + +##################################### +# Advance WAL of $node by $n segments +sub advance_wal +{ + my ($node, $n) = @_; + + # Advance by $n segments (= (16 * $n) MB) on primary + for (my $i = 0; $i < $n; $i++) + { + $node->safe_psql('postgres', + "CREATE TABLE t (); DROP TABLE t; SELECT pg_switch_wal();"); + } + return; +} + +# return the size of logfile of $node in bytes +sub get_log_size +{ + my ($node) = @_; + + return (stat $node->logfile)[7]; +} + +done_testing(); diff --git a/src/test/recovery/t/020_archive_status.pl b/src/test/recovery/t/020_archive_status.pl new file mode 100644 index 0000000..2108d50 --- /dev/null +++ b/src/test/recovery/t/020_archive_status.pl @@ -0,0 +1,251 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# +# Tests related to WAL archiving and recovery. +# +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $primary = PostgreSQL::Test::Cluster->new('primary'); +$primary->init( + has_archiving => 1, + allows_streaming => 1); +$primary->append_conf('postgresql.conf', 'autovacuum = off'); +$primary->start; +my $primary_data = $primary->data_dir; + +# Temporarily use an archive_command value to make the archiver fail, +# knowing that archiving is enabled. Note that we cannot use a command +# that does not exist as in this case the archiver process would just exit +# without reporting the failure to pg_stat_archiver. This also cannot +# use a plain "false" as that's unportable on Windows. So, instead, as +# a portable solution, use an archive command based on a command known to +# work but will fail: copy with an incorrect original path. +my $incorrect_command = + $PostgreSQL::Test::Utils::windows_os + ? qq{copy "%p_does_not_exist" "%f_does_not_exist"} + : qq{cp "%p_does_not_exist" "%f_does_not_exist"}; +$primary->safe_psql( + 'postgres', qq{ + ALTER SYSTEM SET archive_command TO '$incorrect_command'; + SELECT pg_reload_conf(); +}); + +# Save the WAL segment currently in use and switch to a new segment. +# This will be used to track the activity of the archiver. +my $segment_name_1 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_1 = "pg_wal/archive_status/$segment_name_1"; +my $segment_path_1_ready = "$segment_path_1.ready"; +my $segment_path_1_done = "$segment_path_1.done"; +$primary->safe_psql( + 'postgres', q{ + CREATE TABLE mine AS SELECT generate_series(1,10) AS x; + SELECT pg_switch_wal(); + CHECKPOINT; +}); + +# Wait for an archive failure. +$primary->poll_query_until('postgres', + q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't') + or die "Timed out while waiting for archiving to fail"; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file exists for WAL segment $segment_name_1 waiting to be archived" +); +ok( !-f "$primary_data/$segment_path_1_done", + ".done file does not exist for WAL segment $segment_name_1 waiting to be archived" +); + +is( $primary->safe_psql( + 'postgres', q{ + SELECT archived_count, last_failed_wal + FROM pg_stat_archiver + }), + "0|$segment_name_1", + "pg_stat_archiver failed to archive $segment_name_1"); + +# Crash the cluster for the next test in charge of checking that non-archived +# WAL segments are not removed. +$primary->stop('immediate'); + +# Recovery tests for the archiving with a standby partially check +# the recovery behavior when restoring a backup taken using a +# snapshot with no pg_backup_start/stop. In this situation, +# the recovered standby should enter first crash recovery then +# switch to regular archive recovery. Note that the base backup +# is taken here so as archive_command will fail. This is necessary +# for the assumptions of the tests done with the standbys below. +$primary->backup_fs_cold('backup'); + +$primary->start; +ok( -f "$primary_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary" +); + +# Allow WAL archiving again and wait for a success. +$primary->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); + +$primary->poll_query_until('postgres', + q{SELECT archived_count FROM pg_stat_archiver}, '1') + or die "Timed out while waiting for archiving to finish"; + +ok(!-f "$primary_data/$segment_path_1_ready", + ".ready file for archived WAL segment $segment_name_1 removed"); + +ok(-f "$primary_data/$segment_path_1_done", + ".done file for archived WAL segment $segment_name_1 exists"); + +is( $primary->safe_psql( + 'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }), + $segment_name_1, + "archive success reported in pg_stat_archiver for WAL segment $segment_name_1" +); + +# Create some WAL activity and a new checkpoint so as the next standby can +# create a restartpoint. As this standby starts in crash recovery because +# of the cold backup taken previously, it needs a clean restartpoint to deal +# with existing status files. +my $segment_name_2 = $primary->safe_psql('postgres', + q{SELECT pg_walfile_name(pg_current_wal_lsn())}); +my $segment_path_2 = "pg_wal/archive_status/$segment_name_2"; +my $segment_path_2_ready = "$segment_path_2.ready"; +my $segment_path_2_done = "$segment_path_2.done"; +$primary->safe_psql( + 'postgres', q{ + INSERT INTO mine SELECT generate_series(10,20) AS x; + CHECKPOINT; +}); + +# Switch to a new segment and use the returned LSN to make sure that +# standbys have caught up to this point. +my $primary_lsn = $primary->safe_psql( + 'postgres', q{ + SELECT pg_switch_wal(); +}); + +$primary->poll_query_until('postgres', + q{ SELECT last_archived_wal FROM pg_stat_archiver }, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +# Test standby with archive_mode = on. +my $standby1 = PostgreSQL::Test::Cluster->new('standby'); +$standby1->init_from_backup($primary, 'backup', has_restoring => 1); +$standby1->append_conf('postgresql.conf', "archive_mode = on"); +my $standby1_data = $standby1->data_dir; +$standby1->start; + +# Wait for the replay of the segment switch done previously, ensuring +# that all segments needed are restored from the archives. +$standby1->poll_query_until('postgres', + qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 } +) or die "Timed out while waiting for xlog replay on standby1"; + +$standby1->safe_psql('postgres', q{CHECKPOINT}); + +# Recovery with archive_mode=on does not keep .ready signal files inherited +# from backup. Note that this WAL segment existed in the backup. +ok( !-f "$standby1_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby" +); + +# Recovery with archive_mode=on should not create .ready files. +# Note that this segment did not exist in the backup. +ok( !-f "$standby1_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby" +); + +# Recovery with archive_mode = on creates .done files. +ok( -f "$standby1_data/$segment_path_2_done", + ".done file for WAL segment $segment_name_2 created when archive_mode=on on standby" +); + +# Test recovery with archive_mode = always, which should always keep +# .ready files if archiving is enabled, though here we want the archive +# command to fail to persist the .ready files. Note that this node +# has inherited the archive command of the previous cold backup that +# will cause archiving failures. +my $standby2 = PostgreSQL::Test::Cluster->new('standby2'); +$standby2->init_from_backup($primary, 'backup', has_restoring => 1); +$standby2->append_conf('postgresql.conf', 'archive_mode = always'); +my $standby2_data = $standby2->data_dir; +$standby2->start; + +# Wait for the replay of the segment switch done previously, ensuring +# that all segments needed are restored from the archives. +$standby2->poll_query_until('postgres', + qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 } +) or die "Timed out while waiting for xlog replay on standby2"; + +$standby2->safe_psql('postgres', q{CHECKPOINT}); + +ok( -f "$standby2_data/$segment_path_1_ready", + ".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_2_ready", + ".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby" +); + +# Reset statistics of the archiver for the next checks. +$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')}); + +# Now crash the cluster to check that recovery step does not +# remove non-archived WAL segments on a standby where archiving +# is enabled. +$standby2->stop('immediate'); +$standby2->start; + +ok( -f "$standby2_data/$segment_path_1_ready", + "WAL segment still ready to archive after crash recovery on standby with archive_mode=always" +); + +# Allow WAL archiving again, and wait for the segments to be archived. +$standby2->safe_psql( + 'postgres', q{ + ALTER SYSTEM RESET archive_command; + SELECT pg_reload_conf(); +}); +$standby2->poll_query_until('postgres', + q{SELECT last_archived_wal FROM pg_stat_archiver}, + $segment_name_2) + or die "Timed out while waiting for archiving to finish"; + +is( $standby2->safe_psql( + 'postgres', q{SELECT archived_count FROM pg_stat_archiver}), + '2', + "correct number of WAL segments archived from standby"); + +ok( !-f "$standby2_data/$segment_path_1_ready" + && !-f "$standby2_data/$segment_path_2_ready", + ".ready files removed after archive success with archive_mode=always on standby" +); + +ok( -f "$standby2_data/$segment_path_1_done" + && -f "$standby2_data/$segment_path_2_done", + ".done files created after archive success with archive_mode=always on standby" +); + +# Check that the archiver process calls the shell archive module's shutdown +# callback. +$standby2->append_conf('postgresql.conf', "log_min_messages = debug1"); +$standby2->reload; + +# Run a query to make sure that the reload has taken effect. +$standby2->safe_psql('postgres', q{SELECT 1}); +my $log_location = -s $standby2->logfile; + +$standby2->stop; +my $logfile = slurp_file($standby2->logfile, $log_location); +ok( $logfile =~ qr/archiver process shutting down/, + 'check shutdown callback of shell archive module'); + +done_testing(); diff --git a/src/test/recovery/t/021_row_visibility.pl b/src/test/recovery/t/021_row_visibility.pl new file mode 100644 index 0000000..aeaf37c --- /dev/null +++ b/src/test/recovery/t/021_row_visibility.pl @@ -0,0 +1,205 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Checks that snapshots on standbys behave in a minimally reasonable +# way. +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf('postgresql.conf', 'max_prepared_transactions=10'); +$node_primary->start; + +# Initialize with empty test table +$node_primary->safe_psql('postgres', + 'CREATE TABLE public.test_visibility (data text not null)'); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby from backup +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', 'max_prepared_transactions=10'); +$node_standby->start; + +my $psql_timeout = + IPC::Run::timer(2 * $PostgreSQL::Test::Utils::timeout_default); + +# One psql to primary and standby each, for all queries. That allows +# to check uncommitted changes being replicated and such. +my %psql_primary = (stdin => '', stdout => '', stderr => ''); +$psql_primary{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ], + '<', + \$psql_primary{stdin}, + '>', + \$psql_primary{stdout}, + '2>', + \$psql_primary{stderr}, + $psql_timeout); + +my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => ''); +$psql_standby{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ], + '<', + \$psql_standby{stdin}, + '>', + \$psql_standby{stdout}, + '2>', + \$psql_standby{stderr}, + $psql_timeout); + +# +# 1. Check initial data is the same +# +ok( send_query_and_wait( + \%psql_standby, + q/SELECT * FROM test_visibility ORDER BY data;/, + qr/^\(0 rows\)$/m), + 'data not visible'); + +# +# 2. Check if an INSERT is replayed and visible +# +$node_primary->psql('postgres', + "INSERT INTO test_visibility VALUES ('first insert')"); +$node_primary->wait_for_catchup($node_standby); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first insert.*\n\(1 row\)/m), + 'insert visible'); + +# +# 3. Verify that uncommitted changes aren't visible. +# +ok( send_query_and_wait( + \%psql_primary, + q[ +BEGIN; +UPDATE test_visibility SET data = 'first update' RETURNING data; + ], + qr/^UPDATE 1$/m), + 'UPDATE'); + +$node_primary->psql('postgres', "SELECT txid_current();"); # ensure WAL flush +$node_primary->wait_for_catchup($node_standby); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first insert.*\n\(1 row\)/m), + 'uncommitted update invisible'); + +# +# 4. That a commit turns 3. visible +# +ok(send_query_and_wait(\%psql_primary, q[COMMIT;], qr/^COMMIT$/m), 'COMMIT'); + +$node_primary->wait_for_catchup($node_standby); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/first update\n\(1 row\)$/m), + 'committed update visible'); + +# +# 5. Check that changes in prepared xacts is invisible +# +ok( send_query_and_wait( + \%psql_primary, q[ +DELETE from test_visibility; -- delete old data, so we start with clean slate +BEGIN; +INSERT INTO test_visibility VALUES('inserted in prepared will_commit'); +PREPARE TRANSACTION 'will_commit';], + qr/^PREPARE TRANSACTION$/m), + 'prepared will_commit'); + +ok( send_query_and_wait( + \%psql_primary, q[ +BEGIN; +INSERT INTO test_visibility VALUES('inserted in prepared will_abort'); +PREPARE TRANSACTION 'will_abort'; + ], + qr/^PREPARE TRANSACTION$/m), + 'prepared will_abort'); + +$node_primary->wait_for_catchup($node_standby); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/^\(0 rows\)$/m), + 'uncommitted prepared invisible'); + +# For some variation, finish prepared xacts via separate connections +$node_primary->safe_psql('postgres', "COMMIT PREPARED 'will_commit';"); +$node_primary->safe_psql('postgres', "ROLLBACK PREPARED 'will_abort';"); +$node_primary->wait_for_catchup($node_standby); + +ok( send_query_and_wait( + \%psql_standby, + q[SELECT * FROM test_visibility ORDER BY data;], + qr/will_commit.*\n\(1 row\)$/m), + 'finished prepared visible'); + +# explicitly shut down psql instances gracefully - to avoid hangs +# or worse on windows +$psql_primary{stdin} .= "\\q\n"; +$psql_primary{run}->finish; +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_primary->stop; +$node_standby->stop; + +# Send query, wait until string matches +sub send_query_and_wait +{ + my ($psql, $query, $untl) = @_; + my $ret; + + # send query + $$psql{stdin} .= $query; + $$psql{stdin} .= "\n"; + + # wait for query results + $$psql{run}->pump_nb(); + while (1) + { + last if $$psql{stdout} =~ /$untl/; + + if ($psql_timeout->is_expired) + { + BAIL_OUT("aborting wait: program timed out\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + if (not $$psql{run}->pumpable()) + { + BAIL_OUT("aborting wait: program died\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + $$psql{run}->pump(); + } + + $$psql{stdout} = ''; + + return 1; +} + +done_testing(); diff --git a/src/test/recovery/t/022_crash_temp_files.pl b/src/test/recovery/t/022_crash_temp_files.pl new file mode 100644 index 0000000..53a55c7 --- /dev/null +++ b/src/test/recovery/t/022_crash_temp_files.pl @@ -0,0 +1,277 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test remove of temporary files after a crash. +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Config; + +if ($Config{osname} eq 'MSWin32') +{ + plan skip_all => 'tests hang on Windows'; + exit; +} + +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); + +my $node = PostgreSQL::Test::Cluster->new('node_crash'); +$node->init(); +$node->start(); + +# By default, PostgreSQL::Test::Cluster doesn't restart after crash +# Reduce work_mem to generate temporary file with a few number of rows +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = on; + ALTER SYSTEM SET log_connections = 1; + ALTER SYSTEM SET work_mem = '64kB'; + ALTER SYSTEM SET restart_after_crash = on; + SELECT pg_reload_conf();]); + +# create table, insert rows +$node->safe_psql('postgres', q[CREATE TABLE tab_crash (a integer UNIQUE);]); + +# Run psql, keeping session alive, so we have an alive backend to kill. +my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +my $killme = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin, + '>', + \$killme_stdout, + '2>', + \$killme_stderr, + $psql_timeout); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok( pump_until( + $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +my $pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Open a 2nd session that will block the 1st one, using the UNIQUE constraint. +# This will prevent removal of the temporary file created by the 1st session. +my ($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', ''); +my $killme2 = IPC::Run::start( + [ + 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d', + $node->connstr('postgres') + ], + '<', + \$killme_stdin2, + '>', + \$killme_stdout2, + '2>', + \$killme_stderr2, + $psql_timeout); + +# Insert one tuple and leave the transaction open +$killme_stdin2 .= q[ +BEGIN; +INSERT INTO tab_crash (a) VALUES(1); +SELECT $$insert-tuple-to-lock-next-insert$$; +]; +pump_until($killme2, $psql_timeout, \$killme_stdout2, + qr/insert-tuple-to-lock-next-insert/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i); +]; +ok( pump_until( + $killme, $psql_timeout, + \$killme_stdout, qr/in-progress-before-sigkill/m), + 'insert in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait until the batch insert gets stuck on the lock. +$killme_stdin2 .= q[ +DO $c$ +DECLARE + c INT; +BEGIN + LOOP + SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid + . q[ AND NOT granted; + IF c > 0 THEN + EXIT; + END IF; + END LOOP; +END; $c$; +SELECT $$insert-tuple-lock-waiting$$; +]; + +pump_until($killme2, $psql_timeout, \$killme_stdout2, + qr/insert-tuple-lock-waiting/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Kill with SIGKILL +my $ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close that psql session +$killme->finish; + +# Wait till the other session reports failure, ensuring that the postmaster +# has noticed its dead child and begun a restart cycle. +$killme_stdin2 .= qq[ +SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default); +]; +ok( pump_until( + $killme2, + $psql_timeout, + \$killme_stderr2, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "second psql session died successfully after SIGKILL"); +$killme2->finish; + +# Wait till server finishes restarting +$node->poll_query_until('postgres', undef, ''); + +# Check for temporary files +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), + 'no temporary files'); + +# +# Test old behavior (don't remove temporary files after crash) +# +$node->safe_psql( + 'postgres', + q[ALTER SYSTEM SET remove_temp_files_after_crash = off; + SELECT pg_reload_conf();]); + +# Restart psql session +($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', ''); +$killme->run(); + +# Get backend pid +$killme_stdin .= q[ +SELECT pg_backend_pid(); +]; +ok( pump_until( + $killme, $psql_timeout, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m), + 'acquired pid for SIGKILL'); +$pid = $killme_stdout; +chomp($pid); +$killme_stdout = ''; +$killme_stderr = ''; + +# Restart the 2nd psql session +($killme_stdin2, $killme_stdout2, $killme_stderr2) = ('', '', ''); +$killme2->run(); + +# Insert one tuple and leave the transaction open +$killme_stdin2 .= q[ +BEGIN; +INSERT INTO tab_crash (a) VALUES(1); +SELECT $$insert-tuple-to-lock-next-insert$$; +]; +pump_until($killme2, $psql_timeout, \$killme_stdout2, + qr/insert-tuple-to-lock-next-insert/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Run the query that generates a temporary file and that will be killed before +# it finishes. Since the query that generates the temporary file does not +# return before the connection is killed, use a SELECT before to trigger +# pump_until. +$killme_stdin .= q[ +BEGIN; +SELECT $$in-progress-before-sigkill$$; +INSERT INTO tab_crash (a) SELECT i FROM generate_series(1, 5000) s(i); +]; +ok( pump_until( + $killme, $psql_timeout, + \$killme_stdout, qr/in-progress-before-sigkill/m), + 'insert in-progress-before-sigkill'); +$killme_stdout = ''; +$killme_stderr = ''; + +# Wait until the batch insert gets stuck on the lock. +$killme_stdin2 .= q[ +DO $c$ +DECLARE + c INT; +BEGIN + LOOP + SELECT COUNT(*) INTO c FROM pg_locks WHERE pid = ] . $pid + . q[ AND NOT granted; + IF c > 0 THEN + EXIT; + END IF; + END LOOP; +END; $c$; +SELECT $$insert-tuple-lock-waiting$$; +]; + +pump_until($killme2, $psql_timeout, \$killme_stdout2, + qr/insert-tuple-lock-waiting/m); +$killme_stdout2 = ''; +$killme_stderr2 = ''; + +# Kill with SIGKILL +$ret = PostgreSQL::Test::Utils::system_log('pg_ctl', 'kill', 'KILL', $pid); +is($ret, 0, 'killed process with KILL'); + +# Close that psql session +$killme->finish; + +# Wait till the other session reports failure, ensuring that the postmaster +# has noticed its dead child and begun a restart cycle. +$killme_stdin2 .= qq[ +SELECT pg_sleep($PostgreSQL::Test::Utils::timeout_default); +]; +ok( pump_until( + $killme2, + $psql_timeout, + \$killme_stderr2, + qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost|could not send data to server/m + ), + "second psql session died successfully after SIGKILL"); +$killme2->finish; + +# Wait till server finishes restarting +$node->poll_query_until('postgres', undef, ''); + +# Check for temporary files -- should be there +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(1), + 'one temporary file'); + +# Restart should remove the temporary files +$node->restart(); + +# Check the temporary files -- should be gone +is( $node->safe_psql( + 'postgres', 'SELECT COUNT(1) FROM pg_ls_dir($$base/pgsql_tmp$$)'), + qq(0), + 'temporary file was removed'); + +$node->stop(); + +done_testing(); diff --git a/src/test/recovery/t/023_pitr_prepared_xact.pl b/src/test/recovery/t/023_pitr_prepared_xact.pl new file mode 100644 index 0000000..39e8a8f --- /dev/null +++ b/src/test/recovery/t/023_pitr_prepared_xact.pl @@ -0,0 +1,91 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for point-in-time-recovery (PITR) with prepared transactions +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Compare; + +# Initialize and start primary node with WAL archiving +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', qq{ +max_prepared_transactions = 10}); +$node_primary->start; + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Initialize node for PITR targeting a very specific restore point, just +# after a PREPARE TRANSACTION is issued so as we finish with a promoted +# node where this 2PC transaction needs an explicit COMMIT PREPARED. +my $node_pitr = PostgreSQL::Test::Cluster->new('node_pitr'); +$node_pitr->init_from_backup( + $node_primary, $backup_name, + standby => 0, + has_restoring => 1); +$node_pitr->append_conf( + 'postgresql.conf', qq{ +recovery_target_name = 'rp' +recovery_target_action = 'promote'}); + +# Workload with a prepared transaction and the target restore point. +$node_primary->psql( + 'postgres', qq{ +CREATE TABLE foo(i int); +BEGIN; +INSERT INTO foo VALUES(1); +PREPARE TRANSACTION 'fooinsert'; +SELECT pg_create_restore_point('rp'); +INSERT INTO foo VALUES(2); +}); + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_primary->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_primary->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# Wait until the WAL segment has been archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; +$node_primary->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Now start the PITR node. +$node_pitr->start; + +# Wait until the PITR node exits recovery. +$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';") + or die "Timed out while waiting for PITR promotion"; + +# Commit the prepared transaction in the latest timeline and check its +# result. There should only be one row in the table, coming from the +# prepared transaction. The row from the INSERT after the restore point +# should not show up, since our recovery target was older than the second +# INSERT done. +$node_pitr->psql('postgres', qq{COMMIT PREPARED 'fooinsert';}); +my $result = $node_pitr->safe_psql('postgres', "SELECT * FROM foo;"); +is($result, qq{1}, "check table contents after COMMIT PREPARED"); + +# Insert more data and do a checkpoint. These should be generated on the +# timeline chosen after the PITR promotion. +$node_pitr->psql( + 'postgres', qq{ +INSERT INTO foo VALUES(3); +CHECKPOINT; +}); + +# Enforce recovery, the checkpoint record generated previously should +# still be found. +$node_pitr->stop('immediate'); +$node_pitr->start; + +done_testing(); diff --git a/src/test/recovery/t/024_archive_recovery.pl b/src/test/recovery/t/024_archive_recovery.pl new file mode 100644 index 0000000..ce347e0 --- /dev/null +++ b/src/test/recovery/t/024_archive_recovery.pl @@ -0,0 +1,105 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test for archive recovery of WAL generated with wal_level=minimal +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(usleep); + +# Initialize and start node with wal_level = replica and WAL archiving +# enabled. +my $node = PostgreSQL::Test::Cluster->new('orig'); +$node->init(has_archiving => 1, allows_streaming => 1); +my $replica_config = q[ +wal_level = replica +archive_mode = on +max_wal_senders = 10 +hot_standby = off +]; +$node->append_conf('postgresql.conf', $replica_config); +$node->start; + +# Take backup +my $backup_name = 'my_backup'; +$node->backup($backup_name); + +# Restart node with wal_level = minimal and WAL archiving disabled +# to generate WAL with that setting. Note that such WAL has not been +# archived yet at this moment because WAL archiving is not enabled. +$node->append_conf( + 'postgresql.conf', q[ +wal_level = minimal +archive_mode = off +max_wal_senders = 0 +]); +$node->restart; + +# Restart node with wal_level = replica and WAL archiving enabled +# to archive WAL previously generated with wal_level = minimal. +# We ensure the WAL file containing the record indicating the change +# of wal_level to minimal is archived by checking pg_stat_archiver. +$node->append_conf('postgresql.conf', $replica_config); +$node->restart; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node->safe_psql('postgres', 'SELECT pg_switch_wal()'); +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver;"; + +# Wait until the WAL segment has been archived. +$node->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; + +$node->stop; + +# Initialize new node from backup, and start archive recovery. Check that +# archive recovery fails with an error when it detects the WAL record +# indicating the change of wal_level to minimal and node stops. +sub test_recovery_wal_level_minimal +{ + my ($node_name, $node_text, $standby_setting) = @_; + + my $recovery_node = PostgreSQL::Test::Cluster->new($node_name); + $recovery_node->init_from_backup( + $node, $backup_name, + has_restoring => 1, + standby => $standby_setting); + + # Use run_log instead of recovery_node->start because this test expects + # that the server ends with an error during recovery. + run_log( + [ + 'pg_ctl', '-D', + $recovery_node->data_dir, '-l', + $recovery_node->logfile, 'start' + ]); + + # wait for postgres to terminate + foreach my $i (0 .. 10 * $PostgreSQL::Test::Utils::timeout_default) + { + last if !-f $recovery_node->data_dir . '/postmaster.pid'; + usleep(100_000); + } + + # Confirm that the archive recovery fails with an expected error + my $logfile = slurp_file($recovery_node->logfile()); + ok( $logfile =~ + qr/FATAL: .* WAL was generated with wal_level=minimal, cannot continue recovering/, + "$node_text ends with an error because it finds WAL generated with wal_level=minimal" + ); +} + +# Test for archive recovery +test_recovery_wal_level_minimal('archive_recovery', 'archive recovery', 0); + +# Test for standby server +test_recovery_wal_level_minimal('standby', 'standby', 1); + +done_testing(); diff --git a/src/test/recovery/t/025_stuck_on_old_timeline.pl b/src/test/recovery/t/025_stuck_on_old_timeline.pl new file mode 100644 index 0000000..fd82124 --- /dev/null +++ b/src/test/recovery/t/025_stuck_on_old_timeline.pl @@ -0,0 +1,112 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Testing streaming replication where standby is promoted and a new cascading +# standby (without WAL) is connected to the promoted standby. Both archiving +# and streaming are enabled, but only the history file is available from the +# archive, so the WAL files all have to be streamed. Test that the cascading +# standby can follow the new primary (promoted standby). +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; + +use File::Basename; +use FindBin; +use Test::More; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); + +# Set up an archive command that will copy the history file but not the WAL +# files. No real archive command should behave this way; the point is to +# simulate a race condition where the new cascading standby starts up after +# the timeline history file reaches the archive but before any of the WAL files +# get there. +$node_primary->init(allows_streaming => 1, has_archiving => 1); + +# Note: consistent use of forward slashes here avoids any escaping problems +# that arise from use of backslashes. That means we need to double-quote all +# the paths in the archive_command +my $perlbin = $^X; +$perlbin =~ s!\\!/!g if $PostgreSQL::Test::Utils::windows_os; +my $archivedir_primary = $node_primary->archive_dir; +$archivedir_primary =~ s!\\!/!g if $PostgreSQL::Test::Utils::windows_os; +$node_primary->append_conf( + 'postgresql.conf', qq( +archive_command = '"$perlbin" "$FindBin::RealBin/cp_history_files" "%p" "$archivedir_primary/%f"' +wal_keep_size=128MB +)); +# Make sure that Msys perl doesn't complain about difficulty in setting locale +# when called from the archive_command. +local $ENV{PERL_BADLANG} = 0; +$node_primary->start; + +# Take backup from primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup( + $node_primary, $backup_name, + allows_streaming => 1, + has_streaming => 1, + has_archiving => 1); +$node_standby->start; + +# Take backup of standby, use -Xnone so that pg_wal is empty. +$node_standby->backup($backup_name, backup_options => ['-Xnone']); + +# Create cascading standby but don't start it yet. +# Must set up both streaming and archiving. +my $node_cascade = PostgreSQL::Test::Cluster->new('cascade'); +$node_cascade->init_from_backup($node_standby, $backup_name, + has_streaming => 1); +$node_cascade->enable_restoring($node_primary); +$node_cascade->append_conf( + 'postgresql.conf', qq( +recovery_target_timeline='latest' +)); + +# Promote the standby. +$node_standby->promote; + +# Wait for promotion to complete +$node_standby->poll_query_until('postgres', "SELECT NOT pg_is_in_recovery();") + or die "Timed out while waiting for promotion"; + +# Find next WAL segment to be archived +my $walfile_to_be_archived = $node_standby->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# Wait until the WAL segment has been archived. +# Since the history file gets created on promotion and is archived before any +# WAL segment, this is enough to guarantee that the history file was +# archived. +my $archive_wait_query = + "SELECT '$walfile_to_be_archived' <= last_archived_wal FROM pg_stat_archiver"; +$node_standby->poll_query_until('postgres', $archive_wait_query) + or die "Timed out while waiting for WAL segment to be archived"; +my $last_archived_wal_file = $walfile_to_be_archived; + +# Start cascade node +$node_cascade->start; + +# Create some content on promoted standby and check its presence on the +# cascading standby. +$node_standby->safe_psql('postgres', "CREATE TABLE tab_int AS SELECT 1 AS a"); + +# Wait for the replication to catch up +$node_standby->wait_for_catchup($node_cascade); + +# Check that cascading standby has the new content +my $result = + $node_cascade->safe_psql('postgres', "SELECT count(*) FROM tab_int"); +print "cascade: $result\n"; +is($result, 1, 'check streamed content on cascade standby'); + +done_testing(); diff --git a/src/test/recovery/t/026_overwrite_contrecord.pl b/src/test/recovery/t/026_overwrite_contrecord.pl new file mode 100644 index 0000000..78feccd --- /dev/null +++ b/src/test/recovery/t/026_overwrite_contrecord.pl @@ -0,0 +1,109 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests for already-propagated WAL segments ending in incomplete WAL records. + +use strict; +use warnings; + +use FindBin; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +# Test: Create a physical replica that's missing the last WAL file, +# then restart the primary to create a divergent WAL file and observe +# that the replica replays the "overwrite contrecord" from that new +# file and the standby promotes successfully. + +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); +# We need these settings for stability of WAL behavior. +$node->append_conf( + 'postgresql.conf', qq( +autovacuum = off +wal_keep_size = 1GB +)); +$node->start; + +$node->safe_psql('postgres', 'create table filler (a int, b text)'); + +# Now consume all remaining room in the current WAL segment, leaving +# space enough only for the start of a largish record. +$node->safe_psql( + 'postgres', q{ +DO $$ +DECLARE + wal_segsize int := setting::int FROM pg_settings WHERE name = 'wal_segment_size'; + remain int; + iters int := 0; +BEGIN + LOOP + INSERT into filler + select g, repeat(md5(g::text), (random() * 60 + 1)::int) + from generate_series(1, 10) g; + + remain := wal_segsize - (pg_current_wal_insert_lsn() - '0/0') % wal_segsize; + IF remain < 2 * setting::int from pg_settings where name = 'block_size' THEN + RAISE log 'exiting after % iterations, % bytes to end of WAL segment', iters, remain; + EXIT; + END IF; + iters := iters + 1; + END LOOP; +END +$$; +}); + +my $initfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', repeat('xyzxz', 123456))} +); +#$node->safe_psql('postgres', qq{create table foo ()}); +my $endfile = $node->safe_psql('postgres', + 'SELECT pg_walfile_name(pg_current_wal_insert_lsn())'); +ok($initfile ne $endfile, "$initfile differs from $endfile"); + +# Now stop abruptly, to avoid a stop checkpoint. We can remove the tail file +# afterwards, and on startup the large message should be overwritten with new +# contents +$node->stop('immediate'); + +unlink $node->basedir . "/pgdata/pg_wal/$endfile" + or die "could not unlink " . $node->basedir . "/pgdata/pg_wal/$endfile: $!"; + +# OK, create a standby at this spot. +$node->backup_fs_cold('backup'); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node, 'backup', has_streaming => 1); + +$node_standby->start; +$node->start; + +$node->safe_psql('postgres', + qq{create table foo (a text); insert into foo values ('hello')}); +$node->safe_psql('postgres', + qq{SELECT pg_logical_emit_message(true, 'test 026', 'AABBCC')}); + +my $until_lsn = $node->safe_psql('postgres', "SELECT pg_current_wal_lsn()"); +my $caughtup_query = + "SELECT '$until_lsn'::pg_lsn <= pg_last_wal_replay_lsn()"; +$node_standby->poll_query_until('postgres', $caughtup_query) + or die "Timed out while waiting for standby to catch up"; + +ok($node_standby->safe_psql('postgres', 'select * from foo') eq 'hello', + 'standby replays past overwritten contrecord'); + +# Verify message appears in standby's log +my $log = slurp_file($node_standby->logfile); +like( + $log, + qr[successfully skipped missing contrecord at], + "found log line in standby"); + +# Verify promotion is successful +$node_standby->promote; + +$node->stop; +$node_standby->stop; + +done_testing(); diff --git a/src/test/recovery/t/027_stream_regress.pl b/src/test/recovery/t/027_stream_regress.pl new file mode 100644 index 0000000..69d6ddf --- /dev/null +++ b/src/test/recovery/t/027_stream_regress.pl @@ -0,0 +1,113 @@ +# Run the standard regression tests with streaming replication +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Basename; + +# Initialize primary node +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); + +# Increase some settings that Cluster->new makes too low by default. +$node_primary->adjust_conf('postgresql.conf', 'max_connections', '25'); +$node_primary->append_conf('postgresql.conf', + 'max_prepared_transactions = 10'); +# We'll stick with Cluster->new's small default shared_buffers, but since that +# makes synchronized seqscans more probable, it risks changing the results of +# some test queries. Disable synchronized seqscans to prevent that. +$node_primary->append_conf('postgresql.conf', 'synchronize_seqscans = off'); + +# WAL consistency checking is resource intensive so require opt-in with the +# PG_TEST_EXTRA environment variable. +if ( $ENV{PG_TEST_EXTRA} + && $ENV{PG_TEST_EXTRA} =~ m/\bwal_consistency_checking\b/) +{ + $node_primary->append_conf('postgresql.conf', + 'wal_consistency_checking = all'); +} + +$node_primary->start; +is( $node_primary->psql( + 'postgres', + qq[SELECT pg_create_physical_replication_slot('standby_1');]), + 0, + 'physical slot created on primary'); +my $backup_name = 'my_backup'; + +# Take backup +$node_primary->backup($backup_name); + +# Create streaming standby linking to primary +my $node_standby_1 = PostgreSQL::Test::Cluster->new('standby_1'); +$node_standby_1->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby_1->append_conf('postgresql.conf', + "primary_slot_name = standby_1"); +$node_standby_1->append_conf('postgresql.conf', + 'max_standby_streaming_delay = 600s'); +$node_standby_1->start; + +my $dlpath = dirname($ENV{REGRESS_SHLIB}); +my $outputdir = $PostgreSQL::Test::Utils::tmp_check; + +# Run the regression tests against the primary. +my $extra_opts = $ENV{EXTRA_REGRESS_OPTS} || ""; +my $rc = + system($ENV{PG_REGRESS} + . " $extra_opts " + . "--dlpath=\"$dlpath\" " + . "--bindir= " + . "--host=" + . $node_primary->host . " " + . "--port=" + . $node_primary->port . " " + . "--schedule=../regress/parallel_schedule " + . "--max-concurrent-tests=20 " + . "--inputdir=../regress " + . "--outputdir=\"$outputdir\""); +if ($rc != 0) +{ + # Dump out the regression diffs file, if there is one + my $diffs = "$outputdir/regression.diffs"; + if (-e $diffs) + { + print "=== dumping $diffs ===\n"; + print slurp_file($diffs); + print "=== EOF ===\n"; + } +} +is($rc, 0, 'regression tests pass'); + +# Clobber all sequences with their next value, so that we don't have +# differences between nodes due to caching. +$node_primary->psql('regression', + "select setval(seqrelid, nextval(seqrelid)) from pg_sequence"); + +# Wait for standby to catch up +$node_primary->wait_for_catchup($node_standby_1, 'replay', + $node_primary->lsn('insert')); + +# Perform a logical dump of primary and standby, and check that they match +command_ok( + [ + 'pg_dumpall', '-f', $outputdir . '/primary.dump', + '--no-sync', '-p', $node_primary->port, + '--no-unlogged-table-data' # if unlogged, standby has schema only + ], + 'dump primary server'); +command_ok( + [ + 'pg_dumpall', '-f', $outputdir . '/standby.dump', + '--no-sync', '-p', $node_standby_1->port + ], + 'dump standby server'); +command_ok( + [ 'diff', $outputdir . '/primary.dump', $outputdir . '/standby.dump' ], + 'compare primary and standby dumps'); + +$node_standby_1->stop; +$node_primary->stop; + +done_testing(); diff --git a/src/test/recovery/t/028_pitr_timelines.pl b/src/test/recovery/t/028_pitr_timelines.pl new file mode 100644 index 0000000..bad02ed --- /dev/null +++ b/src/test/recovery/t/028_pitr_timelines.pl @@ -0,0 +1,176 @@ +# Copyright (c) 2022, PostgreSQL Global Development Group + +# Test recovering to a point-in-time using WAL archive, such that the +# target point is physically in a WAL segment with a higher TLI than +# the target point's TLI. For example, imagine that the following WAL +# segments exist in the WAL archive: +# +# 000000010000000000000001 +# 000000010000000000000002 +# 000000020000000000000003 +# +# The timeline switch happened in the middle of WAL segment 3, but it +# was never archived on timeline 1. The first half of +# 000000020000000000000003 contains the WAL from timeline 1 up to the +# point where the timeline switch happened. If you now perform +# archive recovery with recovery target point in that first half of +# segment 3, archive recovery will find the WAL up to that point in +# segment 000000020000000000000003, but it will not follow the +# timeline switch to timeline 2, and creates a timeline switching +# end-of-recovery record with TLI 1 -> 3. That's what this test case +# tests. +# +# The comments below contain lists of WAL segments at different points +# in the tests, to make it easier to follow along. They are correct +# as of this writing, but the exact WAL segment numbers could change +# if the backend logic for when it switches to a new segment changes. +# The actual checks are not sensitive to that. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Compare; + +# Initialize and start primary node with WAL archiving +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(has_archiving => 1, allows_streaming => 1); +$node_primary->start; + +# Take a backup. +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +# Workload with some transactions, and the target restore point. +$node_primary->psql( + 'postgres', qq{ +CREATE TABLE foo(i int); +INSERT INTO foo VALUES(1); +SELECT pg_create_restore_point('rp'); +INSERT INTO foo VALUES(2); +}); + +# Contents of the WAL archive at this point: +# +# 000000010000000000000001 +# 000000010000000000000002 +# 000000010000000000000002.00000028.backup +# +# The operations on the test table and the restore point went into WAL +# segment 3, but it hasn't been archived yet. + +# Start a standby node, and wait for it to catch up. +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup( + $node_primary, $backup_name, + standby => 1, + has_streaming => 1, + has_archiving => 1, + has_restoring => 0); +$node_standby->append_conf('postgresql.conf', 'archive_mode = always'); +$node_standby->start; +$node_primary->wait_for_catchup($node_standby); + +# Check that it's really caught up. +my $result = $node_standby->safe_psql('postgres', "SELECT max(i) FROM foo;"); +is($result, qq{2}, "check table contents after archive recovery"); + +# Kill the old primary, before it archives the most recent WAL segment that +# contains all the INSERTs. +$node_primary->stop('immediate'); + +# Promote the standby, and switch WAL so that it archives a WAL segment +# that contains all the INSERTs, on a new timeline. +$node_standby->promote; + +# Find next WAL segment to be archived. +my $walfile_to_be_archived = $node_standby->safe_psql('postgres', + "SELECT pg_walfile_name(pg_current_wal_lsn());"); + +# Make WAL segment eligible for archival +$node_standby->safe_psql('postgres', 'SELECT pg_switch_wal()'); + +# We don't need the standby anymore, request shutdown. The server will +# finish archiving all the WAL on timeline 2 before it exits. +$node_standby->stop; + +# Contents of the WAL archive at this point: +# +# 000000010000000000000001 +# 000000010000000000000002 +# 000000010000000000000002.00000028.backup +# 000000010000000000000003.partial +# 000000020000000000000003 +# 00000002.history +# +# The operations on the test table and the restore point are in +# segment 3. They are part of timeline 1, but were not archived by +# the primary yet. However, they were copied into the beginning of +# segment 000000020000000000000003, before the timeline switching +# record. (They are also present in the +# 000000010000000000000003.partial file, but .partial files are not +# used automatically.) + +# Now test PITR to the recovery target. It should find the WAL in +# segment 000000020000000000000003, but not follow the timeline switch +# to timeline 2. +my $node_pitr = PostgreSQL::Test::Cluster->new('node_pitr'); +$node_pitr->init_from_backup( + $node_primary, $backup_name, + standby => 0, + has_restoring => 1); +$node_pitr->append_conf( + 'postgresql.conf', qq{ +recovery_target_name = 'rp' +recovery_target_action = 'promote' +}); + +$node_pitr->start; + +# Wait until recovery finishes. +$node_pitr->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';") + or die "Timed out while waiting for PITR promotion"; + +# Check that we see the data we expect. +$result = $node_pitr->safe_psql('postgres', "SELECT max(i) FROM foo;"); +is($result, qq{1}, "check table contents after point-in-time recovery"); + +# Insert a row so that we can check later that we successfully recover +# back to this timeline. +$node_pitr->safe_psql('postgres', "INSERT INTO foo VALUES(3);"); + +# Wait for the archiver to be running. The startup process might have yet to +# exit, in which case the postmaster has not started the archiver. If we +# stop() without an archiver, the archive will be incomplete. +$node_pitr->poll_query_until('postgres', + "SELECT true FROM pg_stat_activity WHERE backend_type = 'archiver';") + or die "Timed out while waiting for archiver to start"; + +# Stop the node. This archives the last segment. +$node_pitr->stop(); + +# Test archive recovery on the timeline created by the PITR. This +# replays the end-of-recovery record that switches from timeline 1 to +# 3. +my $node_pitr2 = PostgreSQL::Test::Cluster->new('node_pitr2'); +$node_pitr2->init_from_backup( + $node_primary, $backup_name, + standby => 0, + has_restoring => 1); +$node_pitr2->append_conf( + 'postgresql.conf', qq{ +recovery_target_action = 'promote' +}); + +$node_pitr2->start; + +# Wait until recovery finishes. +$node_pitr2->poll_query_until('postgres', "SELECT pg_is_in_recovery() = 'f';") + or die "Timed out while waiting for PITR promotion"; + +# Verify that we can see the row inserted after the PITR. +$result = $node_pitr2->safe_psql('postgres', "SELECT max(i) FROM foo;"); +is($result, qq{3}, "check table contents after point-in-time recovery"); + +done_testing(); diff --git a/src/test/recovery/t/029_stats_restart.pl b/src/test/recovery/t/029_stats_restart.pl new file mode 100644 index 0000000..1bf7b56 --- /dev/null +++ b/src/test/recovery/t/029_stats_restart.pl @@ -0,0 +1,344 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests statistics handling around restarts, including handling of crashes and +# invalid stats files, as well as restorting stats after "normal" restarts. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Copy; + +my $node = PostgreSQL::Test::Cluster->new('primary'); +$node->init(allows_streaming => 1); +$node->append_conf('postgresql.conf', "track_functions = 'all'"); +$node->start; + +my $connect_db = 'postgres'; +my $db_under_test = 'test'; + +# create test objects +$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test"); +$node->safe_psql($db_under_test, + "CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a" +); +$node->safe_psql($db_under_test, + "CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE" +); + +# collect object oids +my $dboid = $node->safe_psql($db_under_test, + "SELECT oid FROM pg_database WHERE datname = '$db_under_test'"); +my $funcoid = $node->safe_psql($db_under_test, + "SELECT 'func_stats_crash_discard1()'::regprocedure::oid"); +my $tableoid = $node->safe_psql($db_under_test, + "SELECT 'tab_stats_crash_discard_test1'::regclass::oid"); + +# generate stats and flush them +trigger_funcrel_stat(); + +# verify stats objects exist +my $sect = "initial"; +is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); +is(have_stats('function', $dboid, $funcoid), + 't', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 't', "$sect: relation stats do exist"); + +# regular shutdown +$node->stop(); + +# backup stats files +my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1"; +ok(!-f "$statsfile", "backup statsfile cannot already exist"); + +my $datadir = $node->data_dir(); +my $og_stats = "$datadir/pg_stat/pgstat.stat"; +ok(-f "$og_stats", "origin stats file must exist"); +copy($og_stats, $statsfile) or die "Copy failed: $!"; + + +## test discarding of stats file after crash etc + +$node->start; + +$sect = "copy"; +is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); +is(have_stats('function', $dboid, $funcoid), + 't', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 't', "$sect: relation stats do exist"); + +$node->stop('immediate'); + +ok(!-f "$og_stats", "no stats file should exist after immediate shutdown"); + +# copy the old stats back to test we discard stats after crash restart +copy($statsfile, $og_stats) or die "Copy failed: $!"; + +$node->start; + +# stats should have been discarded +$sect = "post immediate"; +is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); +is(have_stats('function', $dboid, $funcoid), + 'f', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 'f', "$sect: relation stats do not exist"); + +# get rid of backup statsfile +unlink $statsfile or die "cannot unlink $statsfile $!"; + + +# generate new stats and flush them +trigger_funcrel_stat(); + +$sect = "post immediate, new"; +is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist"); +is(have_stats('function', $dboid, $funcoid), + 't', "$sect: function stats do exist"); +is(have_stats('relation', $dboid, $tableoid), + 't', "$sect: relation stats do exist"); + +# regular shutdown +$node->stop(); + + +## check an invalid stats file is handled + +overwrite_file($og_stats, "ZZZZZZZZZZZZZ"); + +# normal startup and no issues despite invalid stats file +$node->start; + +# no stats present due to invalid stats file +$sect = "invalid_overwrite"; +is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); +is(have_stats('function', $dboid, $funcoid), + 'f', "$sect: function stats do not exist"); +is(have_stats('relation', $dboid, $tableoid), + 'f', "$sect: relation stats do not exist"); + + +## check invalid stats file starting with valid contents, but followed by +## invalid content is handled. + +trigger_funcrel_stat(); +$node->stop; +append_file($og_stats, "XYZ"); +$node->start; + +$sect = "invalid_append"; +is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist"); +is(have_stats('function', $dboid, $funcoid), + 'f', "$sect: function stats do not exist"); +is(have_stats('relation', $dboid, $tableoid), + 'f', "$sect: relation stats do not exist"); + + +## checks related to stats persistency around restarts and resets + +# Ensure enough checkpoints to protect against races for test after reset, +# even on very slow machines. +$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;"); + + +## check checkpoint and wal stats are incremented due to restart + +my $ckpt_start = checkpoint_stats(); +my $wal_start = wal_stats(); +$node->restart; + +$sect = "post restart"; +my $ckpt_restart = checkpoint_stats(); +my $wal_restart = wal_stats(); + +cmp_ok( + $ckpt_start->{count}, '<', + $ckpt_restart->{count}, + "$sect: increased checkpoint count"); +cmp_ok( + $wal_start->{records}, '<', + $wal_restart->{records}, + "$sect: increased wal record count"); +cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes}, + "$sect: increased wal bytes"); +is( $ckpt_start->{reset}, + $ckpt_restart->{reset}, + "$sect: checkpoint stats_reset equal"); +is($wal_start->{reset}, $wal_restart->{reset}, + "$sect: wal stats_reset equal"); + + +## Check that checkpoint stats are reset, WAL stats aren't affected + +$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')"); + +$sect = "post ckpt reset"; +my $ckpt_reset = checkpoint_stats(); +my $wal_ckpt_reset = wal_stats(); + +cmp_ok($ckpt_restart->{count}, + '>', $ckpt_reset->{count}, "$sect: checkpoint count smaller"); +cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset}, + "$sect: stats_reset newer"); + +cmp_ok( + $wal_restart->{records}, + '<=', + $wal_ckpt_reset->{records}, + "$sect: wal record count not affected by reset"); +is( $wal_start->{reset}, + $wal_ckpt_reset->{reset}, + "$sect: wal stats_reset equal"); + + +## check that checkpoint stats stay reset after restart + +$node->restart; + +$sect = "post ckpt reset & restart"; +my $ckpt_restart_reset = checkpoint_stats(); +my $wal_restart2 = wal_stats(); + +# made sure above there's enough checkpoints that this will be stable even on slow machines +cmp_ok( + $ckpt_restart_reset->{count}, + '<', + $ckpt_restart->{count}, + "$sect: checkpoint still reset"); +is($ckpt_restart_reset->{reset}, + $ckpt_reset->{reset}, "$sect: stats_reset same"); + +cmp_ok( + $wal_ckpt_reset->{records}, + '<', + $wal_restart2->{records}, + "$sect: increased wal record count"); +cmp_ok( + $wal_ckpt_reset->{bytes}, + '<', + $wal_restart2->{bytes}, + "$sect: increased wal bytes"); +is( $wal_start->{reset}, + $wal_restart2->{reset}, + "$sect: wal stats_reset equal"); + + +## check WAL stats stay reset + +$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')"); + +$sect = "post wal reset"; +my $wal_reset = wal_stats(); + +cmp_ok( + $wal_reset->{records}, '<', + $wal_restart2->{records}, + "$sect: smaller record count"); +cmp_ok( + $wal_reset->{bytes}, '<', + $wal_restart2->{bytes}, + "$sect: smaller bytes"); +cmp_ok( + $wal_reset->{reset}, 'gt', + $wal_restart2->{reset}, + "$sect: newer stats_reset"); + +$node->restart; + +$sect = "post wal reset & restart"; +my $wal_reset_restart = wal_stats(); + +# enough WAL generated during prior tests and initdb to make this not racy +cmp_ok( + $wal_reset_restart->{records}, + '<', + $wal_restart2->{records}, + "$sect: smaller record count"); +cmp_ok( + $wal_reset->{bytes}, '<', + $wal_restart2->{bytes}, + "$sect: smaller bytes"); +cmp_ok( + $wal_reset->{reset}, 'gt', + $wal_restart2->{reset}, + "$sect: newer stats_reset"); + +$node->stop('immediate'); +$node->start; + +$sect = "post immediate restart"; +my $wal_restart_immediate = wal_stats(); + +cmp_ok( + $wal_reset_restart->{reset}, + 'lt', + $wal_restart_immediate->{reset}, + "$sect: reset timestamp is new"); + +$node->stop; +done_testing(); + +sub trigger_funcrel_stat +{ + $node->safe_psql( + $db_under_test, q[ + SELECT * FROM tab_stats_crash_discard_test1; + SELECT func_stats_crash_discard1(); + SELECT pg_stat_force_next_flush();]); +} + +sub have_stats +{ + my ($kind, $dboid, $objoid) = @_; + + return $node->safe_psql($connect_db, + "SELECT pg_stat_have_stats('$kind', $dboid, $objoid)"); +} + +sub overwrite_file +{ + my ($filename, $str) = @_; + open my $fh, ">", $filename + or die "could not overwrite \"$filename\": $!"; + print $fh $str; + close $fh; + return; +} + +sub append_file +{ + my ($filename, $str) = @_; + open my $fh, ">>", $filename + or die "could not append to \"$filename\": $!"; + print $fh $str; + close $fh; + return; +} + +sub checkpoint_stats +{ + my %results; + + $results{count} = $node->safe_psql($connect_db, + "SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter"); + $results{reset} = $node->safe_psql($connect_db, + "SELECT stats_reset FROM pg_stat_bgwriter"); + + return \%results; +} + +sub wal_stats +{ + my %results; + $results{records} = + $node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal"); + $results{bytes} = + $node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal"); + $results{reset} = + $node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal"); + + return \%results; +} diff --git a/src/test/recovery/t/030_stats_cleanup_replica.pl b/src/test/recovery/t/030_stats_cleanup_replica.pl new file mode 100644 index 0000000..cc92ddb --- /dev/null +++ b/src/test/recovery/t/030_stats_cleanup_replica.pl @@ -0,0 +1,206 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Tests that standbys: +# - drop stats for objects when the those records are replayed +# - persist stats across graceful restarts +# - discard stats after immediate / crash restarts + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf('postgresql.conf', "track_functions = 'all'"); +$node_primary->start; + +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); + +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->start; + + +## Test that stats are cleaned up on standby after dropping table or function + +my $sect = 'initial'; + +my ($dboid, $tableoid, $funcoid) = + populate_standby_stats('postgres', 'public'); +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 't'); + +drop_table_by_oid('postgres', $tableoid); +drop_function_by_oid('postgres', $funcoid); + +$sect = 'post drop'; +my $primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 'f'); + + +## Test that stats are cleaned up on standby after dropping indirectly + +$sect = "schema creation"; + +$node_primary->safe_psql('postgres', "CREATE SCHEMA drop_schema_test1"); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +($dboid, $tableoid, $funcoid) = + populate_standby_stats('postgres', 'drop_schema_test1'); + +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 't'); +$node_primary->safe_psql('postgres', "DROP SCHEMA drop_schema_test1 CASCADE"); + +$sect = "post schema drop"; + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +# verify table and function stats removed from standby +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 'f'); + + +## Test that stats are cleaned up on standby after dropping database + +$sect = "createdb"; + +$node_primary->safe_psql('postgres', "CREATE DATABASE test"); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +($dboid, $tableoid, $funcoid) = populate_standby_stats('test', 'public'); + +# verify stats are present +test_standby_func_tab_stats_status('test', $dboid, $tableoid, $funcoid, 't'); +test_standby_db_stats_status('test', $dboid, 't'); + +$node_primary->safe_psql('postgres', "DROP DATABASE test"); +$sect = "post dropdb"; +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +# Test that the stats were cleaned up on standby +# Note that this connects to 'postgres' but provides the dboid of dropped db +# 'test' which we acquired previously +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 'f'); + +test_standby_db_stats_status('postgres', $dboid, 'f'); + + +## verify that stats persist across graceful restarts on a replica + +# NB: Can't test database stats, they're immediately repopulated when +# reconnecting... +$sect = "pre restart"; +($dboid, $tableoid, $funcoid) = populate_standby_stats('postgres', 'public'); +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 't'); + +$node_standby->restart(); + +$sect = "post non-immediate"; + +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 't'); + +# but gone after an immediate restart +$node_standby->stop('immediate'); +$node_standby->start(); + +$sect = "post immediate restart"; + +test_standby_func_tab_stats_status('postgres', + $dboid, $tableoid, $funcoid, 'f'); + + +done_testing(); + + +sub populate_standby_stats +{ + my ($connect_db, $schema) = @_; + + # create objects on primary + $node_primary->safe_psql($connect_db, + "CREATE TABLE $schema.drop_tab_test1 AS SELECT generate_series(1,100) AS a" + ); + $node_primary->safe_psql($connect_db, + "CREATE FUNCTION $schema.drop_func_test1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE" + ); + my $primary_lsn = $node_primary->lsn('flush'); + $node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + + # collect object oids + my $dboid = $node_standby->safe_psql($connect_db, + "SELECT oid FROM pg_database WHERE datname = '$connect_db'"); + my $tableoid = $node_standby->safe_psql($connect_db, + "SELECT '$schema.drop_tab_test1'::regclass::oid"); + my $funcoid = $node_standby->safe_psql($connect_db, + "SELECT '$schema.drop_func_test1()'::regprocedure::oid"); + + # generate stats on standby + $node_standby->safe_psql($connect_db, + "SELECT * FROM $schema.drop_tab_test1"); + $node_standby->safe_psql($connect_db, "SELECT $schema.drop_func_test1()"); + + return ($dboid, $tableoid, $funcoid); +} + +sub drop_function_by_oid +{ + my ($connect_db, $funcoid) = @_; + + # Get function name from returned oid + my $func_name = $node_primary->safe_psql($connect_db, + "SELECT '$funcoid'::regprocedure"); + $node_primary->safe_psql($connect_db, "DROP FUNCTION $func_name"); +} + +sub drop_table_by_oid +{ + my ($connect_db, $tableoid) = @_; + + # Get table name from returned oid + my $table_name = + $node_primary->safe_psql($connect_db, "SELECT '$tableoid'::regclass"); + $node_primary->safe_psql($connect_db, "DROP TABLE $table_name"); +} + +sub test_standby_func_tab_stats_status +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ($connect_db, $dboid, $tableoid, $funcoid, $present) = @_; + + my %expected = (rel => $present, func => $present); + my %stats; + + $stats{rel} = $node_standby->safe_psql($connect_db, + "SELECT pg_stat_have_stats('relation', $dboid, $tableoid)"); + $stats{func} = $node_standby->safe_psql($connect_db, + "SELECT pg_stat_have_stats('function', $dboid, $funcoid)"); + + is_deeply(\%stats, \%expected, "$sect: standby stats as expected"); + + return; +} + +sub test_standby_db_stats_status +{ + local $Test::Builder::Level = $Test::Builder::Level + 1; + my ($connect_db, $dboid, $present) = @_; + + is( $node_standby->safe_psql( + $connect_db, "SELECT pg_stat_have_stats('database', $dboid, 0)"), + $present, + "$sect: standby db stats as expected"); +} diff --git a/src/test/recovery/t/031_recovery_conflict.pl b/src/test/recovery/t/031_recovery_conflict.pl new file mode 100644 index 0000000..99f154a --- /dev/null +++ b/src/test/recovery/t/031_recovery_conflict.pl @@ -0,0 +1,383 @@ +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test that connections to a hot standby are correctly canceled when a +# recovery conflict is detected Also, test that statistics in +# pg_stat_database_conflicts are populated correctly + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +plan skip_all => "disabled due to instability"; + +# Set up nodes +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); + +my $tablespace1 = "test_recovery_conflict_tblspc"; + +$node_primary->append_conf( + 'postgresql.conf', qq[ +allow_in_place_tablespaces = on +log_temp_files = 0 + +# for deadlock test +max_prepared_transactions = 10 + +# wait some to test the wait paths as well, but not long for obvious reasons +max_standby_streaming_delay = 50ms + +temp_tablespaces = $tablespace1 +# Some of the recovery conflict logging code only gets exercised after +# deadlock_timeout. The test doesn't rely on that additional output, but it's +# nice to get some minimal coverage of that code. +log_recovery_conflict_waits = on +deadlock_timeout = 10ms +]); +$node_primary->start; + +my $backup_name = 'my_backup'; + +$node_primary->safe_psql('postgres', + qq[CREATE TABLESPACE $tablespace1 LOCATION '']); + +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); + +$node_standby->start; + +my $test_db = "test_db"; + +# use a new database, to trigger database recovery conflict +$node_primary->safe_psql('postgres', "CREATE DATABASE $test_db"); + +# test schema / data +my $table1 = "test_recovery_conflict_table1"; +my $table2 = "test_recovery_conflict_table2"; +$node_primary->safe_psql( + $test_db, qq[ +CREATE TABLE ${table1}(a int, b int); +INSERT INTO $table1 SELECT i % 3, 0 FROM generate_series(1,20) i; +CREATE TABLE ${table2}(a int, b int); +]); +my $primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + + +# a longrunning psql that we can use to trigger conflicts +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); +my %psql_standby = ('stdin' => '', 'stdout' => ''); +$psql_standby{run} = + $node_standby->background_psql($test_db, \$psql_standby{stdin}, + \$psql_standby{stdout}, + $psql_timeout); +$psql_standby{stdout} = ''; + +my $expected_conflicts = 0; + + +## RECOVERY CONFLICT 1: Buffer pin conflict +my $sect = "buffer pin conflict"; +$expected_conflicts++; + +# Aborted INSERT on primary that will be cleaned up by vacuum. Has to be old +# enough so that there's not a snapshot conflict before the buffer pin +# conflict. + +$node_primary->safe_psql( + $test_db, + qq[ + BEGIN; + INSERT INTO $table1 VALUES (1,0); + ROLLBACK; + -- ensure flush, rollback doesn't do so + BEGIN; LOCK $table1; COMMIT; + ]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +my $cursor1 = "test_recovery_conflict_cursor"; + +# DECLARE and use a cursor on standby, causing buffer with the only block of +# the relation to be pinned on the standby +$psql_standby{stdin} .= qq[ + BEGIN; + DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1; + FETCH FORWARD FROM $cursor1; + ]; +# FETCH FORWARD should have returned a 0 since all values of b in the table +# are 0 +ok(pump_until_standby(qr/^0$/m), + "$sect: cursor with conflicting pin established"); + +# to check the log starting now for recovery conflict messages +my $log_location = -s $node_standby->logfile; + +# VACUUM on the primary +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); + +# Wait for catchup. Existing connection will be terminated before replay is +# finished, so waiting for catchup ensures that there is no race between +# encountering the recovery conflict which causes the disconnect and checking +# the logfile for the terminated connection. +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was holding shared buffer pin for too long"); +reconnect_and_clear(); +check_conflict_stat("bufferpin"); + + +## RECOVERY CONFLICT 2: Snapshot conflict +$sect = "snapshot conflict"; +$expected_conflicts++; + +$node_primary->safe_psql($test_db, + qq[INSERT INTO $table1 SELECT i, 0 FROM generate_series(1,20) i]); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +# DECLARE and FETCH from cursor on the standby +$psql_standby{stdin} .= qq[ + BEGIN; + DECLARE $cursor1 CURSOR FOR SELECT b FROM $table1; + FETCH FORWARD FROM $cursor1; + ]; +ok( pump_until( + $psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, qr/^0$/m,), + "$sect: cursor with conflicting snapshot established"); + +# Do some HOT updates +$node_primary->safe_psql($test_db, + qq[UPDATE $table1 SET a = a + 1 WHERE a > 2;]); + +# VACUUM, pruning those dead tuples +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); + +# Wait for attempted replay of PRUNE records +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log( + "User query might have needed to see row versions that must be removed"); +reconnect_and_clear(); +check_conflict_stat("snapshot"); + + +## RECOVERY CONFLICT 3: Lock conflict +$sect = "lock conflict"; +$expected_conflicts++; + +# acquire lock to conflict with +$psql_standby{stdin} .= qq[ + BEGIN; + LOCK TABLE $table1 IN ACCESS SHARE MODE; + SELECT 1; + ]; +ok(pump_until_standby(qr/^1$/m), "$sect: conflicting lock acquired"); + +# DROP TABLE containing block which standby has in a pinned buffer +$node_primary->safe_psql($test_db, qq[DROP TABLE $table1;]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was holding a relation lock for too long"); +reconnect_and_clear(); +check_conflict_stat("lock"); + + +## RECOVERY CONFLICT 4: Tablespace conflict +$sect = "tablespace conflict"; +$expected_conflicts++; + +# DECLARE a cursor for a query which, with sufficiently low work_mem, will +# spill tuples into temp files in the temporary tablespace created during +# setup. +$psql_standby{stdin} .= qq[ + BEGIN; + SET work_mem = '64kB'; + DECLARE $cursor1 CURSOR FOR + SELECT count(*) FROM generate_series(1,6000); + FETCH FORWARD FROM $cursor1; + ]; +ok(pump_until_standby(qr/^6000$/m), + "$sect: cursor with conflicting temp file established"); + +# Drop the tablespace currently containing spill files for the query on the +# standby +$node_primary->safe_psql($test_db, qq[DROP TABLESPACE $tablespace1;]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log( + "User was or might have been using tablespace that must be dropped"); +reconnect_and_clear(); +check_conflict_stat("tablespace"); + + +## RECOVERY CONFLICT 5: Deadlock +$sect = "startup deadlock"; +$expected_conflicts++; + +# Want to test recovery deadlock conflicts, not buffer pin conflicts. Without +# changing max_standby_streaming_delay it'd be timing dependent what we hit +# first +$node_standby->adjust_conf( + 'postgresql.conf', + 'max_standby_streaming_delay', + "${PostgreSQL::Test::Utils::timeout_default}s"); +$node_standby->restart(); +reconnect_and_clear(); + +# Generate a few dead rows, to later be cleaned up by vacuum. Then acquire a +# lock on another relation in a prepared xact, so it's held continuously by +# the startup process. The standby psql will block acquiring that lock while +# holding a pin that vacuum needs, triggering the deadlock. +$node_primary->safe_psql( + $test_db, + qq[ +CREATE TABLE $table1(a int, b int); +INSERT INTO $table1 VALUES (1); +BEGIN; +INSERT INTO $table1(a) SELECT generate_series(1, 100) i; +ROLLBACK; +BEGIN; +LOCK TABLE $table2; +PREPARE TRANSACTION 'lock'; +INSERT INTO $table1(a) VALUES (170); +SELECT txid_current(); +]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +$psql_standby{stdin} .= qq[ + BEGIN; + -- hold pin + DECLARE $cursor1 CURSOR FOR SELECT a FROM $table1; + FETCH FORWARD FROM $cursor1; + -- wait for lock held by prepared transaction + SELECT * FROM $table2; + ]; +ok( pump_until( + $psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, qr/^1$/m,), + "$sect: cursor holding conflicting pin, also waiting for lock, established" +); + +# just to make sure we're waiting for lock already +ok( $node_standby->poll_query_until( + 'postgres', qq[ +SELECT 'waiting' FROM pg_locks WHERE locktype = 'relation' AND NOT granted; +], 'waiting'), + "$sect: lock acquisition is waiting"); + +# VACUUM will prune away rows, causing a buffer pin conflict, while standby +# psql is waiting on lock +$node_primary->safe_psql($test_db, qq[VACUUM $table1;]); +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User transaction caused buffer deadlock with recovery."); +reconnect_and_clear(); +check_conflict_stat("deadlock"); + +# clean up for next tests +$node_primary->safe_psql($test_db, qq[ROLLBACK PREPARED 'lock';]); +$node_standby->adjust_conf('postgresql.conf', 'max_standby_streaming_delay', + '50ms'); +$node_standby->restart(); +reconnect_and_clear(); + + +# Check that expected number of conflicts show in pg_stat_database. Needs to +# be tested before database is dropped, for obvious reasons. +is( $node_standby->safe_psql( + $test_db, + qq[SELECT conflicts FROM pg_stat_database WHERE datname='$test_db';]), + $expected_conflicts, + qq[$expected_conflicts recovery conflicts shown in pg_stat_database]); + + +## RECOVERY CONFLICT 6: Database conflict +$sect = "database conflict"; + +$node_primary->safe_psql('postgres', qq[DROP DATABASE $test_db;]); + +$primary_lsn = $node_primary->lsn('flush'); +$node_primary->wait_for_catchup($node_standby, 'replay', $primary_lsn); + +check_conflict_log("User was connected to a database that must be dropped"); + + +# explicitly shut down psql instances gracefully - to avoid hangs or worse on +# windows +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_standby->stop(); +$node_primary->stop(); + + +done_testing(); + + +sub pump_until_standby +{ + my $match = shift; + + return pump_until($psql_standby{run}, $psql_timeout, + \$psql_standby{stdout}, $match); +} + +sub reconnect_and_clear +{ + # If psql isn't dead already, tell it to quit as \q, when already dead, + # causes IPC::Run to unhelpfully error out with "ack Broken pipe:". + $psql_standby{run}->pump_nb(); + if ($psql_standby{run}->pumpable()) + { + $psql_standby{stdin} .= "\\q\n"; + } + $psql_standby{run}->finish; + + # restart + $psql_standby{run}->run(); + $psql_standby{stdin} = ''; + $psql_standby{stdout} = ''; + + # Run query to ensure connection has finished re-establishing + $psql_standby{stdin} .= qq[SELECT 1;\n]; + die unless pump_until_standby(qr/^1$/m); + $psql_standby{stdout} = ''; +} + +sub check_conflict_log +{ + my $message = shift; + my $old_log_location = $log_location; + + $log_location = $node_standby->wait_for_log(qr/$message/, $log_location); + + cmp_ok($log_location, '>', $old_log_location, + "$sect: logfile contains terminated connection due to recovery conflict" + ); +} + +sub check_conflict_stat +{ + my $conflict_type = shift; + my $count = $node_standby->safe_psql($test_db, + qq[SELECT confl_$conflict_type FROM pg_stat_database_conflicts WHERE datname='$test_db';] + ); + + is($count, 1, "$sect: stats show conflict on standby"); +} diff --git a/src/test/recovery/t/032_relfilenode_reuse.pl b/src/test/recovery/t/032_relfilenode_reuse.pl new file mode 100644 index 0000000..92ec510 --- /dev/null +++ b/src/test/recovery/t/032_relfilenode_reuse.pl @@ -0,0 +1,243 @@ +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use File::Basename; + + +my $node_primary = PostgreSQL::Test::Cluster->new('primary'); +$node_primary->init(allows_streaming => 1); +$node_primary->append_conf( + 'postgresql.conf', q[ +allow_in_place_tablespaces = true +log_connections=on +# to avoid "repairing" corruption +full_page_writes=off +log_min_messages=debug2 +shared_buffers=1MB +]); +$node_primary->start; + + +# Create streaming standby linking to primary +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->start; + +# We'll reset this timeout for each individual query we run. +my $psql_timeout = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); + +my %psql_primary = (stdin => '', stdout => '', stderr => ''); +$psql_primary{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_primary->connstr('postgres') ], + '<', + \$psql_primary{stdin}, + '>', + \$psql_primary{stdout}, + '2>', + \$psql_primary{stderr}, + $psql_timeout); + +my %psql_standby = ('stdin' => '', 'stdout' => '', 'stderr' => ''); +$psql_standby{run} = IPC::Run::start( + [ 'psql', '-XA', '-f', '-', '-d', $node_standby->connstr('postgres') ], + '<', + \$psql_standby{stdin}, + '>', + \$psql_standby{stdout}, + '2>', + \$psql_standby{stderr}, + $psql_timeout); + + +# Create template database with a table that we'll update, to trigger dirty +# rows. Using a template database + preexisting rows makes it a bit easier to +# reproduce, because there's no cache invalidations generated. + +$node_primary->safe_psql('postgres', + "CREATE DATABASE conflict_db_template OID = 50000;"); +$node_primary->safe_psql( + 'conflict_db_template', q[ + CREATE TABLE large(id serial primary key, dataa text, datab text); + INSERT INTO large(dataa, datab) SELECT g.i::text, 1 FROM generate_series(1, 4000) g(i);] +); +$node_primary->safe_psql('postgres', + "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;"); + +$node_primary->safe_psql( + 'postgres', q[ + CREATE EXTENSION pg_prewarm; + CREATE TABLE replace_sb(data text); + INSERT INTO replace_sb(data) SELECT random()::text FROM generate_series(1, 15000);] +); + +$node_primary->wait_for_catchup($node_standby); + +# Use longrunning transactions, so that AtEOXact_SMgr doesn't close files +send_query_and_wait(\%psql_primary, q[BEGIN;], qr/BEGIN/m); +send_query_and_wait(\%psql_standby, q[BEGIN;], qr/BEGIN/m); + +# Cause lots of dirty rows in shared_buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 1;"); + +# Now do a bunch of work in another database. That will end up needing to +# write back dirty data from the previous step, opening the relevant file +# descriptors +cause_eviction(\%psql_primary, \%psql_standby); + +# drop and recreate database +$node_primary->safe_psql('postgres', "DROP DATABASE conflict_db;"); +$node_primary->safe_psql('postgres', + "CREATE DATABASE conflict_db TEMPLATE conflict_db_template OID = 50001;"); + +verify($node_primary, $node_standby, 1, "initial contents as expected"); + +# Again cause lots of dirty rows in shared_buffers, but use a different update +# value so we can check everything is OK +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 2;"); + +# Again cause a lot of IO. That'll again write back dirty data, but uses newly +# opened file descriptors, so we don't confuse old files with new files despite +# recycling relfilenodes. +cause_eviction(\%psql_primary, \%psql_standby); + +verify($node_primary, $node_standby, 2, + "update to reused relfilenode (due to DB oid conflict) is not lost"); + + +$node_primary->safe_psql('conflict_db', "VACUUM FULL large;"); +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 3;"); + +verify($node_primary, $node_standby, 3, "restored contents as expected"); + +# Test for old filehandles after moving a database in / out of tablespace +$node_primary->safe_psql('postgres', + q[CREATE TABLESPACE test_tablespace LOCATION '']); + +# cause dirty buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 4;"); +# cause files to be opened in backend in other database +cause_eviction(\%psql_primary, \%psql_standby); + +# move database back / forth +$node_primary->safe_psql('postgres', + 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace'); +$node_primary->safe_psql('postgres', + 'ALTER DATABASE conflict_db SET TABLESPACE pg_default'); + +# cause dirty buffers +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 5;"); +cause_eviction(\%psql_primary, \%psql_standby); + +verify($node_primary, $node_standby, 5, "post move contents as expected"); + +$node_primary->safe_psql('postgres', + 'ALTER DATABASE conflict_db SET TABLESPACE test_tablespace'); + +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 7;"); +cause_eviction(\%psql_primary, \%psql_standby); +$node_primary->safe_psql('conflict_db', "UPDATE large SET datab = 8;"); +$node_primary->safe_psql('postgres', 'DROP DATABASE conflict_db'); +$node_primary->safe_psql('postgres', 'DROP TABLESPACE test_tablespace'); + +$node_primary->safe_psql('postgres', 'REINDEX TABLE pg_database'); + + +# explicitly shut down psql instances gracefully - to avoid hangs +# or worse on windows +$psql_primary{stdin} .= "\\q\n"; +$psql_primary{run}->finish; +$psql_standby{stdin} .= "\\q\n"; +$psql_standby{run}->finish; + +$node_primary->stop(); +$node_standby->stop(); + +# Make sure that there weren't crashes during shutdown + +command_like( + [ 'pg_controldata', $node_primary->data_dir ], + qr/Database cluster state:\s+shut down\n/, + 'primary shut down ok'); +command_like( + [ 'pg_controldata', $node_standby->data_dir ], + qr/Database cluster state:\s+shut down in recovery\n/, + 'standby shut down ok'); +done_testing(); + +sub verify +{ + my ($primary, $standby, $counter, $message) = @_; + + my $query = + "SELECT datab, count(*) FROM large GROUP BY 1 ORDER BY 1 LIMIT 10"; + is($primary->safe_psql('conflict_db', $query), + "$counter|4000", "primary: $message"); + + $primary->wait_for_catchup($standby); + is($standby->safe_psql('conflict_db', $query), + "$counter|4000", "standby: $message"); +} + +sub cause_eviction +{ + my ($psql_primary, $psql_standby) = @_; + + send_query_and_wait( + $psql_primary, + q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;], + qr/warmed_buffers/m); + + send_query_and_wait( + $psql_standby, + q[SELECT SUM(pg_prewarm(oid)) warmed_buffers FROM pg_class WHERE pg_relation_filenode(oid) != 0;], + qr/warmed_buffers/m); +} + +# Send query, wait until string matches +sub send_query_and_wait +{ + my ($psql, $query, $untl) = @_; + my $ret; + + # For each query we run, we'll restart the timeout. Otherwise the timeout + # would apply to the whole test script, and would need to be set very high + # to survive when running under Valgrind. + $psql_timeout->reset(); + $psql_timeout->start(); + + # send query + $$psql{stdin} .= $query; + $$psql{stdin} .= "\n"; + + # wait for query results + $$psql{run}->pump_nb(); + while (1) + { + last if $$psql{stdout} =~ /$untl/; + + if ($psql_timeout->is_expired) + { + BAIL_OUT("aborting wait: program timed out\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + if (not $$psql{run}->pumpable()) + { + BAIL_OUT("aborting wait: program died\n" + . "stream contents: >>$$psql{stdout}<<\n" + . "pattern searched for: $untl\n"); + return 0; + } + $$psql{run}->pump(); + } + + $$psql{stdout} = ''; + + return 1; +} diff --git a/src/test/recovery/t/033_replay_tsp_drops.pl b/src/test/recovery/t/033_replay_tsp_drops.pl new file mode 100644 index 0000000..64c3296 --- /dev/null +++ b/src/test/recovery/t/033_replay_tsp_drops.pl @@ -0,0 +1,148 @@ + +# Copyright (c) 2021-2022, PostgreSQL Global Development Group + +# Test replay of tablespace/database creation/drop + +use strict; +use warnings; + +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Time::HiRes qw(usleep); + +sub test_tablespace +{ + my ($strategy) = @_; + + my $node_primary = PostgreSQL::Test::Cluster->new("primary1_$strategy"); + $node_primary->init(allows_streaming => 1); + $node_primary->start; + $node_primary->psql( + 'postgres', + qq[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE dropme_ts1 LOCATION ''; + CREATE TABLESPACE dropme_ts2 LOCATION ''; + CREATE TABLESPACE source_ts LOCATION ''; + CREATE TABLESPACE target_ts LOCATION ''; + CREATE DATABASE template_db IS_TEMPLATE = true; + SELECT pg_create_physical_replication_slot('slot', true); + ]); + my $backup_name = 'my_backup'; + $node_primary->backup($backup_name); + + my $node_standby = PostgreSQL::Test::Cluster->new("standby2_$strategy"); + $node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); + $node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); + $node_standby->append_conf('postgresql.conf', + "primary_slot_name = slot"); + $node_standby->start; + + # Make sure the connection is made + $node_primary->wait_for_catchup($node_standby, 'write', + $node_primary->lsn('write')); + + # Do immediate shutdown just after a sequence of CREATE DATABASE / DROP + # DATABASE / DROP TABLESPACE. This causes CREATE DATABASE WAL records + # to be applied to already-removed directories. + my $query = q[ + CREATE DATABASE dropme_db1 WITH TABLESPACE dropme_ts1 STRATEGY=<STRATEGY>; + CREATE TABLE t (a int) TABLESPACE dropme_ts2; + CREATE DATABASE dropme_db2 WITH TABLESPACE dropme_ts2 STRATEGY=<STRATEGY>; + CREATE DATABASE moveme_db TABLESPACE source_ts STRATEGY=<STRATEGY>; + ALTER DATABASE moveme_db SET TABLESPACE target_ts; + CREATE DATABASE newdb TEMPLATE template_db STRATEGY=<STRATEGY>; + ALTER DATABASE template_db IS_TEMPLATE = false; + DROP DATABASE dropme_db1; + DROP TABLE t; + DROP DATABASE dropme_db2; DROP TABLESPACE dropme_ts2; + DROP TABLESPACE source_ts; + DROP DATABASE template_db; + ]; + $query =~ s/<STRATEGY>/$strategy/g; + + $node_primary->safe_psql('postgres', $query); + $node_primary->wait_for_catchup($node_standby, 'write', + $node_primary->lsn('write')); + + # show "create missing directory" log message + $node_standby->safe_psql('postgres', + "ALTER SYSTEM SET log_min_messages TO debug1;"); + $node_standby->stop('immediate'); + # Should restart ignoring directory creation error. + is($node_standby->start(fail_ok => 1), + 1, "standby node started for $strategy"); + $node_standby->stop('immediate'); +} + +test_tablespace("FILE_COPY"); +test_tablespace("WAL_LOG"); + +# Ensure that a missing tablespace directory during create database +# replay immediately causes panic if the standby has already reached +# consistent state (archive recovery is in progress). This is +# effective only for CREATE DATABASE WITH STRATEGY=FILE_COPY. + +my $node_primary = PostgreSQL::Test::Cluster->new('primary2'); +$node_primary->init(allows_streaming => 1); +$node_primary->start; + +# Create tablespace +$node_primary->safe_psql( + 'postgres', q[ + SET allow_in_place_tablespaces=on; + CREATE TABLESPACE ts1 LOCATION '' + ]); +$node_primary->safe_psql('postgres', + "CREATE DATABASE db1 WITH TABLESPACE ts1 STRATEGY=FILE_COPY"); + +# Take backup +my $backup_name = 'my_backup'; +$node_primary->backup($backup_name); +my $node_standby = PostgreSQL::Test::Cluster->new('standby3'); +$node_standby->init_from_backup($node_primary, $backup_name, + has_streaming => 1); +$node_standby->append_conf('postgresql.conf', + "allow_in_place_tablespaces = on"); +$node_standby->start; + +# Make sure standby reached consistency and starts accepting connections +$node_standby->poll_query_until('postgres', 'SELECT 1', '1'); + +# Remove standby tablespace directory so it will be missing when +# replay resumes. +my $tspoid = $node_standby->safe_psql('postgres', + "SELECT oid FROM pg_tablespace WHERE spcname = 'ts1';"); +my $tspdir = $node_standby->data_dir . "/pg_tblspc/$tspoid"; +File::Path::rmtree($tspdir); + +my $logstart = -s $node_standby->logfile; + +# Create a database in the tablespace and a table in default tablespace +$node_primary->safe_psql( + 'postgres', + q[ + CREATE TABLE should_not_replay_insertion(a int); + CREATE DATABASE db2 WITH TABLESPACE ts1 STRATEGY=FILE_COPY; + INSERT INTO should_not_replay_insertion VALUES (1); + ]); + +# Standby should fail and should not silently skip replaying the wal +# In this test, PANIC turns into WARNING by allow_in_place_tablespaces. +# Check the log messages instead of confirming standby failure. +my $max_attempts = $PostgreSQL::Test::Utils::timeout_default * 10; +while ($max_attempts-- >= 0) +{ + last + if ( + $node_standby->log_contains( + qr!WARNING: ( [A-Z0-9]+:)? creating missing directory: pg_tblspc/!, + $logstart)); + usleep(100_000); +} +ok($max_attempts > 0, "invalid directory creation is detected"); + +done_testing(); diff --git a/src/test/recovery/t/034_create_database.pl b/src/test/recovery/t/034_create_database.pl new file mode 100644 index 0000000..4698cbc --- /dev/null +++ b/src/test/recovery/t/034_create_database.pl @@ -0,0 +1,45 @@ + +# Copyright (c) 2023, PostgreSQL Global Development Group + +# Test WAL replay for CREATE DATABASE .. STRATEGY WAL_LOG. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->start; + +# This checks that any DDLs run on the template database that modify pg_class +# are persisted after creating a database from it using the WAL_LOG strategy, +# as a direct copy of the template database's pg_class is used in this case. +my $db_template = "template1"; +my $db_new = "test_db_1"; + +# Create table. It should persist on the template database. +$node->safe_psql("postgres", + "CREATE DATABASE $db_new STRATEGY WAL_LOG TEMPLATE $db_template;"); + +$node->safe_psql($db_template, "CREATE TABLE tab_db_after_create_1 (a INT);"); + +# Flush the changes affecting the template database, then replay them. +$node->safe_psql("postgres", "CHECKPOINT;"); + +$node->stop('immediate'); +$node->start; +my $result = $node->safe_psql($db_template, + "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';"); +is($result, "1", + "check that table exists on template after crash, with checkpoint"); + +# The new database should have no tables. +$result = $node->safe_psql($db_new, + "SELECT count(*) FROM pg_class WHERE relname LIKE 'tab_db_%';"); +is($result, "0", + "check that there are no tables from template on new database after crash" +); + +done_testing(); diff --git a/src/test/recovery/t/037_invalid_database.pl b/src/test/recovery/t/037_invalid_database.pl new file mode 100644 index 0000000..a061fab --- /dev/null +++ b/src/test/recovery/t/037_invalid_database.pl @@ -0,0 +1,157 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +# +# Test we handle interrupted DROP DATABASE correctly. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; + +my $node = PostgreSQL::Test::Cluster->new('node'); +$node->init; +$node->append_conf( + "postgresql.conf", qq( +autovacuum = off +max_prepared_transactions=5 +log_min_duration_statement=0 +log_connections=on +log_disconnections=on +)); + +$node->start; + + +# First verify that we can't connect to or ALTER an invalid database. Just +# mark the database as invalid ourselves, that's more reliable than hitting the +# required race conditions (see testing further down)... + +$node->safe_psql( + "postgres", qq( +CREATE DATABASE regression_invalid; +UPDATE pg_database SET datconnlimit = -2 WHERE datname = 'regression_invalid'; +)); + +my $psql_stdout = ''; +my $psql_stderr = ''; + +is($node->psql('regression_invalid', '', stderr => \$psql_stderr), + 2, "can't connect to invalid database - error code"); +like( + $psql_stderr, + qr/FATAL:\s+cannot connect to invalid database "regression_invalid"/, + "can't connect to invalid database - error message"); + +is($node->psql('postgres', 'ALTER DATABASE regression_invalid CONNECTION LIMIT 10'), + 2, "can't ALTER invalid database"); + +# check invalid database can't be used as a template +is( $node->psql('postgres', 'CREATE DATABASE copy_invalid TEMPLATE regression_invalid'), + 3, + "can't use invalid database as template"); + + +# Verify that VACUUM ignores an invalid database when computing how much of +# the clog is needed (vac_truncate_clog()). For that we modify the pg_database +# row of the invalid database to have an outdated datfrozenxid. +$psql_stderr = ''; +$node->psql( + 'postgres', + qq( +UPDATE pg_database SET datfrozenxid = '123456' WHERE datname = 'regression_invalid'; +DROP TABLE IF EXISTS foo_tbl; CREATE TABLE foo_tbl(); +VACUUM FREEZE;), + stderr => \$psql_stderr); +unlike( + $psql_stderr, + qr/some databases have not been vacuumed in over 2 billion transactions/, + "invalid databases are ignored by vac_truncate_clog"); + + +# But we need to be able to drop an invalid database. +is( $node->psql( + 'postgres', 'DROP DATABASE regression_invalid', + stdout => \$psql_stdout, + stderr => \$psql_stderr), + 0, + "can DROP invalid database"); + +# Ensure database is gone +is($node->psql('postgres', 'DROP DATABASE regression_invalid'), + 3, "can't drop already dropped database"); + + +# Test that interruption of DROP DATABASE is handled properly. To ensure the +# interruption happens at the appropriate moment, we lock pg_tablespace. DROP +# DATABASE scans pg_tablespace once it has reached the "irreversible" part of +# dropping the database, making it a suitable point to wait. +my $bgpsql_in = ''; +my $bgpsql_out = ''; +my $bgpsql_err = ''; +my $bgpsql_timer = IPC::Run::timer($PostgreSQL::Test::Utils::timeout_default); +my $bgpsql = $node->background_psql('postgres', \$bgpsql_in, \$bgpsql_out, + $bgpsql_timer, on_error_stop => 0); +$bgpsql_out = ''; +$bgpsql_in .= "SELECT pg_backend_pid();\n"; + +pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/\d/); + +my $pid = $bgpsql_out; +$bgpsql_out = ''; + +# create the database, prevent drop database via lock held by a 2PC transaction +$bgpsql_in .= qq( + CREATE DATABASE regression_invalid_interrupt; + BEGIN; + LOCK pg_tablespace; + PREPARE TRANSACTION 'lock_tblspc'; + \\echo done +); + +ok(pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/done/), + "blocked DROP DATABASE completion"); +$bgpsql_out = ''; + +# Try to drop. This will wait due to the still held lock. +$bgpsql_in .= qq( + DROP DATABASE regression_invalid_interrupt; + \\echo DROP DATABASE completed +); +$bgpsql->pump_nb; + +# Ensure we're waiting for the lock +$node->poll_query_until('postgres', + qq(SELECT EXISTS(SELECT * FROM pg_locks WHERE NOT granted AND relation = 'pg_tablespace'::regclass AND mode = 'AccessShareLock');) +); + +# and finally interrupt the DROP DATABASE +ok($node->safe_psql('postgres', "SELECT pg_cancel_backend($pid)"), + "canceling DROP DATABASE"); + +# wait for cancellation to be processed +ok( pump_until( + $bgpsql, $bgpsql_timer, \$bgpsql_out, qr/DROP DATABASE completed/), + "cancel processed"); +$bgpsql_out = ''; + +# verify that connection to the database aren't allowed +is($node->psql('regression_invalid_interrupt', ''), + 2, "can't connect to invalid_interrupt database"); + +# To properly drop the database, we need to release the lock previously preventing +# doing so. +$bgpsql_in .= qq( + ROLLBACK PREPARED 'lock_tblspc'; + \\echo ROLLBACK PREPARED +); +ok(pump_until($bgpsql, $bgpsql_timer, \$bgpsql_out, qr/ROLLBACK PREPARED/), + "unblock DROP DATABASE"); +$bgpsql_out = ''; + +is($node->psql('postgres', "DROP DATABASE regression_invalid_interrupt"), + 0, "DROP DATABASE invalid_interrupt"); + +$bgpsql_in .= "\\q\n"; +$bgpsql->finish(); + +done_testing(); diff --git a/src/test/recovery/t/039_end_of_wal.pl b/src/test/recovery/t/039_end_of_wal.pl new file mode 100644 index 0000000..b3b813d --- /dev/null +++ b/src/test/recovery/t/039_end_of_wal.pl @@ -0,0 +1,485 @@ +# Copyright (c) 2023, PostgreSQL Global Development Group +# +# Test detecting end-of-WAL conditions. This test suite generates +# fake defective page and record headers to trigger various failure +# scenarios. + +use strict; +use warnings; +use PostgreSQL::Test::Cluster; +use PostgreSQL::Test::Utils; +use Test::More; +use Fcntl qw(SEEK_SET); + +use integer; # causes / operator to use integer math + +# Is this a big-endian system ("network" byte order)? We can't use 'Q' in +# pack() calls because it's not available in some perl builds, so we need to +# break 64 bit LSN values into two 'I' values. Fortunately we don't need to +# deal with high values, so we can just write 0 for the high order 32 bits, but +# we need to know the endianness to do that. +my $BIG_ENDIAN = pack("L", 0x12345678) eq pack("N", 0x12345678); + +# Header size of record header. +my $RECORD_HEADER_SIZE = 24; + +# Fields retrieved from code headers. +my @scan_result = scan_server_header('access/xlog_internal.h', + '#define\s+XLOG_PAGE_MAGIC\s+(\w+)'); +my $XLP_PAGE_MAGIC = hex($scan_result[0]); +@scan_result = scan_server_header('access/xlog_internal.h', + '#define\s+XLP_FIRST_IS_CONTRECORD\s+(\w+)'); +my $XLP_FIRST_IS_CONTRECORD = hex($scan_result[0]); + +# Values queried from the server +my $WAL_SEGMENT_SIZE; +my $WAL_BLOCK_SIZE; +my $TLI; + +# Build path of a WAL segment. +sub wal_segment_path +{ + my $node = shift; + my $tli = shift; + my $segment = shift; + my $wal_path = + sprintf("%s/pg_wal/%08X%08X%08X", $node->data_dir, $tli, 0, $segment); + return $wal_path; +} + +# Calculate from a LSN (in bytes) its segment number and its offset. +sub lsn_to_segment_and_offset +{ + my $lsn = shift; + return ($lsn / $WAL_SEGMENT_SIZE, $lsn % $WAL_SEGMENT_SIZE); +} + +# Write some arbitrary data in WAL for the given segment at LSN. +# This should be called while the cluster is not running. +sub write_wal +{ + my $node = shift; + my $tli = shift; + my $lsn = shift; + my $data = shift; + + my ($segment, $offset) = lsn_to_segment_and_offset($lsn); + my $path = wal_segment_path($node, $tli, $segment); + + open my $fh, "+<:raw", $path or die; + seek($fh, $offset, SEEK_SET) or die; + print $fh $data; + close $fh; +} + +# Emit a WAL record of arbitrary size. Returns the end LSN of the +# record inserted, in bytes. +sub emit_message +{ + my $node = shift; + my $size = shift; + return int( + $node->safe_psql( + 'postgres', + "SELECT pg_logical_emit_message(true, '', repeat('a', $size)) - '0/0'" + )); +} + +# Get the current insert LSN of a node, in bytes. +sub get_insert_lsn +{ + my $node = shift; + return int( + $node->safe_psql( + 'postgres', "SELECT pg_current_wal_insert_lsn() - '0/0'")); +} + +# Get GUC value, converted to an int. +sub get_int_setting +{ + my $node = shift; + my $name = shift; + return int( + $node->safe_psql( + 'postgres', + "SELECT setting FROM pg_settings WHERE name = '$name'")); +} + +sub start_of_page +{ + my $lsn = shift; + return $lsn & ~($WAL_BLOCK_SIZE - 1); +} + +sub start_of_next_page +{ + my $lsn = shift; + return start_of_page($lsn) + $WAL_BLOCK_SIZE; +} + +# Build a fake WAL record header based on the data given by the caller. +# This needs to follow the format of the C structure XLogRecord. To +# be inserted with write_wal(). +sub build_record_header +{ + my $xl_tot_len = shift; + my $xl_xid = shift || 0; + my $xl_prev = shift || 0; + my $xl_info = shift || 0; + my $xl_rmid = shift || 0; + my $xl_crc = shift || 0; + + # This needs to follow the structure XLogRecord: + # I for xl_tot_len + # I for xl_xid + # II for xl_prev + # C for xl_info + # C for xl_rmid + # BB for two bytes of padding + # I for xl_crc + return pack("IIIICCBBI", + $xl_tot_len, $xl_xid, + $BIG_ENDIAN ? 0 : $xl_prev, + $BIG_ENDIAN ? $xl_prev : 0, + $xl_info, $xl_rmid, 0, 0, $xl_crc); +} + +# Build a fake WAL page header, based on the data given by the caller +# This needs to follow the format of the C structure XLogPageHeaderData. +# To be inserted with write_wal(). +sub build_page_header +{ + my $xlp_magic = shift; + my $xlp_info = shift || 0; + my $xlp_tli = shift || 0; + my $xlp_pageaddr = shift || 0; + my $xlp_rem_len = shift || 0; + + # This needs to follow the structure XLogPageHeaderData: + # S for xlp_magic + # S for xlp_info + # I for xlp_tli + # II for xlp_pageaddr + # I for xlp_rem_len + return pack("SSIIII", + $xlp_magic, $xlp_info, $xlp_tli, + $BIG_ENDIAN ? 0 : $xlp_pageaddr, + $BIG_ENDIAN ? $xlp_pageaddr : 0, $xlp_rem_len); +} + +# Make sure we are far away enough from the end of a page that we could insert +# a couple of small records. This inserts a few records of a fixed size, until +# the threshold gets close enough to the end of the WAL page inserting records +# to. +sub advance_out_of_record_splitting_zone +{ + my $node = shift; + + my $page_threshold = $WAL_BLOCK_SIZE / 4; + my $end_lsn = get_insert_lsn($node); + my $page_offset = $end_lsn % $WAL_BLOCK_SIZE; + while ($page_offset >= $WAL_BLOCK_SIZE - $page_threshold) + { + emit_message($node, $page_threshold); + $end_lsn = get_insert_lsn($node); + $page_offset = $end_lsn % $WAL_BLOCK_SIZE; + } + return $end_lsn; +} + +# Advance so close to the end of a page that an XLogRecordHeader would not +# fit on it. +sub advance_to_record_splitting_zone +{ + my $node = shift; + + my $end_lsn = get_insert_lsn($node); + my $page_offset = $end_lsn % $WAL_BLOCK_SIZE; + + # Get fairly close to the end of a page in big steps + while ($page_offset <= $WAL_BLOCK_SIZE - 512) + { + emit_message($node, $WAL_BLOCK_SIZE - $page_offset - 256); + $end_lsn = get_insert_lsn($node); + $page_offset = $end_lsn % $WAL_BLOCK_SIZE; + } + + # Calibrate our message size so that we can get closer 8 bytes at + # a time. + my $message_size = $WAL_BLOCK_SIZE - 80; + while ($page_offset <= $WAL_BLOCK_SIZE - $RECORD_HEADER_SIZE) + { + emit_message($node, $message_size); + $end_lsn = get_insert_lsn($node); + + my $old_offset = $page_offset; + $page_offset = $end_lsn % $WAL_BLOCK_SIZE; + + # Adjust the message size until it causes 8 bytes changes in + # offset, enough to be able to split a record header. + my $delta = $page_offset - $old_offset; + if ($delta > 8) + { + $message_size -= 8; + } + elsif ($delta <= 0) + { + $message_size += 8; + } + } + return $end_lsn; +} + +# Setup a new node. The configuration chosen here minimizes the number +# of arbitrary records that could get generated in a cluster. Enlarging +# checkpoint_timeout avoids noise with checkpoint activity. wal_level +# set to "minimal" avoids random standby snapshot records. Autovacuum +# could also trigger randomly, generating random WAL activity of its own. +my $node = PostgreSQL::Test::Cluster->new("node"); +$node->init; +$node->append_conf( + 'postgresql.conf', + q[wal_level = minimal + autovacuum = off + checkpoint_timeout = '30min' +]); +$node->start; +$node->safe_psql('postgres', "CREATE TABLE t AS SELECT 42"); + +$WAL_SEGMENT_SIZE = get_int_setting($node, 'wal_segment_size'); +$WAL_BLOCK_SIZE = get_int_setting($node, 'wal_block_size'); +$TLI = $node->safe_psql('postgres', + "SELECT timeline_id FROM pg_control_checkpoint();"); + +my $end_lsn; +my $prev_lsn; + +########################################################################### +note "Single-page end-of-WAL detection"; +########################################################################### + +# xl_tot_len is 0 (a common case, we hit trailing zeroes). +emit_message($node, 0); +$end_lsn = advance_out_of_record_splitting_zone($node); +$node->stop('immediate'); +my $log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "invalid record length at .*: wanted 24, got 0", $log_size + ), + "xl_tot_len zero"); + +# xl_tot_len is < 24 (presumably recycled garbage). +emit_message($node, 0); +$end_lsn = advance_out_of_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, build_record_header(23)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "invalid record length at .*: wanted 24, got 23", + $log_size), + "xl_tot_len short"); + +# xl_tot_len in final position, not big enough to span into a new page but +# also not eligible for regular record header validation +emit_message($node, 0); +$end_lsn = advance_to_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, build_record_header(1)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "invalid record length at .*: wanted 24, got 1", $log_size + ), + "xl_tot_len short at end-of-page"); + +# Need more pages, but xl_prev check fails first. +emit_message($node, 0); +$end_lsn = advance_out_of_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "record with incorrect prev-link 0/DEADBEEF at .*", $log_size), + "xl_prev bad"); + +# xl_crc check fails. +emit_message($node, 0); +advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 10); +$node->stop('immediate'); +# Corrupt a byte in that record, breaking its CRC. +write_wal($node, $TLI, $end_lsn - 8, '!'); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "incorrect resource manager data checksum in record at .*", $log_size + ), + "xl_crc bad"); + + +########################################################################### +note "Multi-page end-of-WAL detection, header is not split"; +########################################################################### + +# This series of tests requires a valid xl_prev set in the record header +# written to WAL. + +# Good xl_prev, we hit zero page next (zero magic). +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn)); +$log_size = -s $node->logfile; +$node->start; +ok($node->log_contains("invalid magic number 0000 ", $log_size), + "xlp_magic zero"); + +# Good xl_prev, we hit garbage page next (bad magic). +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header(0xcafe, 0, 1, 0)); +$log_size = -s $node->logfile; +$node->start; +ok($node->log_contains("invalid magic number CAFE ", $log_size), + "xlp_magic bad"); + +# Good xl_prev, we hit typical recycled page (good xlp_magic, bad +# xlp_pageaddr). +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, $prev_lsn)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header($XLP_PAGE_MAGIC, 0, 1, 0xbaaaaaad)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "unexpected pageaddr 0/BAAAAAAD ", $log_size), + "xlp_pageaddr bad"); + +# Good xl_prev, xlp_magic, xlp_pageaddr, but bogus xlp_info. +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header( + $XLP_PAGE_MAGIC, 0x1234, 1, start_of_next_page($end_lsn))); +$log_size = -s $node->logfile; +$node->start; +ok($node->log_contains("invalid info bits 1234 ", $log_size), + "xlp_info bad"); + +# Good xl_prev, xlp_magic, xlp_pageaddr, but xlp_info doesn't mention +# continuation record. +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header($XLP_PAGE_MAGIC, 0, 1, start_of_next_page($end_lsn))); +$log_size = -s $node->logfile; +$node->start; +ok($node->log_contains("there is no contrecord flag at .*", $log_size), + "xlp_info lacks XLP_FIRST_IS_CONTRECORD"); + +# Good xl_prev, xlp_magic, xlp_pageaddr, xlp_info but xlp_rem_len doesn't add +# up. +emit_message($node, 0); +$prev_lsn = advance_out_of_record_splitting_zone($node); +$end_lsn = emit_message($node, 0); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 42, $prev_lsn)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header( + $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD, + 1, start_of_next_page($end_lsn), + 123456)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "invalid contrecord length 123456 .* at .*", $log_size), + "xlp_rem_len bad"); + + +########################################################################### +note "Multi-page, but header is split, so page checks are done first"; +########################################################################### + +# xl_prev is bad and xl_tot_len is too big, but we'll check xlp_magic first. +emit_message($node, 0); +$end_lsn = advance_to_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef)); +$log_size = -s $node->logfile; +$node->start; +ok($node->log_contains("invalid magic number 0000 ", $log_size), + "xlp_magic zero (split record header)"); + +# And we'll also check xlp_pageaddr before any header checks. +emit_message($node, 0); +$end_lsn = advance_to_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header( + $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD, 1, 0xbaaaaaad)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "unexpected pageaddr 0/BAAAAAAD ", $log_size), + "xlp_pageaddr bad (split record header)"); + +# We'll also discover that xlp_rem_len doesn't add up before any +# header checks, +emit_message($node, 0); +$end_lsn = advance_to_record_splitting_zone($node); +$node->stop('immediate'); +write_wal($node, $TLI, $end_lsn, + build_record_header(2 * 1024 * 1024 * 1024, 0, 0xdeadbeef)); +write_wal( + $node, $TLI, + start_of_next_page($end_lsn), + build_page_header( + $XLP_PAGE_MAGIC, $XLP_FIRST_IS_CONTRECORD, + 1, start_of_next_page($end_lsn), + 123456)); +$log_size = -s $node->logfile; +$node->start; +ok( $node->log_contains( + "invalid contrecord length 123456 .* at .*", $log_size), + "xlp_rem_len bad (split record header)"); + +done_testing(); diff --git a/src/test/recovery/t/cp_history_files b/src/test/recovery/t/cp_history_files new file mode 100644 index 0000000..cfeea41 --- /dev/null +++ b/src/test/recovery/t/cp_history_files @@ -0,0 +1,10 @@ +#!/usr/bin/perl + +use File::Copy; +use strict; +use warnings; + +die "wrong number of arguments" if @ARGV != 2; +my ($source, $target) = @ARGV; +exit if $source !~ /history/; +copy($source, $target) or die "couldn't copy $source to $target: $!"; |