src/test/recovery/t/002_archiving.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144

# Copyright (c) 2021-2022, PostgreSQL Global Development Group

# test for archiving with hot standby
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use File::Copy;

# Initialize primary node, doing archives
my $node_primary = PostgreSQL::Test::Cluster->new('primary');
$node_primary->init(
	has_archiving    => 1,
	allows_streaming => 1);
my $backup_name = 'my_backup';

# Start it
$node_primary->start;

# Take backup for standby
$node_primary->backup($backup_name);

# Initialize standby node from backup, fetching WAL from archives
my $node_standby = PostgreSQL::Test::Cluster->new('standby');
# Note that this makes the standby store its contents on the archives
# of the primary.
$node_standby->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);
$node_standby->append_conf('postgresql.conf',
	"wal_retrieve_retry_interval = '100ms'");

# Set archive_cleanup_command and recovery_end_command, checking their
# execution by the backend with dummy commands.
my $data_dir                     = $node_standby->data_dir;
my $archive_cleanup_command_file = "archive_cleanup_command.done";
my $recovery_end_command_file    = "recovery_end_command.done";
$node_standby->append_conf(
	'postgresql.conf', qq(
archive_cleanup_command = 'echo archive_cleanup_done > $archive_cleanup_command_file'
recovery_end_command = 'echo recovery_ended_done > $recovery_end_command_file'
));
$node_standby->start;

# Create some content on primary
$node_primary->safe_psql('postgres',
	"CREATE TABLE tab_int AS SELECT generate_series(1,1000) AS a");

# Note the presence of this checkpoint for the archive_cleanup_command
# check done below, before switching to a new segment.
$node_primary->safe_psql('postgres', "CHECKPOINT");

# Done after the checkpoint to ensure that it is replayed on the standby,
# for archive_cleanup_command.
my $current_lsn =
  $node_primary->safe_psql('postgres', "SELECT pg_current_wal_lsn();");

# Force archiving of WAL file to make it present on primary
$node_primary->safe_psql('postgres', "SELECT pg_switch_wal()");

# Add some more content, it should not be present on standby
$node_primary->safe_psql('postgres',
	"INSERT INTO tab_int VALUES (generate_series(1001,2000))");

# Wait until necessary replay has been done on standby
my $caughtup_query =
  "SELECT '$current_lsn'::pg_lsn <= pg_last_wal_replay_lsn()";
$node_standby->poll_query_until('postgres', $caughtup_query)
  or die "Timed out while waiting for standby to catch up";

my $result =
  $node_standby->safe_psql('postgres', "SELECT count(*) FROM tab_int");
is($result, qq(1000), 'check content from archives');

# archive_cleanup_command is executed after generating a restart point,
# with a checkpoint.
$node_standby->safe_psql('postgres', q{CHECKPOINT});
ok( -f "$data_dir/$archive_cleanup_command_file",
	'archive_cleanup_command executed on checkpoint');
ok( !-f "$data_dir/$recovery_end_command_file",
	'recovery_end_command not executed yet');

# Check the presence of temporary files specifically generated during
# archive recovery.  To ensure the presence of the temporary history
# file, switch to a timeline large enough to allow a standby to recover
# a history file from an archive.  As this requires at least two timeline
# switches, promote the existing standby first.  Then create a second
# standby based on the primary, using its archives.  Finally, the second
# standby is promoted.
$node_standby->promote;

# Wait until the history file has been stored on the archives of the
# primary once the promotion of the standby completes.  This ensures that
# the second standby created below will be able to restore this file,
# creating a RECOVERYHISTORY.
my $primary_archive = $node_primary->archive_dir;
$caughtup_query =
  "SELECT size IS NOT NULL FROM pg_stat_file('$primary_archive/00000002.history')";
$node_primary->poll_query_until('postgres', $caughtup_query)
  or die "Timed out while waiting for archiving of 00000002.history";

# recovery_end_command should have been triggered on promotion.
ok( -f "$data_dir/$recovery_end_command_file",
	'recovery_end_command executed after promotion');

my $node_standby2 = PostgreSQL::Test::Cluster->new('standby2');
$node_standby2->init_from_backup($node_primary, $backup_name,
	has_restoring => 1);

# Make execution of recovery_end_command fail.  This should not affect
# promotion, and its failure should be logged.
$node_standby2->append_conf(
	'postgresql.conf', qq(
recovery_end_command = 'echo recovery_end_failed > missing_dir/xyz.file'
));

$node_standby2->start;

# Save the log location, to see the failure of recovery_end_command.
my $log_location = -s $node_standby2->logfile;

# Now promote standby2, and check that temporary files specifically
# generated during archive recovery are removed by the end of recovery.
$node_standby2->promote;

# Check the logs of the standby to see that the commands have failed.
my $log_contents = slurp_file($node_standby2->logfile, $log_location);
my $node_standby2_data = $node_standby2->data_dir;

like(
	$log_contents,
	qr/restored log file "00000002.history" from archive/s,
	"00000002.history retrieved from the archives");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYHISTORY",
	"RECOVERYHISTORY removed after promotion");
ok( !-f "$node_standby2_data/pg_wal/RECOVERYXLOG",
	"RECOVERYXLOG removed after promotion");
like(
	$log_contents,
	qr/WARNING:.*recovery_end_command/s,
	"recovery_end_command failure detected in logs after promotion");

done_testing();