summaryrefslogtreecommitdiffstats
path: root/src/test/recovery/t/020_archive_status.pl
blob: 2108d50073abf6291033187448997ccd64e1b75f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# Copyright (c) 2021-2022, PostgreSQL Global Development Group

#
# Tests related to WAL archiving and recovery.
#
use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;

my $primary = PostgreSQL::Test::Cluster->new('primary');
$primary->init(
	has_archiving    => 1,
	allows_streaming => 1);
$primary->append_conf('postgresql.conf', 'autovacuum = off');
$primary->start;
my $primary_data = $primary->data_dir;

# Temporarily use an archive_command value to make the archiver fail,
# knowing that archiving is enabled.  Note that we cannot use a command
# that does not exist as in this case the archiver process would just exit
# without reporting the failure to pg_stat_archiver.  This also cannot
# use a plain "false" as that's unportable on Windows.  So, instead, as
# a portable solution, use an archive command based on a command known to
# work but will fail: copy with an incorrect original path.
my $incorrect_command =
  $PostgreSQL::Test::Utils::windows_os
  ? qq{copy "%p_does_not_exist" "%f_does_not_exist"}
  : qq{cp "%p_does_not_exist" "%f_does_not_exist"};
$primary->safe_psql(
	'postgres', qq{
    ALTER SYSTEM SET archive_command TO '$incorrect_command';
    SELECT pg_reload_conf();
});

# Save the WAL segment currently in use and switch to a new segment.
# This will be used to track the activity of the archiver.
my $segment_name_1 = $primary->safe_psql('postgres',
	q{SELECT pg_walfile_name(pg_current_wal_lsn())});
my $segment_path_1       = "pg_wal/archive_status/$segment_name_1";
my $segment_path_1_ready = "$segment_path_1.ready";
my $segment_path_1_done  = "$segment_path_1.done";
$primary->safe_psql(
	'postgres', q{
	CREATE TABLE mine AS SELECT generate_series(1,10) AS x;
	SELECT pg_switch_wal();
	CHECKPOINT;
});

# Wait for an archive failure.
$primary->poll_query_until('postgres',
	q{SELECT failed_count > 0 FROM pg_stat_archiver}, 't')
  or die "Timed out while waiting for archiving to fail";
ok( -f "$primary_data/$segment_path_1_ready",
	".ready file exists for WAL segment $segment_name_1 waiting to be archived"
);
ok( !-f "$primary_data/$segment_path_1_done",
	".done file does not exist for WAL segment $segment_name_1 waiting to be archived"
);

is( $primary->safe_psql(
		'postgres', q{
		SELECT archived_count, last_failed_wal
		FROM pg_stat_archiver
	}),
	"0|$segment_name_1",
	"pg_stat_archiver failed to archive $segment_name_1");

# Crash the cluster for the next test in charge of checking that non-archived
# WAL segments are not removed.
$primary->stop('immediate');

# Recovery tests for the archiving with a standby partially check
# the recovery behavior when restoring a backup taken using a
# snapshot with no pg_backup_start/stop.  In this situation,
# the recovered standby should enter first crash recovery then
# switch to regular archive recovery.  Note that the base backup
# is taken here so as archive_command will fail.  This is necessary
# for the assumptions of the tests done with the standbys below.
$primary->backup_fs_cold('backup');

$primary->start;
ok( -f "$primary_data/$segment_path_1_ready",
	".ready file for WAL segment $segment_name_1 still exists after crash recovery on primary"
);

# Allow WAL archiving again and wait for a success.
$primary->safe_psql(
	'postgres', q{
	ALTER SYSTEM RESET archive_command;
	SELECT pg_reload_conf();
});

$primary->poll_query_until('postgres',
	q{SELECT archived_count FROM pg_stat_archiver}, '1')
  or die "Timed out while waiting for archiving to finish";

ok(!-f "$primary_data/$segment_path_1_ready",
	".ready file for archived WAL segment $segment_name_1 removed");

ok(-f "$primary_data/$segment_path_1_done",
	".done file for archived WAL segment $segment_name_1 exists");

is( $primary->safe_psql(
		'postgres', q{ SELECT last_archived_wal FROM pg_stat_archiver }),
	$segment_name_1,
	"archive success reported in pg_stat_archiver for WAL segment $segment_name_1"
);

# Create some WAL activity and a new checkpoint so as the next standby can
# create a restartpoint.  As this standby starts in crash recovery because
# of the cold backup taken previously, it needs a clean restartpoint to deal
# with existing status files.
my $segment_name_2 = $primary->safe_psql('postgres',
	q{SELECT pg_walfile_name(pg_current_wal_lsn())});
my $segment_path_2       = "pg_wal/archive_status/$segment_name_2";
my $segment_path_2_ready = "$segment_path_2.ready";
my $segment_path_2_done  = "$segment_path_2.done";
$primary->safe_psql(
	'postgres', q{
	INSERT INTO mine SELECT generate_series(10,20) AS x;
	CHECKPOINT;
});

# Switch to a new segment and use the returned LSN to make sure that
# standbys have caught up to this point.
my $primary_lsn = $primary->safe_psql(
	'postgres', q{
	SELECT pg_switch_wal();
});

$primary->poll_query_until('postgres',
	q{ SELECT last_archived_wal FROM pg_stat_archiver },
	$segment_name_2)
  or die "Timed out while waiting for archiving to finish";

# Test standby with archive_mode = on.
my $standby1 = PostgreSQL::Test::Cluster->new('standby');
$standby1->init_from_backup($primary, 'backup', has_restoring => 1);
$standby1->append_conf('postgresql.conf', "archive_mode = on");
my $standby1_data = $standby1->data_dir;
$standby1->start;

# Wait for the replay of the segment switch done previously, ensuring
# that all segments needed are restored from the archives.
$standby1->poll_query_until('postgres',
	qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
) or die "Timed out while waiting for xlog replay on standby1";

$standby1->safe_psql('postgres', q{CHECKPOINT});

# Recovery with archive_mode=on does not keep .ready signal files inherited
# from backup.  Note that this WAL segment existed in the backup.
ok( !-f "$standby1_data/$segment_path_1_ready",
	".ready file for WAL segment $segment_name_1 present in backup got removed with archive_mode=on on standby"
);

# Recovery with archive_mode=on should not create .ready files.
# Note that this segment did not exist in the backup.
ok( !-f "$standby1_data/$segment_path_2_ready",
	".ready file for WAL segment $segment_name_2 not created on standby when archive_mode=on on standby"
);

# Recovery with archive_mode = on creates .done files.
ok( -f "$standby1_data/$segment_path_2_done",
	".done file for WAL segment $segment_name_2 created when archive_mode=on on standby"
);

# Test recovery with archive_mode = always, which should always keep
# .ready files if archiving is enabled, though here we want the archive
# command to fail to persist the .ready files.  Note that this node
# has inherited the archive command of the previous cold backup that
# will cause archiving failures.
my $standby2 = PostgreSQL::Test::Cluster->new('standby2');
$standby2->init_from_backup($primary, 'backup', has_restoring => 1);
$standby2->append_conf('postgresql.conf', 'archive_mode = always');
my $standby2_data = $standby2->data_dir;
$standby2->start;

# Wait for the replay of the segment switch done previously, ensuring
# that all segments needed are restored from the archives.
$standby2->poll_query_until('postgres',
	qq{ SELECT pg_wal_lsn_diff(pg_last_wal_replay_lsn(), '$primary_lsn') >= 0 }
) or die "Timed out while waiting for xlog replay on standby2";

$standby2->safe_psql('postgres', q{CHECKPOINT});

ok( -f "$standby2_data/$segment_path_1_ready",
	".ready file for WAL segment $segment_name_1 existing in backup is kept with archive_mode=always on standby"
);

ok( -f "$standby2_data/$segment_path_2_ready",
	".ready file for WAL segment $segment_name_2 created with archive_mode=always on standby"
);

# Reset statistics of the archiver for the next checks.
$standby2->safe_psql('postgres', q{SELECT pg_stat_reset_shared('archiver')});

# Now crash the cluster to check that recovery step does not
# remove non-archived WAL segments on a standby where archiving
# is enabled.
$standby2->stop('immediate');
$standby2->start;

ok( -f "$standby2_data/$segment_path_1_ready",
	"WAL segment still ready to archive after crash recovery on standby with archive_mode=always"
);

# Allow WAL archiving again, and wait for the segments to be archived.
$standby2->safe_psql(
	'postgres', q{
	ALTER SYSTEM RESET archive_command;
	SELECT pg_reload_conf();
});
$standby2->poll_query_until('postgres',
	q{SELECT last_archived_wal FROM pg_stat_archiver},
	$segment_name_2)
  or die "Timed out while waiting for archiving to finish";

is( $standby2->safe_psql(
		'postgres', q{SELECT archived_count FROM pg_stat_archiver}),
	'2',
	"correct number of WAL segments archived from standby");

ok( !-f "$standby2_data/$segment_path_1_ready"
	  && !-f "$standby2_data/$segment_path_2_ready",
	".ready files removed after archive success with archive_mode=always on standby"
);

ok( -f "$standby2_data/$segment_path_1_done"
	  && -f "$standby2_data/$segment_path_2_done",
	".done files created after archive success with archive_mode=always on standby"
);

# Check that the archiver process calls the shell archive module's shutdown
# callback.
$standby2->append_conf('postgresql.conf', "log_min_messages = debug1");
$standby2->reload;

# Run a query to make sure that the reload has taken effect.
$standby2->safe_psql('postgres', q{SELECT 1});
my $log_location = -s $standby2->logfile;

$standby2->stop;
my $logfile = slurp_file($standby2->logfile, $log_location);
ok( $logfile =~ qr/archiver process shutting down/,
	'check shutdown callback of shell archive module');

done_testing();