src/test/recovery/t/029_stats_restart.pl


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344

# Copyright (c) 2021-2022, PostgreSQL Global Development Group

# Tests statistics handling around restarts, including handling of crashes and
# invalid stats files, as well as restorting stats after "normal" restarts.

use strict;
use warnings;
use PostgreSQL::Test::Cluster;
use PostgreSQL::Test::Utils;
use Test::More;
use File::Copy;

my $node = PostgreSQL::Test::Cluster->new('primary');
$node->init(allows_streaming => 1);
$node->append_conf('postgresql.conf', "track_functions = 'all'");
$node->start;

my $connect_db    = 'postgres';
my $db_under_test = 'test';

# create test objects
$node->safe_psql($connect_db, "CREATE DATABASE $db_under_test");
$node->safe_psql($db_under_test,
	"CREATE TABLE tab_stats_crash_discard_test1 AS SELECT generate_series(1,100) AS a"
);
$node->safe_psql($db_under_test,
	"CREATE FUNCTION func_stats_crash_discard1() RETURNS VOID AS 'select 2;' LANGUAGE SQL IMMUTABLE"
);

# collect object oids
my $dboid = $node->safe_psql($db_under_test,
	"SELECT oid FROM pg_database WHERE datname = '$db_under_test'");
my $funcoid = $node->safe_psql($db_under_test,
	"SELECT 'func_stats_crash_discard1()'::regprocedure::oid");
my $tableoid = $node->safe_psql($db_under_test,
	"SELECT 'tab_stats_crash_discard_test1'::regclass::oid");

# generate stats and flush them
trigger_funcrel_stat();

# verify stats objects exist
my $sect = "initial";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
	't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
	't', "$sect: relation stats do exist");

# regular shutdown
$node->stop();

# backup stats files
my $statsfile = $PostgreSQL::Test::Utils::tmp_check . '/' . "discard_stats1";
ok(!-f "$statsfile", "backup statsfile cannot already exist");

my $datadir  = $node->data_dir();
my $og_stats = "$datadir/pg_stat/pgstat.stat";
ok(-f "$og_stats", "origin stats file must exist");
copy($og_stats, $statsfile) or die "Copy failed: $!";


## test discarding of stats file after crash etc

$node->start;

$sect = "copy";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
	't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
	't', "$sect: relation stats do exist");

$node->stop('immediate');

ok(!-f "$og_stats", "no stats file should exist after immediate shutdown");

# copy the old stats back to test we discard stats after crash restart
copy($statsfile, $og_stats) or die "Copy failed: $!";

$node->start;

# stats should have been discarded
$sect = "post immediate";
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
is(have_stats('function', $dboid, $funcoid),
	'f', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
	'f', "$sect: relation stats do not exist");

# get rid of backup statsfile
unlink $statsfile or die "cannot unlink $statsfile $!";


# generate new stats and flush them
trigger_funcrel_stat();

$sect = "post immediate, new";
is(have_stats('database', $dboid, 0), 't', "$sect: db stats do exist");
is(have_stats('function', $dboid, $funcoid),
	't', "$sect: function stats do exist");
is(have_stats('relation', $dboid, $tableoid),
	't', "$sect: relation stats do exist");

# regular shutdown
$node->stop();


## check an invalid stats file is handled

overwrite_file($og_stats, "ZZZZZZZZZZZZZ");

# normal startup and no issues despite invalid stats file
$node->start;

# no stats present due to invalid stats file
$sect = "invalid_overwrite";
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
is(have_stats('function', $dboid, $funcoid),
	'f', "$sect: function stats do not exist");
is(have_stats('relation', $dboid, $tableoid),
	'f', "$sect: relation stats do not exist");


## check invalid stats file starting with valid contents, but followed by
## invalid content is handled.

trigger_funcrel_stat();
$node->stop;
append_file($og_stats, "XYZ");
$node->start;

$sect = "invalid_append";
is(have_stats('database', $dboid, 0), 'f', "$sect: db stats do not exist");
is(have_stats('function', $dboid, $funcoid),
	'f', "$sect: function stats do not exist");
is(have_stats('relation', $dboid, $tableoid),
	'f', "$sect: relation stats do not exist");


## checks related to stats persistency around restarts and resets

# Ensure enough checkpoints to protect against races for test after reset,
# even on very slow machines.
$node->safe_psql($connect_db, "CHECKPOINT; CHECKPOINT;");


## check checkpoint and wal stats are incremented due to restart

my $ckpt_start = checkpoint_stats();
my $wal_start  = wal_stats();
$node->restart;

$sect = "post restart";
my $ckpt_restart = checkpoint_stats();
my $wal_restart  = wal_stats();

cmp_ok(
	$ckpt_start->{count}, '<',
	$ckpt_restart->{count},
	"$sect: increased checkpoint count");
cmp_ok(
	$wal_start->{records}, '<',
	$wal_restart->{records},
	"$sect: increased wal record count");
cmp_ok($wal_start->{bytes}, '<', $wal_restart->{bytes},
	"$sect: increased wal bytes");
is( $ckpt_start->{reset},
	$ckpt_restart->{reset},
	"$sect: checkpoint stats_reset equal");
is($wal_start->{reset}, $wal_restart->{reset},
	"$sect: wal stats_reset equal");


## Check that checkpoint stats are reset, WAL stats aren't affected

$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('bgwriter')");

$sect = "post ckpt reset";
my $ckpt_reset     = checkpoint_stats();
my $wal_ckpt_reset = wal_stats();

cmp_ok($ckpt_restart->{count},
	'>', $ckpt_reset->{count}, "$sect: checkpoint count smaller");
cmp_ok($ckpt_start->{reset}, 'lt', $ckpt_reset->{reset},
	"$sect: stats_reset newer");

cmp_ok(
	$wal_restart->{records},
	'<=',
	$wal_ckpt_reset->{records},
	"$sect: wal record count not affected by reset");
is( $wal_start->{reset},
	$wal_ckpt_reset->{reset},
	"$sect: wal stats_reset equal");


## check that checkpoint stats stay reset after restart

$node->restart;

$sect = "post ckpt reset & restart";
my $ckpt_restart_reset = checkpoint_stats();
my $wal_restart2       = wal_stats();

# made sure above there's enough checkpoints that this will be stable even on slow machines
cmp_ok(
	$ckpt_restart_reset->{count},
	'<',
	$ckpt_restart->{count},
	"$sect: checkpoint still reset");
is($ckpt_restart_reset->{reset},
	$ckpt_reset->{reset}, "$sect: stats_reset same");

cmp_ok(
	$wal_ckpt_reset->{records},
	'<',
	$wal_restart2->{records},
	"$sect: increased wal record count");
cmp_ok(
	$wal_ckpt_reset->{bytes},
	'<',
	$wal_restart2->{bytes},
	"$sect: increased wal bytes");
is( $wal_start->{reset},
	$wal_restart2->{reset},
	"$sect: wal stats_reset equal");


## check WAL stats stay reset

$node->safe_psql($connect_db, "SELECT pg_stat_reset_shared('wal')");

$sect = "post wal reset";
my $wal_reset = wal_stats();

cmp_ok(
	$wal_reset->{records}, '<',
	$wal_restart2->{records},
	"$sect: smaller record count");
cmp_ok(
	$wal_reset->{bytes}, '<',
	$wal_restart2->{bytes},
	"$sect: smaller bytes");
cmp_ok(
	$wal_reset->{reset}, 'gt',
	$wal_restart2->{reset},
	"$sect: newer stats_reset");

$node->restart;

$sect = "post wal reset & restart";
my $wal_reset_restart = wal_stats();

# enough WAL generated during prior tests and initdb to make this not racy
cmp_ok(
	$wal_reset_restart->{records},
	'<',
	$wal_restart2->{records},
	"$sect: smaller record count");
cmp_ok(
	$wal_reset->{bytes}, '<',
	$wal_restart2->{bytes},
	"$sect: smaller bytes");
cmp_ok(
	$wal_reset->{reset}, 'gt',
	$wal_restart2->{reset},
	"$sect: newer stats_reset");

$node->stop('immediate');
$node->start;

$sect = "post immediate restart";
my $wal_restart_immediate = wal_stats();

cmp_ok(
	$wal_reset_restart->{reset},
	'lt',
	$wal_restart_immediate->{reset},
	"$sect: reset timestamp is new");

$node->stop;
done_testing();

sub trigger_funcrel_stat
{
	$node->safe_psql(
		$db_under_test, q[
	SELECT * FROM tab_stats_crash_discard_test1;
	SELECT func_stats_crash_discard1();
    SELECT pg_stat_force_next_flush();]);
}

sub have_stats
{
	my ($kind, $dboid, $objoid) = @_;

	return $node->safe_psql($connect_db,
		"SELECT pg_stat_have_stats('$kind', $dboid, $objoid)");
}

sub overwrite_file
{
	my ($filename, $str) = @_;
	open my $fh, ">", $filename
	  or die "could not overwrite \"$filename\": $!";
	print $fh $str;
	close $fh;
	return;
}

sub append_file
{
	my ($filename, $str) = @_;
	open my $fh, ">>", $filename
	  or die "could not append to \"$filename\": $!";
	print $fh $str;
	close $fh;
	return;
}

sub checkpoint_stats
{
	my %results;

	$results{count} = $node->safe_psql($connect_db,
		"SELECT checkpoints_timed + checkpoints_req FROM pg_stat_bgwriter");
	$results{reset} = $node->safe_psql($connect_db,
		"SELECT stats_reset FROM pg_stat_bgwriter");

	return \%results;
}

sub wal_stats
{
	my %results;
	$results{records} =
	  $node->safe_psql($connect_db, "SELECT wal_records FROM pg_stat_wal");
	$results{bytes} =
	  $node->safe_psql($connect_db, "SELECT wal_bytes FROM pg_stat_wal");
	$results{reset} =
	  $node->safe_psql($connect_db, "SELECT stats_reset FROM pg_stat_wal");

	return \%results;
}