summaryrefslogtreecommitdiffstats
path: root/src/test/recovery/t/013_crash_restart.pl
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/recovery/t/013_crash_restart.pl')
-rw-r--r--src/test/recovery/t/013_crash_restart.pl272
1 files changed, 272 insertions, 0 deletions
diff --git a/src/test/recovery/t/013_crash_restart.pl b/src/test/recovery/t/013_crash_restart.pl
new file mode 100644
index 0000000..3e3d50b
--- /dev/null
+++ b/src/test/recovery/t/013_crash_restart.pl
@@ -0,0 +1,272 @@
+#
+# Tests restarts of postgres due to crashes of a subprocess.
+#
+# Two longer-running psql subprocesses are used: One to kill a
+# backend, triggering a crash-restart cycle, one to detect when
+# postmaster noticed the backend died. The second backend is
+# necessary because it's otherwise hard to determine if postmaster is
+# still accepting new sessions (because it hasn't noticed that the
+# backend died), or because it's already restarted.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More;
+use Config;
+use Time::HiRes qw(usleep);
+
+plan tests => 18;
+
+
+# To avoid hanging while expecting some specific input from a psql
+# instance being driven by us, add a timeout high enough that it
+# should never trigger even on very slow machines, unless something
+# is really wrong.
+my $psql_timeout = IPC::Run::timer(60);
+
+my $node = get_new_node('master');
+$node->init(allows_streaming => 1);
+$node->start();
+
+# by default PostgresNode doesn't doesn't restart after a crash
+$node->safe_psql(
+ 'postgres',
+ q[ALTER SYSTEM SET restart_after_crash = 1;
+ ALTER SYSTEM SET log_connections = 1;
+ SELECT pg_reload_conf();]);
+
+# Run psql, keeping session alive, so we have an alive backend to kill.
+my ($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+my $killme = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$killme_stdin,
+ '>',
+ \$killme_stdout,
+ '2>',
+ \$killme_stderr,
+ $psql_timeout);
+
+# Need a second psql to check if crash-restart happened.
+my ($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+my $monitor = IPC::Run::start(
+ [
+ 'psql', '-X', '-qAt', '-v', 'ON_ERROR_STOP=1', '-f', '-', '-d',
+ $node->connstr('postgres')
+ ],
+ '<',
+ \$monitor_stdin,
+ '>',
+ \$monitor_stdout,
+ '2>',
+ \$monitor_stderr,
+ $psql_timeout);
+
+#create table, insert row that should survive
+$killme_stdin .= q[
+CREATE TABLE alive(status text);
+INSERT INTO alive VALUES($$committed-before-sigquit$$);
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ 'acquired pid for SIGQUIT');
+my $pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+#insert a row that should *not* survive, due to in-progress xact
+$killme_stdin .= q[
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigquit$$) RETURNING status;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigquit/m),
+ 'inserted in-progress-before-sigquit');
+$killme_stdout = '';
+$killme_stderr = '';
+
+
+# Start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok(pump_until($monitor, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+# kill once with QUIT - we expect psql to exit, while emitting error message first
+my $ret = TestLib::system_log('pg_ctl', 'kill', 'QUIT', $pid);
+
+# Exactly process should have been alive to be killed
+is($ret, 0, "killed process with SIGQUIT");
+
+# Check that psql sees the killed backend as having been terminated
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ \$killme_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql query died successfully after SIGQUIT");
+$killme_stderr = '';
+$killme_stdout = '';
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql monitor died successfully after SIGQUIT");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGQUIT");
+
+
+# restart psql processes, now that the crash cycle finished
+($killme_stdin, $killme_stdout, $killme_stderr) = ('', '', '');
+$killme->run();
+($monitor_stdin, $monitor_stdout, $monitor_stderr) = ('', '', '');
+$monitor->run();
+
+
+# Acquire pid of new backend
+$killme_stdin .= q[
+SELECT pg_backend_pid();
+];
+ok(pump_until($killme, \$killme_stdout, qr/[[:digit:]]+[\r\n]$/m),
+ "acquired pid for SIGKILL");
+$pid = $killme_stdout;
+chomp($pid);
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Insert test rows
+$killme_stdin .= q[
+INSERT INTO alive VALUES($$committed-before-sigkill$$) RETURNING status;
+BEGIN;
+INSERT INTO alive VALUES($$in-progress-before-sigkill$$) RETURNING status;
+];
+ok(pump_until($killme, \$killme_stdout, qr/in-progress-before-sigkill/m),
+ 'inserted in-progress-before-sigkill');
+$killme_stdout = '';
+$killme_stderr = '';
+
+# Re-start longrunning query in second session; its failure will signal that
+# crash-restart has occurred. The initial wait for the trivial select is to
+# be sure that psql successfully connected to backend.
+$monitor_stdin .= q[
+SELECT $$psql-connected$$;
+SELECT pg_sleep(3600);
+];
+ok(pump_until($monitor, \$monitor_stdout, qr/psql-connected/m),
+ 'monitor connected');
+$monitor_stdout = '';
+$monitor_stderr = '';
+
+
+# kill with SIGKILL this time - we expect the backend to exit, without
+# being able to emit an error message
+$ret = TestLib::system_log('pg_ctl', 'kill', 'KILL', $pid);
+is($ret, 0, "killed process with KILL");
+
+# Check that psql sees the server as being terminated. No WARNING,
+# because signal handlers aren't being run on SIGKILL.
+$killme_stdin .= q[
+SELECT 1;
+];
+ok( pump_until(
+ $killme,
+ \$killme_stderr,
+ qr/server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql query died successfully after SIGKILL");
+$killme->finish;
+
+# Wait till server restarts - we should get the WARNING here, but
+# sometimes the server is unable to send that, if interrupted while
+# sending.
+ok( pump_until(
+ $monitor,
+ \$monitor_stderr,
+ qr/WARNING: terminating connection because of crash of another server process|server closed the connection unexpectedly|connection to server was lost/m
+ ),
+ "psql monitor died successfully after SIGKILL");
+$monitor->finish;
+
+# Wait till server restarts
+is($node->poll_query_until('postgres', undef, ''),
+ "1", "reconnected after SIGKILL");
+
+# Make sure the committed rows survived, in-progress ones not
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$before-orderly-restart$$) RETURNING status'
+ ),
+ 'before-orderly-restart',
+ 'can still write after crash restart');
+
+# Just to be sure, check that an orderly restart now still works
+$node->restart();
+
+is( $node->safe_psql('postgres', 'SELECT * FROM alive'),
+ "committed-before-sigquit\ncommitted-before-sigkill\nbefore-orderly-restart",
+ 'data survived');
+
+is( $node->safe_psql(
+ 'postgres',
+ 'INSERT INTO alive VALUES($$after-orderly-restart$$) RETURNING status'
+ ),
+ 'after-orderly-restart',
+ 'can still write after orderly restart');
+
+$node->stop();
+
+# Pump until string is matched, or timeout occurs
+sub pump_until
+{
+ my ($proc, $stream, $untl) = @_;
+ $proc->pump_nb();
+ while (1)
+ {
+ last if $$stream =~ /$untl/;
+ if ($psql_timeout->is_expired)
+ {
+ diag("aborting wait: program timed out");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ if (not $proc->pumpable())
+ {
+ diag("aborting wait: program died");
+ diag("stream contents: >>", $$stream, "<<");
+ diag("pattern searched for: ", $untl);
+
+ return 0;
+ }
+ $proc->pump();
+ }
+ return 1;
+
+}