summaryrefslogtreecommitdiffstats
path: root/src/bin/pg_amcheck/t
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:15:05 +0000
commit46651ce6fe013220ed397add242004d764fc0153 (patch)
tree6e5299f990f88e60174a1d3ae6e48eedd2688b2b /src/bin/pg_amcheck/t
parentInitial commit. (diff)
downloadpostgresql-14-upstream.tar.xz
postgresql-14-upstream.zip
Adding upstream version 14.5.upstream/14.5upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/bin/pg_amcheck/t')
-rw-r--r--src/bin/pg_amcheck/t/001_basic.pl12
-rw-r--r--src/bin/pg_amcheck/t/002_nonesuch.pl359
-rw-r--r--src/bin/pg_amcheck/t/003_check.pl518
-rw-r--r--src/bin/pg_amcheck/t/004_verify_heapam.pl529
-rw-r--r--src/bin/pg_amcheck/t/005_opclass_damage.pl59
5 files changed, 1477 insertions, 0 deletions
diff --git a/src/bin/pg_amcheck/t/001_basic.pl b/src/bin/pg_amcheck/t/001_basic.pl
new file mode 100644
index 0000000..6f60e3e
--- /dev/null
+++ b/src/bin/pg_amcheck/t/001_basic.pl
@@ -0,0 +1,12 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use TestLib;
+use Test::More tests => 8;
+
+program_help_ok('pg_amcheck');
+program_version_ok('pg_amcheck');
+program_options_handling_ok('pg_amcheck');
diff --git a/src/bin/pg_amcheck/t/002_nonesuch.pl b/src/bin/pg_amcheck/t/002_nonesuch.pl
new file mode 100644
index 0000000..df0cb03
--- /dev/null
+++ b/src/bin/pg_amcheck/t/002_nonesuch.pl
@@ -0,0 +1,359 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+use Test::More tests => 100;
+
+# Test set-up
+my ($node, $port);
+$node = get_new_node('test');
+$node->init;
+$node->start;
+$port = $node->port;
+
+# Load the amcheck extension, upon which pg_amcheck depends
+$node->safe_psql('postgres', q(CREATE EXTENSION amcheck));
+
+#########################################
+# Test non-existent databases
+
+# Failing to connect to the initial database is an error.
+$node->command_checks_all(
+ [ 'pg_amcheck', 'qqq' ],
+ 1, [qr/^$/],
+ [qr/FATAL: database "qqq" does not exist/],
+ 'checking a non-existent database');
+
+# Failing to resolve a database pattern is an error by default.
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'qqq', '-d', 'postgres' ],
+ 1,
+ [qr/^$/],
+ [qr/pg_amcheck: error: no connectable databases to check matching "qqq"/],
+ 'checking an unresolvable database pattern');
+
+# But only a warning under --no-strict-names
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-d', 'qqq', '-d', 'postgres' ],
+ 0,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: no connectable databases to check matching "qqq"/
+ ],
+ 'checking an unresolvable database pattern under --no-strict-names');
+
+# Check that a substring of an existent database name does not get interpreted
+# as a matching pattern.
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'post', '-d', 'postgres' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: no connectable databases to check matching "post"/
+ ],
+ 'checking an unresolvable database pattern (substring of existent database)'
+);
+
+# Check that a superstring of an existent database name does not get interpreted
+# as a matching pattern.
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'postgresql', '-d', 'postgres' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: no connectable databases to check matching "postgresql"/
+ ],
+ 'checking an unresolvable database pattern (superstring of existent database)'
+);
+
+#########################################
+# Test connecting with a non-existent user
+
+# Failing to connect to the initial database due to bad username is an error.
+$node->command_checks_all([ 'pg_amcheck', '-U', 'no_such_user', 'postgres' ],
+ 1, [qr/^$/], [], 'checking with a non-existent user');
+
+#########################################
+# Test checking databases without amcheck installed
+
+# Attempting to check a database by name where amcheck is not installed should
+# raise a warning. If all databases are skipped, having no relations to check
+# raises an error.
+$node->command_checks_all(
+ [ 'pg_amcheck', 'template1' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/,
+ qr/pg_amcheck: error: no relations to check/
+ ],
+ 'checking a database by name without amcheck installed, no other databases'
+);
+
+# Again, but this time with another database to check, so no error is raised.
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'template1', '-d', 'postgres' ],
+ 0,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/
+ ],
+ 'checking a database by name without amcheck installed, with other databases'
+);
+
+# Again, but by way of checking all databases
+$node->command_checks_all(
+ [ 'pg_amcheck', '--all' ],
+ 0,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/
+ ],
+ 'checking a database by pattern without amcheck installed, with other databases'
+);
+
+#########################################
+# Test unreasonable patterns
+
+# Check three-part unreasonable pattern that has zero-length names
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'postgres', '-t', '..' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: no connectable databases to check matching "\.\."/
+ ],
+ 'checking table pattern ".."');
+
+# Again, but with non-trivial schema and relation parts
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'postgres', '-t', '.foo.bar' ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: no connectable databases to check matching "\.foo\.bar"/
+ ],
+ 'checking table pattern ".foo.bar"');
+
+# Check two-part unreasonable pattern that has zero-length names
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'postgres', '-t', '.' ],
+ 1,
+ [qr/^$/],
+ [qr/pg_amcheck: error: no heap tables to check matching "\."/],
+ 'checking table pattern "."');
+
+# Check that a multipart database name is rejected
+$node->command_checks_all(
+ [ 'pg_amcheck', '-d', 'localhost.postgres' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): localhost\.postgres/
+ ],
+ 'multipart database patterns are rejected'
+);
+
+# Check that a three-part schema name is rejected
+$node->command_checks_all(
+ [ 'pg_amcheck', '-s', 'localhost.postgres.pg_catalog' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): localhost\.postgres\.pg_catalog/
+ ],
+ 'three part schema patterns are rejected'
+);
+
+# Check that a four-part table name is rejected
+$node->command_checks_all(
+ [ 'pg_amcheck', '-t', 'localhost.postgres.pg_catalog.pg_class' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper relation name \(too many dotted names\): localhost\.postgres\.pg_catalog\.pg_class/
+ ],
+ 'four part table patterns are rejected'
+);
+
+# Check that too many dotted names still draws an error under --no-strict-names
+# That flag means that it is ok for the object to be missing, not that it is ok
+# for the object name to be ungrammatical
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-t', 'this.is.a.really.long.dotted.string' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper relation name \(too many dotted names\): this\.is\.a\.really\.long\.dotted\.string/
+ ],
+ 'ungrammatical table names still draw errors under --no-strict-names'
+);
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-s', 'postgres.long.dotted.string' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): postgres\.long\.dotted\.string/
+ ],
+ 'ungrammatical schema names still draw errors under --no-strict-names'
+);
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-d', 'postgres.long.dotted.string' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): postgres\.long\.dotted\.string/
+ ],
+ 'ungrammatical database names still draw errors under --no-strict-names'
+);
+
+# Likewise for exclusion patterns
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-T', 'a.b.c.d' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper relation name \(too many dotted names\): a\.b\.c\.d/
+ ],
+ 'ungrammatical table exclusions still draw errors under --no-strict-names'
+);
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-S', 'a.b.c' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): a\.b\.c/
+ ],
+ 'ungrammatical schema exclusions still draw errors under --no-strict-names'
+);
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-strict-names', '-D', 'a.b' ],
+ 2,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: error: improper qualified name \(too many dotted names\): a\.b/
+ ],
+ 'ungrammatical database exclusions still draw errors under --no-strict-names'
+);
+
+
+#########################################
+# Test checking non-existent databases, schemas, tables, and indexes
+
+# Use --no-strict-names and a single existent table so we only get warnings
+# about the failed pattern matches
+$node->command_checks_all(
+ [
+ 'pg_amcheck', '--no-strict-names',
+ '-t', 'no_such_table',
+ '-t', 'no*such*table',
+ '-i', 'no_such_index',
+ '-i', 'no*such*index',
+ '-r', 'no_such_relation',
+ '-r', 'no*such*relation',
+ '-d', 'no_such_database',
+ '-d', 'no*such*database',
+ '-r', 'none.none',
+ '-r', 'none.none.none',
+ '-r', 'postgres.none.none',
+ '-r', 'postgres.pg_catalog.none',
+ '-r', 'postgres.none.pg_class',
+ '-t', 'postgres.pg_catalog.pg_class', # This exists
+ ],
+ 0,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: no heap tables to check matching "no_such_table"/,
+ qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
+ qr/pg_amcheck: warning: no btree indexes to check matching "no_such_index"/,
+ qr/pg_amcheck: warning: no btree indexes to check matching "no\*such\*index"/,
+ qr/pg_amcheck: warning: no relations to check matching "no_such_relation"/,
+ qr/pg_amcheck: warning: no relations to check matching "no\*such\*relation"/,
+ qr/pg_amcheck: warning: no heap tables to check matching "no\*such\*table"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no\*such\*database"/,
+ qr/pg_amcheck: warning: no relations to check matching "none\.none"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "none\.none\.none"/,
+ qr/pg_amcheck: warning: no relations to check matching "postgres\.none\.none"/,
+ qr/pg_amcheck: warning: no relations to check matching "postgres\.pg_catalog\.none"/,
+ qr/pg_amcheck: warning: no relations to check matching "postgres\.none\.pg_class"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no\*such\*database"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "none\.none\.none"/,
+ ],
+ 'many unmatched patterns and one matched pattern under --no-strict-names'
+);
+
+#########################################
+# Test checking otherwise existent objects but in databases where they do not exist
+
+$node->safe_psql(
+ 'postgres', q(
+ CREATE TABLE public.foo (f integer);
+ CREATE INDEX foo_idx ON foo(f);
+));
+$node->safe_psql('postgres', q(CREATE DATABASE another_db));
+
+$node->command_checks_all(
+ [
+ 'pg_amcheck', '-d',
+ 'postgres', '--no-strict-names',
+ '-t', 'template1.public.foo',
+ '-t', 'another_db.public.foo',
+ '-t', 'no_such_database.public.foo',
+ '-i', 'template1.public.foo_idx',
+ '-i', 'another_db.public.foo_idx',
+ '-i', 'no_such_database.public.foo_idx',
+ ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/,
+ qr/pg_amcheck: warning: no heap tables to check matching "template1\.public\.foo"/,
+ qr/pg_amcheck: warning: no heap tables to check matching "another_db\.public\.foo"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo"/,
+ qr/pg_amcheck: warning: no btree indexes to check matching "template1\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no btree indexes to check matching "another_db\.public\.foo_idx"/,
+ qr/pg_amcheck: warning: no connectable databases to check matching "no_such_database\.public\.foo_idx"/,
+ qr/pg_amcheck: error: no relations to check/,
+ ],
+ 'checking otherwise existent objets in the wrong databases');
+
+
+#########################################
+# Test schema exclusion patterns
+
+# Check with only schema exclusion patterns
+$node->command_checks_all(
+ [
+ 'pg_amcheck', '--all', '--no-strict-names', '-S',
+ 'public', '-S', 'pg_catalog', '-S',
+ 'pg_toast', '-S', 'information_schema',
+ ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/,
+ qr/pg_amcheck: error: no relations to check/
+ ],
+ 'schema exclusion patterns exclude all relations');
+
+# Check with schema exclusion patterns overriding relation and schema inclusion patterns
+$node->command_checks_all(
+ [
+ 'pg_amcheck', '--all', '--no-strict-names', '-s',
+ 'public', '-s', 'pg_catalog', '-s',
+ 'pg_toast', '-s', 'information_schema', '-t',
+ 'pg_catalog.pg_class', '-S*'
+ ],
+ 1,
+ [qr/^$/],
+ [
+ qr/pg_amcheck: warning: skipping database "template1": amcheck is not installed/,
+ qr/pg_amcheck: error: no relations to check/
+ ],
+ 'schema exclusion pattern overrides all inclusion patterns');
diff --git a/src/bin/pg_amcheck/t/003_check.pl b/src/bin/pg_amcheck/t/003_check.pl
new file mode 100644
index 0000000..4122d72
--- /dev/null
+++ b/src/bin/pg_amcheck/t/003_check.pl
@@ -0,0 +1,518 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+
+use Test::More tests => 63;
+
+my ($node, $port, %corrupt_page, %remove_relation);
+
+# Returns the filesystem path for the named relation.
+#
+# Assumes the test node is running
+sub relation_filepath
+{
+ my ($dbname, $relname) = @_;
+
+ my $pgdata = $node->data_dir;
+ my $rel =
+ $node->safe_psql($dbname, qq(SELECT pg_relation_filepath('$relname')));
+ die "path not found for relation $relname" unless defined $rel;
+ return "$pgdata/$rel";
+}
+
+# Returns the name of the toast relation associated with the named relation.
+#
+# Assumes the test node is running
+sub relation_toast
+{
+ my ($dbname, $relname) = @_;
+
+ my $rel = $node->safe_psql(
+ $dbname, qq(
+ SELECT c.reltoastrelid::regclass
+ FROM pg_catalog.pg_class c
+ WHERE c.oid = '$relname'::regclass
+ AND c.reltoastrelid != 0
+ ));
+ return $rel;
+}
+
+# Adds the relation file for the given (dbname, relname) to the list
+# to be corrupted by means of overwriting junk in the first page.
+#
+# Assumes the test node is running.
+sub plan_to_corrupt_first_page
+{
+ my ($dbname, $relname) = @_;
+ my $relpath = relation_filepath($dbname, $relname);
+ $corrupt_page{$relpath} = 1;
+}
+
+# Adds the relation file for the given (dbname, relname) to the list
+# to be corrupted by means of removing the file..
+#
+# Assumes the test node is running
+sub plan_to_remove_relation_file
+{
+ my ($dbname, $relname) = @_;
+ my $relpath = relation_filepath($dbname, $relname);
+ $remove_relation{$relpath} = 1;
+}
+
+# For the given (dbname, relname), if a corresponding toast table
+# exists, adds that toast table's relation file to the list to be
+# corrupted by means of removing the file.
+#
+# Assumes the test node is running.
+sub plan_to_remove_toast_file
+{
+ my ($dbname, $relname) = @_;
+ my $toastname = relation_toast($dbname, $relname);
+ plan_to_remove_relation_file($dbname, $toastname) if ($toastname);
+}
+
+# Corrupts the first page of the given file path
+sub corrupt_first_page
+{
+ my ($relpath) = @_;
+
+ my $fh;
+ open($fh, '+<', $relpath)
+ or BAIL_OUT("open failed: $!");
+ binmode $fh;
+
+ # Corrupt some line pointers. The values are chosen to hit the
+ # various line-pointer-corruption checks in verify_heapam.c
+ # on both little-endian and big-endian architectures.
+ sysseek($fh, 32, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ syswrite(
+ $fh,
+ pack("L*",
+ 0xAAA15550, 0xAAA0D550, 0x00010000, 0x00008000,
+ 0x0000800F, 0x001e8000, 0xFFFFFFFF)
+ ) or BAIL_OUT("syswrite failed: $!");
+ close($fh)
+ or BAIL_OUT("close failed: $!");
+}
+
+# Stops the node, performs all the corruptions previously planned, and
+# starts the node again.
+#
+sub perform_all_corruptions()
+{
+ $node->stop();
+ for my $relpath (keys %corrupt_page)
+ {
+ corrupt_first_page($relpath);
+ }
+ for my $relpath (keys %remove_relation)
+ {
+ unlink($relpath);
+ }
+ $node->start;
+}
+
+# Test set-up
+$node = get_new_node('test');
+$node->init;
+$node->append_conf('postgresql.conf', 'autovacuum=off');
+$node->start;
+$port = $node->port;
+
+for my $dbname (qw(db1 db2 db3))
+{
+ # Create the database
+ $node->safe_psql('postgres', qq(CREATE DATABASE $dbname));
+
+ # Load the amcheck extension, upon which pg_amcheck depends. Put the
+ # extension in an unexpected location to test that pg_amcheck finds it
+ # correctly. Create tables with names that look like pg_catalog names to
+ # check that pg_amcheck does not get confused by them. Create functions in
+ # schema public that look like amcheck functions to check that pg_amcheck
+ # does not use them.
+ $node->safe_psql(
+ $dbname, q(
+ CREATE SCHEMA amcheck_schema;
+ CREATE EXTENSION amcheck WITH SCHEMA amcheck_schema;
+ CREATE TABLE amcheck_schema.pg_database (junk text);
+ CREATE TABLE amcheck_schema.pg_namespace (junk text);
+ CREATE TABLE amcheck_schema.pg_class (junk text);
+ CREATE TABLE amcheck_schema.pg_operator (junk text);
+ CREATE TABLE amcheck_schema.pg_proc (junk text);
+ CREATE TABLE amcheck_schema.pg_tablespace (junk text);
+
+ CREATE FUNCTION public.bt_index_check(index regclass,
+ heapallindexed boolean default false)
+ RETURNS VOID AS $$
+ BEGIN
+ RAISE EXCEPTION 'Invoked wrong bt_index_check!';
+ END;
+ $$ LANGUAGE plpgsql;
+
+ CREATE FUNCTION public.bt_index_parent_check(index regclass,
+ heapallindexed boolean default false,
+ rootdescend boolean default false)
+ RETURNS VOID AS $$
+ BEGIN
+ RAISE EXCEPTION 'Invoked wrong bt_index_parent_check!';
+ END;
+ $$ LANGUAGE plpgsql;
+
+ CREATE FUNCTION public.verify_heapam(relation regclass,
+ on_error_stop boolean default false,
+ check_toast boolean default false,
+ skip text default 'none',
+ startblock bigint default null,
+ endblock bigint default null,
+ blkno OUT bigint,
+ offnum OUT integer,
+ attnum OUT integer,
+ msg OUT text)
+ RETURNS SETOF record AS $$
+ BEGIN
+ RAISE EXCEPTION 'Invoked wrong verify_heapam!';
+ END;
+ $$ LANGUAGE plpgsql;
+ ));
+
+ # Create schemas, tables and indexes in five separate
+ # schemas. The schemas are all identical to start, but
+ # we will corrupt them differently later.
+ #
+ for my $schema (qw(s1 s2 s3 s4 s5))
+ {
+ $node->safe_psql(
+ $dbname, qq(
+ CREATE SCHEMA $schema;
+ CREATE SEQUENCE $schema.seq1;
+ CREATE SEQUENCE $schema.seq2;
+ CREATE TABLE $schema.t1 (
+ i INTEGER,
+ b BOX,
+ ia int4[],
+ ir int4range,
+ t TEXT
+ );
+ CREATE TABLE $schema.t2 (
+ i INTEGER,
+ b BOX,
+ ia int4[],
+ ir int4range,
+ t TEXT
+ );
+ CREATE VIEW $schema.t2_view AS (
+ SELECT i*2, t FROM $schema.t2
+ );
+ ALTER TABLE $schema.t2
+ ALTER COLUMN t
+ SET STORAGE EXTERNAL;
+
+ INSERT INTO $schema.t1 (i, b, ia, ir, t)
+ (SELECT gs::INTEGER AS i,
+ box(point(gs,gs+5),point(gs*2,gs*3)) AS b,
+ array[gs, gs + 1]::int4[] AS ia,
+ int4range(gs, gs+100) AS ir,
+ repeat('foo', gs) AS t
+ FROM generate_series(1,10000,3000) AS gs);
+
+ INSERT INTO $schema.t2 (i, b, ia, ir, t)
+ (SELECT gs::INTEGER AS i,
+ box(point(gs,gs+5),point(gs*2,gs*3)) AS b,
+ array[gs, gs + 1]::int4[] AS ia,
+ int4range(gs, gs+100) AS ir,
+ repeat('foo', gs) AS t
+ FROM generate_series(1,10000,3000) AS gs);
+
+ CREATE MATERIALIZED VIEW $schema.t1_mv AS SELECT * FROM $schema.t1;
+ CREATE MATERIALIZED VIEW $schema.t2_mv AS SELECT * FROM $schema.t2;
+
+ create table $schema.p1 (a int, b int) PARTITION BY list (a);
+ create table $schema.p2 (a int, b int) PARTITION BY list (a);
+
+ create table $schema.p1_1 partition of $schema.p1 for values in (1, 2, 3);
+ create table $schema.p1_2 partition of $schema.p1 for values in (4, 5, 6);
+ create table $schema.p2_1 partition of $schema.p2 for values in (1, 2, 3);
+ create table $schema.p2_2 partition of $schema.p2 for values in (4, 5, 6);
+
+ CREATE INDEX t1_btree ON $schema.t1 USING BTREE (i);
+ CREATE INDEX t2_btree ON $schema.t2 USING BTREE (i);
+
+ CREATE INDEX t1_hash ON $schema.t1 USING HASH (i);
+ CREATE INDEX t2_hash ON $schema.t2 USING HASH (i);
+
+ CREATE INDEX t1_brin ON $schema.t1 USING BRIN (i);
+ CREATE INDEX t2_brin ON $schema.t2 USING BRIN (i);
+
+ CREATE INDEX t1_gist ON $schema.t1 USING GIST (b);
+ CREATE INDEX t2_gist ON $schema.t2 USING GIST (b);
+
+ CREATE INDEX t1_gin ON $schema.t1 USING GIN (ia);
+ CREATE INDEX t2_gin ON $schema.t2 USING GIN (ia);
+
+ CREATE INDEX t1_spgist ON $schema.t1 USING SPGIST (ir);
+ CREATE INDEX t2_spgist ON $schema.t2 USING SPGIST (ir);
+ ));
+ }
+}
+
+# Database 'db1' corruptions
+#
+
+# Corrupt indexes in schema "s1"
+plan_to_remove_relation_file('db1', 's1.t1_btree');
+plan_to_corrupt_first_page('db1', 's1.t2_btree');
+
+# Corrupt tables in schema "s2"
+plan_to_remove_relation_file('db1', 's2.t1');
+plan_to_corrupt_first_page('db1', 's2.t2');
+
+# Corrupt tables, partitions, matviews, and btrees in schema "s3"
+plan_to_remove_relation_file('db1', 's3.t1');
+plan_to_corrupt_first_page('db1', 's3.t2');
+
+plan_to_remove_relation_file('db1', 's3.t1_mv');
+plan_to_remove_relation_file('db1', 's3.p1_1');
+
+plan_to_corrupt_first_page('db1', 's3.t2_mv');
+plan_to_corrupt_first_page('db1', 's3.p2_1');
+
+plan_to_remove_relation_file('db1', 's3.t1_btree');
+plan_to_corrupt_first_page('db1', 's3.t2_btree');
+
+# Corrupt toast table, partitions, and materialized views in schema "s4"
+plan_to_remove_toast_file('db1', 's4.t2');
+
+# Corrupt all other object types in schema "s5". We don't have amcheck support
+# for these types, but we check that their corruption does not trigger any
+# errors in pg_amcheck
+plan_to_remove_relation_file('db1', 's5.seq1');
+plan_to_remove_relation_file('db1', 's5.t1_hash');
+plan_to_remove_relation_file('db1', 's5.t1_gist');
+plan_to_remove_relation_file('db1', 's5.t1_gin');
+plan_to_remove_relation_file('db1', 's5.t1_brin');
+plan_to_remove_relation_file('db1', 's5.t1_spgist');
+
+plan_to_corrupt_first_page('db1', 's5.seq2');
+plan_to_corrupt_first_page('db1', 's5.t2_hash');
+plan_to_corrupt_first_page('db1', 's5.t2_gist');
+plan_to_corrupt_first_page('db1', 's5.t2_gin');
+plan_to_corrupt_first_page('db1', 's5.t2_brin');
+plan_to_corrupt_first_page('db1', 's5.t2_spgist');
+
+
+# Database 'db2' corruptions
+#
+plan_to_remove_relation_file('db2', 's1.t1');
+plan_to_remove_relation_file('db2', 's1.t1_btree');
+
+
+# Leave 'db3' uncorrupted
+#
+
+# Standard first arguments to TestLib functions
+my @cmd = ('pg_amcheck', '-p', $port);
+
+# Regular expressions to match various expected output
+my $no_output_re = qr/^$/;
+my $line_pointer_corruption_re = qr/line pointer/;
+my $missing_file_re = qr/could not open file ".*": No such file or directory/;
+my $index_missing_relation_fork_re =
+ qr/index ".*" lacks a main relation fork/;
+
+# We have created test databases with tables populated with data, but have not
+# yet corrupted anything. As such, we expect no corruption and verify that
+# none is reported
+#
+$node->command_checks_all([ @cmd, '-d', 'db1', '-d', 'db2', '-d', 'db3' ],
+ 0, [$no_output_re], [$no_output_re], 'pg_amcheck prior to corruption');
+
+# Perform the corruptions we planned above using only a single database restart.
+#
+perform_all_corruptions();
+
+
+# Checking databases with amcheck installed and corrupt relations, pg_amcheck
+# command itself should return exit status = 2, because tables and indexes are
+# corrupt, not exit status = 1, which would mean the pg_amcheck command itself
+# failed. Corruption messages should go to stdout, and nothing to stderr.
+#
+$node->command_checks_all(
+ [ @cmd, 'db1' ],
+ 2,
+ [
+ $index_missing_relation_fork_re, $line_pointer_corruption_re,
+ $missing_file_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck all schemas, tables and indexes in database db1');
+
+$node->command_checks_all(
+ [ @cmd, '-d', 'db1', '-d', 'db2', '-d', 'db3' ],
+ 2,
+ [
+ $index_missing_relation_fork_re, $line_pointer_corruption_re,
+ $missing_file_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck all schemas, tables and indexes in databases db1, db2, and db3'
+);
+
+# Scans of indexes in s1 should detect the specific corruption that we created
+# above. For missing relation forks, we know what the error message looks
+# like. For corrupted index pages, the error might vary depending on how the
+# page was formatted on disk, including variations due to alignment differences
+# between platforms, so we accept any non-empty error message.
+#
+# If we don't limit the check to databases with amcheck installed, we expect
+# complaint on stderr, but otherwise stderr should be quiet.
+#
+$node->command_checks_all(
+ [ @cmd, '--all', '-s', 's1', '-i', 't1_btree' ],
+ 2,
+ [$index_missing_relation_fork_re],
+ [
+ qr/pg_amcheck: warning: skipping database "postgres": amcheck is not installed/
+ ],
+ 'pg_amcheck index s1.t1_btree reports missing main relation fork');
+
+$node->command_checks_all(
+ [ @cmd, '-d', 'db1', '-s', 's1', '-i', 't2_btree' ],
+ 2,
+ [qr/.+/], # Any non-empty error message is acceptable
+ [$no_output_re],
+ 'pg_amcheck index s1.s2 reports index corruption');
+
+# Checking db1.s1 with indexes excluded should show no corruptions because we
+# did not corrupt any tables in db1.s1. Verify that both stdout and stderr
+# are quiet.
+#
+$node->command_checks_all(
+ [ @cmd, '-t', 's1.*', '--no-dependent-indexes', 'db1' ],
+ 0, [$no_output_re], [$no_output_re],
+ 'pg_amcheck of db1.s1 excluding indexes');
+
+# Checking db2.s1 should show table corruptions if indexes are excluded
+#
+$node->command_checks_all(
+ [ @cmd, '-t', 's1.*', '--no-dependent-indexes', 'db2' ],
+ 2, [$missing_file_re], [$no_output_re],
+ 'pg_amcheck of db2.s1 excluding indexes');
+
+# In schema db1.s3, the tables and indexes are both corrupt. We should see
+# corruption messages on stdout, and nothing on stderr.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's3', 'db1' ],
+ 2,
+ [
+ $index_missing_relation_fork_re, $line_pointer_corruption_re,
+ $missing_file_re,
+ ],
+ [$no_output_re],
+ 'pg_amcheck schema s3 reports table and index errors');
+
+# In schema db1.s4, only toast tables are corrupt. Check that under default
+# options the toast corruption is reported, but when excluding toast we get no
+# error reports.
+$node->command_checks_all([ @cmd, '-s', 's4', 'db1' ],
+ 2, [$missing_file_re], [$no_output_re],
+ 'pg_amcheck in schema s4 reports toast corruption');
+
+$node->command_checks_all(
+ [
+ @cmd, '--no-dependent-toast', '--exclude-toast-pointers', '-s', 's4',
+ 'db1'
+ ],
+ 0,
+ [$no_output_re],
+ [$no_output_re],
+ 'pg_amcheck in schema s4 excluding toast reports no corruption');
+
+# Check that no corruption is reported in schema db1.s5
+$node->command_checks_all([ @cmd, '-s', 's5', 'db1' ],
+ 0, [$no_output_re], [$no_output_re],
+ 'pg_amcheck over schema s5 reports no corruption');
+
+# In schema db1.s1, only indexes are corrupt. Verify that when we exclude
+# the indexes, no corruption is reported about the schema.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's1', '-I', 't1_btree', '-I', 't2_btree', 'db1' ],
+ 0,
+ [$no_output_re],
+ [$no_output_re],
+ 'pg_amcheck over schema s1 with corrupt indexes excluded reports no corruption'
+);
+
+# In schema db1.s1, only indexes are corrupt. Verify that when we provide only
+# table inclusions, and disable index expansion, no corruption is reported
+# about the schema.
+#
+$node->command_checks_all(
+ [ @cmd, '-t', 's1.*', '--no-dependent-indexes', 'db1' ],
+ 0,
+ [$no_output_re],
+ [$no_output_re],
+ 'pg_amcheck over schema s1 with all indexes excluded reports no corruption'
+);
+
+# In schema db1.s2, only tables are corrupt. Verify that when we exclude those
+# tables that no corruption is reported.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's2', '-T', 't1', '-T', 't2', 'db1' ],
+ 0,
+ [$no_output_re],
+ [$no_output_re],
+ 'pg_amcheck over schema s2 with corrupt tables excluded reports no corruption'
+);
+
+# Check errors about bad block range command line arguments. We use schema s5
+# to avoid getting messages about corrupt tables or indexes.
+#
+command_fails_like(
+ [ @cmd, '-s', 's5', '--startblock', 'junk', 'db1' ],
+ qr/invalid start block/,
+ 'pg_amcheck rejects garbage startblock');
+
+command_fails_like(
+ [ @cmd, '-s', 's5', '--endblock', '1234junk', 'db1' ],
+ qr/invalid end block/,
+ 'pg_amcheck rejects garbage endblock');
+
+command_fails_like(
+ [ @cmd, '-s', 's5', '--startblock', '5', '--endblock', '4', 'db1' ],
+ qr/end block precedes start block/,
+ 'pg_amcheck rejects invalid block range');
+
+# Check bt_index_parent_check alternates. We don't create any index corruption
+# that would behave differently under these modes, so just smoke test that the
+# arguments are handled sensibly.
+#
+$node->command_checks_all(
+ [ @cmd, '-s', 's1', '-i', 't1_btree', '--parent-check', 'db1' ],
+ 2,
+ [$index_missing_relation_fork_re],
+ [$no_output_re],
+ 'pg_amcheck smoke test --parent-check');
+
+$node->command_checks_all(
+ [
+ @cmd, '-s', 's1', '-i', 't1_btree', '--heapallindexed',
+ '--rootdescend', 'db1'
+ ],
+ 2,
+ [$index_missing_relation_fork_re],
+ [$no_output_re],
+ 'pg_amcheck smoke test --heapallindexed --rootdescend');
+
+$node->command_checks_all(
+ [ @cmd, '-d', 'db1', '-d', 'db2', '-d', 'db3', '-S', 's*' ],
+ 0, [$no_output_re], [$no_output_re],
+ 'pg_amcheck excluding all corrupt schemas');
diff --git a/src/bin/pg_amcheck/t/004_verify_heapam.pl b/src/bin/pg_amcheck/t/004_verify_heapam.pl
new file mode 100644
index 0000000..b603efa
--- /dev/null
+++ b/src/bin/pg_amcheck/t/004_verify_heapam.pl
@@ -0,0 +1,529 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+use strict;
+use warnings;
+
+use PostgresNode;
+use TestLib;
+
+use Test::More;
+
+# This regression test demonstrates that the pg_amcheck binary correctly
+# identifies specific kinds of corruption within pages. To test this, we need
+# a mechanism to create corrupt pages with predictable, repeatable corruption.
+# The postgres backend cannot be expected to help us with this, as its design
+# is not consistent with the goal of intentionally corrupting pages.
+#
+# Instead, we create a table to corrupt, and with careful consideration of how
+# postgresql lays out heap pages, we seek to offsets within the page and
+# overwrite deliberately chosen bytes with specific values calculated to
+# corrupt the page in expected ways. We then verify that pg_amcheck reports
+# the corruption, and that it runs without crashing. Note that the backend
+# cannot simply be started to run queries against the corrupt table, as the
+# backend will crash, at least for some of the corruption types we generate.
+#
+# Autovacuum potentially touching the table in the background makes the exact
+# behavior of this test harder to reason about. We turn it off to keep things
+# simpler. We use a "belt and suspenders" approach, turning it off for the
+# system generally in postgresql.conf, and turning it off specifically for the
+# test table.
+#
+# This test depends on the table being written to the heap file exactly as we
+# expect it to be, so we take care to arrange the columns of the table, and
+# insert rows of the table, that give predictable sizes and locations within
+# the table page.
+#
+# The HeapTupleHeaderData has 23 bytes of fixed size fields before the variable
+# length t_bits[] array. We have exactly 3 columns in the table, so natts = 3,
+# t_bits is 1 byte long, and t_hoff = MAXALIGN(23 + 1) = 24.
+#
+# We're not too fussy about which datatypes we use for the test, but we do care
+# about some specific properties. We'd like to test both fixed size and
+# varlena types. We'd like some varlena data inline and some toasted. And
+# we'd like the layout of the table such that the datums land at predictable
+# offsets within the tuple. We choose a structure without padding on all
+# supported architectures:
+#
+# a BIGINT
+# b TEXT
+# c TEXT
+#
+# We always insert a 7-ascii character string into field 'b', which with a
+# 1-byte varlena header gives an 8 byte inline value. We always insert a long
+# text string in field 'c', long enough to force toast storage.
+#
+# We choose to read and write binary copies of our table's tuples, using perl's
+# pack() and unpack() functions. Perl uses a packing code system in which:
+#
+# l = "signed 32-bit Long",
+# L = "Unsigned 32-bit Long",
+# S = "Unsigned 16-bit Short",
+# C = "Unsigned 8-bit Octet",
+#
+# Each tuple in our table has a layout as follows:
+#
+# xx xx xx xx t_xmin: xxxx offset = 0 L
+# xx xx xx xx t_xmax: xxxx offset = 4 L
+# xx xx xx xx t_field3: xxxx offset = 8 L
+# xx xx bi_hi: xx offset = 12 S
+# xx xx bi_lo: xx offset = 14 S
+# xx xx ip_posid: xx offset = 16 S
+# xx xx t_infomask2: xx offset = 18 S
+# xx xx t_infomask: xx offset = 20 S
+# xx t_hoff: x offset = 22 C
+# xx t_bits: x offset = 23 C
+# xx xx xx xx xx xx xx xx 'a': xxxxxxxx offset = 24 LL
+# xx xx xx xx xx xx xx xx 'b': xxxxxxxx offset = 32 CCCCCCCC
+# xx xx xx xx xx xx xx xx 'c': xxxxxxxx offset = 40 CCllLL
+# xx xx xx xx xx xx xx xx : xxxxxxxx ...continued
+# xx xx : xx ...continued
+#
+# We could choose to read and write columns 'b' and 'c' in other ways, but
+# it is convenient enough to do it this way. We define packing code
+# constants here, where they can be compared easily against the layout.
+
+use constant HEAPTUPLE_PACK_CODE => 'LLLSSSSSCCLLCCCCCCCCCCllLL';
+use constant HEAPTUPLE_PACK_LENGTH => 58; # Total size
+
+# Read a tuple of our table from a heap page.
+#
+# Takes an open filehandle to the heap file, and the offset of the tuple.
+#
+# Rather than returning the binary data from the file, unpacks the data into a
+# perl hash with named fields. These fields exactly match the ones understood
+# by write_tuple(), below. Returns a reference to this hash.
+#
+sub read_tuple
+{
+ my ($fh, $offset) = @_;
+ my ($buffer, %tup);
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(sysread($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
+ or BAIL_OUT("sysread failed: $!");
+
+ @_ = unpack(HEAPTUPLE_PACK_CODE, $buffer);
+ %tup = (
+ t_xmin => shift,
+ t_xmax => shift,
+ t_field3 => shift,
+ bi_hi => shift,
+ bi_lo => shift,
+ ip_posid => shift,
+ t_infomask2 => shift,
+ t_infomask => shift,
+ t_hoff => shift,
+ t_bits => shift,
+ a_1 => shift,
+ a_2 => shift,
+ b_header => shift,
+ b_body1 => shift,
+ b_body2 => shift,
+ b_body3 => shift,
+ b_body4 => shift,
+ b_body5 => shift,
+ b_body6 => shift,
+ b_body7 => shift,
+ c_va_header => shift,
+ c_va_vartag => shift,
+ c_va_rawsize => shift,
+ c_va_extinfo => shift,
+ c_va_valueid => shift,
+ c_va_toastrelid => shift);
+ # Stitch together the text for column 'b'
+ $tup{b} = join('', map { chr($tup{"b_body$_"}) } (1 .. 7));
+ return \%tup;
+}
+
+# Write a tuple of our table to a heap page.
+#
+# Takes an open filehandle to the heap file, the offset of the tuple, and a
+# reference to a hash with the tuple values, as returned by read_tuple().
+# Writes the tuple fields from the hash into the heap file.
+#
+# The purpose of this function is to write a tuple back to disk with some
+# subset of fields modified. The function does no error checking. Use
+# cautiously.
+#
+sub write_tuple
+{
+ my ($fh, $offset, $tup) = @_;
+ my $buffer = pack(
+ HEAPTUPLE_PACK_CODE,
+ $tup->{t_xmin}, $tup->{t_xmax},
+ $tup->{t_field3}, $tup->{bi_hi},
+ $tup->{bi_lo}, $tup->{ip_posid},
+ $tup->{t_infomask2}, $tup->{t_infomask},
+ $tup->{t_hoff}, $tup->{t_bits},
+ $tup->{a_1}, $tup->{a_2},
+ $tup->{b_header}, $tup->{b_body1},
+ $tup->{b_body2}, $tup->{b_body3},
+ $tup->{b_body4}, $tup->{b_body5},
+ $tup->{b_body6}, $tup->{b_body7},
+ $tup->{c_va_header}, $tup->{c_va_vartag},
+ $tup->{c_va_rawsize}, $tup->{c_va_extinfo},
+ $tup->{c_va_valueid}, $tup->{c_va_toastrelid});
+ sysseek($fh, $offset, 0)
+ or BAIL_OUT("sysseek failed: $!");
+ defined(syswrite($fh, $buffer, HEAPTUPLE_PACK_LENGTH))
+ or BAIL_OUT("syswrite failed: $!");
+ return;
+}
+
+# Set umask so test directories and files are created with default permissions
+umask(0077);
+
+# Set up the node. Once we create and corrupt the table,
+# autovacuum workers visiting the table could crash the backend.
+# Disable autovacuum so that won't happen.
+my $node = get_new_node('test');
+$node->init;
+$node->append_conf('postgresql.conf', 'autovacuum=off');
+
+# Start the node and load the extensions. We depend on both
+# amcheck and pageinspect for this test.
+$node->start;
+my $port = $node->port;
+my $pgdata = $node->data_dir;
+$node->safe_psql('postgres', "CREATE EXTENSION amcheck");
+$node->safe_psql('postgres', "CREATE EXTENSION pageinspect");
+
+# Get a non-zero datfrozenxid
+$node->safe_psql('postgres', qq(VACUUM FREEZE));
+
+# Create the test table with precisely the schema that our corruption function
+# expects.
+$node->safe_psql(
+ 'postgres', qq(
+ CREATE TABLE public.test (a BIGINT, b TEXT, c TEXT);
+ ALTER TABLE public.test SET (autovacuum_enabled=false);
+ ALTER TABLE public.test ALTER COLUMN c SET STORAGE EXTERNAL;
+ CREATE INDEX test_idx ON public.test(a, b);
+ ));
+
+# We want (0 < datfrozenxid < test.relfrozenxid). To achieve this, we freeze
+# an otherwise unused table, public.junk, prior to inserting data and freezing
+# public.test
+$node->safe_psql(
+ 'postgres', qq(
+ CREATE TABLE public.junk AS SELECT 'junk'::TEXT AS junk_column;
+ ALTER TABLE public.junk SET (autovacuum_enabled=false);
+ VACUUM FREEZE public.junk
+ ));
+
+my $rel = $node->safe_psql('postgres',
+ qq(SELECT pg_relation_filepath('public.test')));
+my $relpath = "$pgdata/$rel";
+
+# Insert data and freeze public.test
+use constant ROWCOUNT => 16;
+$node->safe_psql(
+ 'postgres', qq(
+ INSERT INTO public.test (a, b, c)
+ VALUES (
+ x'DEADF9F9DEADF9F9'::bigint,
+ 'abcdefg',
+ repeat('w', 10000)
+ );
+ VACUUM FREEZE public.test
+ )) for (1 .. ROWCOUNT);
+
+my $relfrozenxid = $node->safe_psql('postgres',
+ q(select relfrozenxid from pg_class where relname = 'test'));
+my $datfrozenxid = $node->safe_psql('postgres',
+ q(select datfrozenxid from pg_database where datname = 'postgres'));
+
+# Sanity check that our 'test' table has a relfrozenxid newer than the
+# datfrozenxid for the database, and that the datfrozenxid is greater than the
+# first normal xid. We rely on these invariants in some of our tests.
+if ($datfrozenxid <= 3 || $datfrozenxid >= $relfrozenxid)
+{
+ $node->clean_node;
+ plan skip_all =>
+ "Xid thresholds not as expected: got datfrozenxid = $datfrozenxid, relfrozenxid = $relfrozenxid";
+ exit;
+}
+
+# Find where each of the tuples is located on the page.
+my @lp_off;
+for my $tup (0 .. ROWCOUNT - 1)
+{
+ push(
+ @lp_off,
+ $node->safe_psql(
+ 'postgres', qq(
+select lp_off from heap_page_items(get_raw_page('test', 'main', 0))
+ offset $tup limit 1)));
+}
+
+# Sanity check that our 'test' table on disk layout matches expectations. If
+# this is not so, we will have to skip the test until somebody updates the test
+# to work on this platform.
+$node->stop;
+my $file;
+open($file, '+<', $relpath)
+ or BAIL_OUT("open failed: $!");
+binmode $file;
+
+my $ENDIANNESS;
+for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
+{
+ my $offnum = $tupidx + 1; # offnum is 1-based, not zero-based
+ my $offset = $lp_off[$tupidx];
+ my $tup = read_tuple($file, $offset);
+
+ # Sanity-check that the data appears on the page where we expect.
+ my $a_1 = $tup->{a_1};
+ my $a_2 = $tup->{a_2};
+ my $b = $tup->{b};
+ if ($a_1 != 0xDEADF9F9 || $a_2 != 0xDEADF9F9 || $b ne 'abcdefg')
+ {
+ close($file); # ignore errors on close; we're exiting anyway
+ $node->clean_node;
+ plan skip_all =>
+ sprintf(
+ "Page layout differs from our expectations: expected (%x, %x, \"%s\"), got (%x, %x, \"%s\")",
+ 0xDEADF9F9, 0xDEADF9F9, "abcdefg", $a_1, $a_2, $b);
+ exit;
+ }
+
+ # Determine endianness of current platform from the 1-byte varlena header
+ $ENDIANNESS = $tup->{b_header} == 0x11 ? "little" : "big";
+}
+close($file)
+ or BAIL_OUT("close failed: $!");
+$node->start;
+
+# Ok, Xids and page layout look ok. We can run corruption tests.
+plan tests => 19;
+
+# Check that pg_amcheck runs against the uncorrupted table without error.
+$node->command_ok(
+ [ 'pg_amcheck', '-p', $port, 'postgres' ],
+ 'pg_amcheck test table, prior to corruption');
+
+# Check that pg_amcheck runs against the uncorrupted table and index without error.
+$node->command_ok([ 'pg_amcheck', '-p', $port, 'postgres' ],
+ 'pg_amcheck test table and index, prior to corruption');
+
+$node->stop;
+
+# Some #define constants from access/htup_details.h for use while corrupting.
+use constant HEAP_HASNULL => 0x0001;
+use constant HEAP_XMAX_LOCK_ONLY => 0x0080;
+use constant HEAP_XMIN_COMMITTED => 0x0100;
+use constant HEAP_XMIN_INVALID => 0x0200;
+use constant HEAP_XMAX_COMMITTED => 0x0400;
+use constant HEAP_XMAX_INVALID => 0x0800;
+use constant HEAP_NATTS_MASK => 0x07FF;
+use constant HEAP_XMAX_IS_MULTI => 0x1000;
+use constant HEAP_KEYS_UPDATED => 0x2000;
+
+# Helper function to generate a regular expression matching the header we
+# expect verify_heapam() to return given which fields we expect to be non-null.
+sub header
+{
+ my ($blkno, $offnum, $attnum) = @_;
+ return
+ qr/heap table "postgres\.public\.test", block $blkno, offset $offnum, attribute $attnum:\s+/ms
+ if (defined $attnum);
+ return
+ qr/heap table "postgres\.public\.test", block $blkno, offset $offnum:\s+/ms
+ if (defined $offnum);
+ return qr/heap table "postgres\.public\.test", block $blkno:\s+/ms
+ if (defined $blkno);
+ return qr/heap table "postgres\.public\.test":\s+/ms;
+}
+
+# Corrupt the tuples, one type of corruption per tuple. Some types of
+# corruption cause verify_heapam to skip to the next tuple without
+# performing any remaining checks, so we can't exercise the system properly if
+# we focus all our corruption on a single tuple.
+#
+my @expected;
+open($file, '+<', $relpath)
+ or BAIL_OUT("open failed: $!");
+binmode $file;
+
+for (my $tupidx = 0; $tupidx < ROWCOUNT; $tupidx++)
+{
+ my $offnum = $tupidx + 1; # offnum is 1-based, not zero-based
+ my $offset = $lp_off[$tupidx];
+ my $tup = read_tuple($file, $offset);
+
+ my $header = header(0, $offnum, undef);
+ if ($offnum == 1)
+ {
+ # Corruptly set xmin < relfrozenxid
+ my $xmin = $relfrozenxid - 1;
+ $tup->{t_xmin} = $xmin;
+ $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+ $tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
+
+ # Expected corruption report
+ push @expected,
+ qr/${header}xmin $xmin precedes relation freeze threshold 0:\d+/;
+ }
+ if ($offnum == 2)
+ {
+ # Corruptly set xmin < datfrozenxid
+ my $xmin = 3;
+ $tup->{t_xmin} = $xmin;
+ $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+ $tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
+
+ push @expected,
+ qr/${$header}xmin $xmin precedes oldest valid transaction ID 0:\d+/;
+ }
+ elsif ($offnum == 3)
+ {
+ # Corruptly set xmin < datfrozenxid, further back, noting circularity
+ # of xid comparison. For a new cluster with epoch = 0, the corrupt
+ # xmin will be interpreted as in the future
+ $tup->{t_xmin} = 4026531839;
+ $tup->{t_infomask} &= ~HEAP_XMIN_COMMITTED;
+ $tup->{t_infomask} &= ~HEAP_XMIN_INVALID;
+
+ push @expected,
+ qr/${$header}xmin 4026531839 equals or exceeds next valid transaction ID 0:\d+/;
+ }
+ elsif ($offnum == 4)
+ {
+ # Corruptly set xmax < relminmxid;
+ $tup->{t_xmax} = 4026531839;
+ $tup->{t_infomask} &= ~HEAP_XMAX_INVALID;
+
+ push @expected,
+ qr/${$header}xmax 4026531839 equals or exceeds next valid transaction ID 0:\d+/;
+ }
+ elsif ($offnum == 5)
+ {
+ # Corrupt the tuple t_hoff, but keep it aligned properly
+ $tup->{t_hoff} += 128;
+
+ push @expected,
+ qr/${$header}data begins at offset 152 beyond the tuple length 58/,
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 152 \(3 attributes, no nulls\)/;
+ }
+ elsif ($offnum == 6)
+ {
+ # Corrupt the tuple t_hoff, wrong alignment
+ $tup->{t_hoff} += 3;
+
+ push @expected,
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 27 \(3 attributes, no nulls\)/;
+ }
+ elsif ($offnum == 7)
+ {
+ # Corrupt the tuple t_hoff, underflow but correct alignment
+ $tup->{t_hoff} -= 8;
+
+ push @expected,
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 16 \(3 attributes, no nulls\)/;
+ }
+ elsif ($offnum == 8)
+ {
+ # Corrupt the tuple t_hoff, underflow and wrong alignment
+ $tup->{t_hoff} -= 3;
+
+ push @expected,
+ qr/${$header}tuple data should begin at byte 24, but actually begins at byte 21 \(3 attributes, no nulls\)/;
+ }
+ elsif ($offnum == 9)
+ {
+ # Corrupt the tuple to look like it has lots of attributes, not just 3
+ $tup->{t_infomask2} |= HEAP_NATTS_MASK;
+
+ push @expected,
+ qr/${$header}number of attributes 2047 exceeds maximum expected for table 3/;
+ }
+ elsif ($offnum == 10)
+ {
+ # Corrupt the tuple to look like it has lots of attributes, some of
+ # them null. This falsely creates the impression that the t_bits
+ # array is longer than just one byte, but t_hoff still says otherwise.
+ $tup->{t_infomask} |= HEAP_HASNULL;
+ $tup->{t_infomask2} |= HEAP_NATTS_MASK;
+ $tup->{t_bits} = 0xAA;
+
+ push @expected,
+ qr/${$header}tuple data should begin at byte 280, but actually begins at byte 24 \(2047 attributes, has nulls\)/;
+ }
+ elsif ($offnum == 11)
+ {
+ # Same as above, but this time t_hoff plays along
+ $tup->{t_infomask} |= HEAP_HASNULL;
+ $tup->{t_infomask2} |= (HEAP_NATTS_MASK & 0x40);
+ $tup->{t_bits} = 0xAA;
+ $tup->{t_hoff} = 32;
+
+ push @expected,
+ qr/${$header}number of attributes 67 exceeds maximum expected for table 3/;
+ }
+ elsif ($offnum == 12)
+ {
+ # Overwrite column 'b' 1-byte varlena header and initial characters to
+ # look like a long 4-byte varlena
+ #
+ # On little endian machines, bytes ending in two zero bits (xxxxxx00 bytes)
+ # are 4-byte length word, aligned, uncompressed data (up to 1G). We set the
+ # high six bits to 111111 and the lower two bits to 00, then the next three
+ # bytes with 0xFF using 0xFCFFFFFF.
+ #
+ # On big endian machines, bytes starting in two zero bits (00xxxxxx bytes)
+ # are 4-byte length word, aligned, uncompressed data (up to 1G). We set the
+ # low six bits to 111111 and the high two bits to 00, then the next three
+ # bytes with 0xFF using 0x3FFFFFFF.
+ #
+ $tup->{b_header} = $ENDIANNESS eq 'little' ? 0xFC : 0x3F;
+ $tup->{b_body1} = 0xFF;
+ $tup->{b_body2} = 0xFF;
+ $tup->{b_body3} = 0xFF;
+
+ $header = header(0, $offnum, 1);
+ push @expected,
+ qr/${header}attribute with length \d+ ends at offset \d+ beyond total tuple length \d+/;
+ }
+ elsif ($offnum == 13)
+ {
+ # Corrupt the bits in column 'c' toast pointer
+ $tup->{c_va_valueid} = 0xFFFFFFFF;
+
+ $header = header(0, $offnum, 2);
+ push @expected, qr/${header}toast value \d+ not found in toast table/;
+ }
+ elsif ($offnum == 14)
+ {
+ # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
+ $tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
+ $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
+ $tup->{t_xmax} = 4;
+
+ push @expected,
+ qr/${header}multitransaction ID 4 equals or exceeds next valid multitransaction ID 1/;
+ }
+ elsif ($offnum == 15) # Last offnum must equal ROWCOUNT
+ {
+ # Set both HEAP_XMAX_COMMITTED and HEAP_XMAX_IS_MULTI
+ $tup->{t_infomask} |= HEAP_XMAX_COMMITTED;
+ $tup->{t_infomask} |= HEAP_XMAX_IS_MULTI;
+ $tup->{t_xmax} = 4000000000;
+
+ push @expected,
+ qr/${header}multitransaction ID 4000000000 precedes relation minimum multitransaction ID threshold 1/;
+ }
+ write_tuple($file, $offset, $tup);
+}
+close($file)
+ or BAIL_OUT("close failed: $!");
+$node->start;
+
+# Run pg_amcheck against the corrupt table with epoch=0, comparing actual
+# corruption messages against the expected messages
+$node->command_checks_all(
+ [ 'pg_amcheck', '--no-dependent-indexes', '-p', $port, 'postgres' ],
+ 2, [@expected], [], 'Expected corruption message output');
+
+$node->teardown_node;
+$node->clean_node;
diff --git a/src/bin/pg_amcheck/t/005_opclass_damage.pl b/src/bin/pg_amcheck/t/005_opclass_damage.pl
new file mode 100644
index 0000000..30be684
--- /dev/null
+++ b/src/bin/pg_amcheck/t/005_opclass_damage.pl
@@ -0,0 +1,59 @@
+
+# Copyright (c) 2021, PostgreSQL Global Development Group
+
+# This regression test checks the behavior of the btree validation in the
+# presence of breaking sort order changes.
+#
+use strict;
+use warnings;
+use PostgresNode;
+use TestLib;
+use Test::More tests => 5;
+
+my $node = get_new_node('test');
+$node->init;
+$node->start;
+
+# Create a custom operator class and an index which uses it.
+$node->safe_psql(
+ 'postgres', q(
+ CREATE EXTENSION amcheck;
+
+ CREATE FUNCTION int4_asc_cmp (a int4, b int4) RETURNS int LANGUAGE sql AS $$
+ SELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN 1 ELSE -1 END; $$;
+
+ CREATE OPERATOR CLASS int4_fickle_ops FOR TYPE int4 USING btree AS
+ OPERATOR 1 < (int4, int4), OPERATOR 2 <= (int4, int4),
+ OPERATOR 3 = (int4, int4), OPERATOR 4 >= (int4, int4),
+ OPERATOR 5 > (int4, int4), FUNCTION 1 int4_asc_cmp(int4, int4);
+
+ CREATE TABLE int4tbl (i int4);
+ INSERT INTO int4tbl (SELECT * FROM generate_series(1,1000) gs);
+ CREATE INDEX fickleidx ON int4tbl USING btree (i int4_fickle_ops);
+));
+
+# We have not yet broken the index, so we should get no corruption
+$node->command_like(
+ [ 'pg_amcheck', '-p', $node->port, 'postgres' ],
+ qr/^$/,
+ 'pg_amcheck all schemas, tables and indexes reports no corruption');
+
+# Change the operator class to use a function which sorts in a different
+# order to corrupt the btree index
+$node->safe_psql(
+ 'postgres', q(
+ CREATE FUNCTION int4_desc_cmp (int4, int4) RETURNS int LANGUAGE sql AS $$
+ SELECT CASE WHEN $1 = $2 THEN 0 WHEN $1 > $2 THEN -1 ELSE 1 END; $$;
+ UPDATE pg_catalog.pg_amproc
+ SET amproc = 'int4_desc_cmp'::regproc
+ WHERE amproc = 'int4_asc_cmp'::regproc
+));
+
+# Index corruption should now be reported
+$node->command_checks_all(
+ [ 'pg_amcheck', '-p', $node->port, 'postgres' ],
+ 2,
+ [qr/item order invariant violated for index "fickleidx"/],
+ [],
+ 'pg_amcheck all schemas, tables and indexes reports fickleidx corruption'
+);