diff options
Diffstat (limited to '')
-rw-r--r-- | contrib/pg_visibility/.gitignore | 4 | ||||
-rw-r--r-- | contrib/pg_visibility/Makefile | 24 | ||||
-rw-r--r-- | contrib/pg_visibility/expected/pg_visibility.out | 177 | ||||
-rw-r--r-- | contrib/pg_visibility/pg_visibility--1.0--1.1.sql | 24 | ||||
-rw-r--r-- | contrib/pg_visibility/pg_visibility--1.1--1.2.sql | 13 | ||||
-rw-r--r-- | contrib/pg_visibility/pg_visibility--1.1.sql | 75 | ||||
-rw-r--r-- | contrib/pg_visibility/pg_visibility.c | 788 | ||||
-rw-r--r-- | contrib/pg_visibility/pg_visibility.control | 5 | ||||
-rw-r--r-- | contrib/pg_visibility/sql/pg_visibility.sql | 102 |
9 files changed, 1212 insertions, 0 deletions
diff --git a/contrib/pg_visibility/.gitignore b/contrib/pg_visibility/.gitignore new file mode 100644 index 0000000..5dcb3ff --- /dev/null +++ b/contrib/pg_visibility/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pg_visibility/Makefile b/contrib/pg_visibility/Makefile new file mode 100644 index 0000000..b3b1a89 --- /dev/null +++ b/contrib/pg_visibility/Makefile @@ -0,0 +1,24 @@ +# contrib/pg_visibility/Makefile + +MODULE_big = pg_visibility +OBJS = \ + $(WIN32RES) \ + pg_visibility.o + +EXTENSION = pg_visibility +DATA = pg_visibility--1.1.sql pg_visibility--1.1--1.2.sql \ + pg_visibility--1.0--1.1.sql +PGFILEDESC = "pg_visibility - page visibility information" + +REGRESS = pg_visibility + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pg_visibility +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pg_visibility/expected/pg_visibility.out b/contrib/pg_visibility/expected/pg_visibility.out new file mode 100644 index 0000000..843550a --- /dev/null +++ b/contrib/pg_visibility/expected/pg_visibility.out @@ -0,0 +1,177 @@ +CREATE EXTENSION pg_visibility; +-- +-- recently-dropped table +-- +\set VERBOSITY sqlstate +BEGIN; +CREATE TABLE droppedtest (c int); +SELECT 'droppedtest'::regclass::oid AS oid \gset +SAVEPOINT q; DROP TABLE droppedtest; RELEASE q; +SAVEPOINT q; SELECT * FROM pg_visibility_map(:oid); ROLLBACK TO q; +ERROR: XX000 +-- ERROR: could not open relation with OID 16xxx +SAVEPOINT q; SELECT 1; ROLLBACK TO q; + ?column? +---------- + 1 +(1 row) + +SAVEPOINT q; SELECT 1; ROLLBACK TO q; + ?column? +---------- + 1 +(1 row) + +SELECT pg_relation_size(:oid), pg_relation_filepath(:oid), + has_table_privilege(:oid, 'SELECT'); + pg_relation_size | pg_relation_filepath | has_table_privilege +------------------+----------------------+--------------------- + | | +(1 row) + +SELECT * FROM pg_visibility_map(:oid); +ERROR: XX000 +-- ERROR: could not open relation with OID 16xxx +ROLLBACK; +\set VERBOSITY default +-- +-- check that using the module's functions with unsupported relations will fail +-- +-- partitioned tables (the parent ones) don't have visibility maps +create table test_partitioned (a int) partition by list (a); +-- these should all fail +select pg_visibility('test_partitioned', 0); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +select pg_visibility_map('test_partitioned'); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +select pg_visibility_map_summary('test_partitioned'); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +select pg_check_frozen('test_partitioned'); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +select pg_truncate_visibility_map('test_partitioned'); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +create table test_partition partition of test_partitioned for values in (1); +create index test_index on test_partition (a); +-- indexes do not, so these all fail +select pg_visibility('test_index', 0); +ERROR: "test_index" is not a table, materialized view, or TOAST table +select pg_visibility_map('test_index'); +ERROR: "test_index" is not a table, materialized view, or TOAST table +select pg_visibility_map_summary('test_index'); +ERROR: "test_index" is not a table, materialized view, or TOAST table +select pg_check_frozen('test_index'); +ERROR: "test_index" is not a table, materialized view, or TOAST table +select pg_truncate_visibility_map('test_index'); +ERROR: "test_index" is not a table, materialized view, or TOAST table +create view test_view as select 1; +-- views do not have VMs, so these all fail +select pg_visibility('test_view', 0); +ERROR: "test_view" is not a table, materialized view, or TOAST table +select pg_visibility_map('test_view'); +ERROR: "test_view" is not a table, materialized view, or TOAST table +select pg_visibility_map_summary('test_view'); +ERROR: "test_view" is not a table, materialized view, or TOAST table +select pg_check_frozen('test_view'); +ERROR: "test_view" is not a table, materialized view, or TOAST table +select pg_truncate_visibility_map('test_view'); +ERROR: "test_view" is not a table, materialized view, or TOAST table +create sequence test_sequence; +-- sequences do not have VMs, so these all fail +select pg_visibility('test_sequence', 0); +ERROR: "test_sequence" is not a table, materialized view, or TOAST table +select pg_visibility_map('test_sequence'); +ERROR: "test_sequence" is not a table, materialized view, or TOAST table +select pg_visibility_map_summary('test_sequence'); +ERROR: "test_sequence" is not a table, materialized view, or TOAST table +select pg_check_frozen('test_sequence'); +ERROR: "test_sequence" is not a table, materialized view, or TOAST table +select pg_truncate_visibility_map('test_sequence'); +ERROR: "test_sequence" is not a table, materialized view, or TOAST table +create foreign data wrapper dummy; +create server dummy_server foreign data wrapper dummy; +create foreign table test_foreign_table () server dummy_server; +-- foreign tables do not have VMs, so these all fail +select pg_visibility('test_foreign_table', 0); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +select pg_visibility_map('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +select pg_visibility_map_summary('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +select pg_check_frozen('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +select pg_truncate_visibility_map('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +-- check some of the allowed relkinds +create table regular_table (a int); +insert into regular_table values (1), (2); +vacuum (disable_page_skipping) regular_table; +select count(*) > 0 from pg_visibility('regular_table'); + ?column? +---------- + t +(1 row) + +truncate regular_table; +select count(*) > 0 from pg_visibility('regular_table'); + ?column? +---------- + f +(1 row) + +create materialized view matview_visibility_test as select * from regular_table; +vacuum (disable_page_skipping) matview_visibility_test; +select count(*) > 0 from pg_visibility('matview_visibility_test'); + ?column? +---------- + f +(1 row) + +insert into regular_table values (1), (2); +refresh materialized view matview_visibility_test; +select count(*) > 0 from pg_visibility('matview_visibility_test'); + ?column? +---------- + t +(1 row) + +-- regular tables which are part of a partition *do* have visibility maps +insert into test_partition values (1); +vacuum (disable_page_skipping) test_partition; +select count(*) > 0 from pg_visibility('test_partition', 0); + ?column? +---------- + t +(1 row) + +select count(*) > 0 from pg_visibility_map('test_partition'); + ?column? +---------- + t +(1 row) + +select count(*) > 0 from pg_visibility_map_summary('test_partition'); + ?column? +---------- + t +(1 row) + +select * from pg_check_frozen('test_partition'); -- hopefully none + t_ctid +-------- +(0 rows) + +select pg_truncate_visibility_map('test_partition'); + pg_truncate_visibility_map +---------------------------- + +(1 row) + +-- cleanup +drop table test_partitioned; +drop view test_view; +drop sequence test_sequence; +drop foreign table test_foreign_table; +drop server dummy_server; +drop foreign data wrapper dummy; +drop materialized view matview_visibility_test; +drop table regular_table; diff --git a/contrib/pg_visibility/pg_visibility--1.0--1.1.sql b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql new file mode 100644 index 0000000..378824c --- /dev/null +++ b/contrib/pg_visibility/pg_visibility--1.0--1.1.sql @@ -0,0 +1,24 @@ +/* contrib/pg_visibility/pg_visibility--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_visibility UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_frozen' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_visible' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_truncate_visibility_map(regclass) +RETURNS void +AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map' +LANGUAGE C STRICT +PARALLEL UNSAFE; -- let's not make this any more dangerous + +REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility--1.1--1.2.sql b/contrib/pg_visibility/pg_visibility--1.1--1.2.sql new file mode 100644 index 0000000..a5a4fe7 --- /dev/null +++ b/contrib/pg_visibility/pg_visibility--1.1--1.2.sql @@ -0,0 +1,13 @@ +/* contrib/pg_visibility/pg_visibility--1.1--1.2.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pg_visibility UPDATE TO '1.2'" to load this file. \quit + +-- Allow use of monitoring functions by pg_monitor members +GRANT EXECUTE ON FUNCTION pg_visibility_map(regclass, bigint) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_visibility(regclass, bigint) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_visibility_map(regclass) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_visibility(regclass) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_visibility_map_summary(regclass) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_check_frozen(regclass) TO pg_stat_scan_tables; +GRANT EXECUTE ON FUNCTION pg_check_visible(regclass) TO pg_stat_scan_tables; diff --git a/contrib/pg_visibility/pg_visibility--1.1.sql b/contrib/pg_visibility/pg_visibility--1.1.sql new file mode 100644 index 0000000..0a29967 --- /dev/null +++ b/contrib/pg_visibility/pg_visibility--1.1.sql @@ -0,0 +1,75 @@ +/* contrib/pg_visibility/pg_visibility--1.1.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pg_visibility" to load this file. \quit + +-- Show visibility map information. +CREATE FUNCTION pg_visibility_map(regclass, blkno bigint, + all_visible OUT boolean, + all_frozen OUT boolean) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility_map' +LANGUAGE C STRICT; + +-- Show visibility map and page-level visibility information. +CREATE FUNCTION pg_visibility(regclass, blkno bigint, + all_visible OUT boolean, + all_frozen OUT boolean, + pd_all_visible OUT boolean) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility' +LANGUAGE C STRICT; + +-- Show visibility map information for each block in a relation. +CREATE FUNCTION pg_visibility_map(regclass, blkno OUT bigint, + all_visible OUT boolean, + all_frozen OUT boolean) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_visibility_map_rel' +LANGUAGE C STRICT; + +-- Show visibility map and page-level visibility information for each block. +CREATE FUNCTION pg_visibility(regclass, blkno OUT bigint, + all_visible OUT boolean, + all_frozen OUT boolean, + pd_all_visible OUT boolean) +RETURNS SETOF record +AS 'MODULE_PATHNAME', 'pg_visibility_rel' +LANGUAGE C STRICT; + +-- Show summary of visibility map bits for a relation. +CREATE FUNCTION pg_visibility_map_summary(regclass, + OUT all_visible bigint, OUT all_frozen bigint) +RETURNS record +AS 'MODULE_PATHNAME', 'pg_visibility_map_summary' +LANGUAGE C STRICT; + +-- Show tupleids of non-frozen tuples if any in all_frozen pages +-- for a relation. +CREATE FUNCTION pg_check_frozen(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_frozen' +LANGUAGE C STRICT; + +-- Show tupleids of dead tuples if any in all_visible pages for a relation. +CREATE FUNCTION pg_check_visible(regclass, t_ctid OUT tid) +RETURNS SETOF tid +AS 'MODULE_PATHNAME', 'pg_check_visible' +LANGUAGE C STRICT; + +-- Truncate the visibility map fork. +CREATE FUNCTION pg_truncate_visibility_map(regclass) +RETURNS void +AS 'MODULE_PATHNAME', 'pg_truncate_visibility_map' +LANGUAGE C STRICT +PARALLEL UNSAFE; -- let's not make this any more dangerous + +-- Don't want these to be available to public. +REVOKE ALL ON FUNCTION pg_visibility_map(regclass, bigint) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility(regclass, bigint) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility_map(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_visibility_map_summary(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_frozen(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_check_visible(regclass) FROM PUBLIC; +REVOKE ALL ON FUNCTION pg_truncate_visibility_map(regclass) FROM PUBLIC; diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c new file mode 100644 index 0000000..68d580e --- /dev/null +++ b/contrib/pg_visibility/pg_visibility.c @@ -0,0 +1,788 @@ +/*------------------------------------------------------------------------- + * + * pg_visibility.c + * display visibility map information and page-level visibility bits + * + * Copyright (c) 2016-2020, PostgreSQL Global Development Group + * + * contrib/pg_visibility/pg_visibility.c + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/visibilitymap.h" +#include "catalog/pg_type.h" +#include "catalog/storage_xlog.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/procarray.h" +#include "storage/smgr.h" +#include "utils/rel.h" +#include "utils/snapmgr.h" + +PG_MODULE_MAGIC; + +typedef struct vbits +{ + BlockNumber next; + BlockNumber count; + uint8 bits[FLEXIBLE_ARRAY_MEMBER]; +} vbits; + +typedef struct corrupt_items +{ + BlockNumber next; + BlockNumber count; + ItemPointer tids; +} corrupt_items; + +PG_FUNCTION_INFO_V1(pg_visibility_map); +PG_FUNCTION_INFO_V1(pg_visibility_map_rel); +PG_FUNCTION_INFO_V1(pg_visibility); +PG_FUNCTION_INFO_V1(pg_visibility_rel); +PG_FUNCTION_INFO_V1(pg_visibility_map_summary); +PG_FUNCTION_INFO_V1(pg_check_frozen); +PG_FUNCTION_INFO_V1(pg_check_visible); +PG_FUNCTION_INFO_V1(pg_truncate_visibility_map); + +static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd); +static vbits *collect_visibility_data(Oid relid, bool include_pd); +static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible, + bool all_frozen); +static void record_corrupt_item(corrupt_items *items, ItemPointer tid); +static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, + Buffer buffer); +static void check_relation_relkind(Relation rel); + +/* + * Visibility map information for a single block of a relation. + * + * Note: the VM code will silently return zeroes for pages past the end + * of the map, so we allow probes up to MaxBlockNumber regardless of the + * actual relation size. + */ +Datum +pg_visibility_map(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 blkno = PG_GETARG_INT64(1); + int32 mapbits; + Relation rel; + Buffer vmbuffer = InvalidBuffer; + TupleDesc tupdesc; + Datum values[2]; + bool nulls[2]; + + rel = relation_open(relid, AccessShareLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + + tupdesc = pg_visibility_tupdesc(false, false); + MemSet(nulls, 0, sizeof(nulls)); + + mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0); + values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0); + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Visibility map information for a single block of a relation, plus the + * page-level information for the same block. + */ +Datum +pg_visibility(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 blkno = PG_GETARG_INT64(1); + int32 mapbits; + Relation rel; + Buffer vmbuffer = InvalidBuffer; + Buffer buffer; + Page page; + TupleDesc tupdesc; + Datum values[3]; + bool nulls[3]; + + rel = relation_open(relid, AccessShareLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + if (blkno < 0 || blkno > MaxBlockNumber) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("invalid block number"))); + + tupdesc = pg_visibility_tupdesc(false, true); + MemSet(nulls, 0, sizeof(nulls)); + + mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0); + values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0); + + /* Here we have to explicitly check rel size ... */ + if (blkno < RelationGetNumberOfBlocks(rel)) + { + buffer = ReadBuffer(rel, blkno); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + values[2] = BoolGetDatum(PageIsAllVisible(page)); + + UnlockReleaseBuffer(buffer); + } + else + { + /* As with the vismap, silently return 0 for pages past EOF */ + values[2] = BoolGetDatum(false); + } + + relation_close(rel, AccessShareLock); + + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Visibility map information for every block in a relation. + */ +Datum +pg_visibility_map_rel(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + vbits *info; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + funcctx->tuple_desc = pg_visibility_tupdesc(true, false); + /* collect_visibility_data will verify the relkind */ + funcctx->user_fctx = collect_visibility_data(relid, false); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + info = (vbits *) funcctx->user_fctx; + + if (info->next < info->count) + { + Datum values[3]; + bool nulls[3]; + HeapTuple tuple; + + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int64GetDatum(info->next); + values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0); + values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0); + info->next++; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Visibility map information for every block in a relation, plus the page + * level information for each block. + */ +Datum +pg_visibility_rel(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + vbits *info; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + funcctx->tuple_desc = pg_visibility_tupdesc(true, true); + /* collect_visibility_data will verify the relkind */ + funcctx->user_fctx = collect_visibility_data(relid, true); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + info = (vbits *) funcctx->user_fctx; + + if (info->next < info->count) + { + Datum values[4]; + bool nulls[4]; + HeapTuple tuple; + + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int64GetDatum(info->next); + values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0); + values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0); + values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0); + info->next++; + + tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls); + SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple)); + } + + SRF_RETURN_DONE(funcctx); +} + +/* + * Count the number of all-visible and all-frozen pages in the visibility + * map for a particular relation. + */ +Datum +pg_visibility_map_summary(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + BlockNumber nblocks; + BlockNumber blkno; + Buffer vmbuffer = InvalidBuffer; + int64 all_visible = 0; + int64 all_frozen = 0; + TupleDesc tupdesc; + Datum values[2]; + bool nulls[2]; + + rel = relation_open(relid, AccessShareLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + nblocks = RelationGetNumberOfBlocks(rel); + + for (blkno = 0; blkno < nblocks; ++blkno) + { + int32 mapbits; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + /* Get map info. */ + mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); + if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0) + ++all_visible; + if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) + ++all_frozen; + } + + /* Clean up. */ + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + relation_close(rel, AccessShareLock); + + tupdesc = CreateTemplateTupleDesc(2); + TupleDescInitEntry(tupdesc, (AttrNumber) 1, "all_visible", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, (AttrNumber) 2, "all_frozen", INT8OID, -1, 0); + tupdesc = BlessTupleDesc(tupdesc); + + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int64GetDatum(all_visible); + values[1] = Int64GetDatum(all_frozen); + + PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls))); +} + +/* + * Return the TIDs of non-frozen tuples present in pages marked all-frozen + * in the visibility map. We hope no one will ever find any, but there could + * be bugs, database corruption, etc. + */ +Datum +pg_check_frozen(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + corrupt_items *items; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* collect_corrupt_items will verify the relkind */ + funcctx->user_fctx = collect_corrupt_items(relid, false, true); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + items = (corrupt_items *) funcctx->user_fctx; + + if (items->next < items->count) + SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++])); + + SRF_RETURN_DONE(funcctx); +} + +/* + * Return the TIDs of not-all-visible tuples in pages marked all-visible + * in the visibility map. We hope no one will ever find any, but there could + * be bugs, database corruption, etc. + */ +Datum +pg_check_visible(PG_FUNCTION_ARGS) +{ + FuncCallContext *funcctx; + corrupt_items *items; + + if (SRF_IS_FIRSTCALL()) + { + Oid relid = PG_GETARG_OID(0); + MemoryContext oldcontext; + + funcctx = SRF_FIRSTCALL_INIT(); + oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx); + /* collect_corrupt_items will verify the relkind */ + funcctx->user_fctx = collect_corrupt_items(relid, true, false); + MemoryContextSwitchTo(oldcontext); + } + + funcctx = SRF_PERCALL_SETUP(); + items = (corrupt_items *) funcctx->user_fctx; + + if (items->next < items->count) + SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++])); + + SRF_RETURN_DONE(funcctx); +} + +/* + * Remove the visibility map fork for a relation. If there turn out to be + * any bugs in the visibility map code that require rebuilding the VM, this + * provides users with a way to do it that is cleaner than shutting down the + * server and removing files by hand. + * + * This is a cut-down version of RelationTruncate. + */ +Datum +pg_truncate_visibility_map(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + ForkNumber fork; + BlockNumber block; + + rel = relation_open(relid, AccessExclusiveLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + RelationOpenSmgr(rel); + rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber; + + block = visibilitymap_prepare_truncate(rel, 0); + if (BlockNumberIsValid(block)) + { + fork = VISIBILITYMAP_FORKNUM; + smgrtruncate(rel->rd_smgr, &fork, 1, &block); + } + + if (RelationNeedsWAL(rel)) + { + xl_smgr_truncate xlrec; + + xlrec.blkno = 0; + xlrec.rnode = rel->rd_node; + xlrec.flags = SMGR_TRUNCATE_VM; + + XLogBeginInsert(); + XLogRegisterData((char *) &xlrec, sizeof(xlrec)); + + XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE); + } + + /* + * Release the lock right away, not at commit time. + * + * It would be a problem to release the lock prior to commit if this + * truncate operation sends any transactional invalidation messages. Other + * backends would potentially be able to lock the relation without + * processing them in the window of time between when we release the lock + * here and when we sent the messages at our eventual commit. However, + * we're currently only sending a non-transactional smgr invalidation, + * which will have been posted to shared memory immediately from within + * smgr_truncate. Therefore, there should be no race here. + * + * The reason why it's desirable to release the lock early here is because + * of the possibility that someone will need to use this to blow away many + * visibility map forks at once. If we can't release the lock until + * commit time, the transaction doing this will accumulate + * AccessExclusiveLocks on all of those relations at the same time, which + * is undesirable. However, if this turns out to be unsafe we may have no + * choice... + */ + relation_close(rel, AccessExclusiveLock); + + /* Nothing to return. */ + PG_RETURN_VOID(); +} + +/* + * Helper function to construct whichever TupleDesc we need for a particular + * call. + */ +static TupleDesc +pg_visibility_tupdesc(bool include_blkno, bool include_pd) +{ + TupleDesc tupdesc; + AttrNumber maxattr = 2; + AttrNumber a = 0; + + if (include_blkno) + ++maxattr; + if (include_pd) + ++maxattr; + tupdesc = CreateTemplateTupleDesc(maxattr); + if (include_blkno) + TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0); + TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0); + TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0); + if (include_pd) + TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0); + Assert(a == maxattr); + + return BlessTupleDesc(tupdesc); +} + +/* + * Collect visibility data about a relation. + * + * Checks relkind of relid and will throw an error if the relation does not + * have a VM. + */ +static vbits * +collect_visibility_data(Oid relid, bool include_pd) +{ + Relation rel; + BlockNumber nblocks; + vbits *info; + BlockNumber blkno; + Buffer vmbuffer = InvalidBuffer; + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + + rel = relation_open(relid, AccessShareLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + nblocks = RelationGetNumberOfBlocks(rel); + info = palloc0(offsetof(vbits, bits) + nblocks); + info->next = 0; + info->count = nblocks; + + for (blkno = 0; blkno < nblocks; ++blkno) + { + int32 mapbits; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + /* Get map info. */ + mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer); + if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0) + info->bits[blkno] |= (1 << 0); + if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0) + info->bits[blkno] |= (1 << 1); + + /* + * Page-level data requires reading every block, so only get it if the + * caller needs it. Use a buffer access strategy, too, to prevent + * cache-trashing. + */ + if (include_pd) + { + Buffer buffer; + Page page; + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + bstrategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + if (PageIsAllVisible(page)) + info->bits[blkno] |= (1 << 2); + + UnlockReleaseBuffer(buffer); + } + } + + /* Clean up. */ + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + relation_close(rel, AccessShareLock); + + return info; +} + +/* + * Returns a list of items whose visibility map information does not match + * the status of the tuples on the page. + * + * If all_visible is passed as true, this will include all items which are + * on pages marked as all-visible in the visibility map but which do not + * seem to in fact be all-visible. + * + * If all_frozen is passed as true, this will include all items which are + * on pages marked as all-frozen but which do not seem to in fact be frozen. + * + * Checks relkind of relid and will throw an error if the relation does not + * have a VM. + */ +static corrupt_items * +collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen) +{ + Relation rel; + BlockNumber nblocks; + corrupt_items *items; + BlockNumber blkno; + Buffer vmbuffer = InvalidBuffer; + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + TransactionId OldestXmin = InvalidTransactionId; + + if (all_visible) + { + /* Don't pass rel; that will fail in recovery. */ + OldestXmin = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM); + } + + rel = relation_open(relid, AccessShareLock); + + /* Only some relkinds have a visibility map */ + check_relation_relkind(rel); + + nblocks = RelationGetNumberOfBlocks(rel); + + /* + * Guess an initial array size. We don't expect many corrupted tuples, so + * start with a small array. This function uses the "next" field to track + * the next offset where we can store an item (which is the same thing as + * the number of items found so far) and the "count" field to track the + * number of entries allocated. We'll repurpose these fields before + * returning. + */ + items = palloc0(sizeof(corrupt_items)); + items->next = 0; + items->count = 64; + items->tids = palloc(items->count * sizeof(ItemPointerData)); + + /* Loop over every block in the relation. */ + for (blkno = 0; blkno < nblocks; ++blkno) + { + bool check_frozen = false; + bool check_visible = false; + Buffer buffer; + Page page; + OffsetNumber offnum, + maxoff; + + /* Make sure we are interruptible. */ + CHECK_FOR_INTERRUPTS(); + + /* Use the visibility map to decide whether to check this page. */ + if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + check_frozen = true; + if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + check_visible = true; + if (!check_visible && !check_frozen) + continue; + + /* Read and lock the page. */ + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + bstrategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + maxoff = PageGetMaxOffsetNumber(page); + + /* + * The visibility map bits might have changed while we were acquiring + * the page lock. Recheck to avoid returning spurious results. + */ + if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer)) + check_frozen = false; + if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + check_visible = false; + if (!check_visible && !check_frozen) + { + UnlockReleaseBuffer(buffer); + continue; + } + + /* Iterate over each tuple on the page. */ + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + HeapTupleData tuple; + ItemId itemid; + + itemid = PageGetItemId(page, offnum); + + /* Unused or redirect line pointers are of no interest. */ + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid)) + continue; + + /* Dead line pointers are neither all-visible nor frozen. */ + if (ItemIdIsDead(itemid)) + { + ItemPointerSet(&(tuple.t_self), blkno, offnum); + record_corrupt_item(items, &tuple.t_self); + continue; + } + + /* Initialize a HeapTupleData structure for checks below. */ + ItemPointerSet(&(tuple.t_self), blkno, offnum); + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = relid; + + /* + * If we're checking whether the page is all-visible, we expect + * the tuple to be all-visible. + */ + if (check_visible && + !tuple_all_visible(&tuple, OldestXmin, buffer)) + { + TransactionId RecomputedOldestXmin; + + /* + * Time has passed since we computed OldestXmin, so it's + * possible that this tuple is all-visible in reality even + * though it doesn't appear so based on our + * previously-computed value. Let's compute a new value so we + * can be certain whether there is a problem. + * + * From a concurrency point of view, it sort of sucks to + * retake ProcArrayLock here while we're holding the buffer + * exclusively locked, but it should be safe against + * deadlocks, because surely GetOldestXmin() should never take + * a buffer lock. And this shouldn't happen often, so it's + * worth being careful so as to avoid false positives. + */ + RecomputedOldestXmin = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM); + + if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin)) + record_corrupt_item(items, &tuple.t_self); + else + { + OldestXmin = RecomputedOldestXmin; + if (!tuple_all_visible(&tuple, OldestXmin, buffer)) + record_corrupt_item(items, &tuple.t_self); + } + } + + /* + * If we're checking whether the page is all-frozen, we expect the + * tuple to be in a state where it will never need freezing. + */ + if (check_frozen) + { + if (heap_tuple_needs_eventual_freeze(tuple.t_data)) + record_corrupt_item(items, &tuple.t_self); + } + } + + UnlockReleaseBuffer(buffer); + } + + /* Clean up. */ + if (vmbuffer != InvalidBuffer) + ReleaseBuffer(vmbuffer); + relation_close(rel, AccessShareLock); + + /* + * Before returning, repurpose the fields to match caller's expectations. + * next is now the next item that should be read (rather than written) and + * count is now the number of items we wrote (rather than the number we + * allocated). + */ + items->count = items->next; + items->next = 0; + + return items; +} + +/* + * Remember one corrupt item. + */ +static void +record_corrupt_item(corrupt_items *items, ItemPointer tid) +{ + /* enlarge output array if needed. */ + if (items->next >= items->count) + { + items->count *= 2; + items->tids = repalloc(items->tids, + items->count * sizeof(ItemPointerData)); + } + /* and add the new item */ + items->tids[items->next++] = *tid; +} + +/* + * Check whether a tuple is all-visible relative to a given OldestXmin value. + * The buffer should contain the tuple and should be locked and pinned. + */ +static bool +tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer) +{ + HTSV_Result state; + TransactionId xmin; + + state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer); + if (state != HEAPTUPLE_LIVE) + return false; /* all-visible implies live */ + + /* + * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page + * all-visible unless every tuple is hinted committed. However, those hint + * bits could be lost after a crash, so we can't be certain that they'll + * be set here. So just check the xmin. + */ + + xmin = HeapTupleHeaderGetXmin(tup->t_data); + if (!TransactionIdPrecedes(xmin, OldestXmin)) + return false; /* xmin not old enough for all to see */ + + return true; +} + +/* + * check_relation_relkind - convenience routine to check that relation + * is of the relkind supported by the callers + */ +static void +check_relation_relkind(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table, materialized view, or TOAST table", + RelationGetRelationName(rel)))); +} diff --git a/contrib/pg_visibility/pg_visibility.control b/contrib/pg_visibility/pg_visibility.control new file mode 100644 index 0000000..3cffa08 --- /dev/null +++ b/contrib/pg_visibility/pg_visibility.control @@ -0,0 +1,5 @@ +# pg_visibility extension +comment = 'examine the visibility map (VM) and page-level visibility info' +default_version = '1.2' +module_pathname = '$libdir/pg_visibility' +relocatable = true diff --git a/contrib/pg_visibility/sql/pg_visibility.sql b/contrib/pg_visibility/sql/pg_visibility.sql new file mode 100644 index 0000000..f1e917b --- /dev/null +++ b/contrib/pg_visibility/sql/pg_visibility.sql @@ -0,0 +1,102 @@ +CREATE EXTENSION pg_visibility; + +-- +-- recently-dropped table +-- +\set VERBOSITY sqlstate +BEGIN; +CREATE TABLE droppedtest (c int); +SELECT 'droppedtest'::regclass::oid AS oid \gset +SAVEPOINT q; DROP TABLE droppedtest; RELEASE q; +SAVEPOINT q; SELECT * FROM pg_visibility_map(:oid); ROLLBACK TO q; +-- ERROR: could not open relation with OID 16xxx +SAVEPOINT q; SELECT 1; ROLLBACK TO q; +SAVEPOINT q; SELECT 1; ROLLBACK TO q; +SELECT pg_relation_size(:oid), pg_relation_filepath(:oid), + has_table_privilege(:oid, 'SELECT'); +SELECT * FROM pg_visibility_map(:oid); +-- ERROR: could not open relation with OID 16xxx +ROLLBACK; +\set VERBOSITY default + +-- +-- check that using the module's functions with unsupported relations will fail +-- + +-- partitioned tables (the parent ones) don't have visibility maps +create table test_partitioned (a int) partition by list (a); +-- these should all fail +select pg_visibility('test_partitioned', 0); +select pg_visibility_map('test_partitioned'); +select pg_visibility_map_summary('test_partitioned'); +select pg_check_frozen('test_partitioned'); +select pg_truncate_visibility_map('test_partitioned'); + +create table test_partition partition of test_partitioned for values in (1); +create index test_index on test_partition (a); +-- indexes do not, so these all fail +select pg_visibility('test_index', 0); +select pg_visibility_map('test_index'); +select pg_visibility_map_summary('test_index'); +select pg_check_frozen('test_index'); +select pg_truncate_visibility_map('test_index'); + +create view test_view as select 1; +-- views do not have VMs, so these all fail +select pg_visibility('test_view', 0); +select pg_visibility_map('test_view'); +select pg_visibility_map_summary('test_view'); +select pg_check_frozen('test_view'); +select pg_truncate_visibility_map('test_view'); + +create sequence test_sequence; +-- sequences do not have VMs, so these all fail +select pg_visibility('test_sequence', 0); +select pg_visibility_map('test_sequence'); +select pg_visibility_map_summary('test_sequence'); +select pg_check_frozen('test_sequence'); +select pg_truncate_visibility_map('test_sequence'); + +create foreign data wrapper dummy; +create server dummy_server foreign data wrapper dummy; +create foreign table test_foreign_table () server dummy_server; +-- foreign tables do not have VMs, so these all fail +select pg_visibility('test_foreign_table', 0); +select pg_visibility_map('test_foreign_table'); +select pg_visibility_map_summary('test_foreign_table'); +select pg_check_frozen('test_foreign_table'); +select pg_truncate_visibility_map('test_foreign_table'); + +-- check some of the allowed relkinds +create table regular_table (a int); +insert into regular_table values (1), (2); +vacuum (disable_page_skipping) regular_table; +select count(*) > 0 from pg_visibility('regular_table'); +truncate regular_table; +select count(*) > 0 from pg_visibility('regular_table'); + +create materialized view matview_visibility_test as select * from regular_table; +vacuum (disable_page_skipping) matview_visibility_test; +select count(*) > 0 from pg_visibility('matview_visibility_test'); +insert into regular_table values (1), (2); +refresh materialized view matview_visibility_test; +select count(*) > 0 from pg_visibility('matview_visibility_test'); + +-- regular tables which are part of a partition *do* have visibility maps +insert into test_partition values (1); +vacuum (disable_page_skipping) test_partition; +select count(*) > 0 from pg_visibility('test_partition', 0); +select count(*) > 0 from pg_visibility_map('test_partition'); +select count(*) > 0 from pg_visibility_map_summary('test_partition'); +select * from pg_check_frozen('test_partition'); -- hopefully none +select pg_truncate_visibility_map('test_partition'); + +-- cleanup +drop table test_partitioned; +drop view test_view; +drop sequence test_sequence; +drop foreign table test_foreign_table; +drop server dummy_server; +drop foreign data wrapper dummy; +drop materialized view matview_visibility_test; +drop table regular_table; |