diff options
Diffstat (limited to '')
-rw-r--r-- | contrib/pgstattuple/.gitignore | 4 | ||||
-rw-r--r-- | contrib/pgstattuple/Makefile | 27 | ||||
-rw-r--r-- | contrib/pgstattuple/expected/pgstattuple.out | 267 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstatapprox.c | 318 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstatindex.c | 774 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.0--1.1.sql | 11 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.1--1.2.sql | 39 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.2--1.3.sql | 18 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.3--1.4.sql | 13 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.4--1.5.sql | 136 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple--1.4.sql | 95 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple.c | 590 | ||||
-rw-r--r-- | contrib/pgstattuple/pgstattuple.control | 5 | ||||
-rw-r--r-- | contrib/pgstattuple/sql/pgstattuple.sql | 124 |
14 files changed, 2421 insertions, 0 deletions
diff --git a/contrib/pgstattuple/.gitignore b/contrib/pgstattuple/.gitignore new file mode 100644 index 0000000..5dcb3ff --- /dev/null +++ b/contrib/pgstattuple/.gitignore @@ -0,0 +1,4 @@ +# Generated subdirectories +/log/ +/results/ +/tmp_check/ diff --git a/contrib/pgstattuple/Makefile b/contrib/pgstattuple/Makefile new file mode 100644 index 0000000..c5b17fc --- /dev/null +++ b/contrib/pgstattuple/Makefile @@ -0,0 +1,27 @@ +# contrib/pgstattuple/Makefile + +MODULE_big = pgstattuple +OBJS = \ + $(WIN32RES) \ + pgstatapprox.o \ + pgstatindex.o \ + pgstattuple.o + +EXTENSION = pgstattuple +DATA = pgstattuple--1.4.sql pgstattuple--1.4--1.5.sql \ + pgstattuple--1.3--1.4.sql pgstattuple--1.2--1.3.sql \ + pgstattuple--1.1--1.2.sql pgstattuple--1.0--1.1.sql +PGFILEDESC = "pgstattuple - tuple-level statistics" + +REGRESS = pgstattuple + +ifdef USE_PGXS +PG_CONFIG = pg_config +PGXS := $(shell $(PG_CONFIG) --pgxs) +include $(PGXS) +else +subdir = contrib/pgstattuple +top_builddir = ../.. +include $(top_builddir)/src/Makefile.global +include $(top_srcdir)/contrib/contrib-global.mk +endif diff --git a/contrib/pgstattuple/expected/pgstattuple.out b/contrib/pgstattuple/expected/pgstattuple.out new file mode 100644 index 0000000..40f7825 --- /dev/null +++ b/contrib/pgstattuple/expected/pgstattuple.out @@ -0,0 +1,267 @@ +CREATE EXTENSION pgstattuple; +-- +-- It's difficult to come up with platform-independent test cases for +-- the pgstattuple functions, but the results for empty tables and +-- indexes should be that. +-- +create table test (a int primary key, b int[]); +select * from pgstattuple('test'); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +-----------+-------------+-----------+---------------+------------------+----------------+--------------------+------------+-------------- + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(1 row) + +select * from pgstattuple('test'::text); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +-----------+-------------+-----------+---------------+------------------+----------------+--------------------+------------+-------------- + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(1 row) + +select * from pgstattuple('test'::name); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +-----------+-------------+-----------+---------------+------------------+----------------+--------------------+------------+-------------- + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(1 row) + +select * from pgstattuple('test'::regclass); + table_len | tuple_count | tuple_len | tuple_percent | dead_tuple_count | dead_tuple_len | dead_tuple_percent | free_space | free_percent +-----------+-------------+-----------+---------------+------------------+----------------+--------------------+------------+-------------- + 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 +(1 row) + +select pgstattuple(oid) from pg_class where relname = 'test'; + pgstattuple +--------------------- + (0,0,0,0,0,0,0,0,0) +(1 row) + +select pgstattuple(relname) from pg_class where relname = 'test'; + pgstattuple +--------------------- + (0,0,0,0,0,0,0,0,0) +(1 row) + +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'); + version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation +---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- + 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN +(1 row) + +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::text); + version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation +---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- + 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN +(1 row) + +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::name); + version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation +---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- + 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN +(1 row) + +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::regclass); + version | tree_level | index_size | root_block_no | internal_pages | leaf_pages | empty_pages | deleted_pages | avg_leaf_density | leaf_fragmentation +---------+------------+------------+---------------+----------------+------------+-------------+---------------+------------------+-------------------- + 4 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | NaN | NaN +(1 row) + +select pg_relpages('test'); + pg_relpages +------------- + 0 +(1 row) + +select pg_relpages('test_pkey'); + pg_relpages +------------- + 1 +(1 row) + +select pg_relpages('test_pkey'::text); + pg_relpages +------------- + 1 +(1 row) + +select pg_relpages('test_pkey'::name); + pg_relpages +------------- + 1 +(1 row) + +select pg_relpages('test_pkey'::regclass); + pg_relpages +------------- + 1 +(1 row) + +select pg_relpages(oid) from pg_class where relname = 'test_pkey'; + pg_relpages +------------- + 1 +(1 row) + +select pg_relpages(relname) from pg_class where relname = 'test_pkey'; + pg_relpages +------------- + 1 +(1 row) + +create index test_ginidx on test using gin (b); +select * from pgstatginindex('test_ginidx'); + version | pending_pages | pending_tuples +---------+---------------+---------------- + 2 | 0 | 0 +(1 row) + +create index test_hashidx on test using hash (b); +select * from pgstathashindex('test_hashidx'); + version | bucket_pages | overflow_pages | bitmap_pages | unused_pages | live_items | dead_items | free_percent +---------+--------------+----------------+--------------+--------------+------------+------------+-------------- + 4 | 4 | 0 | 1 | 0 | 0 | 0 | 100 +(1 row) + +-- these should error with the wrong type +select pgstatginindex('test_pkey'); +ERROR: relation "test_pkey" is not a GIN index +select pgstathashindex('test_pkey'); +ERROR: relation "test_pkey" is not a hash index +select pgstatindex('test_ginidx'); +ERROR: relation "test_ginidx" is not a btree index +select pgstathashindex('test_ginidx'); +ERROR: relation "test_ginidx" is not a hash index +select pgstatindex('test_hashidx'); +ERROR: relation "test_hashidx" is not a btree index +select pgstatginindex('test_hashidx'); +ERROR: relation "test_hashidx" is not a GIN index +-- check that using any of these functions with unsupported relations will fail +create table test_partitioned (a int) partition by range (a); +create index test_partitioned_index on test_partitioned(a); +-- these should all fail +select pgstattuple('test_partitioned'); +ERROR: "test_partitioned" (partitioned table) is not supported +select pgstattuple('test_partitioned_index'); +ERROR: "test_partitioned_index" (partitioned index) is not supported +select pgstattuple_approx('test_partitioned'); +ERROR: "test_partitioned" is not a table, materialized view, or TOAST table +select pg_relpages('test_partitioned'); +ERROR: "test_partitioned" is not a table, index, materialized view, sequence, or TOAST table +select pgstatindex('test_partitioned'); +ERROR: relation "test_partitioned" is not a btree index +select pgstatginindex('test_partitioned'); +ERROR: relation "test_partitioned" is not a GIN index +select pgstathashindex('test_partitioned'); +ERROR: "test_partitioned" is not an index +create view test_view as select 1; +-- these should all fail +select pgstattuple('test_view'); +ERROR: "test_view" (view) is not supported +select pgstattuple_approx('test_view'); +ERROR: "test_view" is not a table, materialized view, or TOAST table +select pg_relpages('test_view'); +ERROR: "test_view" is not a table, index, materialized view, sequence, or TOAST table +select pgstatindex('test_view'); +ERROR: relation "test_view" is not a btree index +select pgstatginindex('test_view'); +ERROR: relation "test_view" is not a GIN index +select pgstathashindex('test_view'); +ERROR: "test_view" is not an index +create foreign data wrapper dummy; +create server dummy_server foreign data wrapper dummy; +create foreign table test_foreign_table () server dummy_server; +-- these should all fail +select pgstattuple('test_foreign_table'); +ERROR: "test_foreign_table" (foreign table) is not supported +select pgstattuple_approx('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, materialized view, or TOAST table +select pg_relpages('test_foreign_table'); +ERROR: "test_foreign_table" is not a table, index, materialized view, sequence, or TOAST table +select pgstatindex('test_foreign_table'); +ERROR: relation "test_foreign_table" is not a btree index +select pgstatginindex('test_foreign_table'); +ERROR: relation "test_foreign_table" is not a GIN index +select pgstathashindex('test_foreign_table'); +ERROR: "test_foreign_table" is not an index +-- a partition of a partitioned table should work though +create table test_partition partition of test_partitioned for values from (1) to (100); +select pgstattuple('test_partition'); + pgstattuple +--------------------- + (0,0,0,0,0,0,0,0,0) +(1 row) + +select pgstattuple_approx('test_partition'); + pgstattuple_approx +----------------------- + (0,0,0,0,0,0,0,0,0,0) +(1 row) + +select pg_relpages('test_partition'); + pg_relpages +------------- + 0 +(1 row) + +-- toast tables should work +select pgstattuple((select reltoastrelid from pg_class where relname = 'test')); + pgstattuple +--------------------- + (0,0,0,0,0,0,0,0,0) +(1 row) + +select pgstattuple_approx((select reltoastrelid from pg_class where relname = 'test')); + pgstattuple_approx +----------------------- + (0,0,0,0,0,0,0,0,0,0) +(1 row) + +select pg_relpages((select reltoastrelid from pg_class where relname = 'test')); + pg_relpages +------------- + 0 +(1 row) + +-- not for the index calls though, of course +select pgstatindex('test_partition'); +ERROR: relation "test_partition" is not a btree index +select pgstatginindex('test_partition'); +ERROR: relation "test_partition" is not a GIN index +select pgstathashindex('test_partition'); +ERROR: "test_partition" is not an index +-- an actual index of a partitioned table should work though +create index test_partition_idx on test_partition(a); +create index test_partition_hash_idx on test_partition using hash (a); +-- these should work +select pgstatindex('test_partition_idx'); + pgstatindex +------------------------------ + (4,0,8192,0,0,0,0,0,NaN,NaN) +(1 row) + +select pgstathashindex('test_partition_hash_idx'); + pgstathashindex +--------------------- + (4,8,0,1,0,0,0,100) +(1 row) + +drop table test_partitioned; +drop view test_view; +drop foreign table test_foreign_table; +drop server dummy_server; +drop foreign data wrapper dummy; diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c new file mode 100644 index 0000000..1fe193b --- /dev/null +++ b/contrib/pgstattuple/pgstatapprox.c @@ -0,0 +1,318 @@ +/*------------------------------------------------------------------------- + * + * pgstatapprox.c + * Bloat estimation functions + * + * Copyright (c) 2014-2021, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pgstattuple/pgstatapprox.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/heapam.h" +#include "access/htup_details.h" +#include "access/multixact.h" +#include "access/relation.h" +#include "access/transam.h" +#include "access/visibilitymap.h" +#include "access/xact.h" +#include "catalog/namespace.h" +#include "catalog/pg_am_d.h" +#include "commands/vacuum.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/freespace.h" +#include "storage/lmgr.h" +#include "storage/procarray.h" +#include "utils/builtins.h" + +PG_FUNCTION_INFO_V1(pgstattuple_approx); +PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5); + +Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo); + +typedef struct output_type +{ + uint64 table_len; + uint64 scanned_percent; + uint64 tuple_count; + uint64 tuple_len; + double tuple_percent; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + double dead_tuple_percent; + uint64 free_space; + double free_percent; +} output_type; + +#define NUM_OUTPUT_COLUMNS 10 + +/* + * This function takes an already open relation and scans its pages, + * skipping those that have the corresponding visibility map bit set. + * For pages we skip, we find the free space from the free space map + * and approximate tuple_len on that basis. For the others, we count + * the exact number of dead tuples etc. + * + * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but + * we do not try to avoid skipping single pages. + */ +static void +statapprox_heap(Relation rel, output_type *stat) +{ + BlockNumber scanned, + nblocks, + blkno; + Buffer vmbuffer = InvalidBuffer; + BufferAccessStrategy bstrategy; + TransactionId OldestXmin; + + OldestXmin = GetOldestNonRemovableTransactionId(rel); + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + nblocks = RelationGetNumberOfBlocks(rel); + scanned = 0; + + for (blkno = 0; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + OffsetNumber offnum, + maxoff; + Size freespace; + + CHECK_FOR_INTERRUPTS(); + + /* + * If the page has only visible tuples, then we can find out the free + * space from the FSM and move on. + */ + if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer)) + { + freespace = GetRecordedFreeSpace(rel, blkno); + stat->tuple_len += BLCKSZ - freespace; + stat->free_space += freespace; + continue; + } + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, + RBM_NORMAL, bstrategy); + + LockBuffer(buf, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buf); + + /* + * It's not safe to call PageGetHeapFreeSpace() on new pages, so we + * treat them as being free space for our purposes. + */ + if (!PageIsNew(page)) + stat->free_space += PageGetHeapFreeSpace(page); + else + stat->free_space += BLCKSZ - SizeOfPageHeaderData; + + /* We may count the page as scanned even if it's new/empty */ + scanned++; + + if (PageIsNew(page) || PageIsEmpty(page)) + { + UnlockReleaseBuffer(buf); + continue; + } + + /* + * Look at each tuple on the page and decide whether it's live or + * dead, then count it and its size. Unlike lazy_scan_heap, we can + * afford to ignore problems and special cases. + */ + maxoff = PageGetMaxOffsetNumber(page); + + for (offnum = FirstOffsetNumber; + offnum <= maxoff; + offnum = OffsetNumberNext(offnum)) + { + ItemId itemid; + HeapTupleData tuple; + + itemid = PageGetItemId(page, offnum); + + if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) || + ItemIdIsDead(itemid)) + { + continue; + } + + Assert(ItemIdIsNormal(itemid)); + + ItemPointerSet(&(tuple.t_self), blkno, offnum); + + tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid); + tuple.t_len = ItemIdGetLength(itemid); + tuple.t_tableOid = RelationGetRelid(rel); + + /* + * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples + * as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't + * bother distinguishing tuples inserted/deleted by our own + * transaction. + */ + switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf)) + { + case HEAPTUPLE_LIVE: + case HEAPTUPLE_DELETE_IN_PROGRESS: + stat->tuple_len += tuple.t_len; + stat->tuple_count++; + break; + case HEAPTUPLE_DEAD: + case HEAPTUPLE_RECENTLY_DEAD: + case HEAPTUPLE_INSERT_IN_PROGRESS: + stat->dead_tuple_len += tuple.t_len; + stat->dead_tuple_count++; + break; + default: + elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result"); + break; + } + } + + UnlockReleaseBuffer(buf); + } + + stat->table_len = (uint64) nblocks * BLCKSZ; + + /* + * We don't know how many tuples are in the pages we didn't scan, so + * extrapolate the live-tuple count to the whole table in the same way + * that VACUUM does. (Like VACUUM, we're not taking a random sample, so + * just extrapolating linearly seems unsafe.) There should be no dead + * tuples in all-visible pages, so no correction is needed for that, and + * we already accounted for the space in those pages, too. + */ + stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned, + stat->tuple_count); + + /* It's not clear if we could get -1 here, but be safe. */ + stat->tuple_count = Max(stat->tuple_count, 0); + + /* + * Calculate percentages if the relation has one or more pages. + */ + if (nblocks != 0) + { + stat->scanned_percent = 100 * scanned / nblocks; + stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + stat->free_percent = 100.0 * stat->free_space / stat->table_len; + } + + if (BufferIsValid(vmbuffer)) + { + ReleaseBuffer(vmbuffer); + vmbuffer = InvalidBuffer; + } +} + +/* + * Returns estimated live/dead tuple statistics for the given relid. + * + * The superuser() check here must be kept as the library might be upgraded + * without the extension being upgraded, meaning that in pre-1.5 installations + * these functions could be called by any user. + */ +Datum +pgstattuple_approx(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo)); +} + +/* + * As of pgstattuple version 1.5, we no longer need to check if the user + * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC. + * Users can then grant access to it based on their policies. + * + * Otherwise identical to pgstattuple_approx (above). + */ +Datum +pgstattuple_approx_v1_5(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo)); +} + +Datum +pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo) +{ + Relation rel; + output_type stat = {0}; + TupleDesc tupdesc; + bool nulls[NUM_OUTPUT_COLUMNS]; + Datum values[NUM_OUTPUT_COLUMNS]; + HeapTuple ret; + int i = 0; + + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + if (tupdesc->natts != NUM_OUTPUT_COLUMNS) + elog(ERROR, "incorrect number of output arguments"); + + rel = relation_open(relid, AccessShareLock); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + /* + * We support only relation kinds with a visibility map and a free space + * map. + */ + if (!(rel->rd_rel->relkind == RELKIND_RELATION || + rel->rd_rel->relkind == RELKIND_MATVIEW || + rel->rd_rel->relkind == RELKIND_TOASTVALUE)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" is not a table, materialized view, or TOAST table", + RelationGetRelationName(rel)))); + + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + + statapprox_heap(rel, &stat); + + relation_close(rel, AccessShareLock); + + memset(nulls, 0, sizeof(nulls)); + + values[i++] = Int64GetDatum(stat.table_len); + values[i++] = Float8GetDatum(stat.scanned_percent); + values[i++] = Int64GetDatum(stat.tuple_count); + values[i++] = Int64GetDatum(stat.tuple_len); + values[i++] = Float8GetDatum(stat.tuple_percent); + values[i++] = Int64GetDatum(stat.dead_tuple_count); + values[i++] = Int64GetDatum(stat.dead_tuple_len); + values[i++] = Float8GetDatum(stat.dead_tuple_percent); + values[i++] = Int64GetDatum(stat.free_space); + values[i++] = Float8GetDatum(stat.free_percent); + + ret = heap_form_tuple(tupdesc, values, nulls); + return HeapTupleGetDatum(ret); +} diff --git a/contrib/pgstattuple/pgstatindex.c b/contrib/pgstattuple/pgstatindex.c new file mode 100644 index 0000000..5368bb3 --- /dev/null +++ b/contrib/pgstattuple/pgstatindex.c @@ -0,0 +1,774 @@ +/* + * contrib/pgstattuple/pgstatindex.c + * + * + * pgstatindex + * + * Copyright (c) 2006 Satoshi Nagayasu <nagayasus@nttdata.co.jp> + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "access/gin_private.h" +#include "access/hash.h" +#include "access/htup_details.h" +#include "access/nbtree.h" +#include "access/relation.h" +#include "access/table.h" +#include "catalog/namespace.h" +#include "catalog/pg_am.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/rel.h" +#include "utils/varlena.h" + + +/* + * Because of backward-compatibility issue, we have decided to have + * two types of interfaces, with regclass-type input arg and text-type + * input arg, for each function. + * + * Those functions which have text-type input arg will be deprecated + * in the future release. + */ +PG_FUNCTION_INFO_V1(pgstatindex); +PG_FUNCTION_INFO_V1(pgstatindexbyid); +PG_FUNCTION_INFO_V1(pg_relpages); +PG_FUNCTION_INFO_V1(pg_relpagesbyid); +PG_FUNCTION_INFO_V1(pgstatginindex); +PG_FUNCTION_INFO_V1(pgstathashindex); + +PG_FUNCTION_INFO_V1(pgstatindex_v1_5); +PG_FUNCTION_INFO_V1(pgstatindexbyid_v1_5); +PG_FUNCTION_INFO_V1(pg_relpages_v1_5); +PG_FUNCTION_INFO_V1(pg_relpagesbyid_v1_5); +PG_FUNCTION_INFO_V1(pgstatginindex_v1_5); + +Datum pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo); + +#define IS_INDEX(r) ((r)->rd_rel->relkind == RELKIND_INDEX) +#define IS_BTREE(r) ((r)->rd_rel->relam == BTREE_AM_OID) +#define IS_GIN(r) ((r)->rd_rel->relam == GIN_AM_OID) +#define IS_HASH(r) ((r)->rd_rel->relam == HASH_AM_OID) + +/* ------------------------------------------------ + * A structure for a whole btree index statistics + * used by pgstatindex(). + * ------------------------------------------------ + */ +typedef struct BTIndexStat +{ + uint32 version; + uint32 level; + BlockNumber root_blkno; + + uint64 internal_pages; + uint64 leaf_pages; + uint64 empty_pages; + uint64 deleted_pages; + + uint64 max_avail; + uint64 free_space; + + uint64 fragments; +} BTIndexStat; + +/* ------------------------------------------------ + * A structure for a whole GIN index statistics + * used by pgstatginindex(). + * ------------------------------------------------ + */ +typedef struct GinIndexStat +{ + int32 version; + + BlockNumber pending_pages; + int64 pending_tuples; +} GinIndexStat; + +/* ------------------------------------------------ + * A structure for a whole HASH index statistics + * used by pgstathashindex(). + * ------------------------------------------------ + */ +typedef struct HashIndexStat +{ + int32 version; + int32 space_per_page; + + BlockNumber bucket_pages; + BlockNumber overflow_pages; + BlockNumber bitmap_pages; + BlockNumber unused_pages; + + int64 live_items; + int64 dead_items; + uint64 free_space; +} HashIndexStat; + +static Datum pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo); +static void GetHashPageStats(Page page, HashIndexStat *stats); +static void check_relation_relkind(Relation rel); + +/* ------------------------------------------------------ + * pgstatindex() + * + * Usage: SELECT * FROM pgstatindex('t1_pkey'); + * + * The superuser() check here must be kept as the library might be upgraded + * without the extension being upgraded, meaning that in pre-1.5 installations + * these functions could be called by any user. + * ------------------------------------------------------ + */ +Datum +pgstatindex(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + Relation rel; + RangeVar *relrv; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); +} + +/* + * As of pgstattuple version 1.5, we no longer need to check if the user + * is a superuser because we REVOKE EXECUTE on the function from PUBLIC. + * Users can then grant access to it based on their policies. + * + * Otherwise identical to pgstatindex (above). + */ +Datum +pgstatindex_v1_5(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + Relation rel; + RangeVar *relrv; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); +} + +/* + * The superuser() check here must be kept as the library might be upgraded + * without the extension being upgraded, meaning that in pre-1.5 installations + * these functions could be called by any user. + */ +Datum +pgstatindexbyid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); +} + +/* No need for superuser checks in v1.5, see above */ +Datum +pgstatindexbyid_v1_5(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(pgstatindex_impl(rel, fcinfo)); +} + +static Datum +pgstatindex_impl(Relation rel, FunctionCallInfo fcinfo) +{ + Datum result; + BlockNumber nblocks; + BlockNumber blkno; + BTIndexStat indexStat; + BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD); + + if (!IS_INDEX(rel) || !IS_BTREE(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a btree index", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + /* + * Read metapage + */ + { + Buffer buffer = ReadBufferExtended(rel, MAIN_FORKNUM, 0, RBM_NORMAL, bstrategy); + Page page = BufferGetPage(buffer); + BTMetaPageData *metad = BTPageGetMeta(page); + + indexStat.version = metad->btm_version; + indexStat.level = metad->btm_level; + indexStat.root_blkno = metad->btm_root; + + ReleaseBuffer(buffer); + } + + /* -- init counters -- */ + indexStat.internal_pages = 0; + indexStat.leaf_pages = 0; + indexStat.empty_pages = 0; + indexStat.deleted_pages = 0; + + indexStat.max_avail = 0; + indexStat.free_space = 0; + + indexStat.fragments = 0; + + /* + * Scan all blocks except the metapage + */ + nblocks = RelationGetNumberOfBlocks(rel); + + for (blkno = 1; blkno < nblocks; blkno++) + { + Buffer buffer; + Page page; + BTPageOpaque opaque; + + CHECK_FOR_INTERRUPTS(); + + /* Read and lock buffer */ + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + + page = BufferGetPage(buffer); + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + + /* + * Determine page type, and update totals. + * + * Note that we arbitrarily bucket deleted pages together without + * considering if they're leaf pages or internal pages. + */ + if (P_ISDELETED(opaque)) + indexStat.deleted_pages++; + else if (P_IGNORE(opaque)) + indexStat.empty_pages++; /* this is the "half dead" state */ + else if (P_ISLEAF(opaque)) + { + int max_avail; + + max_avail = BLCKSZ - (BLCKSZ - ((PageHeader) page)->pd_special + SizeOfPageHeaderData); + indexStat.max_avail += max_avail; + indexStat.free_space += PageGetFreeSpace(page); + + indexStat.leaf_pages++; + + /* + * If the next leaf is on an earlier block, it means a + * fragmentation. + */ + if (opaque->btpo_next != P_NONE && opaque->btpo_next < blkno) + indexStat.fragments++; + } + else + indexStat.internal_pages++; + + /* Unlock and release buffer */ + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + ReleaseBuffer(buffer); + } + + relation_close(rel, AccessShareLock); + + /*---------------------------- + * Build a result tuple + *---------------------------- + */ + { + TupleDesc tupleDesc; + int j; + char *values[10]; + HeapTuple tuple; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + j = 0; + values[j++] = psprintf("%d", indexStat.version); + values[j++] = psprintf("%d", indexStat.level); + values[j++] = psprintf(INT64_FORMAT, + (1 + /* include the metapage in index_size */ + indexStat.leaf_pages + + indexStat.internal_pages + + indexStat.deleted_pages + + indexStat.empty_pages) * BLCKSZ); + values[j++] = psprintf("%u", indexStat.root_blkno); + values[j++] = psprintf(INT64_FORMAT, indexStat.internal_pages); + values[j++] = psprintf(INT64_FORMAT, indexStat.leaf_pages); + values[j++] = psprintf(INT64_FORMAT, indexStat.empty_pages); + values[j++] = psprintf(INT64_FORMAT, indexStat.deleted_pages); + if (indexStat.max_avail > 0) + values[j++] = psprintf("%.2f", + 100.0 - (double) indexStat.free_space / (double) indexStat.max_avail * 100.0); + else + values[j++] = pstrdup("NaN"); + if (indexStat.leaf_pages > 0) + values[j++] = psprintf("%.2f", + (double) indexStat.fragments / (double) indexStat.leaf_pages * 100.0); + else + values[j++] = pstrdup("NaN"); + + tuple = BuildTupleFromCStrings(TupleDescGetAttInMetadata(tupleDesc), + values); + + result = HeapTupleGetDatum(tuple); + } + + return result; +} + +/* -------------------------------------------------------- + * pg_relpages() + * + * Get the number of pages of the table/index. + * + * Usage: SELECT pg_relpages('t1'); + * SELECT pg_relpages('t1_pkey'); + * + * Must keep superuser() check, see above. + * -------------------------------------------------------- + */ +Datum +pg_relpages(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + int64 relpages; + Relation rel; + RangeVar *relrv; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* only some relkinds have storage */ + check_relation_relkind(rel); + + /* note: this will work OK on non-local temp tables */ + + relpages = RelationGetNumberOfBlocks(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(relpages); +} + +/* No need for superuser checks in v1.5, see above */ +Datum +pg_relpages_v1_5(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + int64 relpages; + Relation rel; + RangeVar *relrv; + + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + /* only some relkinds have storage */ + check_relation_relkind(rel); + + /* note: this will work OK on non-local temp tables */ + + relpages = RelationGetNumberOfBlocks(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(relpages); +} + +/* Must keep superuser() check, see above. */ +Datum +pg_relpagesbyid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 relpages; + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + rel = relation_open(relid, AccessShareLock); + + /* only some relkinds have storage */ + check_relation_relkind(rel); + + /* note: this will work OK on non-local temp tables */ + + relpages = RelationGetNumberOfBlocks(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(relpages); +} + +/* No need for superuser checks in v1.5, see above */ +Datum +pg_relpagesbyid_v1_5(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + int64 relpages; + Relation rel; + + rel = relation_open(relid, AccessShareLock); + + /* only some relkinds have storage */ + check_relation_relkind(rel); + + /* note: this will work OK on non-local temp tables */ + + relpages = RelationGetNumberOfBlocks(rel); + + relation_close(rel, AccessShareLock); + + PG_RETURN_INT64(relpages); +} + +/* ------------------------------------------------------ + * pgstatginindex() + * + * Usage: SELECT * FROM pgstatginindex('ginindex'); + * + * Must keep superuser() check, see above. + * ------------------------------------------------------ + */ +Datum +pgstatginindex(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo)); +} + +/* No need for superuser checks in v1.5, see above */ +Datum +pgstatginindex_v1_5(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + + PG_RETURN_DATUM(pgstatginindex_internal(relid, fcinfo)); +} + +Datum +pgstatginindex_internal(Oid relid, FunctionCallInfo fcinfo) +{ + Relation rel; + Buffer buffer; + Page page; + GinMetaPageData *metadata; + GinIndexStat stats; + HeapTuple tuple; + TupleDesc tupleDesc; + Datum values[3]; + bool nulls[3] = {false, false, false}; + Datum result; + + rel = relation_open(relid, AccessShareLock); + + if (!IS_INDEX(rel) || !IS_GIN(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a GIN index", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary indexes of other sessions"))); + + /* + * Read metapage + */ + buffer = ReadBuffer(rel, GIN_METAPAGE_BLKNO); + LockBuffer(buffer, GIN_SHARE); + page = BufferGetPage(buffer); + metadata = GinPageGetMeta(page); + + stats.version = metadata->ginVersion; + stats.pending_pages = metadata->nPendingPages; + stats.pending_tuples = metadata->nPendingHeapTuples; + + UnlockReleaseBuffer(buffer); + relation_close(rel, AccessShareLock); + + /* + * Build a tuple descriptor for our result type + */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + values[0] = Int32GetDatum(stats.version); + values[1] = UInt32GetDatum(stats.pending_pages); + values[2] = Int64GetDatum(stats.pending_tuples); + + /* + * Build and return the tuple + */ + tuple = heap_form_tuple(tupleDesc, values, nulls); + result = HeapTupleGetDatum(tuple); + + return result; +} + +/* ------------------------------------------------------ + * pgstathashindex() + * + * Usage: SELECT * FROM pgstathashindex('hashindex'); + * ------------------------------------------------------ + */ +Datum +pgstathashindex(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + BlockNumber nblocks; + BlockNumber blkno; + Relation rel; + HashIndexStat stats; + BufferAccessStrategy bstrategy; + HeapTuple tuple; + TupleDesc tupleDesc; + Datum values[8]; + bool nulls[8]; + Buffer metabuf; + HashMetaPage metap; + float8 free_percent; + uint64 total_space; + + rel = index_open(relid, AccessShareLock); + + /* index_open() checks that it's an index */ + if (!IS_HASH(rel)) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("relation \"%s\" is not a hash index", + RelationGetRelationName(rel)))); + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary indexes of other sessions"))); + + /* Get the information we need from the metapage. */ + memset(&stats, 0, sizeof(stats)); + metabuf = _hash_getbuf(rel, HASH_METAPAGE, HASH_READ, LH_META_PAGE); + metap = HashPageGetMeta(BufferGetPage(metabuf)); + stats.version = metap->hashm_version; + stats.space_per_page = metap->hashm_bsize; + _hash_relbuf(rel, metabuf); + + /* Get the current relation length */ + nblocks = RelationGetNumberOfBlocks(rel); + + /* prepare access strategy for this index */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + /* Start from blkno 1 as 0th block is metapage */ + for (blkno = 1; blkno < nblocks; blkno++) + { + Buffer buf; + Page page; + + CHECK_FOR_INTERRUPTS(); + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, + bstrategy); + LockBuffer(buf, BUFFER_LOCK_SHARE); + page = (Page) BufferGetPage(buf); + + if (PageIsNew(page)) + stats.unused_pages++; + else if (PageGetSpecialSize(page) != + MAXALIGN(sizeof(HashPageOpaqueData))) + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("index \"%s\" contains corrupted page at block %u", + RelationGetRelationName(rel), + BufferGetBlockNumber(buf)))); + else + { + HashPageOpaque opaque; + int pagetype; + + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + pagetype = opaque->hasho_flag & LH_PAGE_TYPE; + + if (pagetype == LH_BUCKET_PAGE) + { + stats.bucket_pages++; + GetHashPageStats(page, &stats); + } + else if (pagetype == LH_OVERFLOW_PAGE) + { + stats.overflow_pages++; + GetHashPageStats(page, &stats); + } + else if (pagetype == LH_BITMAP_PAGE) + stats.bitmap_pages++; + else if (pagetype == LH_UNUSED_PAGE) + stats.unused_pages++; + else + ereport(ERROR, + (errcode(ERRCODE_INDEX_CORRUPTED), + errmsg("unexpected page type 0x%04X in HASH index \"%s\" block %u", + opaque->hasho_flag, RelationGetRelationName(rel), + BufferGetBlockNumber(buf)))); + } + UnlockReleaseBuffer(buf); + } + + /* Done accessing the index */ + index_close(rel, AccessShareLock); + + /* Count unused pages as free space. */ + stats.free_space += (uint64) stats.unused_pages * stats.space_per_page; + + /* + * Total space available for tuples excludes the metapage and the bitmap + * pages. + */ + total_space = (uint64) (nblocks - (stats.bitmap_pages + 1)) * + stats.space_per_page; + + if (total_space == 0) + free_percent = 0.0; + else + free_percent = 100.0 * stats.free_space / total_space; + + /* + * Build a tuple descriptor for our result type + */ + if (get_call_result_type(fcinfo, NULL, &tupleDesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + tupleDesc = BlessTupleDesc(tupleDesc); + + /* + * Build and return the tuple + */ + MemSet(nulls, 0, sizeof(nulls)); + values[0] = Int32GetDatum(stats.version); + values[1] = Int64GetDatum((int64) stats.bucket_pages); + values[2] = Int64GetDatum((int64) stats.overflow_pages); + values[3] = Int64GetDatum((int64) stats.bitmap_pages); + values[4] = Int64GetDatum((int64) stats.unused_pages); + values[5] = Int64GetDatum(stats.live_items); + values[6] = Int64GetDatum(stats.dead_items); + values[7] = Float8GetDatum(free_percent); + tuple = heap_form_tuple(tupleDesc, values, nulls); + + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +} + +/* ------------------------------------------------- + * GetHashPageStats() + * + * Collect statistics of single hash page + * ------------------------------------------------- + */ +static void +GetHashPageStats(Page page, HashIndexStat *stats) +{ + OffsetNumber maxoff = PageGetMaxOffsetNumber(page); + int off; + + /* count live and dead tuples, and free space */ + for (off = FirstOffsetNumber; off <= maxoff; off++) + { + ItemId id = PageGetItemId(page, off); + + if (!ItemIdIsDead(id)) + stats->live_items++; + else + stats->dead_items++; + } + stats->free_space += PageGetExactFreeSpace(page); +} + +/* + * check_relation_relkind - convenience routine to check that relation + * is of the relkind supported by the callers + */ +static void +check_relation_relkind(Relation rel) +{ + if (rel->rd_rel->relkind != RELKIND_RELATION && + rel->rd_rel->relkind != RELKIND_INDEX && + rel->rd_rel->relkind != RELKIND_MATVIEW && + rel->rd_rel->relkind != RELKIND_SEQUENCE && + rel->rd_rel->relkind != RELKIND_TOASTVALUE) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not a table, index, materialized view, sequence, or TOAST table", + RelationGetRelationName(rel)))); +} diff --git a/contrib/pgstattuple/pgstattuple--1.0--1.1.sql b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql new file mode 100644 index 0000000..cf582a0 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.0--1.1.sql @@ -0,0 +1,11 @@ +/* contrib/pgstattuple/pgstattuple--1.0--1.1.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.1'" to load this file. \quit + +CREATE FUNCTION pgstatginindex(IN relname regclass, + OUT version INT4, + OUT pending_pages INT4, + OUT pending_tuples BIGINT) +AS 'MODULE_PATHNAME', 'pgstatginindex' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.1--1.2.sql b/contrib/pgstattuple/pgstattuple--1.1--1.2.sql new file mode 100644 index 0000000..2783a63 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.1--1.2.sql @@ -0,0 +1,39 @@ +/* contrib/pgstattuple/pgstattuple--1.1--1.2.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.2'" to load this file. \quit + +ALTER EXTENSION pgstattuple DROP FUNCTION pgstattuple(oid); +DROP FUNCTION pgstattuple(oid); + +CREATE FUNCTION pgstattuple(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstattuplebyid' +LANGUAGE C STRICT; + +CREATE FUNCTION pgstatindex(IN relname regclass, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) +AS 'MODULE_PATHNAME', 'pgstatindexbyid' +LANGUAGE C STRICT; + +CREATE FUNCTION pg_relpages(IN relname regclass) +RETURNS BIGINT +AS 'MODULE_PATHNAME', 'pg_relpagesbyid' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.2--1.3.sql b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql new file mode 100644 index 0000000..99301a2 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.2--1.3.sql @@ -0,0 +1,18 @@ +/* contrib/pgstattuple/pgstattuple--1.2--1.3.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.3'" to load this file. \quit + +CREATE FUNCTION pgstattuple_approx(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT approx_free_space BIGINT, -- estimated free space in bytes + OUT approx_free_percent FLOAT8) -- free space in % (based on estimate) +AS 'MODULE_PATHNAME', 'pgstattuple_approx' +LANGUAGE C STRICT; diff --git a/contrib/pgstattuple/pgstattuple--1.3--1.4.sql b/contrib/pgstattuple/pgstattuple--1.3--1.4.sql new file mode 100644 index 0000000..9130650 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.3--1.4.sql @@ -0,0 +1,13 @@ +/* contrib/pgstattuple/pgstattuple--1.3--1.4.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.4'" to load this file. \quit + +ALTER FUNCTION pgstattuple(text) PARALLEL SAFE; +ALTER FUNCTION pgstatindex(text) PARALLEL SAFE; +ALTER FUNCTION pg_relpages(text) PARALLEL SAFE; +ALTER FUNCTION pgstatginindex(regclass) PARALLEL SAFE; +ALTER FUNCTION pgstattuple(regclass) PARALLEL SAFE; +ALTER FUNCTION pgstatindex(regclass) PARALLEL SAFE; +ALTER FUNCTION pg_relpages(regclass) PARALLEL SAFE; +ALTER FUNCTION pgstattuple_approx(regclass) PARALLEL SAFE; diff --git a/contrib/pgstattuple/pgstattuple--1.4--1.5.sql b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql new file mode 100644 index 0000000..5d03544 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.4--1.5.sql @@ -0,0 +1,136 @@ +/* contrib/pgstattuple/pgstattuple--1.4--1.5.sql */ + +-- complain if script is sourced in psql, rather than via ALTER EXTENSION +\echo Use "ALTER EXTENSION pgstattuple UPDATE TO '1.5'" to load this file. \quit + +CREATE OR REPLACE FUNCTION pgstattuple(IN relname text, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstattuple_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstattuple(text) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstattuple(text) TO pg_stat_scan_tables; + +CREATE OR REPLACE FUNCTION pgstatindex(IN relname text, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) +AS 'MODULE_PATHNAME', 'pgstatindex_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstatindex(text) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstatindex(text) TO pg_stat_scan_tables; + +CREATE OR REPLACE FUNCTION pg_relpages(IN relname text) +RETURNS BIGINT +AS 'MODULE_PATHNAME', 'pg_relpages_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_relpages(text) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_relpages(text) TO pg_stat_scan_tables; + +/* New stuff in 1.1 begins here */ + +CREATE OR REPLACE FUNCTION pgstatginindex(IN relname regclass, + OUT version INT4, + OUT pending_pages INT4, + OUT pending_tuples BIGINT) +AS 'MODULE_PATHNAME', 'pgstatginindex_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstatginindex(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstatginindex(regclass) TO pg_stat_scan_tables; + +/* New stuff in 1.2 begins here */ + +CREATE OR REPLACE FUNCTION pgstattuple(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstattuplebyid_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstattuple(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstattuple(regclass) TO pg_stat_scan_tables; + +CREATE OR REPLACE FUNCTION pgstatindex(IN relname regclass, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) +AS 'MODULE_PATHNAME', 'pgstatindexbyid_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstatindex(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstatindex(regclass) TO pg_stat_scan_tables; + +CREATE OR REPLACE FUNCTION pg_relpages(IN relname regclass) +RETURNS BIGINT +AS 'MODULE_PATHNAME', 'pg_relpagesbyid_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pg_relpages(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pg_relpages(regclass) TO pg_stat_scan_tables; + +/* New stuff in 1.3 begins here */ + +CREATE OR REPLACE FUNCTION pgstattuple_approx(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT approx_free_space BIGINT, -- estimated free space in bytes + OUT approx_free_percent FLOAT8) -- free space in % (based on estimate) +AS 'MODULE_PATHNAME', 'pgstattuple_approx_v1_5' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstattuple_approx(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstattuple_approx(regclass) TO pg_stat_scan_tables; + +/* New stuff in 1.5 begins here */ + +CREATE OR REPLACE FUNCTION pgstathashindex(IN relname regclass, + OUT version INTEGER, + OUT bucket_pages BIGINT, + OUT overflow_pages BIGINT, + OUT bitmap_pages BIGINT, + OUT unused_pages BIGINT, + OUT live_items BIGINT, + OUT dead_items BIGINT, + OUT free_percent FLOAT8) +AS 'MODULE_PATHNAME', 'pgstathashindex' +LANGUAGE C STRICT PARALLEL SAFE; + +REVOKE EXECUTE ON FUNCTION pgstathashindex(regclass) FROM PUBLIC; +GRANT EXECUTE ON FUNCTION pgstathashindex(regclass) TO pg_stat_scan_tables; diff --git a/contrib/pgstattuple/pgstattuple--1.4.sql b/contrib/pgstattuple/pgstattuple--1.4.sql new file mode 100644 index 0000000..47377eb --- /dev/null +++ b/contrib/pgstattuple/pgstattuple--1.4.sql @@ -0,0 +1,95 @@ +/* contrib/pgstattuple/pgstattuple--1.4.sql */ + +-- complain if script is sourced in psql, rather than via CREATE EXTENSION +\echo Use "CREATE EXTENSION pgstattuple" to load this file. \quit + +CREATE FUNCTION pgstattuple(IN relname text, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstattuple' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION pgstatindex(IN relname text, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) +AS 'MODULE_PATHNAME', 'pgstatindex' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION pg_relpages(IN relname text) +RETURNS BIGINT +AS 'MODULE_PATHNAME', 'pg_relpages' +LANGUAGE C STRICT PARALLEL SAFE; + +/* New stuff in 1.1 begins here */ + +CREATE FUNCTION pgstatginindex(IN relname regclass, + OUT version INT4, + OUT pending_pages INT4, + OUT pending_tuples BIGINT) +AS 'MODULE_PATHNAME', 'pgstatginindex' +LANGUAGE C STRICT PARALLEL SAFE; + +/* New stuff in 1.2 begins here */ + +CREATE FUNCTION pgstattuple(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT tuple_count BIGINT, -- number of live tuples + OUT tuple_len BIGINT, -- total tuples length in bytes + OUT tuple_percent FLOAT8, -- live tuples in % + OUT dead_tuple_count BIGINT, -- number of dead tuples + OUT dead_tuple_len BIGINT, -- total dead tuples length in bytes + OUT dead_tuple_percent FLOAT8, -- dead tuples in % + OUT free_space BIGINT, -- free space in bytes + OUT free_percent FLOAT8) -- free space in % +AS 'MODULE_PATHNAME', 'pgstattuplebyid' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION pgstatindex(IN relname regclass, + OUT version INT, + OUT tree_level INT, + OUT index_size BIGINT, + OUT root_block_no BIGINT, + OUT internal_pages BIGINT, + OUT leaf_pages BIGINT, + OUT empty_pages BIGINT, + OUT deleted_pages BIGINT, + OUT avg_leaf_density FLOAT8, + OUT leaf_fragmentation FLOAT8) +AS 'MODULE_PATHNAME', 'pgstatindexbyid' +LANGUAGE C STRICT PARALLEL SAFE; + +CREATE FUNCTION pg_relpages(IN relname regclass) +RETURNS BIGINT +AS 'MODULE_PATHNAME', 'pg_relpagesbyid' +LANGUAGE C STRICT PARALLEL SAFE; + +/* New stuff in 1.3 begins here */ + +CREATE FUNCTION pgstattuple_approx(IN reloid regclass, + OUT table_len BIGINT, -- physical table length in bytes + OUT scanned_percent FLOAT8, -- what percentage of the table's pages was scanned + OUT approx_tuple_count BIGINT, -- estimated number of live tuples + OUT approx_tuple_len BIGINT, -- estimated total length in bytes of live tuples + OUT approx_tuple_percent FLOAT8, -- live tuples in % (based on estimate) + OUT dead_tuple_count BIGINT, -- exact number of dead tuples + OUT dead_tuple_len BIGINT, -- exact total length in bytes of dead tuples + OUT dead_tuple_percent FLOAT8, -- dead tuples in % (based on estimate) + OUT approx_free_space BIGINT, -- estimated free space in bytes + OUT approx_free_percent FLOAT8) -- free space in % (based on estimate) +AS 'MODULE_PATHNAME', 'pgstattuple_approx' +LANGUAGE C STRICT PARALLEL SAFE; diff --git a/contrib/pgstattuple/pgstattuple.c b/contrib/pgstattuple/pgstattuple.c new file mode 100644 index 0000000..21fdeff --- /dev/null +++ b/contrib/pgstattuple/pgstattuple.c @@ -0,0 +1,590 @@ +/* + * contrib/pgstattuple/pgstattuple.c + * + * Copyright (c) 2001,2002 Tatsuo Ishii + * + * Permission to use, copy, modify, and distribute this software and + * its documentation for any purpose, without fee, and without a + * written agreement is hereby granted, provided that the above + * copyright notice and this paragraph and the following two + * paragraphs appear in all copies. + * + * IN NO EVENT SHALL THE AUTHOR BE LIABLE TO ANY PARTY FOR DIRECT, + * INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING + * LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS + * DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + * + * THE AUTHOR SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS + * IS" BASIS, AND THE AUTHOR HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, + * SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. + */ + +#include "postgres.h" + +#include "access/gist_private.h" +#include "access/hash.h" +#include "access/heapam.h" +#include "access/nbtree.h" +#include "access/relscan.h" +#include "access/tableam.h" +#include "catalog/namespace.h" +#include "catalog/pg_am_d.h" +#include "funcapi.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/varlena.h" + +PG_MODULE_MAGIC; + +PG_FUNCTION_INFO_V1(pgstattuple); +PG_FUNCTION_INFO_V1(pgstattuple_v1_5); +PG_FUNCTION_INFO_V1(pgstattuplebyid); +PG_FUNCTION_INFO_V1(pgstattuplebyid_v1_5); + +/* + * struct pgstattuple_type + * + * tuple_percent, dead_tuple_percent and free_percent are computable, + * so not defined here. + */ +typedef struct pgstattuple_type +{ + uint64 table_len; + uint64 tuple_count; + uint64 tuple_len; + uint64 dead_tuple_count; + uint64 dead_tuple_len; + uint64 free_space; /* free/reusable space in bytes */ +} pgstattuple_type; + +typedef void (*pgstat_page) (pgstattuple_type *, Relation, BlockNumber, + BufferAccessStrategy); + +static Datum build_pgstattuple_type(pgstattuple_type *stat, + FunctionCallInfo fcinfo); +static Datum pgstat_relation(Relation rel, FunctionCallInfo fcinfo); +static Datum pgstat_heap(Relation rel, FunctionCallInfo fcinfo); +static void pgstat_btree_page(pgstattuple_type *stat, + Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy); +static void pgstat_hash_page(pgstattuple_type *stat, + Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy); +static void pgstat_gist_page(pgstattuple_type *stat, + Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy); +static Datum pgstat_index(Relation rel, BlockNumber start, + pgstat_page pagefn, FunctionCallInfo fcinfo); +static void pgstat_index_page(pgstattuple_type *stat, Page page, + OffsetNumber minoff, OffsetNumber maxoff); + +/* + * build_pgstattuple_type -- build a pgstattuple_type tuple + */ +static Datum +build_pgstattuple_type(pgstattuple_type *stat, FunctionCallInfo fcinfo) +{ +#define NCOLUMNS 9 +#define NCHARS 314 + + HeapTuple tuple; + char *values[NCOLUMNS]; + char values_buf[NCOLUMNS][NCHARS]; + int i; + double tuple_percent; + double dead_tuple_percent; + double free_percent; /* free/reusable space in % */ + TupleDesc tupdesc; + AttInMetadata *attinmeta; + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* + * Generate attribute metadata needed later to produce tuples from raw C + * strings + */ + attinmeta = TupleDescGetAttInMetadata(tupdesc); + + if (stat->table_len == 0) + { + tuple_percent = 0.0; + dead_tuple_percent = 0.0; + free_percent = 0.0; + } + else + { + tuple_percent = 100.0 * stat->tuple_len / stat->table_len; + dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len; + free_percent = 100.0 * stat->free_space / stat->table_len; + } + + /* + * Prepare a values array for constructing the tuple. This should be an + * array of C strings which will be processed later by the appropriate + * "in" functions. + */ + for (i = 0; i < NCOLUMNS; i++) + values[i] = values_buf[i]; + i = 0; + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->table_len); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->tuple_len); + snprintf(values[i++], NCHARS, "%.2f", tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_count); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->dead_tuple_len); + snprintf(values[i++], NCHARS, "%.2f", dead_tuple_percent); + snprintf(values[i++], NCHARS, INT64_FORMAT, stat->free_space); + snprintf(values[i++], NCHARS, "%.2f", free_percent); + + /* build a tuple */ + tuple = BuildTupleFromCStrings(attinmeta, values); + + /* make the tuple into a datum */ + return HeapTupleGetDatum(tuple); +} + +/* ---------- + * pgstattuple: + * returns live/dead tuples info + * + * C FUNCTION definition + * pgstattuple(text) returns pgstattuple_type + * + * The superuser() check here must be kept as the library might be upgraded + * without the extension being upgraded, meaning that in pre-1.5 installations + * these functions could be called by any user. + * ---------- + */ + +Datum +pgstattuple(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + RangeVar *relrv; + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + /* open relation */ + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + PG_RETURN_DATUM(pgstat_relation(rel, fcinfo)); +} + +/* + * As of pgstattuple version 1.5, we no longer need to check if the user + * is a superuser because we REVOKE EXECUTE on the function from PUBLIC. + * Users can then grant access to it based on their policies. + * + * Otherwise identical to pgstattuple (above). + */ +Datum +pgstattuple_v1_5(PG_FUNCTION_ARGS) +{ + text *relname = PG_GETARG_TEXT_PP(0); + RangeVar *relrv; + Relation rel; + + /* open relation */ + relrv = makeRangeVarFromNameList(textToQualifiedNameList(relname)); + rel = relation_openrv(relrv, AccessShareLock); + + PG_RETURN_DATUM(pgstat_relation(rel, fcinfo)); +} + +/* Must keep superuser() check, see above. */ +Datum +pgstattuplebyid(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use pgstattuple functions"))); + + /* open relation */ + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(pgstat_relation(rel, fcinfo)); +} + +/* Remove superuser() check for 1.5 version, see above */ +Datum +pgstattuplebyid_v1_5(PG_FUNCTION_ARGS) +{ + Oid relid = PG_GETARG_OID(0); + Relation rel; + + /* open relation */ + rel = relation_open(relid, AccessShareLock); + + PG_RETURN_DATUM(pgstat_relation(rel, fcinfo)); +} + +/* + * pgstat_relation + */ +static Datum +pgstat_relation(Relation rel, FunctionCallInfo fcinfo) +{ + const char *err; + + /* + * Reject attempts to read non-local temporary relations; we would be + * likely to get wrong data since we have no visibility into the owning + * session's local buffers. + */ + if (RELATION_IS_OTHER_TEMP(rel)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("cannot access temporary tables of other sessions"))); + + switch (rel->rd_rel->relkind) + { + case RELKIND_RELATION: + case RELKIND_MATVIEW: + case RELKIND_TOASTVALUE: + case RELKIND_SEQUENCE: + return pgstat_heap(rel, fcinfo); + case RELKIND_INDEX: + switch (rel->rd_rel->relam) + { + case BTREE_AM_OID: + return pgstat_index(rel, BTREE_METAPAGE + 1, + pgstat_btree_page, fcinfo); + case HASH_AM_OID: + return pgstat_index(rel, HASH_METAPAGE + 1, + pgstat_hash_page, fcinfo); + case GIST_AM_OID: + return pgstat_index(rel, GIST_ROOT_BLKNO + 1, + pgstat_gist_page, fcinfo); + case GIN_AM_OID: + err = "gin index"; + break; + case SPGIST_AM_OID: + err = "spgist index"; + break; + case BRIN_AM_OID: + err = "brin index"; + break; + default: + err = "unknown index"; + break; + } + break; + case RELKIND_VIEW: + err = "view"; + break; + case RELKIND_COMPOSITE_TYPE: + err = "composite type"; + break; + case RELKIND_FOREIGN_TABLE: + err = "foreign table"; + break; + case RELKIND_PARTITIONED_TABLE: + err = "partitioned table"; + break; + case RELKIND_PARTITIONED_INDEX: + err = "partitioned index"; + break; + default: + err = "unknown"; + break; + } + + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("\"%s\" (%s) is not supported", + RelationGetRelationName(rel), err))); + return 0; /* should not happen */ +} + +/* + * pgstat_heap -- returns live/dead tuples info in a heap + */ +static Datum +pgstat_heap(Relation rel, FunctionCallInfo fcinfo) +{ + TableScanDesc scan; + HeapScanDesc hscan; + HeapTuple tuple; + BlockNumber nblocks; + BlockNumber block = 0; /* next block to count free space in */ + BlockNumber tupblock; + Buffer buffer; + pgstattuple_type stat = {0}; + SnapshotData SnapshotDirty; + + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + + /* Disable syncscan because we assume we scan from block zero upwards */ + scan = table_beginscan_strat(rel, SnapshotAny, 0, NULL, true, false); + hscan = (HeapScanDesc) scan; + + InitDirtySnapshot(SnapshotDirty); + + nblocks = hscan->rs_nblocks; /* # blocks to be scanned */ + + /* scan the relation */ + while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL) + { + CHECK_FOR_INTERRUPTS(); + + /* must hold a buffer lock to call HeapTupleSatisfiesVisibility */ + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_SHARE); + + if (HeapTupleSatisfiesVisibility(tuple, &SnapshotDirty, hscan->rs_cbuf)) + { + stat.tuple_len += tuple->t_len; + stat.tuple_count++; + } + else + { + stat.dead_tuple_len += tuple->t_len; + stat.dead_tuple_count++; + } + + LockBuffer(hscan->rs_cbuf, BUFFER_LOCK_UNLOCK); + + /* + * To avoid physically reading the table twice, try to do the + * free-space scan in parallel with the heap scan. However, + * heap_getnext may find no tuples on a given page, so we cannot + * simply examine the pages returned by the heap scan. + */ + tupblock = ItemPointerGetBlockNumber(&tuple->t_self); + + while (block <= tupblock) + { + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, + RBM_NORMAL, hscan->rs_strategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); + UnlockReleaseBuffer(buffer); + block++; + } + } + + while (block < nblocks) + { + CHECK_FOR_INTERRUPTS(); + + buffer = ReadBufferExtended(rel, MAIN_FORKNUM, block, + RBM_NORMAL, hscan->rs_strategy); + LockBuffer(buffer, BUFFER_LOCK_SHARE); + stat.free_space += PageGetHeapFreeSpace((Page) BufferGetPage(buffer)); + UnlockReleaseBuffer(buffer); + block++; + } + + table_endscan(scan); + relation_close(rel, AccessShareLock); + + stat.table_len = (uint64) nblocks * BLCKSZ; + + return build_pgstattuple_type(&stat, fcinfo); +} + +/* + * pgstat_btree_page -- check tuples in a btree page + */ +static void +pgstat_btree_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy) +{ + Buffer buf; + Page page; + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + LockBuffer(buf, BT_READ); + page = BufferGetPage(buf); + + /* Page is valid, see what to do with it */ + if (PageIsNew(page)) + { + /* fully empty page */ + stat->free_space += BLCKSZ; + } + else + { + BTPageOpaque opaque; + + opaque = (BTPageOpaque) PageGetSpecialPointer(page); + if (P_IGNORE(opaque)) + { + /* deleted or half-dead page */ + stat->free_space += BLCKSZ; + } + else if (P_ISLEAF(opaque)) + { + pgstat_index_page(stat, page, P_FIRSTDATAKEY(opaque), + PageGetMaxOffsetNumber(page)); + } + else + { + /* internal page */ + } + } + + _bt_relbuf(rel, buf); +} + +/* + * pgstat_hash_page -- check tuples in a hash page + */ +static void +pgstat_hash_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy) +{ + Buffer buf; + Page page; + + buf = _hash_getbuf_with_strategy(rel, blkno, HASH_READ, 0, bstrategy); + page = BufferGetPage(buf); + + if (PageGetSpecialSize(page) == MAXALIGN(sizeof(HashPageOpaqueData))) + { + HashPageOpaque opaque; + + opaque = (HashPageOpaque) PageGetSpecialPointer(page); + switch (opaque->hasho_flag & LH_PAGE_TYPE) + { + case LH_UNUSED_PAGE: + stat->free_space += BLCKSZ; + break; + case LH_BUCKET_PAGE: + case LH_OVERFLOW_PAGE: + pgstat_index_page(stat, page, FirstOffsetNumber, + PageGetMaxOffsetNumber(page)); + break; + case LH_BITMAP_PAGE: + case LH_META_PAGE: + default: + break; + } + } + else + { + /* maybe corrupted */ + } + + _hash_relbuf(rel, buf); +} + +/* + * pgstat_gist_page -- check tuples in a gist page + */ +static void +pgstat_gist_page(pgstattuple_type *stat, Relation rel, BlockNumber blkno, + BufferAccessStrategy bstrategy) +{ + Buffer buf; + Page page; + + buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL, bstrategy); + LockBuffer(buf, GIST_SHARE); + gistcheckpage(rel, buf); + page = BufferGetPage(buf); + + if (GistPageIsLeaf(page)) + { + pgstat_index_page(stat, page, FirstOffsetNumber, + PageGetMaxOffsetNumber(page)); + } + else + { + /* root or node */ + } + + UnlockReleaseBuffer(buf); +} + +/* + * pgstat_index -- returns live/dead tuples info in a generic index + */ +static Datum +pgstat_index(Relation rel, BlockNumber start, pgstat_page pagefn, + FunctionCallInfo fcinfo) +{ + BlockNumber nblocks; + BlockNumber blkno; + BufferAccessStrategy bstrategy; + pgstattuple_type stat = {0}; + + /* prepare access strategy for this index */ + bstrategy = GetAccessStrategy(BAS_BULKREAD); + + blkno = start; + for (;;) + { + /* Get the current relation length */ + LockRelationForExtension(rel, ExclusiveLock); + nblocks = RelationGetNumberOfBlocks(rel); + UnlockRelationForExtension(rel, ExclusiveLock); + + /* Quit if we've scanned the whole relation */ + if (blkno >= nblocks) + { + stat.table_len = (uint64) nblocks * BLCKSZ; + + break; + } + + for (; blkno < nblocks; blkno++) + { + CHECK_FOR_INTERRUPTS(); + + pagefn(&stat, rel, blkno, bstrategy); + } + } + + relation_close(rel, AccessShareLock); + + return build_pgstattuple_type(&stat, fcinfo); +} + +/* + * pgstat_index_page -- for generic index page + */ +static void +pgstat_index_page(pgstattuple_type *stat, Page page, + OffsetNumber minoff, OffsetNumber maxoff) +{ + OffsetNumber i; + + stat->free_space += PageGetFreeSpace(page); + + for (i = minoff; i <= maxoff; i = OffsetNumberNext(i)) + { + ItemId itemid = PageGetItemId(page, i); + + if (ItemIdIsDead(itemid)) + { + stat->dead_tuple_count++; + stat->dead_tuple_len += ItemIdGetLength(itemid); + } + else + { + stat->tuple_count++; + stat->tuple_len += ItemIdGetLength(itemid); + } + } +} diff --git a/contrib/pgstattuple/pgstattuple.control b/contrib/pgstattuple/pgstattuple.control new file mode 100644 index 0000000..6af4075 --- /dev/null +++ b/contrib/pgstattuple/pgstattuple.control @@ -0,0 +1,5 @@ +# pgstattuple extension +comment = 'show tuple-level statistics' +default_version = '1.5' +module_pathname = '$libdir/pgstattuple' +relocatable = true diff --git a/contrib/pgstattuple/sql/pgstattuple.sql b/contrib/pgstattuple/sql/pgstattuple.sql new file mode 100644 index 0000000..5111be0 --- /dev/null +++ b/contrib/pgstattuple/sql/pgstattuple.sql @@ -0,0 +1,124 @@ +CREATE EXTENSION pgstattuple; + +-- +-- It's difficult to come up with platform-independent test cases for +-- the pgstattuple functions, but the results for empty tables and +-- indexes should be that. +-- + +create table test (a int primary key, b int[]); + +select * from pgstattuple('test'); +select * from pgstattuple('test'::text); +select * from pgstattuple('test'::name); +select * from pgstattuple('test'::regclass); +select pgstattuple(oid) from pg_class where relname = 'test'; +select pgstattuple(relname) from pg_class where relname = 'test'; + +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'); +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::text); +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::name); +select version, tree_level, + index_size / current_setting('block_size')::int as index_size, + root_block_no, internal_pages, leaf_pages, empty_pages, deleted_pages, + avg_leaf_density, leaf_fragmentation + from pgstatindex('test_pkey'::regclass); + +select pg_relpages('test'); +select pg_relpages('test_pkey'); +select pg_relpages('test_pkey'::text); +select pg_relpages('test_pkey'::name); +select pg_relpages('test_pkey'::regclass); +select pg_relpages(oid) from pg_class where relname = 'test_pkey'; +select pg_relpages(relname) from pg_class where relname = 'test_pkey'; + +create index test_ginidx on test using gin (b); + +select * from pgstatginindex('test_ginidx'); + +create index test_hashidx on test using hash (b); + +select * from pgstathashindex('test_hashidx'); + +-- these should error with the wrong type +select pgstatginindex('test_pkey'); +select pgstathashindex('test_pkey'); + +select pgstatindex('test_ginidx'); +select pgstathashindex('test_ginidx'); + +select pgstatindex('test_hashidx'); +select pgstatginindex('test_hashidx'); + +-- check that using any of these functions with unsupported relations will fail +create table test_partitioned (a int) partition by range (a); +create index test_partitioned_index on test_partitioned(a); +-- these should all fail +select pgstattuple('test_partitioned'); +select pgstattuple('test_partitioned_index'); +select pgstattuple_approx('test_partitioned'); +select pg_relpages('test_partitioned'); +select pgstatindex('test_partitioned'); +select pgstatginindex('test_partitioned'); +select pgstathashindex('test_partitioned'); + +create view test_view as select 1; +-- these should all fail +select pgstattuple('test_view'); +select pgstattuple_approx('test_view'); +select pg_relpages('test_view'); +select pgstatindex('test_view'); +select pgstatginindex('test_view'); +select pgstathashindex('test_view'); + +create foreign data wrapper dummy; +create server dummy_server foreign data wrapper dummy; +create foreign table test_foreign_table () server dummy_server; +-- these should all fail +select pgstattuple('test_foreign_table'); +select pgstattuple_approx('test_foreign_table'); +select pg_relpages('test_foreign_table'); +select pgstatindex('test_foreign_table'); +select pgstatginindex('test_foreign_table'); +select pgstathashindex('test_foreign_table'); + +-- a partition of a partitioned table should work though +create table test_partition partition of test_partitioned for values from (1) to (100); +select pgstattuple('test_partition'); +select pgstattuple_approx('test_partition'); +select pg_relpages('test_partition'); + +-- toast tables should work +select pgstattuple((select reltoastrelid from pg_class where relname = 'test')); +select pgstattuple_approx((select reltoastrelid from pg_class where relname = 'test')); +select pg_relpages((select reltoastrelid from pg_class where relname = 'test')); + +-- not for the index calls though, of course +select pgstatindex('test_partition'); +select pgstatginindex('test_partition'); +select pgstathashindex('test_partition'); + +-- an actual index of a partitioned table should work though +create index test_partition_idx on test_partition(a); +create index test_partition_hash_idx on test_partition using hash (a); +-- these should work +select pgstatindex('test_partition_idx'); +select pgstathashindex('test_partition_hash_idx'); + +drop table test_partitioned; +drop view test_view; +drop foreign table test_foreign_table; +drop server dummy_server; +drop foreign data wrapper dummy; |