summaryrefslogtreecommitdiffstats
path: root/contrib/pgstattuple/pgstatapprox.c
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/pgstattuple/pgstatapprox.c')
-rw-r--r--contrib/pgstattuple/pgstatapprox.c319
1 files changed, 319 insertions, 0 deletions
diff --git a/contrib/pgstattuple/pgstatapprox.c b/contrib/pgstattuple/pgstatapprox.c
new file mode 100644
index 0000000..f601dc6
--- /dev/null
+++ b/contrib/pgstattuple/pgstatapprox.c
@@ -0,0 +1,319 @@
+/*-------------------------------------------------------------------------
+ *
+ * pgstatapprox.c
+ * Bloat estimation functions
+ *
+ * Copyright (c) 2014-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pgstattuple/pgstatapprox.c
+ *
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/multixact.h"
+#include "access/relation.h"
+#include "access/transam.h"
+#include "access/visibilitymap.h"
+#include "access/xact.h"
+#include "catalog/namespace.h"
+#include "catalog/pg_am_d.h"
+#include "commands/vacuum.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/freespace.h"
+#include "storage/lmgr.h"
+#include "storage/procarray.h"
+#include "utils/builtins.h"
+
+PG_FUNCTION_INFO_V1(pgstattuple_approx);
+PG_FUNCTION_INFO_V1(pgstattuple_approx_v1_5);
+
+Datum pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo);
+
+typedef struct output_type
+{
+ uint64 table_len;
+ double scanned_percent;
+ uint64 tuple_count;
+ uint64 tuple_len;
+ double tuple_percent;
+ uint64 dead_tuple_count;
+ uint64 dead_tuple_len;
+ double dead_tuple_percent;
+ uint64 free_space;
+ double free_percent;
+} output_type;
+
+#define NUM_OUTPUT_COLUMNS 10
+
+/*
+ * This function takes an already open relation and scans its pages,
+ * skipping those that have the corresponding visibility map bit set.
+ * For pages we skip, we find the free space from the free space map
+ * and approximate tuple_len on that basis. For the others, we count
+ * the exact number of dead tuples etc.
+ *
+ * This scan is loosely based on vacuumlazy.c:lazy_scan_heap(), but
+ * we do not try to avoid skipping single pages.
+ */
+static void
+statapprox_heap(Relation rel, output_type *stat)
+{
+ BlockNumber scanned,
+ nblocks,
+ blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ BufferAccessStrategy bstrategy;
+ TransactionId OldestXmin;
+
+ OldestXmin = GetOldestNonRemovableTransactionId(rel);
+ bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+ scanned = 0;
+
+ for (blkno = 0; blkno < nblocks; blkno++)
+ {
+ Buffer buf;
+ Page page;
+ OffsetNumber offnum,
+ maxoff;
+ Size freespace;
+
+ CHECK_FOR_INTERRUPTS();
+
+ /*
+ * If the page has only visible tuples, then we can find out the free
+ * space from the FSM and move on.
+ */
+ if (VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+ {
+ freespace = GetRecordedFreeSpace(rel, blkno);
+ stat->tuple_len += BLCKSZ - freespace;
+ stat->free_space += freespace;
+ continue;
+ }
+
+ buf = ReadBufferExtended(rel, MAIN_FORKNUM, blkno,
+ RBM_NORMAL, bstrategy);
+
+ LockBuffer(buf, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buf);
+
+ /*
+ * It's not safe to call PageGetHeapFreeSpace() on new pages, so we
+ * treat them as being free space for our purposes.
+ */
+ if (!PageIsNew(page))
+ stat->free_space += PageGetHeapFreeSpace(page);
+ else
+ stat->free_space += BLCKSZ - SizeOfPageHeaderData;
+
+ /* We may count the page as scanned even if it's new/empty */
+ scanned++;
+
+ if (PageIsNew(page) || PageIsEmpty(page))
+ {
+ UnlockReleaseBuffer(buf);
+ continue;
+ }
+
+ /*
+ * Look at each tuple on the page and decide whether it's live or
+ * dead, then count it and its size. Unlike lazy_scan_heap, we can
+ * afford to ignore problems and special cases.
+ */
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ ItemId itemid;
+ HeapTupleData tuple;
+
+ itemid = PageGetItemId(page, offnum);
+
+ if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid) ||
+ ItemIdIsDead(itemid))
+ {
+ continue;
+ }
+
+ Assert(ItemIdIsNormal(itemid));
+
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = RelationGetRelid(rel);
+
+ /*
+ * We follow VACUUM's lead in counting INSERT_IN_PROGRESS tuples
+ * as "dead" while DELETE_IN_PROGRESS tuples are "live". We don't
+ * bother distinguishing tuples inserted/deleted by our own
+ * transaction.
+ */
+ switch (HeapTupleSatisfiesVacuum(&tuple, OldestXmin, buf))
+ {
+ case HEAPTUPLE_LIVE:
+ case HEAPTUPLE_DELETE_IN_PROGRESS:
+ stat->tuple_len += tuple.t_len;
+ stat->tuple_count++;
+ break;
+ case HEAPTUPLE_DEAD:
+ case HEAPTUPLE_RECENTLY_DEAD:
+ case HEAPTUPLE_INSERT_IN_PROGRESS:
+ stat->dead_tuple_len += tuple.t_len;
+ stat->dead_tuple_count++;
+ break;
+ default:
+ elog(ERROR, "unexpected HeapTupleSatisfiesVacuum result");
+ break;
+ }
+ }
+
+ UnlockReleaseBuffer(buf);
+ }
+
+ stat->table_len = (uint64) nblocks * BLCKSZ;
+
+ /*
+ * We don't know how many tuples are in the pages we didn't scan, so
+ * extrapolate the live-tuple count to the whole table in the same way
+ * that VACUUM does. (Like VACUUM, we're not taking a random sample, so
+ * just extrapolating linearly seems unsafe.) There should be no dead
+ * tuples in all-visible pages, so no correction is needed for that, and
+ * we already accounted for the space in those pages, too.
+ */
+ stat->tuple_count = vac_estimate_reltuples(rel, nblocks, scanned,
+ stat->tuple_count);
+
+ /* It's not clear if we could get -1 here, but be safe. */
+ stat->tuple_count = Max(stat->tuple_count, 0);
+
+ /*
+ * Calculate percentages if the relation has one or more pages.
+ */
+ if (nblocks != 0)
+ {
+ stat->scanned_percent = 100.0 * scanned / nblocks;
+ stat->tuple_percent = 100.0 * stat->tuple_len / stat->table_len;
+ stat->dead_tuple_percent = 100.0 * stat->dead_tuple_len / stat->table_len;
+ stat->free_percent = 100.0 * stat->free_space / stat->table_len;
+ }
+
+ if (BufferIsValid(vmbuffer))
+ {
+ ReleaseBuffer(vmbuffer);
+ vmbuffer = InvalidBuffer;
+ }
+}
+
+/*
+ * Returns estimated live/dead tuple statistics for the given relid.
+ *
+ * The superuser() check here must be kept as the library might be upgraded
+ * without the extension being upgraded, meaning that in pre-1.5 installations
+ * these functions could be called by any user.
+ */
+Datum
+pgstattuple_approx(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use pgstattuple functions")));
+
+ PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
+}
+
+/*
+ * As of pgstattuple version 1.5, we no longer need to check if the user
+ * is a superuser because we REVOKE EXECUTE on the SQL function from PUBLIC.
+ * Users can then grant access to it based on their policies.
+ *
+ * Otherwise identical to pgstattuple_approx (above).
+ */
+Datum
+pgstattuple_approx_v1_5(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+
+ PG_RETURN_DATUM(pgstattuple_approx_internal(relid, fcinfo));
+}
+
+Datum
+pgstattuple_approx_internal(Oid relid, FunctionCallInfo fcinfo)
+{
+ Relation rel;
+ output_type stat = {0};
+ TupleDesc tupdesc;
+ bool nulls[NUM_OUTPUT_COLUMNS];
+ Datum values[NUM_OUTPUT_COLUMNS];
+ HeapTuple ret;
+ int i = 0;
+
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ if (tupdesc->natts != NUM_OUTPUT_COLUMNS)
+ elog(ERROR, "incorrect number of output arguments");
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /*
+ * Reject attempts to read non-local temporary relations; we would be
+ * likely to get wrong data since we have no visibility into the owning
+ * session's local buffers.
+ */
+ if (RELATION_IS_OTHER_TEMP(rel))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("cannot access temporary tables of other sessions")));
+
+ /*
+ * We support only relation kinds with a visibility map and a free space
+ * map.
+ */
+ if (!(rel->rd_rel->relkind == RELKIND_RELATION ||
+ rel->rd_rel->relkind == RELKIND_MATVIEW ||
+ rel->rd_rel->relkind == RELKIND_TOASTVALUE))
+ ereport(ERROR,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("relation \"%s\" is of wrong relation kind",
+ RelationGetRelationName(rel)),
+ errdetail_relkind_not_supported(rel->rd_rel->relkind)));
+
+ if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only heap AM is supported")));
+
+ statapprox_heap(rel, &stat);
+
+ relation_close(rel, AccessShareLock);
+
+ memset(nulls, 0, sizeof(nulls));
+
+ values[i++] = Int64GetDatum(stat.table_len);
+ values[i++] = Float8GetDatum(stat.scanned_percent);
+ values[i++] = Int64GetDatum(stat.tuple_count);
+ values[i++] = Int64GetDatum(stat.tuple_len);
+ values[i++] = Float8GetDatum(stat.tuple_percent);
+ values[i++] = Int64GetDatum(stat.dead_tuple_count);
+ values[i++] = Int64GetDatum(stat.dead_tuple_len);
+ values[i++] = Float8GetDatum(stat.dead_tuple_percent);
+ values[i++] = Int64GetDatum(stat.free_space);
+ values[i++] = Float8GetDatum(stat.free_percent);
+
+ ret = heap_form_tuple(tupdesc, values, nulls);
+ return HeapTupleGetDatum(ret);
+}