summaryrefslogtreecommitdiffstats
path: root/contrib/pg_visibility/pg_visibility.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-05-04 12:19:15 +0000
commit6eb9c5a5657d1fe77b55cc261450f3538d35a94d (patch)
tree657d8194422a5daccecfd42d654b8a245ef7b4c8 /contrib/pg_visibility/pg_visibility.c
parentInitial commit. (diff)
downloadpostgresql-13-6eb9c5a5657d1fe77b55cc261450f3538d35a94d.tar.xz
postgresql-13-6eb9c5a5657d1fe77b55cc261450f3538d35a94d.zip
Adding upstream version 13.4.upstream/13.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/pg_visibility/pg_visibility.c')
-rw-r--r--contrib/pg_visibility/pg_visibility.c788
1 files changed, 788 insertions, 0 deletions
diff --git a/contrib/pg_visibility/pg_visibility.c b/contrib/pg_visibility/pg_visibility.c
new file mode 100644
index 0000000..68d580e
--- /dev/null
+++ b/contrib/pg_visibility/pg_visibility.c
@@ -0,0 +1,788 @@
+/*-------------------------------------------------------------------------
+ *
+ * pg_visibility.c
+ * display visibility map information and page-level visibility bits
+ *
+ * Copyright (c) 2016-2020, PostgreSQL Global Development Group
+ *
+ * contrib/pg_visibility/pg_visibility.c
+ *-------------------------------------------------------------------------
+ */
+#include "postgres.h"
+
+#include "access/heapam.h"
+#include "access/htup_details.h"
+#include "access/visibilitymap.h"
+#include "catalog/pg_type.h"
+#include "catalog/storage_xlog.h"
+#include "funcapi.h"
+#include "miscadmin.h"
+#include "storage/bufmgr.h"
+#include "storage/procarray.h"
+#include "storage/smgr.h"
+#include "utils/rel.h"
+#include "utils/snapmgr.h"
+
+PG_MODULE_MAGIC;
+
+typedef struct vbits
+{
+ BlockNumber next;
+ BlockNumber count;
+ uint8 bits[FLEXIBLE_ARRAY_MEMBER];
+} vbits;
+
+typedef struct corrupt_items
+{
+ BlockNumber next;
+ BlockNumber count;
+ ItemPointer tids;
+} corrupt_items;
+
+PG_FUNCTION_INFO_V1(pg_visibility_map);
+PG_FUNCTION_INFO_V1(pg_visibility_map_rel);
+PG_FUNCTION_INFO_V1(pg_visibility);
+PG_FUNCTION_INFO_V1(pg_visibility_rel);
+PG_FUNCTION_INFO_V1(pg_visibility_map_summary);
+PG_FUNCTION_INFO_V1(pg_check_frozen);
+PG_FUNCTION_INFO_V1(pg_check_visible);
+PG_FUNCTION_INFO_V1(pg_truncate_visibility_map);
+
+static TupleDesc pg_visibility_tupdesc(bool include_blkno, bool include_pd);
+static vbits *collect_visibility_data(Oid relid, bool include_pd);
+static corrupt_items *collect_corrupt_items(Oid relid, bool all_visible,
+ bool all_frozen);
+static void record_corrupt_item(corrupt_items *items, ItemPointer tid);
+static bool tuple_all_visible(HeapTuple tup, TransactionId OldestXmin,
+ Buffer buffer);
+static void check_relation_relkind(Relation rel);
+
+/*
+ * Visibility map information for a single block of a relation.
+ *
+ * Note: the VM code will silently return zeroes for pages past the end
+ * of the map, so we allow probes up to MaxBlockNumber regardless of the
+ * actual relation size.
+ */
+Datum
+pg_visibility_map(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 blkno = PG_GETARG_INT64(1);
+ int32 mapbits;
+ Relation rel;
+ Buffer vmbuffer = InvalidBuffer;
+ TupleDesc tupdesc;
+ Datum values[2];
+ bool nulls[2];
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ if (blkno < 0 || blkno > MaxBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid block number")));
+
+ tupdesc = pg_visibility_tupdesc(false, false);
+ MemSet(nulls, 0, sizeof(nulls));
+
+ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
+ values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Visibility map information for a single block of a relation, plus the
+ * page-level information for the same block.
+ */
+Datum
+pg_visibility(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ int64 blkno = PG_GETARG_INT64(1);
+ int32 mapbits;
+ Relation rel;
+ Buffer vmbuffer = InvalidBuffer;
+ Buffer buffer;
+ Page page;
+ TupleDesc tupdesc;
+ Datum values[3];
+ bool nulls[3];
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ if (blkno < 0 || blkno > MaxBlockNumber)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("invalid block number")));
+
+ tupdesc = pg_visibility_tupdesc(false, true);
+ MemSet(nulls, 0, sizeof(nulls));
+
+ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ values[0] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0);
+ values[1] = BoolGetDatum((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0);
+
+ /* Here we have to explicitly check rel size ... */
+ if (blkno < RelationGetNumberOfBlocks(rel))
+ {
+ buffer = ReadBuffer(rel, blkno);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+ values[2] = BoolGetDatum(PageIsAllVisible(page));
+
+ UnlockReleaseBuffer(buffer);
+ }
+ else
+ {
+ /* As with the vismap, silently return 0 for pages past EOF */
+ values[2] = BoolGetDatum(false);
+ }
+
+ relation_close(rel, AccessShareLock);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Visibility map information for every block in a relation.
+ */
+Datum
+pg_visibility_map_rel(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ vbits *info;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ funcctx->tuple_desc = pg_visibility_tupdesc(true, false);
+ /* collect_visibility_data will verify the relkind */
+ funcctx->user_fctx = collect_visibility_data(relid, false);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ info = (vbits *) funcctx->user_fctx;
+
+ if (info->next < info->count)
+ {
+ Datum values[3];
+ bool nulls[3];
+ HeapTuple tuple;
+
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int64GetDatum(info->next);
+ values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
+ values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
+ info->next++;
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Visibility map information for every block in a relation, plus the page
+ * level information for each block.
+ */
+Datum
+pg_visibility_rel(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ vbits *info;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ funcctx->tuple_desc = pg_visibility_tupdesc(true, true);
+ /* collect_visibility_data will verify the relkind */
+ funcctx->user_fctx = collect_visibility_data(relid, true);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ info = (vbits *) funcctx->user_fctx;
+
+ if (info->next < info->count)
+ {
+ Datum values[4];
+ bool nulls[4];
+ HeapTuple tuple;
+
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int64GetDatum(info->next);
+ values[1] = BoolGetDatum((info->bits[info->next] & (1 << 0)) != 0);
+ values[2] = BoolGetDatum((info->bits[info->next] & (1 << 1)) != 0);
+ values[3] = BoolGetDatum((info->bits[info->next] & (1 << 2)) != 0);
+ info->next++;
+
+ tuple = heap_form_tuple(funcctx->tuple_desc, values, nulls);
+ SRF_RETURN_NEXT(funcctx, HeapTupleGetDatum(tuple));
+ }
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Count the number of all-visible and all-frozen pages in the visibility
+ * map for a particular relation.
+ */
+Datum
+pg_visibility_map_summary(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Relation rel;
+ BlockNumber nblocks;
+ BlockNumber blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ int64 all_visible = 0;
+ int64 all_frozen = 0;
+ TupleDesc tupdesc;
+ Datum values[2];
+ bool nulls[2];
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ for (blkno = 0; blkno < nblocks; ++blkno)
+ {
+ int32 mapbits;
+
+ /* Make sure we are interruptible. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Get map info. */
+ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
+ if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
+ ++all_visible;
+ if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
+ ++all_frozen;
+ }
+
+ /* Clean up. */
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ relation_close(rel, AccessShareLock);
+
+ tupdesc = CreateTemplateTupleDesc(2);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 1, "all_visible", INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, (AttrNumber) 2, "all_frozen", INT8OID, -1, 0);
+ tupdesc = BlessTupleDesc(tupdesc);
+
+ MemSet(nulls, 0, sizeof(nulls));
+ values[0] = Int64GetDatum(all_visible);
+ values[1] = Int64GetDatum(all_frozen);
+
+ PG_RETURN_DATUM(HeapTupleGetDatum(heap_form_tuple(tupdesc, values, nulls)));
+}
+
+/*
+ * Return the TIDs of non-frozen tuples present in pages marked all-frozen
+ * in the visibility map. We hope no one will ever find any, but there could
+ * be bugs, database corruption, etc.
+ */
+Datum
+pg_check_frozen(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ corrupt_items *items;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ /* collect_corrupt_items will verify the relkind */
+ funcctx->user_fctx = collect_corrupt_items(relid, false, true);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ items = (corrupt_items *) funcctx->user_fctx;
+
+ if (items->next < items->count)
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Return the TIDs of not-all-visible tuples in pages marked all-visible
+ * in the visibility map. We hope no one will ever find any, but there could
+ * be bugs, database corruption, etc.
+ */
+Datum
+pg_check_visible(PG_FUNCTION_ARGS)
+{
+ FuncCallContext *funcctx;
+ corrupt_items *items;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ Oid relid = PG_GETARG_OID(0);
+ MemoryContext oldcontext;
+
+ funcctx = SRF_FIRSTCALL_INIT();
+ oldcontext = MemoryContextSwitchTo(funcctx->multi_call_memory_ctx);
+ /* collect_corrupt_items will verify the relkind */
+ funcctx->user_fctx = collect_corrupt_items(relid, true, false);
+ MemoryContextSwitchTo(oldcontext);
+ }
+
+ funcctx = SRF_PERCALL_SETUP();
+ items = (corrupt_items *) funcctx->user_fctx;
+
+ if (items->next < items->count)
+ SRF_RETURN_NEXT(funcctx, PointerGetDatum(&items->tids[items->next++]));
+
+ SRF_RETURN_DONE(funcctx);
+}
+
+/*
+ * Remove the visibility map fork for a relation. If there turn out to be
+ * any bugs in the visibility map code that require rebuilding the VM, this
+ * provides users with a way to do it that is cleaner than shutting down the
+ * server and removing files by hand.
+ *
+ * This is a cut-down version of RelationTruncate.
+ */
+Datum
+pg_truncate_visibility_map(PG_FUNCTION_ARGS)
+{
+ Oid relid = PG_GETARG_OID(0);
+ Relation rel;
+ ForkNumber fork;
+ BlockNumber block;
+
+ rel = relation_open(relid, AccessExclusiveLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ RelationOpenSmgr(rel);
+ rel->rd_smgr->smgr_vm_nblocks = InvalidBlockNumber;
+
+ block = visibilitymap_prepare_truncate(rel, 0);
+ if (BlockNumberIsValid(block))
+ {
+ fork = VISIBILITYMAP_FORKNUM;
+ smgrtruncate(rel->rd_smgr, &fork, 1, &block);
+ }
+
+ if (RelationNeedsWAL(rel))
+ {
+ xl_smgr_truncate xlrec;
+
+ xlrec.blkno = 0;
+ xlrec.rnode = rel->rd_node;
+ xlrec.flags = SMGR_TRUNCATE_VM;
+
+ XLogBeginInsert();
+ XLogRegisterData((char *) &xlrec, sizeof(xlrec));
+
+ XLogInsert(RM_SMGR_ID, XLOG_SMGR_TRUNCATE | XLR_SPECIAL_REL_UPDATE);
+ }
+
+ /*
+ * Release the lock right away, not at commit time.
+ *
+ * It would be a problem to release the lock prior to commit if this
+ * truncate operation sends any transactional invalidation messages. Other
+ * backends would potentially be able to lock the relation without
+ * processing them in the window of time between when we release the lock
+ * here and when we sent the messages at our eventual commit. However,
+ * we're currently only sending a non-transactional smgr invalidation,
+ * which will have been posted to shared memory immediately from within
+ * smgr_truncate. Therefore, there should be no race here.
+ *
+ * The reason why it's desirable to release the lock early here is because
+ * of the possibility that someone will need to use this to blow away many
+ * visibility map forks at once. If we can't release the lock until
+ * commit time, the transaction doing this will accumulate
+ * AccessExclusiveLocks on all of those relations at the same time, which
+ * is undesirable. However, if this turns out to be unsafe we may have no
+ * choice...
+ */
+ relation_close(rel, AccessExclusiveLock);
+
+ /* Nothing to return. */
+ PG_RETURN_VOID();
+}
+
+/*
+ * Helper function to construct whichever TupleDesc we need for a particular
+ * call.
+ */
+static TupleDesc
+pg_visibility_tupdesc(bool include_blkno, bool include_pd)
+{
+ TupleDesc tupdesc;
+ AttrNumber maxattr = 2;
+ AttrNumber a = 0;
+
+ if (include_blkno)
+ ++maxattr;
+ if (include_pd)
+ ++maxattr;
+ tupdesc = CreateTemplateTupleDesc(maxattr);
+ if (include_blkno)
+ TupleDescInitEntry(tupdesc, ++a, "blkno", INT8OID, -1, 0);
+ TupleDescInitEntry(tupdesc, ++a, "all_visible", BOOLOID, -1, 0);
+ TupleDescInitEntry(tupdesc, ++a, "all_frozen", BOOLOID, -1, 0);
+ if (include_pd)
+ TupleDescInitEntry(tupdesc, ++a, "pd_all_visible", BOOLOID, -1, 0);
+ Assert(a == maxattr);
+
+ return BlessTupleDesc(tupdesc);
+}
+
+/*
+ * Collect visibility data about a relation.
+ *
+ * Checks relkind of relid and will throw an error if the relation does not
+ * have a VM.
+ */
+static vbits *
+collect_visibility_data(Oid relid, bool include_pd)
+{
+ Relation rel;
+ BlockNumber nblocks;
+ vbits *info;
+ BlockNumber blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+ info = palloc0(offsetof(vbits, bits) + nblocks);
+ info->next = 0;
+ info->count = nblocks;
+
+ for (blkno = 0; blkno < nblocks; ++blkno)
+ {
+ int32 mapbits;
+
+ /* Make sure we are interruptible. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Get map info. */
+ mapbits = (int32) visibilitymap_get_status(rel, blkno, &vmbuffer);
+ if ((mapbits & VISIBILITYMAP_ALL_VISIBLE) != 0)
+ info->bits[blkno] |= (1 << 0);
+ if ((mapbits & VISIBILITYMAP_ALL_FROZEN) != 0)
+ info->bits[blkno] |= (1 << 1);
+
+ /*
+ * Page-level data requires reading every block, so only get it if the
+ * caller needs it. Use a buffer access strategy, too, to prevent
+ * cache-trashing.
+ */
+ if (include_pd)
+ {
+ Buffer buffer;
+ Page page;
+
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+ bstrategy);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+ if (PageIsAllVisible(page))
+ info->bits[blkno] |= (1 << 2);
+
+ UnlockReleaseBuffer(buffer);
+ }
+ }
+
+ /* Clean up. */
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ relation_close(rel, AccessShareLock);
+
+ return info;
+}
+
+/*
+ * Returns a list of items whose visibility map information does not match
+ * the status of the tuples on the page.
+ *
+ * If all_visible is passed as true, this will include all items which are
+ * on pages marked as all-visible in the visibility map but which do not
+ * seem to in fact be all-visible.
+ *
+ * If all_frozen is passed as true, this will include all items which are
+ * on pages marked as all-frozen but which do not seem to in fact be frozen.
+ *
+ * Checks relkind of relid and will throw an error if the relation does not
+ * have a VM.
+ */
+static corrupt_items *
+collect_corrupt_items(Oid relid, bool all_visible, bool all_frozen)
+{
+ Relation rel;
+ BlockNumber nblocks;
+ corrupt_items *items;
+ BlockNumber blkno;
+ Buffer vmbuffer = InvalidBuffer;
+ BufferAccessStrategy bstrategy = GetAccessStrategy(BAS_BULKREAD);
+ TransactionId OldestXmin = InvalidTransactionId;
+
+ if (all_visible)
+ {
+ /* Don't pass rel; that will fail in recovery. */
+ OldestXmin = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
+ }
+
+ rel = relation_open(relid, AccessShareLock);
+
+ /* Only some relkinds have a visibility map */
+ check_relation_relkind(rel);
+
+ nblocks = RelationGetNumberOfBlocks(rel);
+
+ /*
+ * Guess an initial array size. We don't expect many corrupted tuples, so
+ * start with a small array. This function uses the "next" field to track
+ * the next offset where we can store an item (which is the same thing as
+ * the number of items found so far) and the "count" field to track the
+ * number of entries allocated. We'll repurpose these fields before
+ * returning.
+ */
+ items = palloc0(sizeof(corrupt_items));
+ items->next = 0;
+ items->count = 64;
+ items->tids = palloc(items->count * sizeof(ItemPointerData));
+
+ /* Loop over every block in the relation. */
+ for (blkno = 0; blkno < nblocks; ++blkno)
+ {
+ bool check_frozen = false;
+ bool check_visible = false;
+ Buffer buffer;
+ Page page;
+ OffsetNumber offnum,
+ maxoff;
+
+ /* Make sure we are interruptible. */
+ CHECK_FOR_INTERRUPTS();
+
+ /* Use the visibility map to decide whether to check this page. */
+ if (all_frozen && VM_ALL_FROZEN(rel, blkno, &vmbuffer))
+ check_frozen = true;
+ if (all_visible && VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+ check_visible = true;
+ if (!check_visible && !check_frozen)
+ continue;
+
+ /* Read and lock the page. */
+ buffer = ReadBufferExtended(rel, MAIN_FORKNUM, blkno, RBM_NORMAL,
+ bstrategy);
+ LockBuffer(buffer, BUFFER_LOCK_SHARE);
+
+ page = BufferGetPage(buffer);
+ maxoff = PageGetMaxOffsetNumber(page);
+
+ /*
+ * The visibility map bits might have changed while we were acquiring
+ * the page lock. Recheck to avoid returning spurious results.
+ */
+ if (check_frozen && !VM_ALL_FROZEN(rel, blkno, &vmbuffer))
+ check_frozen = false;
+ if (check_visible && !VM_ALL_VISIBLE(rel, blkno, &vmbuffer))
+ check_visible = false;
+ if (!check_visible && !check_frozen)
+ {
+ UnlockReleaseBuffer(buffer);
+ continue;
+ }
+
+ /* Iterate over each tuple on the page. */
+ for (offnum = FirstOffsetNumber;
+ offnum <= maxoff;
+ offnum = OffsetNumberNext(offnum))
+ {
+ HeapTupleData tuple;
+ ItemId itemid;
+
+ itemid = PageGetItemId(page, offnum);
+
+ /* Unused or redirect line pointers are of no interest. */
+ if (!ItemIdIsUsed(itemid) || ItemIdIsRedirected(itemid))
+ continue;
+
+ /* Dead line pointers are neither all-visible nor frozen. */
+ if (ItemIdIsDead(itemid))
+ {
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+ record_corrupt_item(items, &tuple.t_self);
+ continue;
+ }
+
+ /* Initialize a HeapTupleData structure for checks below. */
+ ItemPointerSet(&(tuple.t_self), blkno, offnum);
+ tuple.t_data = (HeapTupleHeader) PageGetItem(page, itemid);
+ tuple.t_len = ItemIdGetLength(itemid);
+ tuple.t_tableOid = relid;
+
+ /*
+ * If we're checking whether the page is all-visible, we expect
+ * the tuple to be all-visible.
+ */
+ if (check_visible &&
+ !tuple_all_visible(&tuple, OldestXmin, buffer))
+ {
+ TransactionId RecomputedOldestXmin;
+
+ /*
+ * Time has passed since we computed OldestXmin, so it's
+ * possible that this tuple is all-visible in reality even
+ * though it doesn't appear so based on our
+ * previously-computed value. Let's compute a new value so we
+ * can be certain whether there is a problem.
+ *
+ * From a concurrency point of view, it sort of sucks to
+ * retake ProcArrayLock here while we're holding the buffer
+ * exclusively locked, but it should be safe against
+ * deadlocks, because surely GetOldestXmin() should never take
+ * a buffer lock. And this shouldn't happen often, so it's
+ * worth being careful so as to avoid false positives.
+ */
+ RecomputedOldestXmin = GetOldestXmin(NULL, PROCARRAY_FLAGS_VACUUM);
+
+ if (!TransactionIdPrecedes(OldestXmin, RecomputedOldestXmin))
+ record_corrupt_item(items, &tuple.t_self);
+ else
+ {
+ OldestXmin = RecomputedOldestXmin;
+ if (!tuple_all_visible(&tuple, OldestXmin, buffer))
+ record_corrupt_item(items, &tuple.t_self);
+ }
+ }
+
+ /*
+ * If we're checking whether the page is all-frozen, we expect the
+ * tuple to be in a state where it will never need freezing.
+ */
+ if (check_frozen)
+ {
+ if (heap_tuple_needs_eventual_freeze(tuple.t_data))
+ record_corrupt_item(items, &tuple.t_self);
+ }
+ }
+
+ UnlockReleaseBuffer(buffer);
+ }
+
+ /* Clean up. */
+ if (vmbuffer != InvalidBuffer)
+ ReleaseBuffer(vmbuffer);
+ relation_close(rel, AccessShareLock);
+
+ /*
+ * Before returning, repurpose the fields to match caller's expectations.
+ * next is now the next item that should be read (rather than written) and
+ * count is now the number of items we wrote (rather than the number we
+ * allocated).
+ */
+ items->count = items->next;
+ items->next = 0;
+
+ return items;
+}
+
+/*
+ * Remember one corrupt item.
+ */
+static void
+record_corrupt_item(corrupt_items *items, ItemPointer tid)
+{
+ /* enlarge output array if needed. */
+ if (items->next >= items->count)
+ {
+ items->count *= 2;
+ items->tids = repalloc(items->tids,
+ items->count * sizeof(ItemPointerData));
+ }
+ /* and add the new item */
+ items->tids[items->next++] = *tid;
+}
+
+/*
+ * Check whether a tuple is all-visible relative to a given OldestXmin value.
+ * The buffer should contain the tuple and should be locked and pinned.
+ */
+static bool
+tuple_all_visible(HeapTuple tup, TransactionId OldestXmin, Buffer buffer)
+{
+ HTSV_Result state;
+ TransactionId xmin;
+
+ state = HeapTupleSatisfiesVacuum(tup, OldestXmin, buffer);
+ if (state != HEAPTUPLE_LIVE)
+ return false; /* all-visible implies live */
+
+ /*
+ * Neither lazy_scan_heap nor heap_page_is_all_visible will mark a page
+ * all-visible unless every tuple is hinted committed. However, those hint
+ * bits could be lost after a crash, so we can't be certain that they'll
+ * be set here. So just check the xmin.
+ */
+
+ xmin = HeapTupleHeaderGetXmin(tup->t_data);
+ if (!TransactionIdPrecedes(xmin, OldestXmin))
+ return false; /* xmin not old enough for all to see */
+
+ return true;
+}
+
+/*
+ * check_relation_relkind - convenience routine to check that relation
+ * is of the relkind supported by the callers
+ */
+static void
+check_relation_relkind(Relation rel)
+{
+ if (rel->rd_rel->relkind != RELKIND_RELATION &&
+ rel->rd_rel->relkind != RELKIND_MATVIEW &&
+ rel->rd_rel->relkind != RELKIND_TOASTVALUE)
+ ereport(ERROR,
+ (errcode(ERRCODE_WRONG_OBJECT_TYPE),
+ errmsg("\"%s\" is not a table, materialized view, or TOAST table",
+ RelationGetRelationName(rel))));
+}