summaryrefslogtreecommitdiffstats
path: root/contrib/pageinspect/heapfuncs.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--contrib/pageinspect/heapfuncs.c618
1 files changed, 618 insertions, 0 deletions
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c
new file mode 100644
index 0000000..0f02525
--- /dev/null
+++ b/contrib/pageinspect/heapfuncs.c
@@ -0,0 +1,618 @@
+/*-------------------------------------------------------------------------
+ *
+ * heapfuncs.c
+ * Functions to investigate heap pages
+ *
+ * We check the input to these functions for corrupt pointers etc. that
+ * might cause crashes, but at the same time we try to print out as much
+ * information as possible, even if it's nonsense. That's because if a
+ * page is corrupt, we don't know why and how exactly it is corrupt, so we
+ * let the user judge it.
+ *
+ * These functions are restricted to superusers for the fear of introducing
+ * security holes if the input checking isn't as water-tight as it should be.
+ * You'd need to be superuser to obtain a raw page image anyway, so
+ * there's hardly any use case for using these without superuser-rights
+ * anyway.
+ *
+ * Copyright (c) 2007-2023, PostgreSQL Global Development Group
+ *
+ * IDENTIFICATION
+ * contrib/pageinspect/heapfuncs.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+#include "postgres.h"
+
+#include "access/htup_details.h"
+#include "access/relation.h"
+#include "catalog/pg_am_d.h"
+#include "catalog/pg_type.h"
+#include "funcapi.h"
+#include "mb/pg_wchar.h"
+#include "miscadmin.h"
+#include "pageinspect.h"
+#include "port/pg_bitutils.h"
+#include "utils/array.h"
+#include "utils/builtins.h"
+#include "utils/rel.h"
+
+/*
+ * It's not supported to create tuples with oids anymore, but when pg_upgrade
+ * was used to upgrade from an older version, tuples might still have an
+ * oid. Seems worthwhile to display that.
+ */
+#define HeapTupleHeaderGetOidOld(tup) \
+( \
+ ((tup)->t_infomask & HEAP_HASOID_OLD) ? \
+ *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \
+ : \
+ InvalidOid \
+)
+
+
+/*
+ * bits_to_text
+ *
+ * Converts a bits8-array of 'len' bits to a human-readable
+ * c-string representation.
+ */
+static char *
+bits_to_text(bits8 *bits, int len)
+{
+ int i;
+ char *str;
+
+ str = palloc(len + 1);
+
+ for (i = 0; i < len; i++)
+ str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0';
+
+ str[i] = '\0';
+
+ return str;
+}
+
+
+/*
+ * text_to_bits
+ *
+ * Converts a c-string representation of bits into a bits8-array. This is
+ * the reverse operation of previous routine.
+ */
+static bits8 *
+text_to_bits(char *str, int len)
+{
+ bits8 *bits;
+ int off = 0;
+ char byte = 0;
+
+ bits = palloc(len + 1);
+
+ while (off < len)
+ {
+ if (off % 8 == 0)
+ byte = 0;
+
+ if ((str[off] == '0') || (str[off] == '1'))
+ byte = byte | ((str[off] - '0') << off % 8);
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("invalid character \"%.*s\" in t_bits string",
+ pg_mblen(str + off), str + off)));
+
+ if (off % 8 == 7)
+ bits[off / 8] = byte;
+
+ off++;
+ }
+
+ return bits;
+}
+
+/*
+ * heap_page_items
+ *
+ * Allows inspection of line pointers and tuple headers of a heap page.
+ */
+PG_FUNCTION_INFO_V1(heap_page_items);
+
+typedef struct heap_page_items_state
+{
+ TupleDesc tupd;
+ Page page;
+ uint16 offset;
+} heap_page_items_state;
+
+Datum
+heap_page_items(PG_FUNCTION_ARGS)
+{
+ bytea *raw_page = PG_GETARG_BYTEA_P(0);
+ heap_page_items_state *inter_call_data = NULL;
+ FuncCallContext *fctx;
+ int raw_page_size;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use raw page functions")));
+
+ raw_page_size = VARSIZE(raw_page) - VARHDRSZ;
+
+ if (SRF_IS_FIRSTCALL())
+ {
+ TupleDesc tupdesc;
+ MemoryContext mctx;
+
+ if (raw_page_size < SizeOfPageHeaderData)
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
+ errmsg("input page too small (%d bytes)", raw_page_size)));
+
+ fctx = SRF_FIRSTCALL_INIT();
+ mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx);
+
+ inter_call_data = palloc(sizeof(heap_page_items_state));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ inter_call_data->tupd = tupdesc;
+
+ inter_call_data->offset = FirstOffsetNumber;
+ inter_call_data->page = VARDATA(raw_page);
+
+ fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page);
+ fctx->user_fctx = inter_call_data;
+
+ MemoryContextSwitchTo(mctx);
+ }
+
+ fctx = SRF_PERCALL_SETUP();
+ inter_call_data = fctx->user_fctx;
+
+ if (fctx->call_cntr < fctx->max_calls)
+ {
+ Page page = inter_call_data->page;
+ HeapTuple resultTuple;
+ Datum result;
+ ItemId id;
+ Datum values[14];
+ bool nulls[14];
+ uint16 lp_offset;
+ uint16 lp_flags;
+ uint16 lp_len;
+
+ memset(nulls, 0, sizeof(nulls));
+
+ /* Extract information from the line pointer */
+
+ id = PageGetItemId(page, inter_call_data->offset);
+
+ lp_offset = ItemIdGetOffset(id);
+ lp_flags = ItemIdGetFlags(id);
+ lp_len = ItemIdGetLength(id);
+
+ values[0] = UInt16GetDatum(inter_call_data->offset);
+ values[1] = UInt16GetDatum(lp_offset);
+ values[2] = UInt16GetDatum(lp_flags);
+ values[3] = UInt16GetDatum(lp_len);
+
+ /*
+ * We do just enough validity checking to make sure we don't reference
+ * data outside the page passed to us. The page could be corrupt in
+ * many other ways, but at least we won't crash.
+ */
+ if (ItemIdHasStorage(id) &&
+ lp_len >= MinHeapTupleSize &&
+ lp_offset == MAXALIGN(lp_offset) &&
+ lp_offset + lp_len <= raw_page_size)
+ {
+ HeapTupleHeader tuphdr;
+ bytea *tuple_data_bytea;
+ int tuple_data_len;
+
+ /* Extract information from the tuple header */
+
+ tuphdr = (HeapTupleHeader) PageGetItem(page, id);
+
+ values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr));
+ values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr));
+ /* shared with xvac */
+ values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr));
+ values[7] = PointerGetDatum(&tuphdr->t_ctid);
+ values[8] = UInt32GetDatum(tuphdr->t_infomask2);
+ values[9] = UInt32GetDatum(tuphdr->t_infomask);
+ values[10] = UInt8GetDatum(tuphdr->t_hoff);
+
+ /* Copy raw tuple data into bytea attribute */
+ tuple_data_len = lp_len - tuphdr->t_hoff;
+ tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ);
+ SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ);
+ memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff,
+ tuple_data_len);
+ values[13] = PointerGetDatum(tuple_data_bytea);
+
+ /*
+ * We already checked that the item is completely within the raw
+ * page passed to us, with the length given in the line pointer.
+ * Let's check that t_hoff doesn't point over lp_len, before using
+ * it to access t_bits and oid.
+ */
+ if (tuphdr->t_hoff >= SizeofHeapTupleHeader &&
+ tuphdr->t_hoff <= lp_len &&
+ tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff))
+ {
+ if (tuphdr->t_infomask & HEAP_HASNULL)
+ {
+ int bits_len;
+
+ bits_len =
+ BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE;
+ values[11] = CStringGetTextDatum(bits_to_text(tuphdr->t_bits, bits_len));
+ }
+ else
+ nulls[11] = true;
+
+ if (tuphdr->t_infomask & HEAP_HASOID_OLD)
+ values[12] = HeapTupleHeaderGetOidOld(tuphdr);
+ else
+ nulls[12] = true;
+ }
+ else
+ {
+ nulls[11] = true;
+ nulls[12] = true;
+ }
+ }
+ else
+ {
+ /*
+ * The line pointer is not used, or it's invalid. Set the rest of
+ * the fields to NULL
+ */
+ int i;
+
+ for (i = 4; i <= 13; i++)
+ nulls[i] = true;
+ }
+
+ /* Build and return the result tuple. */
+ resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls);
+ result = HeapTupleGetDatum(resultTuple);
+
+ inter_call_data->offset++;
+
+ SRF_RETURN_NEXT(fctx, result);
+ }
+ else
+ SRF_RETURN_DONE(fctx);
+}
+
+/*
+ * tuple_data_split_internal
+ *
+ * Split raw tuple data taken directly from a page into an array of bytea
+ * elements. This routine does a lookup on NULL values and creates array
+ * elements accordingly. This is a reimplementation of nocachegetattr()
+ * in heaptuple.c simplified for educational purposes.
+ */
+static Datum
+tuple_data_split_internal(Oid relid, char *tupdata,
+ uint16 tupdata_len, uint16 t_infomask,
+ uint16 t_infomask2, bits8 *t_bits,
+ bool do_detoast)
+{
+ ArrayBuildState *raw_attrs;
+ int nattrs;
+ int i;
+ int off = 0;
+ Relation rel;
+ TupleDesc tupdesc;
+
+ /* Get tuple descriptor from relation OID */
+ rel = relation_open(relid, AccessShareLock);
+ tupdesc = RelationGetDescr(rel);
+
+ raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false);
+ nattrs = tupdesc->natts;
+
+ if (rel->rd_rel->relam != HEAP_TABLE_AM_OID)
+ ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("only heap AM is supported")));
+
+ if (nattrs < (t_infomask2 & HEAP_NATTS_MASK))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor")));
+
+ for (i = 0; i < nattrs; i++)
+ {
+ Form_pg_attribute attr;
+ bool is_null;
+ bytea *attr_data = NULL;
+
+ attr = TupleDescAttr(tupdesc, i);
+
+ /*
+ * Tuple header can specify fewer attributes than tuple descriptor as
+ * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually
+ * change tuples in pages, so attributes with numbers greater than
+ * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL.
+ */
+ if (i >= (t_infomask2 & HEAP_NATTS_MASK))
+ is_null = true;
+ else
+ is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits);
+
+ if (!is_null)
+ {
+ int len;
+
+ if (attr->attlen == -1)
+ {
+ off = att_align_pointer(off, attr->attalign, -1,
+ tupdata + off);
+
+ /*
+ * As VARSIZE_ANY throws an exception if it can't properly
+ * detect the type of external storage in macros VARTAG_SIZE,
+ * this check is repeated to have a nicer error handling.
+ */
+ if (VARATT_IS_EXTERNAL(tupdata + off) &&
+ !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) &&
+ !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("first byte of varlena attribute is incorrect for attribute %d", i)));
+
+ len = VARSIZE_ANY(tupdata + off);
+ }
+ else
+ {
+ off = att_align_nominal(off, attr->attalign);
+ len = attr->attlen;
+ }
+
+ if (tupdata_len < off + len)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("unexpected end of tuple data")));
+
+ if (attr->attlen == -1 && do_detoast)
+ attr_data = pg_detoast_datum_copy((struct varlena *) (tupdata + off));
+ else
+ {
+ attr_data = (bytea *) palloc(len + VARHDRSZ);
+ SET_VARSIZE(attr_data, len + VARHDRSZ);
+ memcpy(VARDATA(attr_data), tupdata + off, len);
+ }
+
+ off = att_addlength_pointer(off, attr->attlen,
+ tupdata + off);
+ }
+
+ raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data),
+ is_null, BYTEAOID, CurrentMemoryContext);
+ if (attr_data)
+ pfree(attr_data);
+ }
+
+ if (tupdata_len != off)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("end of tuple reached without looking at all its data")));
+
+ relation_close(rel, AccessShareLock);
+
+ return makeArrayResult(raw_attrs, CurrentMemoryContext);
+}
+
+/*
+ * tuple_data_split
+ *
+ * Split raw tuple data taken directly from page into distinct elements
+ * taking into account null values.
+ */
+PG_FUNCTION_INFO_V1(tuple_data_split);
+
+Datum
+tuple_data_split(PG_FUNCTION_ARGS)
+{
+ Oid relid;
+ bytea *raw_data;
+ uint16 t_infomask;
+ uint16 t_infomask2;
+ char *t_bits_str;
+ bool do_detoast = false;
+ bits8 *t_bits = NULL;
+ Datum res;
+
+ relid = PG_GETARG_OID(0);
+ raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1);
+ t_infomask = PG_GETARG_INT16(2);
+ t_infomask2 = PG_GETARG_INT16(3);
+ t_bits_str = PG_ARGISNULL(4) ? NULL :
+ text_to_cstring(PG_GETARG_TEXT_PP(4));
+
+ if (PG_NARGS() >= 6)
+ do_detoast = PG_GETARG_BOOL(5);
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use raw page functions")));
+
+ if (!raw_data)
+ PG_RETURN_NULL();
+
+ /*
+ * Convert t_bits string back to the bits8 array as represented in the
+ * tuple header.
+ */
+ if (t_infomask & HEAP_HASNULL)
+ {
+ size_t bits_str_len;
+ size_t bits_len;
+
+ bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE;
+ if (!t_bits_str)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("t_bits string must not be NULL")));
+
+ bits_str_len = strlen(t_bits_str);
+ if (bits_len != bits_str_len)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("unexpected length of t_bits string: %zu, expected %zu",
+ bits_str_len, bits_len)));
+
+ /* do the conversion */
+ t_bits = text_to_bits(t_bits_str, bits_str_len);
+ }
+ else
+ {
+ if (t_bits_str)
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_CORRUPTED),
+ errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long",
+ strlen(t_bits_str))));
+ }
+
+ /* Split tuple data */
+ res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ,
+ VARSIZE(raw_data) - VARHDRSZ,
+ t_infomask, t_infomask2, t_bits,
+ do_detoast);
+
+ if (t_bits)
+ pfree(t_bits);
+
+ PG_RETURN_DATUM(res);
+}
+
+/*
+ * heap_tuple_infomask_flags
+ *
+ * Decode into a human-readable format t_infomask and t_infomask2 associated
+ * to a tuple. All the flags are described in access/htup_details.h.
+ */
+PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags);
+
+Datum
+heap_tuple_infomask_flags(PG_FUNCTION_ARGS)
+{
+#define HEAP_TUPLE_INFOMASK_COLS 2
+ Datum values[HEAP_TUPLE_INFOMASK_COLS] = {0};
+ bool nulls[HEAP_TUPLE_INFOMASK_COLS] = {0};
+ uint16 t_infomask = PG_GETARG_INT16(0);
+ uint16 t_infomask2 = PG_GETARG_INT16(1);
+ int cnt = 0;
+ ArrayType *a;
+ int bitcnt;
+ Datum *flags;
+ TupleDesc tupdesc;
+ HeapTuple tuple;
+
+ if (!superuser())
+ ereport(ERROR,
+ (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
+ errmsg("must be superuser to use raw page functions")));
+
+ /* Build a tuple descriptor for our result type */
+ if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE)
+ elog(ERROR, "return type must be a row type");
+
+ bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) +
+ pg_popcount((const char *) &t_infomask2, sizeof(uint16));
+
+ /* If no flags, return a set of empty arrays */
+ if (bitcnt <= 0)
+ {
+ values[0] = PointerGetDatum(construct_empty_array(TEXTOID));
+ values[1] = PointerGetDatum(construct_empty_array(TEXTOID));
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+ }
+
+ /* build set of raw flags */
+ flags = (Datum *) palloc0(sizeof(Datum) * bitcnt);
+
+ /* decode t_infomask */
+ if ((t_infomask & HEAP_HASNULL) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_HASNULL");
+ if ((t_infomask & HEAP_HASVARWIDTH) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_HASVARWIDTH");
+ if ((t_infomask & HEAP_HASEXTERNAL) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_HASEXTERNAL");
+ if ((t_infomask & HEAP_HASOID_OLD) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_HASOID_OLD");
+ if ((t_infomask & HEAP_XMAX_KEYSHR_LOCK) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK");
+ if ((t_infomask & HEAP_COMBOCID) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_COMBOCID");
+ if ((t_infomask & HEAP_XMAX_EXCL_LOCK) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK");
+ if ((t_infomask & HEAP_XMAX_LOCK_ONLY) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY");
+ if ((t_infomask & HEAP_XMIN_COMMITTED) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED");
+ if ((t_infomask & HEAP_XMIN_INVALID) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_INVALID");
+ if ((t_infomask & HEAP_XMAX_COMMITTED) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED");
+ if ((t_infomask & HEAP_XMAX_INVALID) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_INVALID");
+ if ((t_infomask & HEAP_XMAX_IS_MULTI) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI");
+ if ((t_infomask & HEAP_UPDATED) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_UPDATED");
+ if ((t_infomask & HEAP_MOVED_OFF) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_OFF");
+ if ((t_infomask & HEAP_MOVED_IN) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_IN");
+
+ /* decode t_infomask2 */
+ if ((t_infomask2 & HEAP_KEYS_UPDATED) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_KEYS_UPDATED");
+ if ((t_infomask2 & HEAP_HOT_UPDATED) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_HOT_UPDATED");
+ if ((t_infomask2 & HEAP_ONLY_TUPLE) != 0)
+ flags[cnt++] = CStringGetTextDatum("HEAP_ONLY_TUPLE");
+
+ /* build value */
+ Assert(cnt <= bitcnt);
+ a = construct_array_builtin(flags, cnt, TEXTOID);
+ values[0] = PointerGetDatum(a);
+
+ /*
+ * Build set of combined flags. Use the same array as previously, this
+ * keeps the code simple.
+ */
+ cnt = 0;
+ MemSet(flags, 0, sizeof(Datum) * bitcnt);
+
+ /* decode combined masks of t_infomask */
+ if ((t_infomask & HEAP_XMAX_SHR_LOCK) == HEAP_XMAX_SHR_LOCK)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK");
+ if ((t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN)
+ flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_FROZEN");
+ if ((t_infomask & HEAP_MOVED) == HEAP_MOVED)
+ flags[cnt++] = CStringGetTextDatum("HEAP_MOVED");
+
+ /* Build an empty array if there are no combined flags */
+ if (cnt == 0)
+ a = construct_empty_array(TEXTOID);
+ else
+ a = construct_array_builtin(flags, cnt, TEXTOID);
+ pfree(flags);
+ values[1] = PointerGetDatum(a);
+
+ /* Returns the record as Datum */
+ tuple = heap_form_tuple(tupdesc, values, nulls);
+ PG_RETURN_DATUM(HeapTupleGetDatum(tuple));
+}