diff options
Diffstat (limited to 'contrib/pageinspect/heapfuncs.c')
-rw-r--r-- | contrib/pageinspect/heapfuncs.c | 618 |
1 files changed, 618 insertions, 0 deletions
diff --git a/contrib/pageinspect/heapfuncs.c b/contrib/pageinspect/heapfuncs.c new file mode 100644 index 0000000..0f02525 --- /dev/null +++ b/contrib/pageinspect/heapfuncs.c @@ -0,0 +1,618 @@ +/*------------------------------------------------------------------------- + * + * heapfuncs.c + * Functions to investigate heap pages + * + * We check the input to these functions for corrupt pointers etc. that + * might cause crashes, but at the same time we try to print out as much + * information as possible, even if it's nonsense. That's because if a + * page is corrupt, we don't know why and how exactly it is corrupt, so we + * let the user judge it. + * + * These functions are restricted to superusers for the fear of introducing + * security holes if the input checking isn't as water-tight as it should be. + * You'd need to be superuser to obtain a raw page image anyway, so + * there's hardly any use case for using these without superuser-rights + * anyway. + * + * Copyright (c) 2007-2023, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pageinspect/heapfuncs.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/htup_details.h" +#include "access/relation.h" +#include "catalog/pg_am_d.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "mb/pg_wchar.h" +#include "miscadmin.h" +#include "pageinspect.h" +#include "port/pg_bitutils.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/rel.h" + +/* + * It's not supported to create tuples with oids anymore, but when pg_upgrade + * was used to upgrade from an older version, tuples might still have an + * oid. Seems worthwhile to display that. + */ +#define HeapTupleHeaderGetOidOld(tup) \ +( \ + ((tup)->t_infomask & HEAP_HASOID_OLD) ? \ + *((Oid *) ((char *)(tup) + (tup)->t_hoff - sizeof(Oid))) \ + : \ + InvalidOid \ +) + + +/* + * bits_to_text + * + * Converts a bits8-array of 'len' bits to a human-readable + * c-string representation. + */ +static char * +bits_to_text(bits8 *bits, int len) +{ + int i; + char *str; + + str = palloc(len + 1); + + for (i = 0; i < len; i++) + str[i] = (bits[(i / 8)] & (1 << (i % 8))) ? '1' : '0'; + + str[i] = '\0'; + + return str; +} + + +/* + * text_to_bits + * + * Converts a c-string representation of bits into a bits8-array. This is + * the reverse operation of previous routine. + */ +static bits8 * +text_to_bits(char *str, int len) +{ + bits8 *bits; + int off = 0; + char byte = 0; + + bits = palloc(len + 1); + + while (off < len) + { + if (off % 8 == 0) + byte = 0; + + if ((str[off] == '0') || (str[off] == '1')) + byte = byte | ((str[off] - '0') << off % 8); + else + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("invalid character \"%.*s\" in t_bits string", + pg_mblen(str + off), str + off))); + + if (off % 8 == 7) + bits[off / 8] = byte; + + off++; + } + + return bits; +} + +/* + * heap_page_items + * + * Allows inspection of line pointers and tuple headers of a heap page. + */ +PG_FUNCTION_INFO_V1(heap_page_items); + +typedef struct heap_page_items_state +{ + TupleDesc tupd; + Page page; + uint16 offset; +} heap_page_items_state; + +Datum +heap_page_items(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + heap_page_items_state *inter_call_data = NULL; + FuncCallContext *fctx; + int raw_page_size; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + if (SRF_IS_FIRSTCALL()) + { + TupleDesc tupdesc; + MemoryContext mctx; + + if (raw_page_size < SizeOfPageHeaderData) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small (%d bytes)", raw_page_size))); + + fctx = SRF_FIRSTCALL_INIT(); + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + inter_call_data = palloc(sizeof(heap_page_items_state)); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + inter_call_data->tupd = tupdesc; + + inter_call_data->offset = FirstOffsetNumber; + inter_call_data->page = VARDATA(raw_page); + + fctx->max_calls = PageGetMaxOffsetNumber(inter_call_data->page); + fctx->user_fctx = inter_call_data; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + inter_call_data = fctx->user_fctx; + + if (fctx->call_cntr < fctx->max_calls) + { + Page page = inter_call_data->page; + HeapTuple resultTuple; + Datum result; + ItemId id; + Datum values[14]; + bool nulls[14]; + uint16 lp_offset; + uint16 lp_flags; + uint16 lp_len; + + memset(nulls, 0, sizeof(nulls)); + + /* Extract information from the line pointer */ + + id = PageGetItemId(page, inter_call_data->offset); + + lp_offset = ItemIdGetOffset(id); + lp_flags = ItemIdGetFlags(id); + lp_len = ItemIdGetLength(id); + + values[0] = UInt16GetDatum(inter_call_data->offset); + values[1] = UInt16GetDatum(lp_offset); + values[2] = UInt16GetDatum(lp_flags); + values[3] = UInt16GetDatum(lp_len); + + /* + * We do just enough validity checking to make sure we don't reference + * data outside the page passed to us. The page could be corrupt in + * many other ways, but at least we won't crash. + */ + if (ItemIdHasStorage(id) && + lp_len >= MinHeapTupleSize && + lp_offset == MAXALIGN(lp_offset) && + lp_offset + lp_len <= raw_page_size) + { + HeapTupleHeader tuphdr; + bytea *tuple_data_bytea; + int tuple_data_len; + + /* Extract information from the tuple header */ + + tuphdr = (HeapTupleHeader) PageGetItem(page, id); + + values[4] = UInt32GetDatum(HeapTupleHeaderGetRawXmin(tuphdr)); + values[5] = UInt32GetDatum(HeapTupleHeaderGetRawXmax(tuphdr)); + /* shared with xvac */ + values[6] = UInt32GetDatum(HeapTupleHeaderGetRawCommandId(tuphdr)); + values[7] = PointerGetDatum(&tuphdr->t_ctid); + values[8] = UInt32GetDatum(tuphdr->t_infomask2); + values[9] = UInt32GetDatum(tuphdr->t_infomask); + values[10] = UInt8GetDatum(tuphdr->t_hoff); + + /* Copy raw tuple data into bytea attribute */ + tuple_data_len = lp_len - tuphdr->t_hoff; + tuple_data_bytea = (bytea *) palloc(tuple_data_len + VARHDRSZ); + SET_VARSIZE(tuple_data_bytea, tuple_data_len + VARHDRSZ); + memcpy(VARDATA(tuple_data_bytea), (char *) tuphdr + tuphdr->t_hoff, + tuple_data_len); + values[13] = PointerGetDatum(tuple_data_bytea); + + /* + * We already checked that the item is completely within the raw + * page passed to us, with the length given in the line pointer. + * Let's check that t_hoff doesn't point over lp_len, before using + * it to access t_bits and oid. + */ + if (tuphdr->t_hoff >= SizeofHeapTupleHeader && + tuphdr->t_hoff <= lp_len && + tuphdr->t_hoff == MAXALIGN(tuphdr->t_hoff)) + { + if (tuphdr->t_infomask & HEAP_HASNULL) + { + int bits_len; + + bits_len = + BITMAPLEN(HeapTupleHeaderGetNatts(tuphdr)) * BITS_PER_BYTE; + values[11] = CStringGetTextDatum(bits_to_text(tuphdr->t_bits, bits_len)); + } + else + nulls[11] = true; + + if (tuphdr->t_infomask & HEAP_HASOID_OLD) + values[12] = HeapTupleHeaderGetOidOld(tuphdr); + else + nulls[12] = true; + } + else + { + nulls[11] = true; + nulls[12] = true; + } + } + else + { + /* + * The line pointer is not used, or it's invalid. Set the rest of + * the fields to NULL + */ + int i; + + for (i = 4; i <= 13; i++) + nulls[i] = true; + } + + /* Build and return the result tuple. */ + resultTuple = heap_form_tuple(inter_call_data->tupd, values, nulls); + result = HeapTupleGetDatum(resultTuple); + + inter_call_data->offset++; + + SRF_RETURN_NEXT(fctx, result); + } + else + SRF_RETURN_DONE(fctx); +} + +/* + * tuple_data_split_internal + * + * Split raw tuple data taken directly from a page into an array of bytea + * elements. This routine does a lookup on NULL values and creates array + * elements accordingly. This is a reimplementation of nocachegetattr() + * in heaptuple.c simplified for educational purposes. + */ +static Datum +tuple_data_split_internal(Oid relid, char *tupdata, + uint16 tupdata_len, uint16 t_infomask, + uint16 t_infomask2, bits8 *t_bits, + bool do_detoast) +{ + ArrayBuildState *raw_attrs; + int nattrs; + int i; + int off = 0; + Relation rel; + TupleDesc tupdesc; + + /* Get tuple descriptor from relation OID */ + rel = relation_open(relid, AccessShareLock); + tupdesc = RelationGetDescr(rel); + + raw_attrs = initArrayResult(BYTEAOID, CurrentMemoryContext, false); + nattrs = tupdesc->natts; + + if (rel->rd_rel->relam != HEAP_TABLE_AM_OID) + ereport(ERROR, (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("only heap AM is supported"))); + + if (nattrs < (t_infomask2 & HEAP_NATTS_MASK)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("number of attributes in tuple header is greater than number of attributes in tuple descriptor"))); + + for (i = 0; i < nattrs; i++) + { + Form_pg_attribute attr; + bool is_null; + bytea *attr_data = NULL; + + attr = TupleDescAttr(tupdesc, i); + + /* + * Tuple header can specify fewer attributes than tuple descriptor as + * ALTER TABLE ADD COLUMN without DEFAULT keyword does not actually + * change tuples in pages, so attributes with numbers greater than + * (t_infomask2 & HEAP_NATTS_MASK) should be treated as NULL. + */ + if (i >= (t_infomask2 & HEAP_NATTS_MASK)) + is_null = true; + else + is_null = (t_infomask & HEAP_HASNULL) && att_isnull(i, t_bits); + + if (!is_null) + { + int len; + + if (attr->attlen == -1) + { + off = att_align_pointer(off, attr->attalign, -1, + tupdata + off); + + /* + * As VARSIZE_ANY throws an exception if it can't properly + * detect the type of external storage in macros VARTAG_SIZE, + * this check is repeated to have a nicer error handling. + */ + if (VARATT_IS_EXTERNAL(tupdata + off) && + !VARATT_IS_EXTERNAL_ONDISK(tupdata + off) && + !VARATT_IS_EXTERNAL_INDIRECT(tupdata + off)) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("first byte of varlena attribute is incorrect for attribute %d", i))); + + len = VARSIZE_ANY(tupdata + off); + } + else + { + off = att_align_nominal(off, attr->attalign); + len = attr->attlen; + } + + if (tupdata_len < off + len) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("unexpected end of tuple data"))); + + if (attr->attlen == -1 && do_detoast) + attr_data = pg_detoast_datum_copy((struct varlena *) (tupdata + off)); + else + { + attr_data = (bytea *) palloc(len + VARHDRSZ); + SET_VARSIZE(attr_data, len + VARHDRSZ); + memcpy(VARDATA(attr_data), tupdata + off, len); + } + + off = att_addlength_pointer(off, attr->attlen, + tupdata + off); + } + + raw_attrs = accumArrayResult(raw_attrs, PointerGetDatum(attr_data), + is_null, BYTEAOID, CurrentMemoryContext); + if (attr_data) + pfree(attr_data); + } + + if (tupdata_len != off) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("end of tuple reached without looking at all its data"))); + + relation_close(rel, AccessShareLock); + + return makeArrayResult(raw_attrs, CurrentMemoryContext); +} + +/* + * tuple_data_split + * + * Split raw tuple data taken directly from page into distinct elements + * taking into account null values. + */ +PG_FUNCTION_INFO_V1(tuple_data_split); + +Datum +tuple_data_split(PG_FUNCTION_ARGS) +{ + Oid relid; + bytea *raw_data; + uint16 t_infomask; + uint16 t_infomask2; + char *t_bits_str; + bool do_detoast = false; + bits8 *t_bits = NULL; + Datum res; + + relid = PG_GETARG_OID(0); + raw_data = PG_ARGISNULL(1) ? NULL : PG_GETARG_BYTEA_P(1); + t_infomask = PG_GETARG_INT16(2); + t_infomask2 = PG_GETARG_INT16(3); + t_bits_str = PG_ARGISNULL(4) ? NULL : + text_to_cstring(PG_GETARG_TEXT_PP(4)); + + if (PG_NARGS() >= 6) + do_detoast = PG_GETARG_BOOL(5); + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + if (!raw_data) + PG_RETURN_NULL(); + + /* + * Convert t_bits string back to the bits8 array as represented in the + * tuple header. + */ + if (t_infomask & HEAP_HASNULL) + { + size_t bits_str_len; + size_t bits_len; + + bits_len = BITMAPLEN(t_infomask2 & HEAP_NATTS_MASK) * BITS_PER_BYTE; + if (!t_bits_str) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("t_bits string must not be NULL"))); + + bits_str_len = strlen(t_bits_str); + if (bits_len != bits_str_len) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("unexpected length of t_bits string: %zu, expected %zu", + bits_str_len, bits_len))); + + /* do the conversion */ + t_bits = text_to_bits(t_bits_str, bits_str_len); + } + else + { + if (t_bits_str) + ereport(ERROR, + (errcode(ERRCODE_DATA_CORRUPTED), + errmsg("t_bits string is expected to be NULL, but instead it is %zu bytes long", + strlen(t_bits_str)))); + } + + /* Split tuple data */ + res = tuple_data_split_internal(relid, (char *) raw_data + VARHDRSZ, + VARSIZE(raw_data) - VARHDRSZ, + t_infomask, t_infomask2, t_bits, + do_detoast); + + if (t_bits) + pfree(t_bits); + + PG_RETURN_DATUM(res); +} + +/* + * heap_tuple_infomask_flags + * + * Decode into a human-readable format t_infomask and t_infomask2 associated + * to a tuple. All the flags are described in access/htup_details.h. + */ +PG_FUNCTION_INFO_V1(heap_tuple_infomask_flags); + +Datum +heap_tuple_infomask_flags(PG_FUNCTION_ARGS) +{ +#define HEAP_TUPLE_INFOMASK_COLS 2 + Datum values[HEAP_TUPLE_INFOMASK_COLS] = {0}; + bool nulls[HEAP_TUPLE_INFOMASK_COLS] = {0}; + uint16 t_infomask = PG_GETARG_INT16(0); + uint16 t_infomask2 = PG_GETARG_INT16(1); + int cnt = 0; + ArrayType *a; + int bitcnt; + Datum *flags; + TupleDesc tupdesc; + HeapTuple tuple; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + bitcnt = pg_popcount((const char *) &t_infomask, sizeof(uint16)) + + pg_popcount((const char *) &t_infomask2, sizeof(uint16)); + + /* If no flags, return a set of empty arrays */ + if (bitcnt <= 0) + { + values[0] = PointerGetDatum(construct_empty_array(TEXTOID)); + values[1] = PointerGetDatum(construct_empty_array(TEXTOID)); + tuple = heap_form_tuple(tupdesc, values, nulls); + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); + } + + /* build set of raw flags */ + flags = (Datum *) palloc0(sizeof(Datum) * bitcnt); + + /* decode t_infomask */ + if ((t_infomask & HEAP_HASNULL) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_HASNULL"); + if ((t_infomask & HEAP_HASVARWIDTH) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_HASVARWIDTH"); + if ((t_infomask & HEAP_HASEXTERNAL) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_HASEXTERNAL"); + if ((t_infomask & HEAP_HASOID_OLD) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_HASOID_OLD"); + if ((t_infomask & HEAP_XMAX_KEYSHR_LOCK) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_KEYSHR_LOCK"); + if ((t_infomask & HEAP_COMBOCID) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_COMBOCID"); + if ((t_infomask & HEAP_XMAX_EXCL_LOCK) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_EXCL_LOCK"); + if ((t_infomask & HEAP_XMAX_LOCK_ONLY) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_LOCK_ONLY"); + if ((t_infomask & HEAP_XMIN_COMMITTED) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_COMMITTED"); + if ((t_infomask & HEAP_XMIN_INVALID) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_INVALID"); + if ((t_infomask & HEAP_XMAX_COMMITTED) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_COMMITTED"); + if ((t_infomask & HEAP_XMAX_INVALID) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_INVALID"); + if ((t_infomask & HEAP_XMAX_IS_MULTI) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_IS_MULTI"); + if ((t_infomask & HEAP_UPDATED) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_UPDATED"); + if ((t_infomask & HEAP_MOVED_OFF) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_OFF"); + if ((t_infomask & HEAP_MOVED_IN) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_MOVED_IN"); + + /* decode t_infomask2 */ + if ((t_infomask2 & HEAP_KEYS_UPDATED) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_KEYS_UPDATED"); + if ((t_infomask2 & HEAP_HOT_UPDATED) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_HOT_UPDATED"); + if ((t_infomask2 & HEAP_ONLY_TUPLE) != 0) + flags[cnt++] = CStringGetTextDatum("HEAP_ONLY_TUPLE"); + + /* build value */ + Assert(cnt <= bitcnt); + a = construct_array_builtin(flags, cnt, TEXTOID); + values[0] = PointerGetDatum(a); + + /* + * Build set of combined flags. Use the same array as previously, this + * keeps the code simple. + */ + cnt = 0; + MemSet(flags, 0, sizeof(Datum) * bitcnt); + + /* decode combined masks of t_infomask */ + if ((t_infomask & HEAP_XMAX_SHR_LOCK) == HEAP_XMAX_SHR_LOCK) + flags[cnt++] = CStringGetTextDatum("HEAP_XMAX_SHR_LOCK"); + if ((t_infomask & HEAP_XMIN_FROZEN) == HEAP_XMIN_FROZEN) + flags[cnt++] = CStringGetTextDatum("HEAP_XMIN_FROZEN"); + if ((t_infomask & HEAP_MOVED) == HEAP_MOVED) + flags[cnt++] = CStringGetTextDatum("HEAP_MOVED"); + + /* Build an empty array if there are no combined flags */ + if (cnt == 0) + a = construct_empty_array(TEXTOID); + else + a = construct_array_builtin(flags, cnt, TEXTOID); + pfree(flags); + values[1] = PointerGetDatum(a); + + /* Returns the record as Datum */ + tuple = heap_form_tuple(tupdesc, values, nulls); + PG_RETURN_DATUM(HeapTupleGetDatum(tuple)); +} |