diff options
Diffstat (limited to '')
-rw-r--r-- | contrib/pageinspect/brinfuncs.c | 410 |
1 files changed, 410 insertions, 0 deletions
diff --git a/contrib/pageinspect/brinfuncs.c b/contrib/pageinspect/brinfuncs.c new file mode 100644 index 0000000..fb32d74 --- /dev/null +++ b/contrib/pageinspect/brinfuncs.c @@ -0,0 +1,410 @@ +/* + * brinfuncs.c + * Functions to investigate BRIN indexes + * + * Copyright (c) 2014-2020, PostgreSQL Global Development Group + * + * IDENTIFICATION + * contrib/pageinspect/brinfuncs.c + */ +#include "postgres.h" + +#include "access/brin.h" +#include "access/brin_internal.h" +#include "access/brin_page.h" +#include "access/brin_revmap.h" +#include "access/brin_tuple.h" +#include "access/htup_details.h" +#include "catalog/index.h" +#include "catalog/pg_type.h" +#include "funcapi.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "pageinspect.h" +#include "utils/array.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" + +PG_FUNCTION_INFO_V1(brin_page_type); +PG_FUNCTION_INFO_V1(brin_page_items); +PG_FUNCTION_INFO_V1(brin_metapage_info); +PG_FUNCTION_INFO_V1(brin_revmap_data); + +typedef struct brin_column_state +{ + int nstored; + FmgrInfo outputFn[FLEXIBLE_ARRAY_MEMBER]; +} brin_column_state; + + +static Page verify_brin_page(bytea *raw_page, uint16 type, + const char *strtype); + +Datum +brin_page_type(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Page page = VARDATA(raw_page); + int raw_page_size; + char *type; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + if (raw_page_size != BLCKSZ) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small"), + errdetail("Expected size %d, got %d", + BLCKSZ, raw_page_size))); + + switch (BrinPageType(page)) + { + case BRIN_PAGETYPE_META: + type = "meta"; + break; + case BRIN_PAGETYPE_REVMAP: + type = "revmap"; + break; + case BRIN_PAGETYPE_REGULAR: + type = "regular"; + break; + default: + type = psprintf("unknown (%02x)", BrinPageType(page)); + break; + } + + PG_RETURN_TEXT_P(cstring_to_text(type)); +} + +/* + * Verify that the given bytea contains a BRIN page of the indicated page + * type, or die in the attempt. A pointer to the page is returned. + */ +static Page +verify_brin_page(bytea *raw_page, uint16 type, const char *strtype) +{ + Page page; + int raw_page_size; + + raw_page_size = VARSIZE(raw_page) - VARHDRSZ; + + if (raw_page_size != BLCKSZ) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("input page too small"), + errdetail("Expected size %d, got %d", + BLCKSZ, raw_page_size))); + + page = VARDATA(raw_page); + + /* verify the special space says this page is what we want */ + if (BrinPageType(page) != type) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("page is not a BRIN page of type \"%s\"", strtype), + errdetail("Expected special type %08x, got %08x.", + type, BrinPageType(page)))); + + return page; +} + + +/* + * Extract all item values from a BRIN index page + * + * Usage: SELECT * FROM brin_page_items(get_raw_page('idx', 1), 'idx'::regclass); + */ +Datum +brin_page_items(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Oid indexRelid = PG_GETARG_OID(1); + ReturnSetInfo *rsinfo = (ReturnSetInfo *) fcinfo->resultinfo; + TupleDesc tupdesc; + MemoryContext oldcontext; + Tuplestorestate *tupstore; + Relation indexRel; + brin_column_state **columns; + BrinDesc *bdesc; + BrinMemTuple *dtup; + Page page; + OffsetNumber offset; + AttrNumber attno; + bool unusedItem; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + /* check to see if caller supports us returning a tuplestore */ + if (rsinfo == NULL || !IsA(rsinfo, ReturnSetInfo)) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("set-valued function called in context that cannot accept a set"))); + if (!(rsinfo->allowedModes & SFRM_Materialize) || + rsinfo->expectedDesc == NULL) + ereport(ERROR, + (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), + errmsg("materialize mode required, but it is not allowed in this context"))); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + + /* Build tuplestore to hold the result rows */ + oldcontext = MemoryContextSwitchTo(rsinfo->econtext->ecxt_per_query_memory); + + tupstore = tuplestore_begin_heap(true, false, work_mem); + rsinfo->returnMode = SFRM_Materialize; + rsinfo->setResult = tupstore; + rsinfo->setDesc = tupdesc; + + MemoryContextSwitchTo(oldcontext); + + indexRel = index_open(indexRelid, AccessShareLock); + bdesc = brin_build_desc(indexRel); + + /* minimally verify the page we got */ + page = verify_brin_page(raw_page, BRIN_PAGETYPE_REGULAR, "regular"); + + /* + * Initialize output functions for all indexed datatypes; simplifies + * calling them later. + */ + columns = palloc(sizeof(brin_column_state *) * RelationGetDescr(indexRel)->natts); + for (attno = 1; attno <= bdesc->bd_tupdesc->natts; attno++) + { + Oid output; + bool isVarlena; + BrinOpcInfo *opcinfo; + int i; + brin_column_state *column; + + opcinfo = bdesc->bd_info[attno - 1]; + column = palloc(offsetof(brin_column_state, outputFn) + + sizeof(FmgrInfo) * opcinfo->oi_nstored); + + column->nstored = opcinfo->oi_nstored; + for (i = 0; i < opcinfo->oi_nstored; i++) + { + getTypeOutputInfo(opcinfo->oi_typcache[i]->type_id, &output, &isVarlena); + fmgr_info(output, &column->outputFn[i]); + } + + columns[attno - 1] = column; + } + + offset = FirstOffsetNumber; + unusedItem = false; + dtup = NULL; + for (;;) + { + Datum values[7]; + bool nulls[7]; + + /* + * This loop is called once for every attribute of every tuple in the + * page. At the start of a tuple, we get a NULL dtup; that's our + * signal for obtaining and decoding the next one. If that's not the + * case, we output the next attribute. + */ + if (dtup == NULL) + { + ItemId itemId; + + /* verify item status: if there's no data, we can't decode */ + itemId = PageGetItemId(page, offset); + if (ItemIdIsUsed(itemId)) + { + dtup = brin_deform_tuple(bdesc, + (BrinTuple *) PageGetItem(page, itemId), + NULL); + attno = 1; + unusedItem = false; + } + else + unusedItem = true; + } + else + attno++; + + MemSet(nulls, 0, sizeof(nulls)); + + if (unusedItem) + { + values[0] = UInt16GetDatum(offset); + nulls[1] = true; + nulls[2] = true; + nulls[3] = true; + nulls[4] = true; + nulls[5] = true; + nulls[6] = true; + } + else + { + int att = attno - 1; + + values[0] = UInt16GetDatum(offset); + values[1] = UInt32GetDatum(dtup->bt_blkno); + values[2] = UInt16GetDatum(attno); + values[3] = BoolGetDatum(dtup->bt_columns[att].bv_allnulls); + values[4] = BoolGetDatum(dtup->bt_columns[att].bv_hasnulls); + values[5] = BoolGetDatum(dtup->bt_placeholder); + if (!dtup->bt_columns[att].bv_allnulls) + { + BrinValues *bvalues = &dtup->bt_columns[att]; + StringInfoData s; + bool first; + int i; + + initStringInfo(&s); + appendStringInfoChar(&s, '{'); + + first = true; + for (i = 0; i < columns[att]->nstored; i++) + { + char *val; + + if (!first) + appendStringInfoString(&s, " .. "); + first = false; + val = OutputFunctionCall(&columns[att]->outputFn[i], + bvalues->bv_values[i]); + appendStringInfoString(&s, val); + pfree(val); + } + appendStringInfoChar(&s, '}'); + + values[6] = CStringGetTextDatum(s.data); + pfree(s.data); + } + else + { + nulls[6] = true; + } + } + + tuplestore_putvalues(tupstore, tupdesc, values, nulls); + + /* + * If the item was unused, jump straight to the next one; otherwise, + * the only cleanup needed here is to set our signal to go to the next + * tuple in the following iteration, by freeing the current one. + */ + if (unusedItem) + offset = OffsetNumberNext(offset); + else if (attno >= bdesc->bd_tupdesc->natts) + { + pfree(dtup); + dtup = NULL; + offset = OffsetNumberNext(offset); + } + + /* + * If we're beyond the end of the page, we're done. + */ + if (offset > PageGetMaxOffsetNumber(page)) + break; + } + + /* clean up and return the tuplestore */ + brin_free_desc(bdesc); + tuplestore_donestoring(tupstore); + index_close(indexRel, AccessShareLock); + + return (Datum) 0; +} + +Datum +brin_metapage_info(PG_FUNCTION_ARGS) +{ + bytea *raw_page = PG_GETARG_BYTEA_P(0); + Page page; + BrinMetaPageData *meta; + TupleDesc tupdesc; + Datum values[4]; + bool nulls[4]; + HeapTuple htup; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + page = verify_brin_page(raw_page, BRIN_PAGETYPE_META, "metapage"); + + /* Build a tuple descriptor for our result type */ + if (get_call_result_type(fcinfo, NULL, &tupdesc) != TYPEFUNC_COMPOSITE) + elog(ERROR, "return type must be a row type"); + tupdesc = BlessTupleDesc(tupdesc); + + /* Extract values from the metapage */ + meta = (BrinMetaPageData *) PageGetContents(page); + MemSet(nulls, 0, sizeof(nulls)); + values[0] = CStringGetTextDatum(psprintf("0x%08X", meta->brinMagic)); + values[1] = Int32GetDatum(meta->brinVersion); + values[2] = Int32GetDatum(meta->pagesPerRange); + values[3] = Int64GetDatum(meta->lastRevmapPage); + + htup = heap_form_tuple(tupdesc, values, nulls); + + PG_RETURN_DATUM(HeapTupleGetDatum(htup)); +} + +/* + * Return the TID array stored in a BRIN revmap page + */ +Datum +brin_revmap_data(PG_FUNCTION_ARGS) +{ + struct + { + ItemPointerData *tids; + int idx; + } *state; + FuncCallContext *fctx; + + if (!superuser()) + ereport(ERROR, + (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), + errmsg("must be superuser to use raw page functions"))); + + if (SRF_IS_FIRSTCALL()) + { + bytea *raw_page = PG_GETARG_BYTEA_P(0); + MemoryContext mctx; + Page page; + + /* minimally verify the page we got */ + page = verify_brin_page(raw_page, BRIN_PAGETYPE_REVMAP, "revmap"); + + /* create a function context for cross-call persistence */ + fctx = SRF_FIRSTCALL_INIT(); + + /* switch to memory context appropriate for multiple function calls */ + mctx = MemoryContextSwitchTo(fctx->multi_call_memory_ctx); + + state = palloc(sizeof(*state)); + state->tids = ((RevmapContents *) PageGetContents(page))->rm_tids; + state->idx = 0; + + fctx->user_fctx = state; + + MemoryContextSwitchTo(mctx); + } + + fctx = SRF_PERCALL_SETUP(); + state = fctx->user_fctx; + + if (state->idx < REVMAP_PAGE_MAXITEMS) + SRF_RETURN_NEXT(fctx, PointerGetDatum(&state->tids[state->idx++])); + + SRF_RETURN_DONE(fctx); +} |