diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:17:33 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-05-04 12:17:33 +0000 |
commit | 5e45211a64149b3c659b90ff2de6fa982a5a93ed (patch) | |
tree | 739caf8c461053357daa9f162bef34516c7bf452 /src/backend/access/spgist/spgutils.c | |
parent | Initial commit. (diff) | |
download | postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.tar.xz postgresql-15-5e45211a64149b3c659b90ff2de6fa982a5a93ed.zip |
Adding upstream version 15.5.upstream/15.5
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/backend/access/spgist/spgutils.c')
-rw-r--r-- | src/backend/access/spgist/spgutils.c | 1349 |
1 files changed, 1349 insertions, 0 deletions
diff --git a/src/backend/access/spgist/spgutils.c b/src/backend/access/spgist/spgutils.c new file mode 100644 index 0000000..f2da02e --- /dev/null +++ b/src/backend/access/spgist/spgutils.c @@ -0,0 +1,1349 @@ +/*------------------------------------------------------------------------- + * + * spgutils.c + * various support functions for SP-GiST + * + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * IDENTIFICATION + * src/backend/access/spgist/spgutils.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/amvalidate.h" +#include "access/htup_details.h" +#include "access/reloptions.h" +#include "access/spgist_private.h" +#include "access/toast_compression.h" +#include "access/transam.h" +#include "access/xact.h" +#include "catalog/pg_amop.h" +#include "commands/vacuum.h" +#include "nodes/nodeFuncs.h" +#include "parser/parse_coerce.h" +#include "storage/bufmgr.h" +#include "storage/indexfsm.h" +#include "storage/lmgr.h" +#include "utils/builtins.h" +#include "utils/catcache.h" +#include "utils/index_selfuncs.h" +#include "utils/lsyscache.h" +#include "utils/syscache.h" + + +/* + * SP-GiST handler function: return IndexAmRoutine with access method parameters + * and callbacks. + */ +Datum +spghandler(PG_FUNCTION_ARGS) +{ + IndexAmRoutine *amroutine = makeNode(IndexAmRoutine); + + amroutine->amstrategies = 0; + amroutine->amsupport = SPGISTNProc; + amroutine->amoptsprocnum = SPGIST_OPTIONS_PROC; + amroutine->amcanorder = false; + amroutine->amcanorderbyop = true; + amroutine->amcanbackward = false; + amroutine->amcanunique = false; + amroutine->amcanmulticol = false; + amroutine->amoptionalkey = true; + amroutine->amsearcharray = false; + amroutine->amsearchnulls = true; + amroutine->amstorage = true; + amroutine->amclusterable = false; + amroutine->ampredlocks = false; + amroutine->amcanparallel = false; + amroutine->amcaninclude = true; + amroutine->amusemaintenanceworkmem = false; + amroutine->amparallelvacuumoptions = + VACUUM_OPTION_PARALLEL_BULKDEL | VACUUM_OPTION_PARALLEL_COND_CLEANUP; + amroutine->amkeytype = InvalidOid; + + amroutine->ambuild = spgbuild; + amroutine->ambuildempty = spgbuildempty; + amroutine->aminsert = spginsert; + amroutine->ambulkdelete = spgbulkdelete; + amroutine->amvacuumcleanup = spgvacuumcleanup; + amroutine->amcanreturn = spgcanreturn; + amroutine->amcostestimate = spgcostestimate; + amroutine->amoptions = spgoptions; + amroutine->amproperty = spgproperty; + amroutine->ambuildphasename = NULL; + amroutine->amvalidate = spgvalidate; + amroutine->amadjustmembers = spgadjustmembers; + amroutine->ambeginscan = spgbeginscan; + amroutine->amrescan = spgrescan; + amroutine->amgettuple = spggettuple; + amroutine->amgetbitmap = spggetbitmap; + amroutine->amendscan = spgendscan; + amroutine->ammarkpos = NULL; + amroutine->amrestrpos = NULL; + amroutine->amestimateparallelscan = NULL; + amroutine->aminitparallelscan = NULL; + amroutine->amparallelrescan = NULL; + + PG_RETURN_POINTER(amroutine); +} + +/* + * GetIndexInputType + * Determine the nominal input data type for an index column + * + * We define the "nominal" input type as the associated opclass's opcintype, + * or if that is a polymorphic type, the base type of the heap column or + * expression that is the index's input. The reason for preferring the + * opcintype is that non-polymorphic opclasses probably don't want to hear + * about binary-compatible input types. For instance, if a text opclass + * is being used with a varchar heap column, we want to report "text" not + * "varchar". Likewise, opclasses don't want to hear about domain types, + * so if we do consult the actual input type, we make sure to flatten domains. + * + * At some point maybe this should go somewhere else, but it's not clear + * if any other index AMs have a use for it. + */ +static Oid +GetIndexInputType(Relation index, AttrNumber indexcol) +{ + Oid opcintype; + AttrNumber heapcol; + List *indexprs; + ListCell *indexpr_item; + + Assert(index->rd_index != NULL); + Assert(indexcol > 0 && indexcol <= index->rd_index->indnkeyatts); + opcintype = index->rd_opcintype[indexcol - 1]; + if (!IsPolymorphicType(opcintype)) + return opcintype; + heapcol = index->rd_index->indkey.values[indexcol - 1]; + if (heapcol != 0) /* Simple index column? */ + return getBaseType(get_atttype(index->rd_index->indrelid, heapcol)); + + /* + * If the index expressions are already cached, skip calling + * RelationGetIndexExpressions, as it will make a copy which is overkill. + * We're not going to modify the trees, and we're not going to do anything + * that would invalidate the relcache entry before we're done. + */ + if (index->rd_indexprs) + indexprs = index->rd_indexprs; + else + indexprs = RelationGetIndexExpressions(index); + indexpr_item = list_head(indexprs); + for (int i = 1; i <= index->rd_index->indnkeyatts; i++) + { + if (index->rd_index->indkey.values[i - 1] == 0) + { + /* expression column */ + if (indexpr_item == NULL) + elog(ERROR, "wrong number of index expressions"); + if (i == indexcol) + return getBaseType(exprType((Node *) lfirst(indexpr_item))); + indexpr_item = lnext(indexprs, indexpr_item); + } + } + elog(ERROR, "wrong number of index expressions"); + return InvalidOid; /* keep compiler quiet */ +} + +/* Fill in a SpGistTypeDesc struct with info about the specified data type */ +static void +fillTypeDesc(SpGistTypeDesc *desc, Oid type) +{ + HeapTuple tp; + Form_pg_type typtup; + + desc->type = type; + tp = SearchSysCache1(TYPEOID, ObjectIdGetDatum(type)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for type %u", type); + typtup = (Form_pg_type) GETSTRUCT(tp); + desc->attlen = typtup->typlen; + desc->attbyval = typtup->typbyval; + desc->attalign = typtup->typalign; + desc->attstorage = typtup->typstorage; + ReleaseSysCache(tp); +} + +/* + * Fetch local cache of AM-specific info about the index, initializing it + * if necessary + */ +SpGistCache * +spgGetCache(Relation index) +{ + SpGistCache *cache; + + if (index->rd_amcache == NULL) + { + Oid atttype; + spgConfigIn in; + FmgrInfo *procinfo; + Buffer metabuffer; + SpGistMetaPageData *metadata; + + cache = MemoryContextAllocZero(index->rd_indexcxt, + sizeof(SpGistCache)); + + /* SPGiST must have one key column and can also have INCLUDE columns */ + Assert(IndexRelationGetNumberOfKeyAttributes(index) == 1); + Assert(IndexRelationGetNumberOfAttributes(index) <= INDEX_MAX_KEYS); + + /* + * Get the actual (well, nominal) data type of the key column. We + * pass this to the opclass config function so that polymorphic + * opclasses are possible. + */ + atttype = GetIndexInputType(index, spgKeyColumn + 1); + + /* Call the config function to get config info for the opclass */ + in.attType = atttype; + + procinfo = index_getprocinfo(index, 1, SPGIST_CONFIG_PROC); + FunctionCall2Coll(procinfo, + index->rd_indcollation[spgKeyColumn], + PointerGetDatum(&in), + PointerGetDatum(&cache->config)); + + /* + * If leafType isn't specified, use the declared index column type, + * which index.c will have derived from the opclass's opcintype. + * (Although we now make spgvalidate.c warn if these aren't the same, + * old user-defined opclasses may not set the STORAGE parameter + * correctly, so believe leafType if it's given.) + */ + if (!OidIsValid(cache->config.leafType)) + { + cache->config.leafType = + TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid; + + /* + * If index column type is binary-coercible to atttype (for + * example, it's a domain over atttype), treat it as plain atttype + * to avoid thinking we need to compress. + */ + if (cache->config.leafType != atttype && + IsBinaryCoercible(cache->config.leafType, atttype)) + cache->config.leafType = atttype; + } + + /* Get the information we need about each relevant datatype */ + fillTypeDesc(&cache->attType, atttype); + + if (cache->config.leafType != atttype) + { + if (!OidIsValid(index_getprocid(index, 1, SPGIST_COMPRESS_PROC))) + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("compress method must be defined when leaf type is different from input type"))); + + fillTypeDesc(&cache->attLeafType, cache->config.leafType); + } + else + { + /* Save lookups in this common case */ + cache->attLeafType = cache->attType; + } + + fillTypeDesc(&cache->attPrefixType, cache->config.prefixType); + fillTypeDesc(&cache->attLabelType, cache->config.labelType); + + /* Last, get the lastUsedPages data from the metapage */ + metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + LockBuffer(metabuffer, BUFFER_LOCK_SHARE); + + metadata = SpGistPageGetMeta(BufferGetPage(metabuffer)); + + if (metadata->magicNumber != SPGIST_MAGIC_NUMBER) + elog(ERROR, "index \"%s\" is not an SP-GiST index", + RelationGetRelationName(index)); + + cache->lastUsedPages = metadata->lastUsedPages; + + UnlockReleaseBuffer(metabuffer); + + index->rd_amcache = (void *) cache; + } + else + { + /* assume it's up to date */ + cache = (SpGistCache *) index->rd_amcache; + } + + return cache; +} + +/* + * Compute a tuple descriptor for leaf tuples or index-only-scan result tuples. + * + * We can use the relcache's tupdesc as-is in many cases, and it's always + * OK so far as any INCLUDE columns are concerned. However, the entry for + * the key column has to match leafType in the first case or attType in the + * second case. While the relcache's tupdesc *should* show leafType, this + * might not hold for legacy user-defined opclasses, since before v14 they + * were not allowed to declare their true storage type in CREATE OPCLASS. + * Also, attType can be different from what is in the relcache. + * + * This function gives back either a pointer to the relcache's tupdesc + * if that is suitable, or a palloc'd copy that's been adjusted to match + * the specified key column type. We can avoid doing any catalog lookups + * here by insisting that the caller pass an SpGistTypeDesc not just an OID. + */ +TupleDesc +getSpGistTupleDesc(Relation index, SpGistTypeDesc *keyType) +{ + TupleDesc outTupDesc; + Form_pg_attribute att; + + if (keyType->type == + TupleDescAttr(RelationGetDescr(index), spgKeyColumn)->atttypid) + outTupDesc = RelationGetDescr(index); + else + { + outTupDesc = CreateTupleDescCopy(RelationGetDescr(index)); + att = TupleDescAttr(outTupDesc, spgKeyColumn); + /* It's sufficient to update the type-dependent fields of the column */ + att->atttypid = keyType->type; + att->atttypmod = -1; + att->attlen = keyType->attlen; + att->attbyval = keyType->attbyval; + att->attalign = keyType->attalign; + att->attstorage = keyType->attstorage; + /* We shouldn't need to bother with making these valid: */ + att->attcompression = InvalidCompressionMethod; + att->attcollation = InvalidOid; + /* In case we changed typlen, we'd better reset following offsets */ + for (int i = spgFirstIncludeColumn; i < outTupDesc->natts; i++) + TupleDescAttr(outTupDesc, i)->attcacheoff = -1; + } + return outTupDesc; +} + +/* Initialize SpGistState for working with the given index */ +void +initSpGistState(SpGistState *state, Relation index) +{ + SpGistCache *cache; + + state->index = index; + + /* Get cached static information about index */ + cache = spgGetCache(index); + + state->config = cache->config; + state->attType = cache->attType; + state->attLeafType = cache->attLeafType; + state->attPrefixType = cache->attPrefixType; + state->attLabelType = cache->attLabelType; + + /* Ensure we have a valid descriptor for leaf tuples */ + state->leafTupDesc = getSpGistTupleDesc(state->index, &state->attLeafType); + + /* Make workspace for constructing dead tuples */ + state->deadTupleStorage = palloc0(SGDTSIZE); + + /* Set XID to use in redirection tuples */ + state->myXid = GetTopTransactionIdIfAny(); + + /* Assume we're not in an index build (spgbuild will override) */ + state->isBuild = false; +} + +/* + * Allocate a new page (either by recycling, or by extending the index file). + * + * The returned buffer is already pinned and exclusive-locked. + * Caller is responsible for initializing the page by calling SpGistInitBuffer. + */ +Buffer +SpGistNewBuffer(Relation index) +{ + Buffer buffer; + bool needLock; + + /* First, try to get a page from FSM */ + for (;;) + { + BlockNumber blkno = GetFreeIndexPage(index); + + if (blkno == InvalidBlockNumber) + break; /* nothing known to FSM */ + + /* + * The fixed pages shouldn't ever be listed in FSM, but just in case + * one is, ignore it. + */ + if (SpGistBlockIsFixed(blkno)) + continue; + + buffer = ReadBuffer(index, blkno); + + /* + * We have to guard against the possibility that someone else already + * recycled this page; the buffer may be locked if so. + */ + if (ConditionalLockBuffer(buffer)) + { + Page page = BufferGetPage(buffer); + + if (PageIsNew(page)) + return buffer; /* OK to use, if never initialized */ + + if (SpGistPageIsDeleted(page) || PageIsEmpty(page)) + return buffer; /* OK to use */ + + LockBuffer(buffer, BUFFER_LOCK_UNLOCK); + } + + /* Can't use it, so release buffer and try again */ + ReleaseBuffer(buffer); + } + + /* Must extend the file */ + needLock = !RELATION_IS_LOCAL(index); + if (needLock) + LockRelationForExtension(index, ExclusiveLock); + + buffer = ReadBuffer(index, P_NEW); + LockBuffer(buffer, BUFFER_LOCK_EXCLUSIVE); + + if (needLock) + UnlockRelationForExtension(index, ExclusiveLock); + + return buffer; +} + +/* + * Update index metapage's lastUsedPages info from local cache, if possible + * + * Updating meta page isn't critical for index working, so + * 1 use ConditionalLockBuffer to improve concurrency + * 2 don't WAL-log metabuffer changes to decrease WAL traffic + */ +void +SpGistUpdateMetaPage(Relation index) +{ + SpGistCache *cache = (SpGistCache *) index->rd_amcache; + + if (cache != NULL) + { + Buffer metabuffer; + + metabuffer = ReadBuffer(index, SPGIST_METAPAGE_BLKNO); + + if (ConditionalLockBuffer(metabuffer)) + { + Page metapage = BufferGetPage(metabuffer); + SpGistMetaPageData *metadata = SpGistPageGetMeta(metapage); + + metadata->lastUsedPages = cache->lastUsedPages; + + /* + * Set pd_lower just past the end of the metadata. This is + * essential, because without doing so, metadata will be lost if + * xlog.c compresses the page. (We must do this here because + * pre-v11 versions of PG did not set the metapage's pd_lower + * correctly, so a pg_upgraded index might contain the wrong + * value.) + */ + ((PageHeader) metapage)->pd_lower = + ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) metapage; + + MarkBufferDirty(metabuffer); + UnlockReleaseBuffer(metabuffer); + } + else + { + ReleaseBuffer(metabuffer); + } + } +} + +/* Macro to select proper element of lastUsedPages cache depending on flags */ +/* Masking flags with SPGIST_CACHED_PAGES is just for paranoia's sake */ +#define GET_LUP(c, f) (&(c)->lastUsedPages.cachedPage[((unsigned int) (f)) % SPGIST_CACHED_PAGES]) + +/* + * Allocate and initialize a new buffer of the type and parity specified by + * flags. The returned buffer is already pinned and exclusive-locked. + * + * When requesting an inner page, if we get one with the wrong parity, + * we just release the buffer and try again. We will get a different page + * because GetFreeIndexPage will have marked the page used in FSM. The page + * is entered in our local lastUsedPages cache, so there's some hope of + * making use of it later in this session, but otherwise we rely on VACUUM + * to eventually re-enter the page in FSM, making it available for recycling. + * Note that such a page does not get marked dirty here, so unless it's used + * fairly soon, the buffer will just get discarded and the page will remain + * as it was on disk. + * + * When we return a buffer to the caller, the page is *not* entered into + * the lastUsedPages cache; we expect the caller will do so after it's taken + * whatever space it will use. This is because after the caller has used up + * some space, the page might have less space than whatever was cached already + * so we'd rather not trash the old cache entry. + */ +static Buffer +allocNewBuffer(Relation index, int flags) +{ + SpGistCache *cache = spgGetCache(index); + uint16 pageflags = 0; + + if (GBUF_REQ_LEAF(flags)) + pageflags |= SPGIST_LEAF; + if (GBUF_REQ_NULLS(flags)) + pageflags |= SPGIST_NULLS; + + for (;;) + { + Buffer buffer; + + buffer = SpGistNewBuffer(index); + SpGistInitBuffer(buffer, pageflags); + + if (pageflags & SPGIST_LEAF) + { + /* Leaf pages have no parity concerns, so just use it */ + return buffer; + } + else + { + BlockNumber blkno = BufferGetBlockNumber(buffer); + int blkFlags = GBUF_INNER_PARITY(blkno); + + if ((flags & GBUF_PARITY_MASK) == blkFlags) + { + /* Page has right parity, use it */ + return buffer; + } + else + { + /* Page has wrong parity, record it in cache and try again */ + if (pageflags & SPGIST_NULLS) + blkFlags |= GBUF_NULLS; + cache->lastUsedPages.cachedPage[blkFlags].blkno = blkno; + cache->lastUsedPages.cachedPage[blkFlags].freeSpace = + PageGetExactFreeSpace(BufferGetPage(buffer)); + UnlockReleaseBuffer(buffer); + } + } + } +} + +/* + * Get a buffer of the type and parity specified by flags, having at least + * as much free space as indicated by needSpace. We use the lastUsedPages + * cache to assign the same buffer previously requested when possible. + * The returned buffer is already pinned and exclusive-locked. + * + * *isNew is set true if the page was initialized here, false if it was + * already valid. + */ +Buffer +SpGistGetBuffer(Relation index, int flags, int needSpace, bool *isNew) +{ + SpGistCache *cache = spgGetCache(index); + SpGistLastUsedPage *lup; + + /* Bail out if even an empty page wouldn't meet the demand */ + if (needSpace > SPGIST_PAGE_CAPACITY) + elog(ERROR, "desired SPGiST tuple size is too big"); + + /* + * If possible, increase the space request to include relation's + * fillfactor. This ensures that when we add unrelated tuples to a page, + * we try to keep 100-fillfactor% available for adding tuples that are + * related to the ones already on it. But fillfactor mustn't cause an + * error for requests that would otherwise be legal. + */ + needSpace += SpGistGetTargetPageFreeSpace(index); + needSpace = Min(needSpace, SPGIST_PAGE_CAPACITY); + + /* Get the cache entry for this flags setting */ + lup = GET_LUP(cache, flags); + + /* If we have nothing cached, just turn it over to allocNewBuffer */ + if (lup->blkno == InvalidBlockNumber) + { + *isNew = true; + return allocNewBuffer(index, flags); + } + + /* fixed pages should never be in cache */ + Assert(!SpGistBlockIsFixed(lup->blkno)); + + /* If cached freeSpace isn't enough, don't bother looking at the page */ + if (lup->freeSpace >= needSpace) + { + Buffer buffer; + Page page; + + buffer = ReadBuffer(index, lup->blkno); + + if (!ConditionalLockBuffer(buffer)) + { + /* + * buffer is locked by another process, so return a new buffer + */ + ReleaseBuffer(buffer); + *isNew = true; + return allocNewBuffer(index, flags); + } + + page = BufferGetPage(buffer); + + if (PageIsNew(page) || SpGistPageIsDeleted(page) || PageIsEmpty(page)) + { + /* OK to initialize the page */ + uint16 pageflags = 0; + + if (GBUF_REQ_LEAF(flags)) + pageflags |= SPGIST_LEAF; + if (GBUF_REQ_NULLS(flags)) + pageflags |= SPGIST_NULLS; + SpGistInitBuffer(buffer, pageflags); + lup->freeSpace = PageGetExactFreeSpace(page) - needSpace; + *isNew = true; + return buffer; + } + + /* + * Check that page is of right type and has enough space. We must + * recheck this since our cache isn't necessarily up to date. + */ + if ((GBUF_REQ_LEAF(flags) ? SpGistPageIsLeaf(page) : !SpGistPageIsLeaf(page)) && + (GBUF_REQ_NULLS(flags) ? SpGistPageStoresNulls(page) : !SpGistPageStoresNulls(page))) + { + int freeSpace = PageGetExactFreeSpace(page); + + if (freeSpace >= needSpace) + { + /* Success, update freespace info and return the buffer */ + lup->freeSpace = freeSpace - needSpace; + *isNew = false; + return buffer; + } + } + + /* + * fallback to allocation of new buffer + */ + UnlockReleaseBuffer(buffer); + } + + /* No success with cache, so return a new buffer */ + *isNew = true; + return allocNewBuffer(index, flags); +} + +/* + * Update lastUsedPages cache when done modifying a page. + * + * We update the appropriate cache entry if it already contained this page + * (its freeSpace is likely obsolete), or if this page has more space than + * whatever we had cached. + */ +void +SpGistSetLastUsedPage(Relation index, Buffer buffer) +{ + SpGistCache *cache = spgGetCache(index); + SpGistLastUsedPage *lup; + int freeSpace; + Page page = BufferGetPage(buffer); + BlockNumber blkno = BufferGetBlockNumber(buffer); + int flags; + + /* Never enter fixed pages (root pages) in cache, though */ + if (SpGistBlockIsFixed(blkno)) + return; + + if (SpGistPageIsLeaf(page)) + flags = GBUF_LEAF; + else + flags = GBUF_INNER_PARITY(blkno); + if (SpGistPageStoresNulls(page)) + flags |= GBUF_NULLS; + + lup = GET_LUP(cache, flags); + + freeSpace = PageGetExactFreeSpace(page); + if (lup->blkno == InvalidBlockNumber || lup->blkno == blkno || + lup->freeSpace < freeSpace) + { + lup->blkno = blkno; + lup->freeSpace = freeSpace; + } +} + +/* + * Initialize an SPGiST page to empty, with specified flags + */ +void +SpGistInitPage(Page page, uint16 f) +{ + SpGistPageOpaque opaque; + + PageInit(page, BLCKSZ, sizeof(SpGistPageOpaqueData)); + opaque = SpGistPageGetOpaque(page); + opaque->flags = f; + opaque->spgist_page_id = SPGIST_PAGE_ID; +} + +/* + * Initialize a buffer's page to empty, with specified flags + */ +void +SpGistInitBuffer(Buffer b, uint16 f) +{ + Assert(BufferGetPageSize(b) == BLCKSZ); + SpGistInitPage(BufferGetPage(b), f); +} + +/* + * Initialize metadata page + */ +void +SpGistInitMetapage(Page page) +{ + SpGistMetaPageData *metadata; + int i; + + SpGistInitPage(page, SPGIST_META); + metadata = SpGistPageGetMeta(page); + memset(metadata, 0, sizeof(SpGistMetaPageData)); + metadata->magicNumber = SPGIST_MAGIC_NUMBER; + + /* initialize last-used-page cache to empty */ + for (i = 0; i < SPGIST_CACHED_PAGES; i++) + metadata->lastUsedPages.cachedPage[i].blkno = InvalidBlockNumber; + + /* + * Set pd_lower just past the end of the metadata. This is essential, + * because without doing so, metadata will be lost if xlog.c compresses + * the page. + */ + ((PageHeader) page)->pd_lower = + ((char *) metadata + sizeof(SpGistMetaPageData)) - (char *) page; +} + +/* + * reloptions processing for SPGiST + */ +bytea * +spgoptions(Datum reloptions, bool validate) +{ + static const relopt_parse_elt tab[] = { + {"fillfactor", RELOPT_TYPE_INT, offsetof(SpGistOptions, fillfactor)}, + }; + + return (bytea *) build_reloptions(reloptions, validate, + RELOPT_KIND_SPGIST, + sizeof(SpGistOptions), + tab, lengthof(tab)); +} + +/* + * Get the space needed to store a non-null datum of the indicated type + * in an inner tuple (that is, as a prefix or node label). + * Note the result is already rounded up to a MAXALIGN boundary. + * Here we follow the convention that pass-by-val types are just stored + * in their Datum representation (compare memcpyInnerDatum). + */ +unsigned int +SpGistGetInnerTypeSize(SpGistTypeDesc *att, Datum datum) +{ + unsigned int size; + + if (att->attbyval) + size = sizeof(Datum); + else if (att->attlen > 0) + size = att->attlen; + else + size = VARSIZE_ANY(datum); + + return MAXALIGN(size); +} + +/* + * Copy the given non-null datum to *target, in the inner-tuple case + */ +static void +memcpyInnerDatum(void *target, SpGistTypeDesc *att, Datum datum) +{ + unsigned int size; + + if (att->attbyval) + { + memcpy(target, &datum, sizeof(Datum)); + } + else + { + size = (att->attlen > 0) ? att->attlen : VARSIZE_ANY(datum); + memcpy(target, DatumGetPointer(datum), size); + } +} + +/* + * Compute space required for a leaf tuple holding the given data. + * + * This must match the size-calculation portion of spgFormLeafTuple. + */ +Size +SpGistGetLeafTupleSize(TupleDesc tupleDescriptor, + Datum *datums, bool *isnulls) +{ + Size size; + Size data_size; + bool needs_null_mask = false; + int natts = tupleDescriptor->natts; + + /* + * Decide whether we need a nulls bitmask. + * + * If there is only a key attribute (natts == 1), never use a bitmask, for + * compatibility with the pre-v14 layout of leaf tuples. Otherwise, we + * need one if any attribute is null. + */ + if (natts > 1) + { + for (int i = 0; i < natts; i++) + { + if (isnulls[i]) + { + needs_null_mask = true; + break; + } + } + } + + /* + * Calculate size of the data part; same as for heap tuples. + */ + data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls); + + /* + * Compute total size. + */ + size = SGLTHDRSZ(needs_null_mask); + size += data_size; + size = MAXALIGN(size); + + /* + * Ensure that we can replace the tuple with a dead tuple later. This test + * is unnecessary when there are any non-null attributes, but be safe. + */ + if (size < SGDTSIZE) + size = SGDTSIZE; + + return size; +} + +/* + * Construct a leaf tuple containing the given heap TID and datum values + */ +SpGistLeafTuple +spgFormLeafTuple(SpGistState *state, ItemPointer heapPtr, + Datum *datums, bool *isnulls) +{ + SpGistLeafTuple tup; + TupleDesc tupleDescriptor = state->leafTupDesc; + Size size; + Size hoff; + Size data_size; + bool needs_null_mask = false; + int natts = tupleDescriptor->natts; + char *tp; /* ptr to tuple data */ + uint16 tupmask = 0; /* unused heap_fill_tuple output */ + + /* + * Decide whether we need a nulls bitmask. + * + * If there is only a key attribute (natts == 1), never use a bitmask, for + * compatibility with the pre-v14 layout of leaf tuples. Otherwise, we + * need one if any attribute is null. + */ + if (natts > 1) + { + for (int i = 0; i < natts; i++) + { + if (isnulls[i]) + { + needs_null_mask = true; + break; + } + } + } + + /* + * Calculate size of the data part; same as for heap tuples. + */ + data_size = heap_compute_data_size(tupleDescriptor, datums, isnulls); + + /* + * Compute total size. + */ + hoff = SGLTHDRSZ(needs_null_mask); + size = hoff + data_size; + size = MAXALIGN(size); + + /* + * Ensure that we can replace the tuple with a dead tuple later. This test + * is unnecessary when there are any non-null attributes, but be safe. + */ + if (size < SGDTSIZE) + size = SGDTSIZE; + + /* OK, form the tuple */ + tup = (SpGistLeafTuple) palloc0(size); + + tup->size = size; + SGLT_SET_NEXTOFFSET(tup, InvalidOffsetNumber); + tup->heapPtr = *heapPtr; + + tp = (char *) tup + hoff; + + if (needs_null_mask) + { + bits8 *bp; /* ptr to null bitmap in tuple */ + + /* Set nullmask presence bit in SpGistLeafTuple header */ + SGLT_SET_HASNULLMASK(tup, true); + /* Fill the data area and null mask */ + bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData)); + heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size, + &tupmask, bp); + } + else if (natts > 1 || !isnulls[spgKeyColumn]) + { + /* Fill data area only */ + heap_fill_tuple(tupleDescriptor, datums, isnulls, tp, data_size, + &tupmask, (bits8 *) NULL); + } + /* otherwise we have no data, nor a bitmap, to fill */ + + return tup; +} + +/* + * Construct a node (to go into an inner tuple) containing the given label + * + * Note that the node's downlink is just set invalid here. Caller will fill + * it in later. + */ +SpGistNodeTuple +spgFormNodeTuple(SpGistState *state, Datum label, bool isnull) +{ + SpGistNodeTuple tup; + unsigned int size; + unsigned short infomask = 0; + + /* compute space needed (note result is already maxaligned) */ + size = SGNTHDRSZ; + if (!isnull) + size += SpGistGetInnerTypeSize(&state->attLabelType, label); + + /* + * Here we make sure that the size will fit in the field reserved for it + * in t_info. + */ + if ((size & INDEX_SIZE_MASK) != size) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("index row requires %zu bytes, maximum size is %zu", + (Size) size, (Size) INDEX_SIZE_MASK))); + + tup = (SpGistNodeTuple) palloc0(size); + + if (isnull) + infomask |= INDEX_NULL_MASK; + /* we don't bother setting the INDEX_VAR_MASK bit */ + infomask |= size; + tup->t_info = infomask; + + /* The TID field will be filled in later */ + ItemPointerSetInvalid(&tup->t_tid); + + if (!isnull) + memcpyInnerDatum(SGNTDATAPTR(tup), &state->attLabelType, label); + + return tup; +} + +/* + * Construct an inner tuple containing the given prefix and node array + */ +SpGistInnerTuple +spgFormInnerTuple(SpGistState *state, bool hasPrefix, Datum prefix, + int nNodes, SpGistNodeTuple *nodes) +{ + SpGistInnerTuple tup; + unsigned int size; + unsigned int prefixSize; + int i; + char *ptr; + + /* Compute size needed */ + if (hasPrefix) + prefixSize = SpGistGetInnerTypeSize(&state->attPrefixType, prefix); + else + prefixSize = 0; + + size = SGITHDRSZ + prefixSize; + + /* Note: we rely on node tuple sizes to be maxaligned already */ + for (i = 0; i < nNodes; i++) + size += IndexTupleSize(nodes[i]); + + /* + * Ensure that we can replace the tuple with a dead tuple later. This + * test is unnecessary given current tuple layouts, but let's be safe. + */ + if (size < SGDTSIZE) + size = SGDTSIZE; + + /* + * Inner tuple should be small enough to fit on a page + */ + if (size > SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("SP-GiST inner tuple size %zu exceeds maximum %zu", + (Size) size, + SPGIST_PAGE_CAPACITY - sizeof(ItemIdData)), + errhint("Values larger than a buffer page cannot be indexed."))); + + /* + * Check for overflow of header fields --- probably can't fail if the + * above succeeded, but let's be paranoid + */ + if (size > SGITMAXSIZE || + prefixSize > SGITMAXPREFIXSIZE || + nNodes > SGITMAXNNODES) + elog(ERROR, "SPGiST inner tuple header field is too small"); + + /* OK, form the tuple */ + tup = (SpGistInnerTuple) palloc0(size); + + tup->nNodes = nNodes; + tup->prefixSize = prefixSize; + tup->size = size; + + if (hasPrefix) + memcpyInnerDatum(SGITDATAPTR(tup), &state->attPrefixType, prefix); + + ptr = (char *) SGITNODEPTR(tup); + + for (i = 0; i < nNodes; i++) + { + SpGistNodeTuple node = nodes[i]; + + memcpy(ptr, node, IndexTupleSize(node)); + ptr += IndexTupleSize(node); + } + + return tup; +} + +/* + * Construct a "dead" tuple to replace a tuple being deleted. + * + * The state can be SPGIST_REDIRECT, SPGIST_DEAD, or SPGIST_PLACEHOLDER. + * For a REDIRECT tuple, a pointer (blkno+offset) must be supplied, and + * the xid field is filled in automatically. + * + * This is called in critical sections, so we don't use palloc; the tuple + * is built in preallocated storage. It should be copied before another + * call with different parameters can occur. + */ +SpGistDeadTuple +spgFormDeadTuple(SpGistState *state, int tupstate, + BlockNumber blkno, OffsetNumber offnum) +{ + SpGistDeadTuple tuple = (SpGistDeadTuple) state->deadTupleStorage; + + tuple->tupstate = tupstate; + tuple->size = SGDTSIZE; + SGLT_SET_NEXTOFFSET(tuple, InvalidOffsetNumber); + + if (tupstate == SPGIST_REDIRECT) + { + ItemPointerSet(&tuple->pointer, blkno, offnum); + Assert(TransactionIdIsValid(state->myXid)); + tuple->xid = state->myXid; + } + else + { + ItemPointerSetInvalid(&tuple->pointer); + tuple->xid = InvalidTransactionId; + } + + return tuple; +} + +/* + * Convert an SPGiST leaf tuple into Datum/isnull arrays. + * + * The caller must allocate sufficient storage for the output arrays. + * (INDEX_MAX_KEYS entries should be enough.) + */ +void +spgDeformLeafTuple(SpGistLeafTuple tup, TupleDesc tupleDescriptor, + Datum *datums, bool *isnulls, bool keyColumnIsNull) +{ + bool hasNullsMask = SGLT_GET_HASNULLMASK(tup); + char *tp; /* ptr to tuple data */ + bits8 *bp; /* ptr to null bitmap in tuple */ + + if (keyColumnIsNull && tupleDescriptor->natts == 1) + { + /* + * Trivial case: there is only the key attribute and we're in a nulls + * tree. The hasNullsMask bit in the tuple header should not be set + * (and thus we can't use index_deform_tuple_internal), but + * nonetheless the result is NULL. + * + * Note: currently this is dead code, because noplace calls this when + * there is only the key attribute. But we should cover the case. + */ + Assert(!hasNullsMask); + + datums[spgKeyColumn] = (Datum) 0; + isnulls[spgKeyColumn] = true; + return; + } + + tp = (char *) tup + SGLTHDRSZ(hasNullsMask); + bp = (bits8 *) ((char *) tup + sizeof(SpGistLeafTupleData)); + + index_deform_tuple_internal(tupleDescriptor, + datums, isnulls, + tp, bp, hasNullsMask); + + /* + * Key column isnull value from the tuple should be consistent with + * keyColumnIsNull flag from the caller. + */ + Assert(keyColumnIsNull == isnulls[spgKeyColumn]); +} + +/* + * Extract the label datums of the nodes within innerTuple + * + * Returns NULL if label datums are NULLs + */ +Datum * +spgExtractNodeLabels(SpGistState *state, SpGistInnerTuple innerTuple) +{ + Datum *nodeLabels; + int i; + SpGistNodeTuple node; + + /* Either all the labels must be NULL, or none. */ + node = SGITNODEPTR(innerTuple); + if (IndexTupleHasNulls(node)) + { + SGITITERATE(innerTuple, i, node) + { + if (!IndexTupleHasNulls(node)) + elog(ERROR, "some but not all node labels are null in SPGiST inner tuple"); + } + /* They're all null, so just return NULL */ + return NULL; + } + else + { + nodeLabels = (Datum *) palloc(sizeof(Datum) * innerTuple->nNodes); + SGITITERATE(innerTuple, i, node) + { + if (IndexTupleHasNulls(node)) + elog(ERROR, "some but not all node labels are null in SPGiST inner tuple"); + nodeLabels[i] = SGNTDATUM(node, state); + } + return nodeLabels; + } +} + +/* + * Add a new item to the page, replacing a PLACEHOLDER item if possible. + * Return the location it's inserted at, or InvalidOffsetNumber on failure. + * + * If startOffset isn't NULL, we start searching for placeholders at + * *startOffset, and update that to the next place to search. This is just + * an optimization for repeated insertions. + * + * If errorOK is false, we throw error when there's not enough room, + * rather than returning InvalidOffsetNumber. + */ +OffsetNumber +SpGistPageAddNewItem(SpGistState *state, Page page, Item item, Size size, + OffsetNumber *startOffset, bool errorOK) +{ + SpGistPageOpaque opaque = SpGistPageGetOpaque(page); + OffsetNumber i, + maxoff, + offnum; + + if (opaque->nPlaceholder > 0 && + PageGetExactFreeSpace(page) + SGDTSIZE >= MAXALIGN(size)) + { + /* Try to replace a placeholder */ + maxoff = PageGetMaxOffsetNumber(page); + offnum = InvalidOffsetNumber; + + for (;;) + { + if (startOffset && *startOffset != InvalidOffsetNumber) + i = *startOffset; + else + i = FirstOffsetNumber; + for (; i <= maxoff; i++) + { + SpGistDeadTuple it = (SpGistDeadTuple) PageGetItem(page, + PageGetItemId(page, i)); + + if (it->tupstate == SPGIST_PLACEHOLDER) + { + offnum = i; + break; + } + } + + /* Done if we found a placeholder */ + if (offnum != InvalidOffsetNumber) + break; + + if (startOffset && *startOffset != InvalidOffsetNumber) + { + /* Hint was no good, re-search from beginning */ + *startOffset = InvalidOffsetNumber; + continue; + } + + /* Hmm, no placeholder found? */ + opaque->nPlaceholder = 0; + break; + } + + if (offnum != InvalidOffsetNumber) + { + /* Replace the placeholder tuple */ + PageIndexTupleDelete(page, offnum); + + offnum = PageAddItem(page, item, size, offnum, false, false); + + /* + * We should not have failed given the size check at the top of + * the function, but test anyway. If we did fail, we must PANIC + * because we've already deleted the placeholder tuple, and + * there's no other way to keep the damage from getting to disk. + */ + if (offnum != InvalidOffsetNumber) + { + Assert(opaque->nPlaceholder > 0); + opaque->nPlaceholder--; + if (startOffset) + *startOffset = offnum + 1; + } + else + elog(PANIC, "failed to add item of size %zu to SPGiST index page", + size); + + return offnum; + } + } + + /* No luck in replacing a placeholder, so just add it to the page */ + offnum = PageAddItem(page, item, size, + InvalidOffsetNumber, false, false); + + if (offnum == InvalidOffsetNumber && !errorOK) + elog(ERROR, "failed to add item of size %zu to SPGiST index page", + size); + + return offnum; +} + +/* + * spgproperty() -- Check boolean properties of indexes. + * + * This is optional for most AMs, but is required for SP-GiST because the core + * property code doesn't support AMPROP_DISTANCE_ORDERABLE. + */ +bool +spgproperty(Oid index_oid, int attno, + IndexAMProperty prop, const char *propname, + bool *res, bool *isnull) +{ + Oid opclass, + opfamily, + opcintype; + CatCList *catlist; + int i; + + /* Only answer column-level inquiries */ + if (attno == 0) + return false; + + switch (prop) + { + case AMPROP_DISTANCE_ORDERABLE: + break; + default: + return false; + } + + /* + * Currently, SP-GiST distance-ordered scans require that there be a + * distance operator in the opclass with the default types. So we assume + * that if such an operator exists, then there's a reason for it. + */ + + /* First we need to know the column's opclass. */ + opclass = get_index_column_opclass(index_oid, attno); + if (!OidIsValid(opclass)) + { + *isnull = true; + return true; + } + + /* Now look up the opclass family and input datatype. */ + if (!get_opclass_opfamily_and_input_type(opclass, &opfamily, &opcintype)) + { + *isnull = true; + return true; + } + + /* And now we can check whether the operator is provided. */ + catlist = SearchSysCacheList1(AMOPSTRATEGY, + ObjectIdGetDatum(opfamily)); + + *res = false; + + for (i = 0; i < catlist->n_members; i++) + { + HeapTuple amoptup = &catlist->members[i]->tuple; + Form_pg_amop amopform = (Form_pg_amop) GETSTRUCT(amoptup); + + if (amopform->amoppurpose == AMOP_ORDER && + (amopform->amoplefttype == opcintype || + amopform->amoprighttype == opcintype) && + opfamily_can_sort_type(amopform->amopsortfamily, + get_op_rettype(amopform->amopopr))) + { + *res = true; + break; + } + } + + ReleaseSysCacheList(catlist); + + *isnull = false; + + return true; +} |