From 5e45211a64149b3c659b90ff2de6fa982a5a93ed Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 4 May 2024 14:17:33 +0200 Subject: Adding upstream version 15.5. Signed-off-by: Daniel Baumann --- src/backend/access/index/Makefile | 21 + src/backend/access/index/amapi.c | 143 +++++ src/backend/access/index/amvalidate.c | 276 ++++++++++ src/backend/access/index/genam.c | 748 ++++++++++++++++++++++++++ src/backend/access/index/indexam.c | 984 ++++++++++++++++++++++++++++++++++ 5 files changed, 2172 insertions(+) create mode 100644 src/backend/access/index/Makefile create mode 100644 src/backend/access/index/amapi.c create mode 100644 src/backend/access/index/amvalidate.c create mode 100644 src/backend/access/index/genam.c create mode 100644 src/backend/access/index/indexam.c (limited to 'src/backend/access/index') diff --git a/src/backend/access/index/Makefile b/src/backend/access/index/Makefile new file mode 100644 index 0000000..6f2e306 --- /dev/null +++ b/src/backend/access/index/Makefile @@ -0,0 +1,21 @@ +#------------------------------------------------------------------------- +# +# Makefile-- +# Makefile for access/index +# +# IDENTIFICATION +# src/backend/access/index/Makefile +# +#------------------------------------------------------------------------- + +subdir = src/backend/access/index +top_builddir = ../../../.. +include $(top_builddir)/src/Makefile.global + +OBJS = \ + amapi.o \ + amvalidate.o \ + genam.o \ + indexam.o + +include $(top_srcdir)/src/backend/common.mk diff --git a/src/backend/access/index/amapi.c b/src/backend/access/index/amapi.c new file mode 100644 index 0000000..2b028e1 --- /dev/null +++ b/src/backend/access/index/amapi.c @@ -0,0 +1,143 @@ +/*------------------------------------------------------------------------- + * + * amapi.c + * Support routines for API for Postgres index access methods. + * + * Copyright (c) 2015-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/access/index/amapi.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amapi.h" +#include "access/htup_details.h" +#include "catalog/pg_am.h" +#include "catalog/pg_opclass.h" +#include "utils/builtins.h" +#include "utils/syscache.h" + + +/* + * GetIndexAmRoutine - call the specified access method handler routine to get + * its IndexAmRoutine struct, which will be palloc'd in the caller's context. + * + * Note that if the amhandler function is built-in, this will not involve + * any catalog access. It's therefore safe to use this while bootstrapping + * indexes for the system catalogs. relcache.c relies on that. + */ +IndexAmRoutine * +GetIndexAmRoutine(Oid amhandler) +{ + Datum datum; + IndexAmRoutine *routine; + + datum = OidFunctionCall0(amhandler); + routine = (IndexAmRoutine *) DatumGetPointer(datum); + + if (routine == NULL || !IsA(routine, IndexAmRoutine)) + elog(ERROR, "index access method handler function %u did not return an IndexAmRoutine struct", + amhandler); + + return routine; +} + +/* + * GetIndexAmRoutineByAmId - look up the handler of the index access method + * with the given OID, and get its IndexAmRoutine struct. + * + * If the given OID isn't a valid index access method, returns NULL if + * noerror is true, else throws error. + */ +IndexAmRoutine * +GetIndexAmRoutineByAmId(Oid amoid, bool noerror) +{ + HeapTuple tuple; + Form_pg_am amform; + regproc amhandler; + + /* Get handler function OID for the access method */ + tuple = SearchSysCache1(AMOID, ObjectIdGetDatum(amoid)); + if (!HeapTupleIsValid(tuple)) + { + if (noerror) + return NULL; + elog(ERROR, "cache lookup failed for access method %u", + amoid); + } + amform = (Form_pg_am) GETSTRUCT(tuple); + + /* Check if it's an index access method as opposed to some other AM */ + if (amform->amtype != AMTYPE_INDEX) + { + if (noerror) + { + ReleaseSysCache(tuple); + return NULL; + } + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("access method \"%s\" is not of type %s", + NameStr(amform->amname), "INDEX"))); + } + + amhandler = amform->amhandler; + + /* Complain if handler OID is invalid */ + if (!RegProcedureIsValid(amhandler)) + { + if (noerror) + { + ReleaseSysCache(tuple); + return NULL; + } + ereport(ERROR, + (errcode(ERRCODE_OBJECT_NOT_IN_PREREQUISITE_STATE), + errmsg("index access method \"%s\" does not have a handler", + NameStr(amform->amname)))); + } + + ReleaseSysCache(tuple); + + /* And finally, call the handler function to get the API struct. */ + return GetIndexAmRoutine(amhandler); +} + + +/* + * Ask appropriate access method to validate the specified opclass. + */ +Datum +amvalidate(PG_FUNCTION_ARGS) +{ + Oid opclassoid = PG_GETARG_OID(0); + bool result; + HeapTuple classtup; + Form_pg_opclass classform; + Oid amoid; + IndexAmRoutine *amroutine; + + classtup = SearchSysCache1(CLAOID, ObjectIdGetDatum(opclassoid)); + if (!HeapTupleIsValid(classtup)) + elog(ERROR, "cache lookup failed for operator class %u", opclassoid); + classform = (Form_pg_opclass) GETSTRUCT(classtup); + + amoid = classform->opcmethod; + + ReleaseSysCache(classtup); + + amroutine = GetIndexAmRoutineByAmId(amoid, false); + + if (amroutine->amvalidate == NULL) + elog(ERROR, "function amvalidate is not defined for index access method %u", + amoid); + + result = amroutine->amvalidate(opclassoid); + + pfree(amroutine); + + PG_RETURN_BOOL(result); +} diff --git a/src/backend/access/index/amvalidate.c b/src/backend/access/index/amvalidate.c new file mode 100644 index 0000000..d13054e --- /dev/null +++ b/src/backend/access/index/amvalidate.c @@ -0,0 +1,276 @@ +/*------------------------------------------------------------------------- + * + * amvalidate.c + * Support routines for index access methods' amvalidate and + * amadjustmembers functions. + * + * Copyright (c) 2016-2022, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/access/index/amvalidate.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "access/amvalidate.h" +#include "access/htup_details.h" +#include "catalog/pg_am.h" +#include "catalog/pg_amop.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_opclass.h" +#include "catalog/pg_operator.h" +#include "catalog/pg_proc.h" +#include "catalog/pg_type.h" +#include "parser/parse_coerce.h" +#include "utils/syscache.h" + + +/* + * identify_opfamily_groups() returns a List of OpFamilyOpFuncGroup structs, + * one for each combination of lefttype/righttype present in the family's + * operator and support function lists. If amopstrategy K is present for + * this datatype combination, we set bit 1 << K in operatorset, and similarly + * for the support functions. With uint64 fields we can handle operator and + * function numbers up to 63, which is plenty for the foreseeable future. + * + * The given CatCLists are expected to represent a single opfamily fetched + * from the AMOPSTRATEGY and AMPROCNUM caches, so that they will be in + * order by those caches' second and third cache keys, namely the datatypes. + */ +List * +identify_opfamily_groups(CatCList *oprlist, CatCList *proclist) +{ + List *result = NIL; + OpFamilyOpFuncGroup *thisgroup; + Form_pg_amop oprform; + Form_pg_amproc procform; + int io, + ip; + + /* We need the lists to be ordered; should be true in normal operation */ + if (!oprlist->ordered || !proclist->ordered) + elog(ERROR, "cannot validate operator family without ordered data"); + + /* + * Advance through the lists concurrently. Thanks to the ordering, we + * should see all operators and functions of a given datatype pair + * consecutively. + */ + thisgroup = NULL; + io = ip = 0; + if (io < oprlist->n_members) + { + oprform = (Form_pg_amop) GETSTRUCT(&oprlist->members[io]->tuple); + io++; + } + else + oprform = NULL; + if (ip < proclist->n_members) + { + procform = (Form_pg_amproc) GETSTRUCT(&proclist->members[ip]->tuple); + ip++; + } + else + procform = NULL; + + while (oprform || procform) + { + if (oprform && thisgroup && + oprform->amoplefttype == thisgroup->lefttype && + oprform->amoprighttype == thisgroup->righttype) + { + /* Operator belongs to current group; include it and advance */ + + /* Ignore strategy numbers outside supported range */ + if (oprform->amopstrategy > 0 && oprform->amopstrategy < 64) + thisgroup->operatorset |= ((uint64) 1) << oprform->amopstrategy; + + if (io < oprlist->n_members) + { + oprform = (Form_pg_amop) GETSTRUCT(&oprlist->members[io]->tuple); + io++; + } + else + oprform = NULL; + continue; + } + + if (procform && thisgroup && + procform->amproclefttype == thisgroup->lefttype && + procform->amprocrighttype == thisgroup->righttype) + { + /* Procedure belongs to current group; include it and advance */ + + /* Ignore function numbers outside supported range */ + if (procform->amprocnum > 0 && procform->amprocnum < 64) + thisgroup->functionset |= ((uint64) 1) << procform->amprocnum; + + if (ip < proclist->n_members) + { + procform = (Form_pg_amproc) GETSTRUCT(&proclist->members[ip]->tuple); + ip++; + } + else + procform = NULL; + continue; + } + + /* Time for a new group */ + thisgroup = (OpFamilyOpFuncGroup *) palloc(sizeof(OpFamilyOpFuncGroup)); + if (oprform && + (!procform || + (oprform->amoplefttype < procform->amproclefttype || + (oprform->amoplefttype == procform->amproclefttype && + oprform->amoprighttype < procform->amprocrighttype)))) + { + thisgroup->lefttype = oprform->amoplefttype; + thisgroup->righttype = oprform->amoprighttype; + } + else + { + thisgroup->lefttype = procform->amproclefttype; + thisgroup->righttype = procform->amprocrighttype; + } + thisgroup->operatorset = thisgroup->functionset = 0; + result = lappend(result, thisgroup); + } + + return result; +} + +/* + * Validate the signature (argument and result types) of an opclass support + * function. Return true if OK, false if not. + * + * The "..." represents maxargs argument-type OIDs. If "exact" is true, they + * must match the function arg types exactly, else only binary-coercibly. + * In any case the function result type must match restype exactly. + */ +bool +check_amproc_signature(Oid funcid, Oid restype, bool exact, + int minargs, int maxargs,...) +{ + bool result = true; + HeapTuple tp; + Form_pg_proc procform; + va_list ap; + int i; + + tp = SearchSysCache1(PROCOID, ObjectIdGetDatum(funcid)); + if (!HeapTupleIsValid(tp)) + elog(ERROR, "cache lookup failed for function %u", funcid); + procform = (Form_pg_proc) GETSTRUCT(tp); + + if (procform->prorettype != restype || procform->proretset || + procform->pronargs < minargs || procform->pronargs > maxargs) + result = false; + + va_start(ap, maxargs); + for (i = 0; i < maxargs; i++) + { + Oid argtype = va_arg(ap, Oid); + + if (i >= procform->pronargs) + continue; + if (exact ? (argtype != procform->proargtypes.values[i]) : + !IsBinaryCoercible(argtype, procform->proargtypes.values[i])) + result = false; + } + va_end(ap); + + ReleaseSysCache(tp); + return result; +} + +/* + * Validate the signature of an opclass options support function, that should + * be 'void(internal)'. + */ +bool +check_amoptsproc_signature(Oid funcid) +{ + return check_amproc_signature(funcid, VOIDOID, true, 1, 1, INTERNALOID); +} + +/* + * Validate the signature (argument and result types) of an opclass operator. + * Return true if OK, false if not. + * + * Currently, we can hard-wire this as accepting only binary operators. Also, + * we can insist on exact type matches, since the given lefttype/righttype + * come from pg_amop and should always match the operator exactly. + */ +bool +check_amop_signature(Oid opno, Oid restype, Oid lefttype, Oid righttype) +{ + bool result = true; + HeapTuple tp; + Form_pg_operator opform; + + tp = SearchSysCache1(OPEROID, ObjectIdGetDatum(opno)); + if (!HeapTupleIsValid(tp)) /* shouldn't happen */ + elog(ERROR, "cache lookup failed for operator %u", opno); + opform = (Form_pg_operator) GETSTRUCT(tp); + + if (opform->oprresult != restype || opform->oprkind != 'b' || + opform->oprleft != lefttype || opform->oprright != righttype) + result = false; + + ReleaseSysCache(tp); + return result; +} + +/* + * Get the OID of the opclass belonging to an opfamily and accepting + * the specified type as input type. Returns InvalidOid if no such opclass. + * + * If there is more than one such opclass, you get a random one of them. + * Since that shouldn't happen, we don't waste cycles checking. + * + * We could look up the AM's OID from the opfamily, but all existing callers + * know that or can get it without an extra lookup, so we make them pass it. + */ +Oid +opclass_for_family_datatype(Oid amoid, Oid opfamilyoid, Oid datatypeoid) +{ + Oid result = InvalidOid; + CatCList *opclist; + int i; + + /* + * We search through all the AM's opclasses to see if one matches. This + * is a bit inefficient but there is no better index available. It also + * saves making an explicit check that the opfamily belongs to the AM. + */ + opclist = SearchSysCacheList1(CLAAMNAMENSP, ObjectIdGetDatum(amoid)); + + for (i = 0; i < opclist->n_members; i++) + { + HeapTuple classtup = &opclist->members[i]->tuple; + Form_pg_opclass classform = (Form_pg_opclass) GETSTRUCT(classtup); + + if (classform->opcfamily == opfamilyoid && + classform->opcintype == datatypeoid) + { + result = classform->oid; + break; + } + } + + ReleaseCatCacheList(opclist); + + return result; +} + +/* + * Is the datatype a legitimate input type for the btree opfamily? + */ +bool +opfamily_can_sort_type(Oid opfamilyoid, Oid datatypeoid) +{ + return OidIsValid(opclass_for_family_datatype(BTREE_AM_OID, + opfamilyoid, + datatypeoid)); +} diff --git a/src/backend/access/index/genam.c b/src/backend/access/index/genam.c new file mode 100644 index 0000000..98af534 --- /dev/null +++ b/src/backend/access/index/genam.c @@ -0,0 +1,748 @@ +/*------------------------------------------------------------------------- + * + * genam.c + * general index access method routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/index/genam.c + * + * NOTES + * many of the old access method routines have been turned into + * macros and moved to genam.h -cim 4/30/91 + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/genam.h" +#include "access/heapam.h" +#include "access/relscan.h" +#include "access/tableam.h" +#include "access/transam.h" +#include "catalog/index.h" +#include "lib/stringinfo.h" +#include "miscadmin.h" +#include "storage/bufmgr.h" +#include "storage/procarray.h" +#include "utils/acl.h" +#include "utils/builtins.h" +#include "utils/lsyscache.h" +#include "utils/rel.h" +#include "utils/rls.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* ---------------------------------------------------------------- + * general access method routines + * + * All indexed access methods use an identical scan structure. + * We don't know how the various AMs do locking, however, so we don't + * do anything about that here. + * + * The intent is that an AM implementor will define a beginscan routine + * that calls RelationGetIndexScan, to fill in the scan, and then does + * whatever kind of locking he wants. + * + * At the end of a scan, the AM's endscan routine undoes the locking, + * but does *not* call IndexScanEnd --- the higher-level index_endscan + * routine does that. (We can't do it in the AM because index_endscan + * still needs to touch the IndexScanDesc after calling the AM.) + * + * Because of this, the AM does not have a choice whether to call + * RelationGetIndexScan or not; its beginscan routine must return an + * object made by RelationGetIndexScan. This is kinda ugly but not + * worth cleaning up now. + * ---------------------------------------------------------------- + */ + +/* ---------------- + * RelationGetIndexScan -- Create and fill an IndexScanDesc. + * + * This routine creates an index scan structure and sets up initial + * contents for it. + * + * Parameters: + * indexRelation -- index relation for scan. + * nkeys -- count of scan keys (index qual conditions). + * norderbys -- count of index order-by operators. + * + * Returns: + * An initialized IndexScanDesc. + * ---------------- + */ +IndexScanDesc +RelationGetIndexScan(Relation indexRelation, int nkeys, int norderbys) +{ + IndexScanDesc scan; + + scan = (IndexScanDesc) palloc(sizeof(IndexScanDescData)); + + scan->heapRelation = NULL; /* may be set later */ + scan->xs_heapfetch = NULL; + scan->indexRelation = indexRelation; + scan->xs_snapshot = InvalidSnapshot; /* caller must initialize this */ + scan->numberOfKeys = nkeys; + scan->numberOfOrderBys = norderbys; + + /* + * We allocate key workspace here, but it won't get filled until amrescan. + */ + if (nkeys > 0) + scan->keyData = (ScanKey) palloc(sizeof(ScanKeyData) * nkeys); + else + scan->keyData = NULL; + if (norderbys > 0) + scan->orderByData = (ScanKey) palloc(sizeof(ScanKeyData) * norderbys); + else + scan->orderByData = NULL; + + scan->xs_want_itup = false; /* may be set later */ + + /* + * During recovery we ignore killed tuples and don't bother to kill them + * either. We do this because the xmin on the primary node could easily be + * later than the xmin on the standby node, so that what the primary + * thinks is killed is supposed to be visible on standby. So for correct + * MVCC for queries during recovery we must ignore these hints and check + * all tuples. Do *not* set ignore_killed_tuples to true when running in a + * transaction that was started during recovery. xactStartedInRecovery + * should not be altered by index AMs. + */ + scan->kill_prior_tuple = false; + scan->xactStartedInRecovery = TransactionStartedDuringRecovery(); + scan->ignore_killed_tuples = !scan->xactStartedInRecovery; + + scan->opaque = NULL; + + scan->xs_itup = NULL; + scan->xs_itupdesc = NULL; + scan->xs_hitup = NULL; + scan->xs_hitupdesc = NULL; + + return scan; +} + +/* ---------------- + * IndexScanEnd -- End an index scan. + * + * This routine just releases the storage acquired by + * RelationGetIndexScan(). Any AM-level resources are + * assumed to already have been released by the AM's + * endscan routine. + * + * Returns: + * None. + * ---------------- + */ +void +IndexScanEnd(IndexScanDesc scan) +{ + if (scan->keyData != NULL) + pfree(scan->keyData); + if (scan->orderByData != NULL) + pfree(scan->orderByData); + + pfree(scan); +} + +/* + * BuildIndexValueDescription + * + * Construct a string describing the contents of an index entry, in the + * form "(key_name, ...)=(key_value, ...)". This is currently used + * for building unique-constraint and exclusion-constraint error messages, + * so only key columns of the index are checked and printed. + * + * Note that if the user does not have permissions to view all of the + * columns involved then a NULL is returned. Returning a partial key seems + * unlikely to be useful and we have no way to know which of the columns the + * user provided (unlike in ExecBuildSlotValueDescription). + * + * The passed-in values/nulls arrays are the "raw" input to the index AM, + * e.g. results of FormIndexDatum --- this is not necessarily what is stored + * in the index, but it's what the user perceives to be stored. + * + * Note: if you change anything here, check whether + * ExecBuildSlotPartitionKeyDescription() in execMain.c needs a similar + * change. + */ +char * +BuildIndexValueDescription(Relation indexRelation, + Datum *values, bool *isnull) +{ + StringInfoData buf; + Form_pg_index idxrec; + int indnkeyatts; + int i; + int keyno; + Oid indexrelid = RelationGetRelid(indexRelation); + Oid indrelid; + AclResult aclresult; + + indnkeyatts = IndexRelationGetNumberOfKeyAttributes(indexRelation); + + /* + * Check permissions- if the user does not have access to view all of the + * key columns then return NULL to avoid leaking data. + * + * First check if RLS is enabled for the relation. If so, return NULL to + * avoid leaking data. + * + * Next we need to check table-level SELECT access and then, if there is + * no access there, check column-level permissions. + */ + idxrec = indexRelation->rd_index; + indrelid = idxrec->indrelid; + Assert(indexrelid == idxrec->indexrelid); + + /* RLS check- if RLS is enabled then we don't return anything. */ + if (check_enable_rls(indrelid, InvalidOid, true) == RLS_ENABLED) + return NULL; + + /* Table-level SELECT is enough, if the user has it */ + aclresult = pg_class_aclcheck(indrelid, GetUserId(), ACL_SELECT); + if (aclresult != ACLCHECK_OK) + { + /* + * No table-level access, so step through the columns in the index and + * make sure the user has SELECT rights on all of them. + */ + for (keyno = 0; keyno < indnkeyatts; keyno++) + { + AttrNumber attnum = idxrec->indkey.values[keyno]; + + /* + * Note that if attnum == InvalidAttrNumber, then this is an index + * based on an expression and we return no detail rather than try + * to figure out what column(s) the expression includes and if the + * user has SELECT rights on them. + */ + if (attnum == InvalidAttrNumber || + pg_attribute_aclcheck(indrelid, attnum, GetUserId(), + ACL_SELECT) != ACLCHECK_OK) + { + /* No access, so clean up and return */ + return NULL; + } + } + } + + initStringInfo(&buf); + appendStringInfo(&buf, "(%s)=(", + pg_get_indexdef_columns(indexrelid, true)); + + for (i = 0; i < indnkeyatts; i++) + { + char *val; + + if (isnull[i]) + val = "null"; + else + { + Oid foutoid; + bool typisvarlena; + + /* + * The provided data is not necessarily of the type stored in the + * index; rather it is of the index opclass's input type. So look + * at rd_opcintype not the index tupdesc. + * + * Note: this is a bit shaky for opclasses that have pseudotype + * input types such as ANYARRAY or RECORD. Currently, the + * typoutput functions associated with the pseudotypes will work + * okay, but we might have to try harder in future. + */ + getTypeOutputInfo(indexRelation->rd_opcintype[i], + &foutoid, &typisvarlena); + val = OidOutputFunctionCall(foutoid, values[i]); + } + + if (i > 0) + appendStringInfoString(&buf, ", "); + appendStringInfoString(&buf, val); + } + + appendStringInfoChar(&buf, ')'); + + return buf.data; +} + +/* + * Get the latestRemovedXid from the table entries pointed at by the index + * tuples being deleted using an AM-generic approach. + * + * This is a table_index_delete_tuples() shim used by index AMs that have + * simple requirements. These callers only need to consult the tableam to get + * a latestRemovedXid value, and only expect to delete tuples that are already + * known deletable. When a latestRemovedXid value isn't needed in index AM's + * deletion WAL record, it is safe for it to skip calling here entirely. + * + * We assume that caller index AM uses the standard IndexTuple representation, + * with table TIDs stored in the t_tid field. We also expect (and assert) + * that the line pointers on page for 'itemnos' offsets are already marked + * LP_DEAD. + */ +TransactionId +index_compute_xid_horizon_for_tuples(Relation irel, + Relation hrel, + Buffer ibuf, + OffsetNumber *itemnos, + int nitems) +{ + TM_IndexDeleteOp delstate; + TransactionId latestRemovedXid = InvalidTransactionId; + Page ipage = BufferGetPage(ibuf); + IndexTuple itup; + + Assert(nitems > 0); + + delstate.irel = irel; + delstate.iblknum = BufferGetBlockNumber(ibuf); + delstate.bottomup = false; + delstate.bottomupfreespace = 0; + delstate.ndeltids = 0; + delstate.deltids = palloc(nitems * sizeof(TM_IndexDelete)); + delstate.status = palloc(nitems * sizeof(TM_IndexStatus)); + + /* identify what the index tuples about to be deleted point to */ + for (int i = 0; i < nitems; i++) + { + OffsetNumber offnum = itemnos[i]; + ItemId iitemid; + + iitemid = PageGetItemId(ipage, offnum); + itup = (IndexTuple) PageGetItem(ipage, iitemid); + + Assert(ItemIdIsDead(iitemid)); + + ItemPointerCopy(&itup->t_tid, &delstate.deltids[i].tid); + delstate.deltids[i].id = delstate.ndeltids; + delstate.status[i].idxoffnum = offnum; + delstate.status[i].knowndeletable = true; /* LP_DEAD-marked */ + delstate.status[i].promising = false; /* unused */ + delstate.status[i].freespace = 0; /* unused */ + + delstate.ndeltids++; + } + + /* determine the actual xid horizon */ + latestRemovedXid = table_index_delete_tuples(hrel, &delstate); + + /* assert tableam agrees that all items are deletable */ + Assert(delstate.ndeltids == nitems); + + pfree(delstate.deltids); + pfree(delstate.status); + + return latestRemovedXid; +} + + +/* ---------------------------------------------------------------- + * heap-or-index-scan access to system catalogs + * + * These functions support system catalog accesses that normally use + * an index but need to be capable of being switched to heap scans + * if the system indexes are unavailable. + * + * The specified scan keys must be compatible with the named index. + * Generally this means that they must constrain either all columns + * of the index, or the first K columns of an N-column index. + * + * These routines could work with non-system tables, actually, + * but they're only useful when there is a known index to use with + * the given scan keys; so in practice they're only good for + * predetermined types of scans of system catalogs. + * ---------------------------------------------------------------- + */ + +/* + * systable_beginscan --- set up for heap-or-index scan + * + * rel: catalog to scan, already opened and suitably locked + * indexId: OID of index to conditionally use + * indexOK: if false, forces a heap scan (see notes below) + * snapshot: time qual to use (NULL for a recent catalog snapshot) + * nkeys, key: scan keys + * + * The attribute numbers in the scan key should be set for the heap case. + * If we choose to index, we reset them to 1..n to reference the index + * columns. Note this means there must be one scankey qualification per + * index column! This is checked by the Asserts in the normal, index-using + * case, but won't be checked if the heapscan path is taken. + * + * The routine checks the normal cases for whether an indexscan is safe, + * but caller can make additional checks and pass indexOK=false if needed. + * In standard case indexOK can simply be constant TRUE. + */ +SysScanDesc +systable_beginscan(Relation heapRelation, + Oid indexId, + bool indexOK, + Snapshot snapshot, + int nkeys, ScanKey key) +{ + SysScanDesc sysscan; + Relation irel; + + if (indexOK && + !IgnoreSystemIndexes && + !ReindexIsProcessingIndex(indexId)) + irel = index_open(indexId, AccessShareLock); + else + irel = NULL; + + sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); + + sysscan->heap_rel = heapRelation; + sysscan->irel = irel; + sysscan->slot = table_slot_create(heapRelation, NULL); + + if (snapshot == NULL) + { + Oid relid = RelationGetRelid(heapRelation); + + snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + sysscan->snapshot = snapshot; + } + else + { + /* Caller is responsible for any snapshot. */ + sysscan->snapshot = NULL; + } + + if (irel) + { + int i; + + /* Change attribute numbers to be index column numbers. */ + for (i = 0; i < nkeys; i++) + { + int j; + + for (j = 0; j < IndexRelationGetNumberOfAttributes(irel); j++) + { + if (key[i].sk_attno == irel->rd_index->indkey.values[j]) + { + key[i].sk_attno = j + 1; + break; + } + } + if (j == IndexRelationGetNumberOfAttributes(irel)) + elog(ERROR, "column is not in index"); + } + + sysscan->iscan = index_beginscan(heapRelation, irel, + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); + sysscan->scan = NULL; + } + else + { + /* + * We disallow synchronized scans when forced to use a heapscan on a + * catalog. In most cases the desired rows are near the front, so + * that the unpredictable start point of a syncscan is a serious + * disadvantage; and there are no compensating advantages, because + * it's unlikely that such scans will occur in parallel. + */ + sysscan->scan = table_beginscan_strat(heapRelation, snapshot, + nkeys, key, + true, false); + sysscan->iscan = NULL; + } + + /* + * If CheckXidAlive is set then set a flag to indicate that system table + * scan is in-progress. See detailed comments in xact.c where these + * variables are declared. + */ + if (TransactionIdIsValid(CheckXidAlive)) + bsysscan = true; + + return sysscan; +} + +/* + * HandleConcurrentAbort - Handle concurrent abort of the CheckXidAlive. + * + * Error out, if CheckXidAlive is aborted. We can't directly use + * TransactionIdDidAbort as after crash such transaction might not have been + * marked as aborted. See detailed comments in xact.c where the variable + * is declared. + */ +static inline void +HandleConcurrentAbort() +{ + if (TransactionIdIsValid(CheckXidAlive) && + !TransactionIdIsInProgress(CheckXidAlive) && + !TransactionIdDidCommit(CheckXidAlive)) + ereport(ERROR, + (errcode(ERRCODE_TRANSACTION_ROLLBACK), + errmsg("transaction aborted during system catalog scan"))); +} + +/* + * systable_getnext --- get next tuple in a heap-or-index scan + * + * Returns NULL if no more tuples available. + * + * Note that returned tuple is a reference to data in a disk buffer; + * it must not be modified, and should be presumed inaccessible after + * next getnext() or endscan() call. + * + * XXX: It'd probably make sense to offer a slot based interface, at least + * optionally. + */ +HeapTuple +systable_getnext(SysScanDesc sysscan) +{ + HeapTuple htup = NULL; + + if (sysscan->irel) + { + if (index_getnext_slot(sysscan->iscan, ForwardScanDirection, sysscan->slot)) + { + bool shouldFree; + + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree); + Assert(!shouldFree); + + /* + * We currently don't need to support lossy index operators for + * any system catalog scan. It could be done here, using the scan + * keys to drive the operator calls, if we arranged to save the + * heap attnums during systable_beginscan(); this is practical + * because we still wouldn't need to support indexes on + * expressions. + */ + if (sysscan->iscan->xs_recheck) + elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); + } + } + else + { + if (table_scan_getnextslot(sysscan->scan, ForwardScanDirection, sysscan->slot)) + { + bool shouldFree; + + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, &shouldFree); + Assert(!shouldFree); + } + } + + /* + * Handle the concurrent abort while fetching the catalog tuple during + * logical streaming of a transaction. + */ + HandleConcurrentAbort(); + + return htup; +} + +/* + * systable_recheck_tuple --- recheck visibility of most-recently-fetched tuple + * + * In particular, determine if this tuple would be visible to a catalog scan + * that started now. We don't handle the case of a non-MVCC scan snapshot, + * because no caller needs that yet. + * + * This is useful to test whether an object was deleted while we waited to + * acquire lock on it. + * + * Note: we don't actually *need* the tuple to be passed in, but it's a + * good crosscheck that the caller is interested in the right tuple. + */ +bool +systable_recheck_tuple(SysScanDesc sysscan, HeapTuple tup) +{ + Snapshot freshsnap; + bool result; + + Assert(tup == ExecFetchSlotHeapTuple(sysscan->slot, false, NULL)); + + /* + * Trust that table_tuple_satisfies_snapshot() and its subsidiaries + * (commonly LockBuffer() and HeapTupleSatisfiesMVCC()) do not themselves + * acquire snapshots, so we need not register the snapshot. Those + * facilities are too low-level to have any business scanning tables. + */ + freshsnap = GetCatalogSnapshot(RelationGetRelid(sysscan->heap_rel)); + + result = table_tuple_satisfies_snapshot(sysscan->heap_rel, + sysscan->slot, + freshsnap); + + /* + * Handle the concurrent abort while fetching the catalog tuple during + * logical streaming of a transaction. + */ + HandleConcurrentAbort(); + + return result; +} + +/* + * systable_endscan --- close scan, release resources + * + * Note that it's still up to the caller to close the heap relation. + */ +void +systable_endscan(SysScanDesc sysscan) +{ + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + + if (sysscan->irel) + { + index_endscan(sysscan->iscan); + index_close(sysscan->irel, AccessShareLock); + } + else + table_endscan(sysscan->scan); + + if (sysscan->snapshot) + UnregisterSnapshot(sysscan->snapshot); + + /* + * Reset the bsysscan flag at the end of the systable scan. See detailed + * comments in xact.c where these variables are declared. + */ + if (TransactionIdIsValid(CheckXidAlive)) + bsysscan = false; + + pfree(sysscan); +} + + +/* + * systable_beginscan_ordered --- set up for ordered catalog scan + * + * These routines have essentially the same API as systable_beginscan etc, + * except that they guarantee to return multiple matching tuples in + * index order. Also, for largely historical reasons, the index to use + * is opened and locked by the caller, not here. + * + * Currently we do not support non-index-based scans here. (In principle + * we could do a heapscan and sort, but the uses are in places that + * probably don't need to still work with corrupted catalog indexes.) + * For the moment, therefore, these functions are merely the thinest of + * wrappers around index_beginscan/index_getnext_slot. The main reason for + * their existence is to centralize possible future support of lossy operators + * in catalog scans. + */ +SysScanDesc +systable_beginscan_ordered(Relation heapRelation, + Relation indexRelation, + Snapshot snapshot, + int nkeys, ScanKey key) +{ + SysScanDesc sysscan; + int i; + + /* REINDEX can probably be a hard error here ... */ + if (ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) + elog(ERROR, "cannot do ordered scan on index \"%s\", because it is being reindexed", + RelationGetRelationName(indexRelation)); + /* ... but we only throw a warning about violating IgnoreSystemIndexes */ + if (IgnoreSystemIndexes) + elog(WARNING, "using index \"%s\" despite IgnoreSystemIndexes", + RelationGetRelationName(indexRelation)); + + sysscan = (SysScanDesc) palloc(sizeof(SysScanDescData)); + + sysscan->heap_rel = heapRelation; + sysscan->irel = indexRelation; + sysscan->slot = table_slot_create(heapRelation, NULL); + + if (snapshot == NULL) + { + Oid relid = RelationGetRelid(heapRelation); + + snapshot = RegisterSnapshot(GetCatalogSnapshot(relid)); + sysscan->snapshot = snapshot; + } + else + { + /* Caller is responsible for any snapshot. */ + sysscan->snapshot = NULL; + } + + /* Change attribute numbers to be index column numbers. */ + for (i = 0; i < nkeys; i++) + { + int j; + + for (j = 0; j < IndexRelationGetNumberOfAttributes(indexRelation); j++) + { + if (key[i].sk_attno == indexRelation->rd_index->indkey.values[j]) + { + key[i].sk_attno = j + 1; + break; + } + } + if (j == IndexRelationGetNumberOfAttributes(indexRelation)) + elog(ERROR, "column is not in index"); + } + + sysscan->iscan = index_beginscan(heapRelation, indexRelation, + snapshot, nkeys, 0); + index_rescan(sysscan->iscan, key, nkeys, NULL, 0); + sysscan->scan = NULL; + + return sysscan; +} + +/* + * systable_getnext_ordered --- get next tuple in an ordered catalog scan + */ +HeapTuple +systable_getnext_ordered(SysScanDesc sysscan, ScanDirection direction) +{ + HeapTuple htup = NULL; + + Assert(sysscan->irel); + if (index_getnext_slot(sysscan->iscan, direction, sysscan->slot)) + htup = ExecFetchSlotHeapTuple(sysscan->slot, false, NULL); + + /* See notes in systable_getnext */ + if (htup && sysscan->iscan->xs_recheck) + elog(ERROR, "system catalog scans with lossy index conditions are not implemented"); + + /* + * Handle the concurrent abort while fetching the catalog tuple during + * logical streaming of a transaction. + */ + HandleConcurrentAbort(); + + return htup; +} + +/* + * systable_endscan_ordered --- close scan, release resources + */ +void +systable_endscan_ordered(SysScanDesc sysscan) +{ + if (sysscan->slot) + { + ExecDropSingleTupleTableSlot(sysscan->slot); + sysscan->slot = NULL; + } + + Assert(sysscan->irel); + index_endscan(sysscan->iscan); + if (sysscan->snapshot) + UnregisterSnapshot(sysscan->snapshot); + pfree(sysscan); +} diff --git a/src/backend/access/index/indexam.c b/src/backend/access/index/indexam.c new file mode 100644 index 0000000..fe80b8b --- /dev/null +++ b/src/backend/access/index/indexam.c @@ -0,0 +1,984 @@ +/*------------------------------------------------------------------------- + * + * indexam.c + * general index access method routines + * + * Portions Copyright (c) 1996-2022, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/access/index/indexam.c + * + * INTERFACE ROUTINES + * index_open - open an index relation by relation OID + * index_close - close an index relation + * index_beginscan - start a scan of an index with amgettuple + * index_beginscan_bitmap - start a scan of an index with amgetbitmap + * index_rescan - restart a scan of an index + * index_endscan - end a scan + * index_insert - insert an index tuple into a relation + * index_markpos - mark a scan position + * index_restrpos - restore a scan position + * index_parallelscan_estimate - estimate shared memory for parallel scan + * index_parallelscan_initialize - initialize parallel scan + * index_parallelrescan - (re)start a parallel scan of an index + * index_beginscan_parallel - join parallel index scan + * index_getnext_tid - get the next TID from a scan + * index_fetch_heap - get the scan's next heap tuple + * index_getnext_slot - get the next tuple from a scan + * index_getbitmap - get all tuples from a scan + * index_bulk_delete - bulk deletion of index tuples + * index_vacuum_cleanup - post-deletion cleanup of an index + * index_can_return - does index support index-only scans? + * index_getprocid - get a support procedure OID + * index_getprocinfo - get a support procedure's lookup info + * + * NOTES + * This file contains the index_ routines which used + * to be a scattered collection of stuff in access/genam. + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/amapi.h" +#include "access/heapam.h" +#include "access/reloptions.h" +#include "access/relscan.h" +#include "access/tableam.h" +#include "access/transam.h" +#include "access/xlog.h" +#include "catalog/index.h" +#include "catalog/pg_amproc.h" +#include "catalog/pg_type.h" +#include "commands/defrem.h" +#include "nodes/makefuncs.h" +#include "pgstat.h" +#include "storage/bufmgr.h" +#include "storage/lmgr.h" +#include "storage/predicate.h" +#include "utils/ruleutils.h" +#include "utils/snapmgr.h" +#include "utils/syscache.h" + + +/* ---------------------------------------------------------------- + * macros used in index_ routines + * + * Note: the ReindexIsProcessingIndex() check in RELATION_CHECKS is there + * to check that we don't try to scan or do retail insertions into an index + * that is currently being rebuilt or pending rebuild. This helps to catch + * things that don't work when reindexing system catalogs. The assertion + * doesn't prevent the actual rebuild because we don't use RELATION_CHECKS + * when calling the index AM's ambuild routine, and there is no reason for + * ambuild to call its subsidiary routines through this file. + * ---------------------------------------------------------------- + */ +#define RELATION_CHECKS \ +( \ + AssertMacro(RelationIsValid(indexRelation)), \ + AssertMacro(PointerIsValid(indexRelation->rd_indam)), \ + AssertMacro(!ReindexIsProcessingIndex(RelationGetRelid(indexRelation))) \ +) + +#define SCAN_CHECKS \ +( \ + AssertMacro(IndexScanIsValid(scan)), \ + AssertMacro(RelationIsValid(scan->indexRelation)), \ + AssertMacro(PointerIsValid(scan->indexRelation->rd_indam)) \ +) + +#define CHECK_REL_PROCEDURE(pname) \ +do { \ + if (indexRelation->rd_indam->pname == NULL) \ + elog(ERROR, "function \"%s\" is not defined for index \"%s\"", \ + CppAsString(pname), RelationGetRelationName(indexRelation)); \ +} while(0) + +#define CHECK_SCAN_PROCEDURE(pname) \ +do { \ + if (scan->indexRelation->rd_indam->pname == NULL) \ + elog(ERROR, "function \"%s\" is not defined for index \"%s\"", \ + CppAsString(pname), RelationGetRelationName(scan->indexRelation)); \ +} while(0) + +static IndexScanDesc index_beginscan_internal(Relation indexRelation, + int nkeys, int norderbys, Snapshot snapshot, + ParallelIndexScanDesc pscan, bool temp_snap); + + +/* ---------------------------------------------------------------- + * index_ interface functions + * ---------------------------------------------------------------- + */ + +/* ---------------- + * index_open - open an index relation by relation OID + * + * If lockmode is not "NoLock", the specified kind of lock is + * obtained on the index. (Generally, NoLock should only be + * used if the caller knows it has some appropriate lock on the + * index already.) + * + * An error is raised if the index does not exist. + * + * This is a convenience routine adapted for indexscan use. + * Some callers may prefer to use relation_open directly. + * ---------------- + */ +Relation +index_open(Oid relationId, LOCKMODE lockmode) +{ + Relation r; + + r = relation_open(relationId, lockmode); + + if (r->rd_rel->relkind != RELKIND_INDEX && + r->rd_rel->relkind != RELKIND_PARTITIONED_INDEX) + ereport(ERROR, + (errcode(ERRCODE_WRONG_OBJECT_TYPE), + errmsg("\"%s\" is not an index", + RelationGetRelationName(r)))); + + return r; +} + +/* ---------------- + * index_close - close an index relation + * + * If lockmode is not "NoLock", we then release the specified lock. + * + * Note that it is often sensible to hold a lock beyond index_close; + * in that case, the lock is released automatically at xact end. + * ---------------- + */ +void +index_close(Relation relation, LOCKMODE lockmode) +{ + LockRelId relid = relation->rd_lockInfo.lockRelId; + + Assert(lockmode >= NoLock && lockmode < MAX_LOCKMODES); + + /* The relcache does the real work... */ + RelationClose(relation); + + if (lockmode != NoLock) + UnlockRelationId(&relid, lockmode); +} + +/* ---------------- + * index_insert - insert an index tuple into a relation + * ---------------- + */ +bool +index_insert(Relation indexRelation, + Datum *values, + bool *isnull, + ItemPointer heap_t_ctid, + Relation heapRelation, + IndexUniqueCheck checkUnique, + bool indexUnchanged, + IndexInfo *indexInfo) +{ + RELATION_CHECKS; + CHECK_REL_PROCEDURE(aminsert); + + if (!(indexRelation->rd_indam->ampredlocks)) + CheckForSerializableConflictIn(indexRelation, + (ItemPointer) NULL, + InvalidBlockNumber); + + return indexRelation->rd_indam->aminsert(indexRelation, values, isnull, + heap_t_ctid, heapRelation, + checkUnique, indexUnchanged, + indexInfo); +} + +/* + * index_beginscan - start a scan of an index with amgettuple + * + * Caller must be holding suitable locks on the heap and the index. + */ +IndexScanDesc +index_beginscan(Relation heapRelation, + Relation indexRelation, + Snapshot snapshot, + int nkeys, int norderbys) +{ + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, norderbys, snapshot, NULL, false); + + /* + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. + */ + scan->heapRelation = heapRelation; + scan->xs_snapshot = snapshot; + + /* prepare to fetch index matches from table */ + scan->xs_heapfetch = table_index_fetch_begin(heapRelation); + + return scan; +} + +/* + * index_beginscan_bitmap - start a scan of an index with amgetbitmap + * + * As above, caller had better be holding some lock on the parent heap + * relation, even though it's not explicitly mentioned here. + */ +IndexScanDesc +index_beginscan_bitmap(Relation indexRelation, + Snapshot snapshot, + int nkeys) +{ + IndexScanDesc scan; + + scan = index_beginscan_internal(indexRelation, nkeys, 0, snapshot, NULL, false); + + /* + * Save additional parameters into the scandesc. Everything else was set + * up by RelationGetIndexScan. + */ + scan->xs_snapshot = snapshot; + + return scan; +} + +/* + * index_beginscan_internal --- common code for index_beginscan variants + */ +static IndexScanDesc +index_beginscan_internal(Relation indexRelation, + int nkeys, int norderbys, Snapshot snapshot, + ParallelIndexScanDesc pscan, bool temp_snap) +{ + IndexScanDesc scan; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(ambeginscan); + + if (!(indexRelation->rd_indam->ampredlocks)) + PredicateLockRelation(indexRelation, snapshot); + + /* + * We hold a reference count to the relcache entry throughout the scan. + */ + RelationIncrementReferenceCount(indexRelation); + + /* + * Tell the AM to open a scan. + */ + scan = indexRelation->rd_indam->ambeginscan(indexRelation, nkeys, + norderbys); + /* Initialize information for parallel scan. */ + scan->parallel_scan = pscan; + scan->xs_temp_snap = temp_snap; + + return scan; +} + +/* ---------------- + * index_rescan - (re)start a scan of an index + * + * During a restart, the caller may specify a new set of scankeys and/or + * orderbykeys; but the number of keys cannot differ from what index_beginscan + * was told. (Later we might relax that to "must not exceed", but currently + * the index AMs tend to assume that scan->numberOfKeys is what to believe.) + * To restart the scan without changing keys, pass NULL for the key arrays. + * (Of course, keys *must* be passed on the first call, unless + * scan->numberOfKeys is zero.) + * ---------------- + */ +void +index_rescan(IndexScanDesc scan, + ScanKey keys, int nkeys, + ScanKey orderbys, int norderbys) +{ + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amrescan); + + Assert(nkeys == scan->numberOfKeys); + Assert(norderbys == scan->numberOfOrderBys); + + /* Release resources (like buffer pins) from table accesses */ + if (scan->xs_heapfetch) + table_index_fetch_reset(scan->xs_heapfetch); + + scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; + + scan->indexRelation->rd_indam->amrescan(scan, keys, nkeys, + orderbys, norderbys); +} + +/* ---------------- + * index_endscan - end a scan + * ---------------- + */ +void +index_endscan(IndexScanDesc scan) +{ + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amendscan); + + /* Release resources (like buffer pins) from table accesses */ + if (scan->xs_heapfetch) + { + table_index_fetch_end(scan->xs_heapfetch); + scan->xs_heapfetch = NULL; + } + + /* End the AM's scan */ + scan->indexRelation->rd_indam->amendscan(scan); + + /* Release index refcount acquired by index_beginscan */ + RelationDecrementReferenceCount(scan->indexRelation); + + if (scan->xs_temp_snap) + UnregisterSnapshot(scan->xs_snapshot); + + /* Release the scan data structure itself */ + IndexScanEnd(scan); +} + +/* ---------------- + * index_markpos - mark a scan position + * ---------------- + */ +void +index_markpos(IndexScanDesc scan) +{ + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(ammarkpos); + + scan->indexRelation->rd_indam->ammarkpos(scan); +} + +/* ---------------- + * index_restrpos - restore a scan position + * + * NOTE: this only restores the internal scan state of the index AM. See + * comments for ExecRestrPos(). + * + * NOTE: For heap, in the presence of HOT chains, mark/restore only works + * correctly if the scan's snapshot is MVCC-safe; that ensures that there's at + * most one returnable tuple in each HOT chain, and so restoring the prior + * state at the granularity of the index AM is sufficient. Since the only + * current user of mark/restore functionality is nodeMergejoin.c, this + * effectively means that merge-join plans only work for MVCC snapshots. This + * could be fixed if necessary, but for now it seems unimportant. + * ---------------- + */ +void +index_restrpos(IndexScanDesc scan) +{ + Assert(IsMVCCSnapshot(scan->xs_snapshot)); + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amrestrpos); + + /* release resources (like buffer pins) from table accesses */ + if (scan->xs_heapfetch) + table_index_fetch_reset(scan->xs_heapfetch); + + scan->kill_prior_tuple = false; /* for safety */ + scan->xs_heap_continue = false; + + scan->indexRelation->rd_indam->amrestrpos(scan); +} + +/* + * index_parallelscan_estimate - estimate shared memory for parallel scan + * + * Currently, we don't pass any information to the AM-specific estimator, + * so it can probably only return a constant. In the future, we might need + * to pass more information. + */ +Size +index_parallelscan_estimate(Relation indexRelation, Snapshot snapshot) +{ + Size nbytes; + + RELATION_CHECKS; + + nbytes = offsetof(ParallelIndexScanDescData, ps_snapshot_data); + nbytes = add_size(nbytes, EstimateSnapshotSpace(snapshot)); + nbytes = MAXALIGN(nbytes); + + /* + * If amestimateparallelscan is not provided, assume there is no + * AM-specific data needed. (It's hard to believe that could work, but + * it's easy enough to cater to it here.) + */ + if (indexRelation->rd_indam->amestimateparallelscan != NULL) + nbytes = add_size(nbytes, + indexRelation->rd_indam->amestimateparallelscan()); + + return nbytes; +} + +/* + * index_parallelscan_initialize - initialize parallel scan + * + * We initialize both the ParallelIndexScanDesc proper and the AM-specific + * information which follows it. + * + * This function calls access method specific initialization routine to + * initialize am specific information. Call this just once in the leader + * process; then, individual workers attach via index_beginscan_parallel. + */ +void +index_parallelscan_initialize(Relation heapRelation, Relation indexRelation, + Snapshot snapshot, ParallelIndexScanDesc target) +{ + Size offset; + + RELATION_CHECKS; + + offset = add_size(offsetof(ParallelIndexScanDescData, ps_snapshot_data), + EstimateSnapshotSpace(snapshot)); + offset = MAXALIGN(offset); + + target->ps_relid = RelationGetRelid(heapRelation); + target->ps_indexid = RelationGetRelid(indexRelation); + target->ps_offset = offset; + SerializeSnapshot(snapshot, target->ps_snapshot_data); + + /* aminitparallelscan is optional; assume no-op if not provided by AM */ + if (indexRelation->rd_indam->aminitparallelscan != NULL) + { + void *amtarget; + + amtarget = OffsetToPointer(target, offset); + indexRelation->rd_indam->aminitparallelscan(amtarget); + } +} + +/* ---------------- + * index_parallelrescan - (re)start a parallel scan of an index + * ---------------- + */ +void +index_parallelrescan(IndexScanDesc scan) +{ + SCAN_CHECKS; + + if (scan->xs_heapfetch) + table_index_fetch_reset(scan->xs_heapfetch); + + /* amparallelrescan is optional; assume no-op if not provided by AM */ + if (scan->indexRelation->rd_indam->amparallelrescan != NULL) + scan->indexRelation->rd_indam->amparallelrescan(scan); +} + +/* + * index_beginscan_parallel - join parallel index scan + * + * Caller must be holding suitable locks on the heap and the index. + */ +IndexScanDesc +index_beginscan_parallel(Relation heaprel, Relation indexrel, int nkeys, + int norderbys, ParallelIndexScanDesc pscan) +{ + Snapshot snapshot; + IndexScanDesc scan; + + Assert(RelationGetRelid(heaprel) == pscan->ps_relid); + snapshot = RestoreSnapshot(pscan->ps_snapshot_data); + RegisterSnapshot(snapshot); + scan = index_beginscan_internal(indexrel, nkeys, norderbys, snapshot, + pscan, true); + + /* + * Save additional parameters into the scandesc. Everything else was set + * up by index_beginscan_internal. + */ + scan->heapRelation = heaprel; + scan->xs_snapshot = snapshot; + + /* prepare to fetch index matches from table */ + scan->xs_heapfetch = table_index_fetch_begin(heaprel); + + return scan; +} + +/* ---------------- + * index_getnext_tid - get the next TID from a scan + * + * The result is the next TID satisfying the scan keys, + * or NULL if no more matching tuples exist. + * ---------------- + */ +ItemPointer +index_getnext_tid(IndexScanDesc scan, ScanDirection direction) +{ + bool found; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgettuple); + + /* XXX: we should assert that a snapshot is pushed or registered */ + Assert(TransactionIdIsValid(RecentXmin)); + + /* + * The AM's amgettuple proc finds the next index entry matching the scan + * keys, and puts the TID into scan->xs_heaptid. It should also set + * scan->xs_recheck and possibly scan->xs_itup/scan->xs_hitup, though we + * pay no attention to those fields here. + */ + found = scan->indexRelation->rd_indam->amgettuple(scan, direction); + + /* Reset kill flag immediately for safety */ + scan->kill_prior_tuple = false; + scan->xs_heap_continue = false; + + /* If we're out of index entries, we're done */ + if (!found) + { + /* release resources (like buffer pins) from table accesses */ + if (scan->xs_heapfetch) + table_index_fetch_reset(scan->xs_heapfetch); + + return NULL; + } + Assert(ItemPointerIsValid(&scan->xs_heaptid)); + + pgstat_count_index_tuples(scan->indexRelation, 1); + + /* Return the TID of the tuple we found. */ + return &scan->xs_heaptid; +} + +/* ---------------- + * index_fetch_heap - get the scan's next heap tuple + * + * The result is a visible heap tuple associated with the index TID most + * recently fetched by index_getnext_tid, or NULL if no more matching tuples + * exist. (There can be more than one matching tuple because of HOT chains, + * although when using an MVCC snapshot it should be impossible for more than + * one such tuple to exist.) + * + * On success, the buffer containing the heap tup is pinned (the pin will be + * dropped in a future index_getnext_tid, index_fetch_heap or index_endscan + * call). + * + * Note: caller must check scan->xs_recheck, and perform rechecking of the + * scan keys if required. We do not do that here because we don't have + * enough information to do it efficiently in the general case. + * ---------------- + */ +bool +index_fetch_heap(IndexScanDesc scan, TupleTableSlot *slot) +{ + bool all_dead = false; + bool found; + + found = table_index_fetch_tuple(scan->xs_heapfetch, &scan->xs_heaptid, + scan->xs_snapshot, slot, + &scan->xs_heap_continue, &all_dead); + + if (found) + pgstat_count_heap_fetch(scan->indexRelation); + + /* + * If we scanned a whole HOT chain and found only dead tuples, tell index + * AM to kill its entry for that TID (this will take effect in the next + * amgettuple call, in index_getnext_tid). We do not do this when in + * recovery because it may violate MVCC to do so. See comments in + * RelationGetIndexScan(). + */ + if (!scan->xactStartedInRecovery) + scan->kill_prior_tuple = all_dead; + + return found; +} + +/* ---------------- + * index_getnext_slot - get the next tuple from a scan + * + * The result is true if a tuple satisfying the scan keys and the snapshot was + * found, false otherwise. The tuple is stored in the specified slot. + * + * On success, resources (like buffer pins) are likely to be held, and will be + * dropped by a future index_getnext_tid, index_fetch_heap or index_endscan + * call). + * + * Note: caller must check scan->xs_recheck, and perform rechecking of the + * scan keys if required. We do not do that here because we don't have + * enough information to do it efficiently in the general case. + * ---------------- + */ +bool +index_getnext_slot(IndexScanDesc scan, ScanDirection direction, TupleTableSlot *slot) +{ + for (;;) + { + if (!scan->xs_heap_continue) + { + ItemPointer tid; + + /* Time to fetch the next TID from the index */ + tid = index_getnext_tid(scan, direction); + + /* If we're out of index entries, we're done */ + if (tid == NULL) + break; + + Assert(ItemPointerEquals(tid, &scan->xs_heaptid)); + } + + /* + * Fetch the next (or only) visible heap tuple for this index entry. + * If we don't find anything, loop around and grab the next TID from + * the index. + */ + Assert(ItemPointerIsValid(&scan->xs_heaptid)); + if (index_fetch_heap(scan, slot)) + return true; + } + + return false; +} + +/* ---------------- + * index_getbitmap - get all tuples at once from an index scan + * + * Adds the TIDs of all heap tuples satisfying the scan keys to a bitmap. + * Since there's no interlock between the index scan and the eventual heap + * access, this is only safe to use with MVCC-based snapshots: the heap + * item slot could have been replaced by a newer tuple by the time we get + * to it. + * + * Returns the number of matching tuples found. (Note: this might be only + * approximate, so it should only be used for statistical purposes.) + * ---------------- + */ +int64 +index_getbitmap(IndexScanDesc scan, TIDBitmap *bitmap) +{ + int64 ntids; + + SCAN_CHECKS; + CHECK_SCAN_PROCEDURE(amgetbitmap); + + /* just make sure this is false... */ + scan->kill_prior_tuple = false; + + /* + * have the am's getbitmap proc do all the work. + */ + ntids = scan->indexRelation->rd_indam->amgetbitmap(scan, bitmap); + + pgstat_count_index_tuples(scan->indexRelation, ntids); + + return ntids; +} + +/* ---------------- + * index_bulk_delete - do mass deletion of index entries + * + * callback routine tells whether a given main-heap tuple is + * to be deleted + * + * return value is an optional palloc'd struct of statistics + * ---------------- + */ +IndexBulkDeleteResult * +index_bulk_delete(IndexVacuumInfo *info, + IndexBulkDeleteResult *istat, + IndexBulkDeleteCallback callback, + void *callback_state) +{ + Relation indexRelation = info->index; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(ambulkdelete); + + return indexRelation->rd_indam->ambulkdelete(info, istat, + callback, callback_state); +} + +/* ---------------- + * index_vacuum_cleanup - do post-deletion cleanup of an index + * + * return value is an optional palloc'd struct of statistics + * ---------------- + */ +IndexBulkDeleteResult * +index_vacuum_cleanup(IndexVacuumInfo *info, + IndexBulkDeleteResult *istat) +{ + Relation indexRelation = info->index; + + RELATION_CHECKS; + CHECK_REL_PROCEDURE(amvacuumcleanup); + + return indexRelation->rd_indam->amvacuumcleanup(info, istat); +} + +/* ---------------- + * index_can_return + * + * Does the index access method support index-only scans for the given + * column? + * ---------------- + */ +bool +index_can_return(Relation indexRelation, int attno) +{ + RELATION_CHECKS; + + /* amcanreturn is optional; assume false if not provided by AM */ + if (indexRelation->rd_indam->amcanreturn == NULL) + return false; + + return indexRelation->rd_indam->amcanreturn(indexRelation, attno); +} + +/* ---------------- + * index_getprocid + * + * Index access methods typically require support routines that are + * not directly the implementation of any WHERE-clause query operator + * and so cannot be kept in pg_amop. Instead, such routines are kept + * in pg_amproc. These registered procedure OIDs are assigned numbers + * according to a convention established by the access method. + * The general index code doesn't know anything about the routines + * involved; it just builds an ordered list of them for + * each attribute on which an index is defined. + * + * As of Postgres 8.3, support routines within an operator family + * are further subdivided by the "left type" and "right type" of the + * query operator(s) that they support. The "default" functions for a + * particular indexed attribute are those with both types equal to + * the index opclass' opcintype (note that this is subtly different + * from the indexed attribute's own type: it may be a binary-compatible + * type instead). Only the default functions are stored in relcache + * entries --- access methods can use the syscache to look up non-default + * functions. + * + * This routine returns the requested default procedure OID for a + * particular indexed attribute. + * ---------------- + */ +RegProcedure +index_getprocid(Relation irel, + AttrNumber attnum, + uint16 procnum) +{ + RegProcedure *loc; + int nproc; + int procindex; + + nproc = irel->rd_indam->amsupport; + + Assert(procnum > 0 && procnum <= (uint16) nproc); + + procindex = (nproc * (attnum - 1)) + (procnum - 1); + + loc = irel->rd_support; + + Assert(loc != NULL); + + return loc[procindex]; +} + +/* ---------------- + * index_getprocinfo + * + * This routine allows index AMs to keep fmgr lookup info for + * support procs in the relcache. As above, only the "default" + * functions for any particular indexed attribute are cached. + * + * Note: the return value points into cached data that will be lost during + * any relcache rebuild! Therefore, either use the callinfo right away, + * or save it only after having acquired some type of lock on the index rel. + * ---------------- + */ +FmgrInfo * +index_getprocinfo(Relation irel, + AttrNumber attnum, + uint16 procnum) +{ + FmgrInfo *locinfo; + int nproc; + int optsproc; + int procindex; + + nproc = irel->rd_indam->amsupport; + optsproc = irel->rd_indam->amoptsprocnum; + + Assert(procnum > 0 && procnum <= (uint16) nproc); + + procindex = (nproc * (attnum - 1)) + (procnum - 1); + + locinfo = irel->rd_supportinfo; + + Assert(locinfo != NULL); + + locinfo += procindex; + + /* Initialize the lookup info if first time through */ + if (locinfo->fn_oid == InvalidOid) + { + RegProcedure *loc = irel->rd_support; + RegProcedure procId; + + Assert(loc != NULL); + + procId = loc[procindex]; + + /* + * Complain if function was not found during IndexSupportInitialize. + * This should not happen unless the system tables contain bogus + * entries for the index opclass. (If an AM wants to allow a support + * function to be optional, it can use index_getprocid.) + */ + if (!RegProcedureIsValid(procId)) + elog(ERROR, "missing support function %d for attribute %d of index \"%s\"", + procnum, attnum, RelationGetRelationName(irel)); + + fmgr_info_cxt(procId, locinfo, irel->rd_indexcxt); + + if (procnum != optsproc) + { + /* Initialize locinfo->fn_expr with opclass options Const */ + bytea **attoptions = RelationGetIndexAttOptions(irel, false); + MemoryContext oldcxt = MemoryContextSwitchTo(irel->rd_indexcxt); + + set_fn_opclass_options(locinfo, attoptions[attnum - 1]); + + MemoryContextSwitchTo(oldcxt); + } + } + + return locinfo; +} + +/* ---------------- + * index_store_float8_orderby_distances + * + * Convert AM distance function's results (that can be inexact) + * to ORDER BY types and save them into xs_orderbyvals/xs_orderbynulls + * for a possible recheck. + * ---------------- + */ +void +index_store_float8_orderby_distances(IndexScanDesc scan, Oid *orderByTypes, + IndexOrderByDistance *distances, + bool recheckOrderBy) +{ + int i; + + Assert(distances || !recheckOrderBy); + + scan->xs_recheckorderby = recheckOrderBy; + + for (i = 0; i < scan->numberOfOrderBys; i++) + { + if (orderByTypes[i] == FLOAT8OID) + { +#ifndef USE_FLOAT8_BYVAL + /* must free any old value to avoid memory leakage */ + if (!scan->xs_orderbynulls[i]) + pfree(DatumGetPointer(scan->xs_orderbyvals[i])); +#endif + if (distances && !distances[i].isnull) + { + scan->xs_orderbyvals[i] = Float8GetDatum(distances[i].value); + scan->xs_orderbynulls[i] = false; + } + else + { + scan->xs_orderbyvals[i] = (Datum) 0; + scan->xs_orderbynulls[i] = true; + } + } + else if (orderByTypes[i] == FLOAT4OID) + { + /* convert distance function's result to ORDER BY type */ + if (distances && !distances[i].isnull) + { + scan->xs_orderbyvals[i] = Float4GetDatum((float4) distances[i].value); + scan->xs_orderbynulls[i] = false; + } + else + { + scan->xs_orderbyvals[i] = (Datum) 0; + scan->xs_orderbynulls[i] = true; + } + } + else + { + /* + * If the ordering operator's return value is anything else, we + * don't know how to convert the float8 bound calculated by the + * distance function to that. The executor won't actually need + * the order by values we return here, if there are no lossy + * results, so only insist on converting if the *recheck flag is + * set. + */ + if (scan->xs_recheckorderby) + elog(ERROR, "ORDER BY operator must return float8 or float4 if the distance function is lossy"); + scan->xs_orderbynulls[i] = true; + } + } +} + +/* ---------------- + * index_opclass_options + * + * Parse opclass-specific options for index column. + * ---------------- + */ +bytea * +index_opclass_options(Relation indrel, AttrNumber attnum, Datum attoptions, + bool validate) +{ + int amoptsprocnum = indrel->rd_indam->amoptsprocnum; + Oid procid = InvalidOid; + FmgrInfo *procinfo; + local_relopts relopts; + + /* fetch options support procedure if specified */ + if (amoptsprocnum != 0) + procid = index_getprocid(indrel, attnum, amoptsprocnum); + + if (!OidIsValid(procid)) + { + Oid opclass; + Datum indclassDatum; + oidvector *indclass; + bool isnull; + + if (!DatumGetPointer(attoptions)) + return NULL; /* ok, no options, no procedure */ + + /* + * Report an error if the opclass's options-parsing procedure does not + * exist but the opclass options are specified. + */ + indclassDatum = SysCacheGetAttr(INDEXRELID, indrel->rd_indextuple, + Anum_pg_index_indclass, &isnull); + Assert(!isnull); + indclass = (oidvector *) DatumGetPointer(indclassDatum); + opclass = indclass->values[attnum - 1]; + + ereport(ERROR, + (errcode(ERRCODE_INVALID_PARAMETER_VALUE), + errmsg("operator class %s has no options", + generate_opclass_name(opclass)))); + } + + init_local_reloptions(&relopts, 0); + + procinfo = index_getprocinfo(indrel, attnum, amoptsprocnum); + + (void) FunctionCall1(procinfo, PointerGetDatum(&relopts)); + + return build_local_reloptions(&relopts, attoptions, validate); +} -- cgit v1.2.3