/*------------------------------------------------------------------------- * * indextuple.c * This file contains index tuple accessor and mutator routines, * as well as various tuple utilities. * * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group * Portions Copyright (c) 1994, Regents of the University of California * * * IDENTIFICATION * src/backend/access/common/indextuple.c * *------------------------------------------------------------------------- */ #include "postgres.h" #include "access/detoast.h" #include "access/heaptoast.h" #include "access/htup_details.h" #include "access/itup.h" #include "access/toast_internals.h" /* * This enables de-toasting of index entries. Needed until VACUUM is * smart enough to rebuild indexes from scratch. */ #define TOAST_INDEX_HACK /* ---------------------------------------------------------------- * index_ tuple interface routines * ---------------------------------------------------------------- */ /* ---------------- * index_form_tuple * * This shouldn't leak any memory; otherwise, callers such as * tuplesort_putindextuplevalues() will be very unhappy. * * This shouldn't perform external table access provided caller * does not pass values that are stored EXTERNAL. * ---------------- */ IndexTuple index_form_tuple(TupleDesc tupleDescriptor, Datum *values, bool *isnull) { char *tp; /* tuple pointer */ IndexTuple tuple; /* return tuple */ Size size, data_size, hoff; int i; unsigned short infomask = 0; bool hasnull = false; uint16 tupmask = 0; int numberOfAttributes = tupleDescriptor->natts; #ifdef TOAST_INDEX_HACK Datum untoasted_values[INDEX_MAX_KEYS]; bool untoasted_free[INDEX_MAX_KEYS]; #endif if (numberOfAttributes > INDEX_MAX_KEYS) ereport(ERROR, (errcode(ERRCODE_TOO_MANY_COLUMNS), errmsg("number of index columns (%d) exceeds limit (%d)", numberOfAttributes, INDEX_MAX_KEYS))); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { Form_pg_attribute att = TupleDescAttr(tupleDescriptor, i); untoasted_values[i] = values[i]; untoasted_free[i] = false; /* Do nothing if value is NULL or not of varlena type */ if (isnull[i] || att->attlen != -1) continue; /* * If value is stored EXTERNAL, must fetch it so we are not depending * on outside storage. This should be improved someday. */ if (VARATT_IS_EXTERNAL(DatumGetPointer(values[i]))) { untoasted_values[i] = PointerGetDatum(detoast_external_attr((struct varlena *) DatumGetPointer(values[i]))); untoasted_free[i] = true; } /* * If value is above size target, and is of a compressible datatype, * try to compress it in-line. */ if (!VARATT_IS_EXTENDED(DatumGetPointer(untoasted_values[i])) && VARSIZE(DatumGetPointer(untoasted_values[i])) > TOAST_INDEX_TARGET && (att->attstorage == TYPSTORAGE_EXTENDED || att->attstorage == TYPSTORAGE_MAIN)) { Datum cvalue; cvalue = toast_compress_datum(untoasted_values[i], att->attcompression); if (DatumGetPointer(cvalue) != NULL) { /* successful compression */ if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); untoasted_values[i] = cvalue; untoasted_free[i] = true; } } } #endif for (i = 0; i < numberOfAttributes; i++) { if (isnull[i]) { hasnull = true; break; } } if (hasnull) infomask |= INDEX_NULL_MASK; hoff = IndexInfoFindDataOffset(infomask); #ifdef TOAST_INDEX_HACK data_size = heap_compute_data_size(tupleDescriptor, untoasted_values, isnull); #else data_size = heap_compute_data_size(tupleDescriptor, values, isnull); #endif size = hoff + data_size; size = MAXALIGN(size); /* be conservative */ tp = (char *) palloc0(size); tuple = (IndexTuple) tp; heap_fill_tuple(tupleDescriptor, #ifdef TOAST_INDEX_HACK untoasted_values, #else values, #endif isnull, (char *) tp + hoff, data_size, &tupmask, (hasnull ? (bits8 *) tp + sizeof(IndexTupleData) : NULL)); #ifdef TOAST_INDEX_HACK for (i = 0; i < numberOfAttributes; i++) { if (untoasted_free[i]) pfree(DatumGetPointer(untoasted_values[i])); } #endif /* * We do this because heap_fill_tuple wants to initialize a "tupmask" * which is used for HeapTuples, but we want an indextuple infomask. The * only relevant info is the "has variable attributes" field. We have * already set the hasnull bit above. */ if (tupmask & HEAP_HASVARWIDTH) infomask |= INDEX_VAR_MASK; /* Also assert we got rid of external attributes */ #ifdef TOAST_INDEX_HACK Assert((tupmask & HEAP_HASEXTERNAL) == 0); #endif /* * Here we make sure that the size will fit in the field reserved for it * in t_info. */ if ((size & INDEX_SIZE_MASK) != size) ereport(ERROR, (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), errmsg("index row requires %zu bytes, maximum size is %zu", size, (Size) INDEX_SIZE_MASK))); infomask |= size; /* * initialize metadata */ tuple->t_info = infomask; return tuple; } /* ---------------- * nocache_index_getattr * * This gets called from index_getattr() macro, and only in cases * where we can't use cacheoffset and the value is not null. * * This caches attribute offsets in the attribute descriptor. * * An alternative way to speed things up would be to cache offsets * with the tuple, but that seems more difficult unless you take * the storage hit of actually putting those offsets into the * tuple you send to disk. Yuck. * * This scheme will be slightly slower than that, but should * perform well for queries which hit large #'s of tuples. After * you cache the offsets once, examining all the other tuples using * the same attribute descriptor will go much quicker. -cim 5/4/91 * ---------------- */ Datum nocache_index_getattr(IndexTuple tup, int attnum, TupleDesc tupleDesc) { char *tp; /* ptr to data part of tuple */ bits8 *bp = NULL; /* ptr to null bitmap in tuple */ bool slow = false; /* do we have to walk attrs? */ int data_off; /* tuple data offset */ int off; /* current offset within data */ /* ---------------- * Three cases: * * 1: No nulls and no variable-width attributes. * 2: Has a null or a var-width AFTER att. * 3: Has nulls or var-widths BEFORE att. * ---------------- */ data_off = IndexInfoFindDataOffset(tup->t_info); attnum--; if (IndexTupleHasNulls(tup)) { /* * there's a null somewhere in the tuple * * check to see if desired att is null */ /* XXX "knows" t_bits are just after fixed tuple header! */ bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); /* * Now check to see if any preceding bits are null... */ { int byte = attnum >> 3; int finalbit = attnum & 0x07; /* check for nulls "before" final bit of last byte */ if ((~bp[byte]) & ((1 << finalbit) - 1)) slow = true; else { /* check for nulls in any "earlier" bytes */ int i; for (i = 0; i < byte; i++) { if (bp[i] != 0xFF) { slow = true; break; } } } } } tp = (char *) tup + data_off; if (!slow) { Form_pg_attribute att; /* * If we get here, there are no nulls up to and including the target * attribute. If we have a cached offset, we can use it. */ att = TupleDescAttr(tupleDesc, attnum); if (att->attcacheoff >= 0) return fetchatt(att, tp + att->attcacheoff); /* * Otherwise, check for non-fixed-length attrs up to and including * target. If there aren't any, it's safe to cheaply initialize the * cached offsets for these attrs. */ if (IndexTupleHasVarwidths(tup)) { int j; for (j = 0; j <= attnum; j++) { if (TupleDescAttr(tupleDesc, j)->attlen <= 0) { slow = true; break; } } } } if (!slow) { int natts = tupleDesc->natts; int j = 1; /* * If we get here, we have a tuple with no nulls or var-widths up to * and including the target attribute, so we can use the cached offset * ... only we don't have it yet, or we'd not have got here. Since * it's cheap to compute offsets for fixed-width columns, we take the * opportunity to initialize the cached offsets for *all* the leading * fixed-width columns, in hope of avoiding future visits to this * routine. */ TupleDescAttr(tupleDesc, 0)->attcacheoff = 0; /* we might have set some offsets in the slow path previously */ while (j < natts && TupleDescAttr(tupleDesc, j)->attcacheoff > 0) j++; off = TupleDescAttr(tupleDesc, j - 1)->attcacheoff + TupleDescAttr(tupleDesc, j - 1)->attlen; for (; j < natts; j++) { Form_pg_attribute att = TupleDescAttr(tupleDesc, j); if (att->attlen <= 0) break; off = att_align_nominal(off, att->attalign); att->attcacheoff = off; off += att->attlen; } Assert(j > attnum); off = TupleDescAttr(tupleDesc, attnum)->attcacheoff; } else { bool usecache = true; int i; /* * Now we know that we have to walk the tuple CAREFULLY. But we still * might be able to cache some offsets for next time. * * Note - This loop is a little tricky. For each non-null attribute, * we have to first account for alignment padding before the attr, * then advance over the attr based on its length. Nulls have no * storage and no alignment padding either. We can use/set * attcacheoff until we reach either a null or a var-width attribute. */ off = 0; for (i = 0;; i++) /* loop exit is at "break" */ { Form_pg_attribute att = TupleDescAttr(tupleDesc, i); if (IndexTupleHasNulls(tup) && att_isnull(i, bp)) { usecache = false; continue; /* this cannot be the target att */ } /* If we know the next offset, we can skip the rest */ if (usecache && att->attcacheoff >= 0) off = att->attcacheoff; else if (att->attlen == -1) { /* * We can only cache the offset for a varlena attribute if the * offset is already suitably aligned, so that there would be * no pad bytes in any case: then the offset will be valid for * either an aligned or unaligned value. */ if (usecache && off == att_align_nominal(off, att->attalign)) att->attcacheoff = off; else { off = att_align_pointer(off, att->attalign, -1, tp + off); usecache = false; } } else { /* not varlena, so safe to use att_align_nominal */ off = att_align_nominal(off, att->attalign); if (usecache) att->attcacheoff = off; } if (i == attnum) break; off = att_addlength_pointer(off, att->attlen, tp + off); if (usecache && att->attlen <= 0) usecache = false; } } return fetchatt(TupleDescAttr(tupleDesc, attnum), tp + off); } /* * Convert an index tuple into Datum/isnull arrays. * * The caller must allocate sufficient storage for the output arrays. * (INDEX_MAX_KEYS entries should be enough.) * * This is nearly the same as heap_deform_tuple(), but for IndexTuples. * One difference is that the tuple should never have any missing columns. */ void index_deform_tuple(IndexTuple tup, TupleDesc tupleDescriptor, Datum *values, bool *isnull) { char *tp; /* ptr to tuple data */ bits8 *bp; /* ptr to null bitmap in tuple */ /* XXX "knows" t_bits are just after fixed tuple header! */ bp = (bits8 *) ((char *) tup + sizeof(IndexTupleData)); tp = (char *) tup + IndexInfoFindDataOffset(tup->t_info); index_deform_tuple_internal(tupleDescriptor, values, isnull, tp, bp, IndexTupleHasNulls(tup)); } /* * Convert an index tuple into Datum/isnull arrays, * without assuming any specific layout of the index tuple header. * * Caller must supply pointer to data area, pointer to nulls bitmap * (which can be NULL if !hasnulls), and hasnulls flag. */ void index_deform_tuple_internal(TupleDesc tupleDescriptor, Datum *values, bool *isnull, char *tp, bits8 *bp, int hasnulls) { int natts = tupleDescriptor->natts; /* number of atts to extract */ int attnum; int off = 0; /* offset in tuple data */ bool slow = false; /* can we use/set attcacheoff? */ /* Assert to protect callers who allocate fixed-size arrays */ Assert(natts <= INDEX_MAX_KEYS); for (attnum = 0; attnum < natts; attnum++) { Form_pg_attribute thisatt = TupleDescAttr(tupleDescriptor, attnum); if (hasnulls && att_isnull(attnum, bp)) { values[attnum] = (Datum) 0; isnull[attnum] = true; slow = true; /* can't use attcacheoff anymore */ continue; } isnull[attnum] = false; if (!slow && thisatt->attcacheoff >= 0) off = thisatt->attcacheoff; else if (thisatt->attlen == -1) { /* * We can only cache the offset for a varlena attribute if the * offset is already suitably aligned, so that there would be no * pad bytes in any case: then the offset will be valid for either * an aligned or unaligned value. */ if (!slow && off == att_align_nominal(off, thisatt->attalign)) thisatt->attcacheoff = off; else { off = att_align_pointer(off, thisatt->attalign, -1, tp + off); slow = true; } } else { /* not varlena, so safe to use att_align_nominal */ off = att_align_nominal(off, thisatt->attalign); if (!slow) thisatt->attcacheoff = off; } values[attnum] = fetchatt(thisatt, tp + off); off = att_addlength_pointer(off, thisatt->attlen, tp + off); if (thisatt->attlen <= 0) slow = true; /* can't use attcacheoff anymore */ } } /* * Create a palloc'd copy of an index tuple. */ IndexTuple CopyIndexTuple(IndexTuple source) { IndexTuple result; Size size; size = IndexTupleSize(source); result = (IndexTuple) palloc(size); memcpy(result, source, size); return result; } /* * Create a palloc'd copy of an index tuple, leaving only the first * leavenatts attributes remaining. * * Truncation is guaranteed to result in an index tuple that is no * larger than the original. It is safe to use the IndexTuple with * the original tuple descriptor, but caller must avoid actually * accessing truncated attributes from returned tuple! In practice * this means that index_getattr() must be called with special care, * and that the truncated tuple should only ever be accessed by code * under caller's direct control. * * It's safe to call this function with a buffer lock held, since it * never performs external table access. If it ever became possible * for index tuples to contain EXTERNAL TOAST values, then this would * have to be revisited. */ IndexTuple index_truncate_tuple(TupleDesc sourceDescriptor, IndexTuple source, int leavenatts) { TupleDesc truncdesc; Datum values[INDEX_MAX_KEYS]; bool isnull[INDEX_MAX_KEYS]; IndexTuple truncated; Assert(leavenatts <= sourceDescriptor->natts); /* Easy case: no truncation actually required */ if (leavenatts == sourceDescriptor->natts) return CopyIndexTuple(source); /* Create temporary descriptor to scribble on */ truncdesc = palloc(TupleDescSize(sourceDescriptor)); TupleDescCopy(truncdesc, sourceDescriptor); truncdesc->natts = leavenatts; /* Deform, form copy of tuple with fewer attributes */ index_deform_tuple(source, truncdesc, values, isnull); truncated = index_form_tuple(truncdesc, values, isnull); truncated->t_tid = source->t_tid; Assert(IndexTupleSize(truncated) <= IndexTupleSize(source)); /* * Cannot leak memory here, TupleDescCopy() doesn't allocate any inner * structure, so, plain pfree() should clean all allocated memory */ pfree(truncdesc); return truncated; }