diff options
Diffstat (limited to 'src/backend/utils/adt/datum.c')
-rw-r--r-- | src/backend/utils/adt/datum.c | 556 |
1 files changed, 556 insertions, 0 deletions
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c new file mode 100644 index 0000000..2f22939 --- /dev/null +++ b/src/backend/utils/adt/datum.c @@ -0,0 +1,556 @@ +/*------------------------------------------------------------------------- + * + * datum.c + * POSTGRES Datum (abstract data type) manipulation routines. + * + * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group + * Portions Copyright (c) 1994, Regents of the University of California + * + * + * IDENTIFICATION + * src/backend/utils/adt/datum.c + * + *------------------------------------------------------------------------- + */ + +/* + * In the implementation of these routines we assume the following: + * + * A) if a type is "byVal" then all the information is stored in the + * Datum itself (i.e. no pointers involved!). In this case the + * length of the type is always greater than zero and not more than + * "sizeof(Datum)" + * + * B) if a type is not "byVal" and it has a fixed length (typlen > 0), + * then the "Datum" always contains a pointer to a stream of bytes. + * The number of significant bytes are always equal to the typlen. + * + * C) if a type is not "byVal" and has typlen == -1, + * then the "Datum" always points to a "struct varlena". + * This varlena structure has information about the actual length of this + * particular instance of the type and about its value. + * + * D) if a type is not "byVal" and has typlen == -2, + * then the "Datum" always points to a null-terminated C string. + * + * Note that we do not treat "toasted" datums specially; therefore what + * will be copied or compared is the compressed data or toast reference. + * An exception is made for datumCopy() of an expanded object, however, + * because most callers expect to get a simple contiguous (and pfree'able) + * result from datumCopy(). See also datumTransfer(). + */ + +#include "postgres.h" + +#include "access/detoast.h" +#include "common/hashfn.h" +#include "fmgr.h" +#include "utils/builtins.h" +#include "utils/datum.h" +#include "utils/expandeddatum.h" + + +/*------------------------------------------------------------------------- + * datumGetSize + * + * Find the "real" size of a datum, given the datum value, + * whether it is a "by value", and the declared type length. + * (For TOAST pointer datums, this is the size of the pointer datum.) + * + * This is essentially an out-of-line version of the att_addlength_datum() + * macro in access/tupmacs.h. We do a tad more error checking though. + *------------------------------------------------------------------------- + */ +Size +datumGetSize(Datum value, bool typByVal, int typLen) +{ + Size size; + + if (typByVal) + { + /* Pass-by-value types are always fixed-length */ + Assert(typLen > 0 && typLen <= sizeof(Datum)); + size = (Size) typLen; + } + else + { + if (typLen > 0) + { + /* Fixed-length pass-by-ref type */ + size = (Size) typLen; + } + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *s = (struct varlena *) DatumGetPointer(value); + + if (!PointerIsValid(s)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid Datum pointer"))); + + size = (Size) VARSIZE_ANY(s); + } + else if (typLen == -2) + { + /* It is a cstring datatype */ + char *s = (char *) DatumGetPointer(value); + + if (!PointerIsValid(s)) + ereport(ERROR, + (errcode(ERRCODE_DATA_EXCEPTION), + errmsg("invalid Datum pointer"))); + + size = (Size) (strlen(s) + 1); + } + else + { + elog(ERROR, "invalid typLen: %d", typLen); + size = 0; /* keep compiler quiet */ + } + } + + return size; +} + +/*------------------------------------------------------------------------- + * datumCopy + * + * Make a copy of a non-NULL datum. + * + * If the datatype is pass-by-reference, memory is obtained with palloc(). + * + * If the value is a reference to an expanded object, we flatten into memory + * obtained with palloc(). We need to copy because one of the main uses of + * this function is to copy a datum out of a transient memory context that's + * about to be destroyed, and the expanded object is probably in a child + * context that will also go away. Moreover, many callers assume that the + * result is a single pfree-able chunk. + *------------------------------------------------------------------------- + */ +Datum +datumCopy(Datum value, bool typByVal, int typLen) +{ + Datum res; + + if (typByVal) + res = value; + else if (typLen == -1) + { + /* It is a varlena datatype */ + struct varlena *vl = (struct varlena *) DatumGetPointer(value); + + if (VARATT_IS_EXTERNAL_EXPANDED(vl)) + { + /* Flatten into the caller's memory context */ + ExpandedObjectHeader *eoh = DatumGetEOHP(value); + Size resultsize; + char *resultptr; + + resultsize = EOH_get_flat_size(eoh); + resultptr = (char *) palloc(resultsize); + EOH_flatten_into(eoh, (void *) resultptr, resultsize); + res = PointerGetDatum(resultptr); + } + else + { + /* Otherwise, just copy the varlena datum verbatim */ + Size realSize; + char *resultptr; + + realSize = (Size) VARSIZE_ANY(vl); + resultptr = (char *) palloc(realSize); + memcpy(resultptr, vl, realSize); + res = PointerGetDatum(resultptr); + } + } + else + { + /* Pass by reference, but not varlena, so not toasted */ + Size realSize; + char *resultptr; + + realSize = datumGetSize(value, typByVal, typLen); + + resultptr = (char *) palloc(realSize); + memcpy(resultptr, DatumGetPointer(value), realSize); + res = PointerGetDatum(resultptr); + } + return res; +} + +/*------------------------------------------------------------------------- + * datumTransfer + * + * Transfer a non-NULL datum into the current memory context. + * + * This is equivalent to datumCopy() except when the datum is a read-write + * pointer to an expanded object. In that case we merely reparent the object + * into the current context, and return its standard R/W pointer (in case the + * given one is a transient pointer of shorter lifespan). + *------------------------------------------------------------------------- + */ +Datum +datumTransfer(Datum value, bool typByVal, int typLen) +{ + if (!typByVal && typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value))) + value = TransferExpandedObject(value, CurrentMemoryContext); + else + value = datumCopy(value, typByVal, typLen); + return value; +} + +/*------------------------------------------------------------------------- + * datumIsEqual + * + * Return true if two datums are equal, false otherwise + * + * NOTE: XXX! + * We just compare the bytes of the two values, one by one. + * This routine will return false if there are 2 different + * representations of the same value (something along the lines + * of say the representation of zero in one's complement arithmetic). + * Also, it will probably not give the answer you want if either + * datum has been "toasted". + * + * Do not try to make this any smarter than it currently is with respect + * to "toasted" datums, because some of the callers could be working in the + * context of an aborted transaction. + *------------------------------------------------------------------------- + */ +bool +datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen) +{ + bool res; + + if (typByVal) + { + /* + * just compare the two datums. NOTE: just comparing "len" bytes will + * not do the work, because we do not know how these bytes are aligned + * inside the "Datum". We assume instead that any given datatype is + * consistent about how it fills extraneous bits in the Datum. + */ + res = (value1 == value2); + } + else + { + Size size1, + size2; + char *s1, + *s2; + + /* + * Compare the bytes pointed by the pointers stored in the datums. + */ + size1 = datumGetSize(value1, typByVal, typLen); + size2 = datumGetSize(value2, typByVal, typLen); + if (size1 != size2) + return false; + s1 = (char *) DatumGetPointer(value1); + s2 = (char *) DatumGetPointer(value2); + res = (memcmp(s1, s2, size1) == 0); + } + return res; +} + +/*------------------------------------------------------------------------- + * datum_image_eq + * + * Compares two datums for identical contents, based on byte images. Return + * true if the two datums are equal, false otherwise. + *------------------------------------------------------------------------- + */ +bool +datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen) +{ + Size len1, + len2; + bool result = true; + + if (typByVal) + { + result = (value1 == value2); + } + else if (typLen > 0) + { + result = (memcmp(DatumGetPointer(value1), + DatumGetPointer(value2), + typLen) == 0); + } + else if (typLen == -1) + { + len1 = toast_raw_datum_size(value1); + len2 = toast_raw_datum_size(value2); + /* No need to de-toast if lengths don't match. */ + if (len1 != len2) + result = false; + else + { + struct varlena *arg1val; + struct varlena *arg2val; + + arg1val = PG_DETOAST_DATUM_PACKED(value1); + arg2val = PG_DETOAST_DATUM_PACKED(value2); + + result = (memcmp(VARDATA_ANY(arg1val), + VARDATA_ANY(arg2val), + len1 - VARHDRSZ) == 0); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) arg1val != (Pointer) value1) + pfree(arg1val); + if ((Pointer) arg2val != (Pointer) value2) + pfree(arg2val); + } + } + else if (typLen == -2) + { + char *s1, + *s2; + + /* Compare cstring datums */ + s1 = DatumGetCString(value1); + s2 = DatumGetCString(value2); + len1 = strlen(s1) + 1; + len2 = strlen(s2) + 1; + if (len1 != len2) + return false; + result = (memcmp(s1, s2, len1) == 0); + } + else + elog(ERROR, "unexpected typLen: %d", typLen); + + return result; +} + +/*------------------------------------------------------------------------- + * datum_image_hash + * + * Generate a hash value based on the binary representation of 'value'. Most + * use cases will want to use the hash function specific to the Datum's type, + * however, some corner cases require generating a hash value based on the + * actual bits rather than the logical value. + *------------------------------------------------------------------------- + */ +uint32 +datum_image_hash(Datum value, bool typByVal, int typLen) +{ + Size len; + uint32 result; + + if (typByVal) + result = hash_bytes((unsigned char *) &value, sizeof(Datum)); + else if (typLen > 0) + result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen); + else if (typLen == -1) + { + struct varlena *val; + + len = toast_raw_datum_size(value); + + val = PG_DETOAST_DATUM_PACKED(value); + + result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ); + + /* Only free memory if it's a copy made here. */ + if ((Pointer) val != (Pointer) value) + pfree(val); + } + else if (typLen == -2) + { + char *s; + + s = DatumGetCString(value); + len = strlen(s) + 1; + + result = hash_bytes((unsigned char *) s, len); + } + else + { + elog(ERROR, "unexpected typLen: %d", typLen); + result = 0; /* keep compiler quiet */ + } + + return result; +} + +/*------------------------------------------------------------------------- + * btequalimage + * + * Generic "equalimage" support function. + * + * B-Tree operator classes whose equality function could safely be replaced by + * datum_image_eq() in all cases can use this as their "equalimage" support + * function. + * + * Currently, we unconditionally assume that any B-Tree operator class that + * registers btequalimage as its support function 4 must be able to safely use + * optimizations like deduplication (i.e. we return true unconditionally). If + * it ever proved necessary to rescind support for an operator class, we could + * do that in a targeted fashion by doing something with the opcintype + * argument. + *------------------------------------------------------------------------- + */ +Datum +btequalimage(PG_FUNCTION_ARGS) +{ + /* Oid opcintype = PG_GETARG_OID(0); */ + + PG_RETURN_BOOL(true); +} + +/*------------------------------------------------------------------------- + * datumEstimateSpace + * + * Compute the amount of space that datumSerialize will require for a + * particular Datum. + *------------------------------------------------------------------------- + */ +Size +datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen) +{ + Size sz = sizeof(int); + + if (!isnull) + { + /* no need to use add_size, can't overflow */ + if (typByVal) + sz += sizeof(Datum); + else if (typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) + { + /* Expanded objects need to be flattened, see comment below */ + sz += EOH_get_flat_size(DatumGetEOHP(value)); + } + else + sz += datumGetSize(value, typByVal, typLen); + } + + return sz; +} + +/*------------------------------------------------------------------------- + * datumSerialize + * + * Serialize a possibly-NULL datum into caller-provided storage. + * + * Note: "expanded" objects are flattened so as to produce a self-contained + * representation, but other sorts of toast pointers are transferred as-is. + * This is because the intended use of this function is to pass the value + * to another process within the same database server. The other process + * could not access an "expanded" object within this process's memory, but + * we assume it can dereference the same TOAST pointers this one can. + * + * The format is as follows: first, we write a 4-byte header word, which + * is either the length of a pass-by-reference datum, -1 for a + * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing + * further is written. If it is pass-by-value, sizeof(Datum) bytes + * follow. Otherwise, the number of bytes indicated by the header word + * follow. The caller is responsible for ensuring that there is enough + * storage to store the number of bytes that will be written; use + * datumEstimateSpace() to find out how many will be needed. + * *start_address is updated to point to the byte immediately following + * those written. + *------------------------------------------------------------------------- + */ +void +datumSerialize(Datum value, bool isnull, bool typByVal, int typLen, + char **start_address) +{ + ExpandedObjectHeader *eoh = NULL; + int header; + + /* Write header word. */ + if (isnull) + header = -2; + else if (typByVal) + header = -1; + else if (typLen == -1 && + VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value))) + { + eoh = DatumGetEOHP(value); + header = EOH_get_flat_size(eoh); + } + else + header = datumGetSize(value, typByVal, typLen); + memcpy(*start_address, &header, sizeof(int)); + *start_address += sizeof(int); + + /* If not null, write payload bytes. */ + if (!isnull) + { + if (typByVal) + { + memcpy(*start_address, &value, sizeof(Datum)); + *start_address += sizeof(Datum); + } + else if (eoh) + { + char *tmp; + + /* + * EOH_flatten_into expects the target address to be maxaligned, + * so we can't store directly to *start_address. + */ + tmp = (char *) palloc(header); + EOH_flatten_into(eoh, (void *) tmp, header); + memcpy(*start_address, tmp, header); + *start_address += header; + + /* be tidy. */ + pfree(tmp); + } + else + { + memcpy(*start_address, DatumGetPointer(value), header); + *start_address += header; + } + } +} + +/*------------------------------------------------------------------------- + * datumRestore + * + * Restore a possibly-NULL datum previously serialized by datumSerialize. + * *start_address is updated according to the number of bytes consumed. + *------------------------------------------------------------------------- + */ +Datum +datumRestore(char **start_address, bool *isnull) +{ + int header; + void *d; + + /* Read header word. */ + memcpy(&header, *start_address, sizeof(int)); + *start_address += sizeof(int); + + /* If this datum is NULL, we can stop here. */ + if (header == -2) + { + *isnull = true; + return (Datum) 0; + } + + /* OK, datum is not null. */ + *isnull = false; + + /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */ + if (header == -1) + { + Datum val; + + memcpy(&val, *start_address, sizeof(Datum)); + *start_address += sizeof(Datum); + return val; + } + + /* Pass-by-reference case; copy indicated number of bytes. */ + Assert(header > 0); + d = palloc(header); + memcpy(d, *start_address, header); + *start_address += header; + return PointerGetDatum(d); +} |