summaryrefslogtreecommitdiffstats
path: root/src/backend/utils/adt/datum.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/backend/utils/adt/datum.c')
-rw-r--r--src/backend/utils/adt/datum.c556
1 files changed, 556 insertions, 0 deletions
diff --git a/src/backend/utils/adt/datum.c b/src/backend/utils/adt/datum.c
new file mode 100644
index 0000000..2f22939
--- /dev/null
+++ b/src/backend/utils/adt/datum.c
@@ -0,0 +1,556 @@
+/*-------------------------------------------------------------------------
+ *
+ * datum.c
+ * POSTGRES Datum (abstract data type) manipulation routines.
+ *
+ * Portions Copyright (c) 1996-2021, PostgreSQL Global Development Group
+ * Portions Copyright (c) 1994, Regents of the University of California
+ *
+ *
+ * IDENTIFICATION
+ * src/backend/utils/adt/datum.c
+ *
+ *-------------------------------------------------------------------------
+ */
+
+/*
+ * In the implementation of these routines we assume the following:
+ *
+ * A) if a type is "byVal" then all the information is stored in the
+ * Datum itself (i.e. no pointers involved!). In this case the
+ * length of the type is always greater than zero and not more than
+ * "sizeof(Datum)"
+ *
+ * B) if a type is not "byVal" and it has a fixed length (typlen > 0),
+ * then the "Datum" always contains a pointer to a stream of bytes.
+ * The number of significant bytes are always equal to the typlen.
+ *
+ * C) if a type is not "byVal" and has typlen == -1,
+ * then the "Datum" always points to a "struct varlena".
+ * This varlena structure has information about the actual length of this
+ * particular instance of the type and about its value.
+ *
+ * D) if a type is not "byVal" and has typlen == -2,
+ * then the "Datum" always points to a null-terminated C string.
+ *
+ * Note that we do not treat "toasted" datums specially; therefore what
+ * will be copied or compared is the compressed data or toast reference.
+ * An exception is made for datumCopy() of an expanded object, however,
+ * because most callers expect to get a simple contiguous (and pfree'able)
+ * result from datumCopy(). See also datumTransfer().
+ */
+
+#include "postgres.h"
+
+#include "access/detoast.h"
+#include "common/hashfn.h"
+#include "fmgr.h"
+#include "utils/builtins.h"
+#include "utils/datum.h"
+#include "utils/expandeddatum.h"
+
+
+/*-------------------------------------------------------------------------
+ * datumGetSize
+ *
+ * Find the "real" size of a datum, given the datum value,
+ * whether it is a "by value", and the declared type length.
+ * (For TOAST pointer datums, this is the size of the pointer datum.)
+ *
+ * This is essentially an out-of-line version of the att_addlength_datum()
+ * macro in access/tupmacs.h. We do a tad more error checking though.
+ *-------------------------------------------------------------------------
+ */
+Size
+datumGetSize(Datum value, bool typByVal, int typLen)
+{
+ Size size;
+
+ if (typByVal)
+ {
+ /* Pass-by-value types are always fixed-length */
+ Assert(typLen > 0 && typLen <= sizeof(Datum));
+ size = (Size) typLen;
+ }
+ else
+ {
+ if (typLen > 0)
+ {
+ /* Fixed-length pass-by-ref type */
+ size = (Size) typLen;
+ }
+ else if (typLen == -1)
+ {
+ /* It is a varlena datatype */
+ struct varlena *s = (struct varlena *) DatumGetPointer(value);
+
+ if (!PointerIsValid(s))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid Datum pointer")));
+
+ size = (Size) VARSIZE_ANY(s);
+ }
+ else if (typLen == -2)
+ {
+ /* It is a cstring datatype */
+ char *s = (char *) DatumGetPointer(value);
+
+ if (!PointerIsValid(s))
+ ereport(ERROR,
+ (errcode(ERRCODE_DATA_EXCEPTION),
+ errmsg("invalid Datum pointer")));
+
+ size = (Size) (strlen(s) + 1);
+ }
+ else
+ {
+ elog(ERROR, "invalid typLen: %d", typLen);
+ size = 0; /* keep compiler quiet */
+ }
+ }
+
+ return size;
+}
+
+/*-------------------------------------------------------------------------
+ * datumCopy
+ *
+ * Make a copy of a non-NULL datum.
+ *
+ * If the datatype is pass-by-reference, memory is obtained with palloc().
+ *
+ * If the value is a reference to an expanded object, we flatten into memory
+ * obtained with palloc(). We need to copy because one of the main uses of
+ * this function is to copy a datum out of a transient memory context that's
+ * about to be destroyed, and the expanded object is probably in a child
+ * context that will also go away. Moreover, many callers assume that the
+ * result is a single pfree-able chunk.
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumCopy(Datum value, bool typByVal, int typLen)
+{
+ Datum res;
+
+ if (typByVal)
+ res = value;
+ else if (typLen == -1)
+ {
+ /* It is a varlena datatype */
+ struct varlena *vl = (struct varlena *) DatumGetPointer(value);
+
+ if (VARATT_IS_EXTERNAL_EXPANDED(vl))
+ {
+ /* Flatten into the caller's memory context */
+ ExpandedObjectHeader *eoh = DatumGetEOHP(value);
+ Size resultsize;
+ char *resultptr;
+
+ resultsize = EOH_get_flat_size(eoh);
+ resultptr = (char *) palloc(resultsize);
+ EOH_flatten_into(eoh, (void *) resultptr, resultsize);
+ res = PointerGetDatum(resultptr);
+ }
+ else
+ {
+ /* Otherwise, just copy the varlena datum verbatim */
+ Size realSize;
+ char *resultptr;
+
+ realSize = (Size) VARSIZE_ANY(vl);
+ resultptr = (char *) palloc(realSize);
+ memcpy(resultptr, vl, realSize);
+ res = PointerGetDatum(resultptr);
+ }
+ }
+ else
+ {
+ /* Pass by reference, but not varlena, so not toasted */
+ Size realSize;
+ char *resultptr;
+
+ realSize = datumGetSize(value, typByVal, typLen);
+
+ resultptr = (char *) palloc(realSize);
+ memcpy(resultptr, DatumGetPointer(value), realSize);
+ res = PointerGetDatum(resultptr);
+ }
+ return res;
+}
+
+/*-------------------------------------------------------------------------
+ * datumTransfer
+ *
+ * Transfer a non-NULL datum into the current memory context.
+ *
+ * This is equivalent to datumCopy() except when the datum is a read-write
+ * pointer to an expanded object. In that case we merely reparent the object
+ * into the current context, and return its standard R/W pointer (in case the
+ * given one is a transient pointer of shorter lifespan).
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumTransfer(Datum value, bool typByVal, int typLen)
+{
+ if (!typByVal && typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED_RW(DatumGetPointer(value)))
+ value = TransferExpandedObject(value, CurrentMemoryContext);
+ else
+ value = datumCopy(value, typByVal, typLen);
+ return value;
+}
+
+/*-------------------------------------------------------------------------
+ * datumIsEqual
+ *
+ * Return true if two datums are equal, false otherwise
+ *
+ * NOTE: XXX!
+ * We just compare the bytes of the two values, one by one.
+ * This routine will return false if there are 2 different
+ * representations of the same value (something along the lines
+ * of say the representation of zero in one's complement arithmetic).
+ * Also, it will probably not give the answer you want if either
+ * datum has been "toasted".
+ *
+ * Do not try to make this any smarter than it currently is with respect
+ * to "toasted" datums, because some of the callers could be working in the
+ * context of an aborted transaction.
+ *-------------------------------------------------------------------------
+ */
+bool
+datumIsEqual(Datum value1, Datum value2, bool typByVal, int typLen)
+{
+ bool res;
+
+ if (typByVal)
+ {
+ /*
+ * just compare the two datums. NOTE: just comparing "len" bytes will
+ * not do the work, because we do not know how these bytes are aligned
+ * inside the "Datum". We assume instead that any given datatype is
+ * consistent about how it fills extraneous bits in the Datum.
+ */
+ res = (value1 == value2);
+ }
+ else
+ {
+ Size size1,
+ size2;
+ char *s1,
+ *s2;
+
+ /*
+ * Compare the bytes pointed by the pointers stored in the datums.
+ */
+ size1 = datumGetSize(value1, typByVal, typLen);
+ size2 = datumGetSize(value2, typByVal, typLen);
+ if (size1 != size2)
+ return false;
+ s1 = (char *) DatumGetPointer(value1);
+ s2 = (char *) DatumGetPointer(value2);
+ res = (memcmp(s1, s2, size1) == 0);
+ }
+ return res;
+}
+
+/*-------------------------------------------------------------------------
+ * datum_image_eq
+ *
+ * Compares two datums for identical contents, based on byte images. Return
+ * true if the two datums are equal, false otherwise.
+ *-------------------------------------------------------------------------
+ */
+bool
+datum_image_eq(Datum value1, Datum value2, bool typByVal, int typLen)
+{
+ Size len1,
+ len2;
+ bool result = true;
+
+ if (typByVal)
+ {
+ result = (value1 == value2);
+ }
+ else if (typLen > 0)
+ {
+ result = (memcmp(DatumGetPointer(value1),
+ DatumGetPointer(value2),
+ typLen) == 0);
+ }
+ else if (typLen == -1)
+ {
+ len1 = toast_raw_datum_size(value1);
+ len2 = toast_raw_datum_size(value2);
+ /* No need to de-toast if lengths don't match. */
+ if (len1 != len2)
+ result = false;
+ else
+ {
+ struct varlena *arg1val;
+ struct varlena *arg2val;
+
+ arg1val = PG_DETOAST_DATUM_PACKED(value1);
+ arg2val = PG_DETOAST_DATUM_PACKED(value2);
+
+ result = (memcmp(VARDATA_ANY(arg1val),
+ VARDATA_ANY(arg2val),
+ len1 - VARHDRSZ) == 0);
+
+ /* Only free memory if it's a copy made here. */
+ if ((Pointer) arg1val != (Pointer) value1)
+ pfree(arg1val);
+ if ((Pointer) arg2val != (Pointer) value2)
+ pfree(arg2val);
+ }
+ }
+ else if (typLen == -2)
+ {
+ char *s1,
+ *s2;
+
+ /* Compare cstring datums */
+ s1 = DatumGetCString(value1);
+ s2 = DatumGetCString(value2);
+ len1 = strlen(s1) + 1;
+ len2 = strlen(s2) + 1;
+ if (len1 != len2)
+ return false;
+ result = (memcmp(s1, s2, len1) == 0);
+ }
+ else
+ elog(ERROR, "unexpected typLen: %d", typLen);
+
+ return result;
+}
+
+/*-------------------------------------------------------------------------
+ * datum_image_hash
+ *
+ * Generate a hash value based on the binary representation of 'value'. Most
+ * use cases will want to use the hash function specific to the Datum's type,
+ * however, some corner cases require generating a hash value based on the
+ * actual bits rather than the logical value.
+ *-------------------------------------------------------------------------
+ */
+uint32
+datum_image_hash(Datum value, bool typByVal, int typLen)
+{
+ Size len;
+ uint32 result;
+
+ if (typByVal)
+ result = hash_bytes((unsigned char *) &value, sizeof(Datum));
+ else if (typLen > 0)
+ result = hash_bytes((unsigned char *) DatumGetPointer(value), typLen);
+ else if (typLen == -1)
+ {
+ struct varlena *val;
+
+ len = toast_raw_datum_size(value);
+
+ val = PG_DETOAST_DATUM_PACKED(value);
+
+ result = hash_bytes((unsigned char *) VARDATA_ANY(val), len - VARHDRSZ);
+
+ /* Only free memory if it's a copy made here. */
+ if ((Pointer) val != (Pointer) value)
+ pfree(val);
+ }
+ else if (typLen == -2)
+ {
+ char *s;
+
+ s = DatumGetCString(value);
+ len = strlen(s) + 1;
+
+ result = hash_bytes((unsigned char *) s, len);
+ }
+ else
+ {
+ elog(ERROR, "unexpected typLen: %d", typLen);
+ result = 0; /* keep compiler quiet */
+ }
+
+ return result;
+}
+
+/*-------------------------------------------------------------------------
+ * btequalimage
+ *
+ * Generic "equalimage" support function.
+ *
+ * B-Tree operator classes whose equality function could safely be replaced by
+ * datum_image_eq() in all cases can use this as their "equalimage" support
+ * function.
+ *
+ * Currently, we unconditionally assume that any B-Tree operator class that
+ * registers btequalimage as its support function 4 must be able to safely use
+ * optimizations like deduplication (i.e. we return true unconditionally). If
+ * it ever proved necessary to rescind support for an operator class, we could
+ * do that in a targeted fashion by doing something with the opcintype
+ * argument.
+ *-------------------------------------------------------------------------
+ */
+Datum
+btequalimage(PG_FUNCTION_ARGS)
+{
+ /* Oid opcintype = PG_GETARG_OID(0); */
+
+ PG_RETURN_BOOL(true);
+}
+
+/*-------------------------------------------------------------------------
+ * datumEstimateSpace
+ *
+ * Compute the amount of space that datumSerialize will require for a
+ * particular Datum.
+ *-------------------------------------------------------------------------
+ */
+Size
+datumEstimateSpace(Datum value, bool isnull, bool typByVal, int typLen)
+{
+ Size sz = sizeof(int);
+
+ if (!isnull)
+ {
+ /* no need to use add_size, can't overflow */
+ if (typByVal)
+ sz += sizeof(Datum);
+ else if (typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
+ {
+ /* Expanded objects need to be flattened, see comment below */
+ sz += EOH_get_flat_size(DatumGetEOHP(value));
+ }
+ else
+ sz += datumGetSize(value, typByVal, typLen);
+ }
+
+ return sz;
+}
+
+/*-------------------------------------------------------------------------
+ * datumSerialize
+ *
+ * Serialize a possibly-NULL datum into caller-provided storage.
+ *
+ * Note: "expanded" objects are flattened so as to produce a self-contained
+ * representation, but other sorts of toast pointers are transferred as-is.
+ * This is because the intended use of this function is to pass the value
+ * to another process within the same database server. The other process
+ * could not access an "expanded" object within this process's memory, but
+ * we assume it can dereference the same TOAST pointers this one can.
+ *
+ * The format is as follows: first, we write a 4-byte header word, which
+ * is either the length of a pass-by-reference datum, -1 for a
+ * pass-by-value datum, or -2 for a NULL. If the value is NULL, nothing
+ * further is written. If it is pass-by-value, sizeof(Datum) bytes
+ * follow. Otherwise, the number of bytes indicated by the header word
+ * follow. The caller is responsible for ensuring that there is enough
+ * storage to store the number of bytes that will be written; use
+ * datumEstimateSpace() to find out how many will be needed.
+ * *start_address is updated to point to the byte immediately following
+ * those written.
+ *-------------------------------------------------------------------------
+ */
+void
+datumSerialize(Datum value, bool isnull, bool typByVal, int typLen,
+ char **start_address)
+{
+ ExpandedObjectHeader *eoh = NULL;
+ int header;
+
+ /* Write header word. */
+ if (isnull)
+ header = -2;
+ else if (typByVal)
+ header = -1;
+ else if (typLen == -1 &&
+ VARATT_IS_EXTERNAL_EXPANDED(DatumGetPointer(value)))
+ {
+ eoh = DatumGetEOHP(value);
+ header = EOH_get_flat_size(eoh);
+ }
+ else
+ header = datumGetSize(value, typByVal, typLen);
+ memcpy(*start_address, &header, sizeof(int));
+ *start_address += sizeof(int);
+
+ /* If not null, write payload bytes. */
+ if (!isnull)
+ {
+ if (typByVal)
+ {
+ memcpy(*start_address, &value, sizeof(Datum));
+ *start_address += sizeof(Datum);
+ }
+ else if (eoh)
+ {
+ char *tmp;
+
+ /*
+ * EOH_flatten_into expects the target address to be maxaligned,
+ * so we can't store directly to *start_address.
+ */
+ tmp = (char *) palloc(header);
+ EOH_flatten_into(eoh, (void *) tmp, header);
+ memcpy(*start_address, tmp, header);
+ *start_address += header;
+
+ /* be tidy. */
+ pfree(tmp);
+ }
+ else
+ {
+ memcpy(*start_address, DatumGetPointer(value), header);
+ *start_address += header;
+ }
+ }
+}
+
+/*-------------------------------------------------------------------------
+ * datumRestore
+ *
+ * Restore a possibly-NULL datum previously serialized by datumSerialize.
+ * *start_address is updated according to the number of bytes consumed.
+ *-------------------------------------------------------------------------
+ */
+Datum
+datumRestore(char **start_address, bool *isnull)
+{
+ int header;
+ void *d;
+
+ /* Read header word. */
+ memcpy(&header, *start_address, sizeof(int));
+ *start_address += sizeof(int);
+
+ /* If this datum is NULL, we can stop here. */
+ if (header == -2)
+ {
+ *isnull = true;
+ return (Datum) 0;
+ }
+
+ /* OK, datum is not null. */
+ *isnull = false;
+
+ /* If this datum is pass-by-value, sizeof(Datum) bytes follow. */
+ if (header == -1)
+ {
+ Datum val;
+
+ memcpy(&val, *start_address, sizeof(Datum));
+ *start_address += sizeof(Datum);
+ return val;
+ }
+
+ /* Pass-by-reference case; copy indicated number of bytes. */
+ Assert(header > 0);
+ d = palloc(header);
+ memcpy(d, *start_address, header);
+ *start_address += header;
+ return PointerGetDatum(d);
+}