summaryrefslogtreecommitdiffstats
path: root/src/include/utils/jsonb.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/include/utils/jsonb.h')
-rw-r--r--src/include/utils/jsonb.h411
1 files changed, 411 insertions, 0 deletions
diff --git a/src/include/utils/jsonb.h b/src/include/utils/jsonb.h
new file mode 100644
index 0000000..5860011
--- /dev/null
+++ b/src/include/utils/jsonb.h
@@ -0,0 +1,411 @@
+/*-------------------------------------------------------------------------
+ *
+ * jsonb.h
+ * Declarations for jsonb data type support.
+ *
+ * Copyright (c) 1996-2020, PostgreSQL Global Development Group
+ *
+ * src/include/utils/jsonb.h
+ *
+ *-------------------------------------------------------------------------
+ */
+#ifndef __JSONB_H__
+#define __JSONB_H__
+
+#include "lib/stringinfo.h"
+#include "utils/array.h"
+#include "utils/numeric.h"
+
+/* Tokens used when sequentially processing a jsonb value */
+typedef enum
+{
+ WJB_DONE,
+ WJB_KEY,
+ WJB_VALUE,
+ WJB_ELEM,
+ WJB_BEGIN_ARRAY,
+ WJB_END_ARRAY,
+ WJB_BEGIN_OBJECT,
+ WJB_END_OBJECT
+} JsonbIteratorToken;
+
+/* Strategy numbers for GIN index opclasses */
+#define JsonbContainsStrategyNumber 7
+#define JsonbExistsStrategyNumber 9
+#define JsonbExistsAnyStrategyNumber 10
+#define JsonbExistsAllStrategyNumber 11
+#define JsonbJsonpathExistsStrategyNumber 15
+#define JsonbJsonpathPredicateStrategyNumber 16
+
+
+/*
+ * In the standard jsonb_ops GIN opclass for jsonb, we choose to index both
+ * keys and values. The storage format is text. The first byte of the text
+ * string distinguishes whether this is a key (always a string), null value,
+ * boolean value, numeric value, or string value. However, array elements
+ * that are strings are marked as though they were keys; this imprecision
+ * supports the definition of the "exists" operator, which treats array
+ * elements like keys. The remainder of the text string is empty for a null
+ * value, "t" or "f" for a boolean value, a normalized print representation of
+ * a numeric value, or the text of a string value. However, if the length of
+ * this text representation would exceed JGIN_MAXLENGTH bytes, we instead hash
+ * the text representation and store an 8-hex-digit representation of the
+ * uint32 hash value, marking the prefix byte with an additional bit to
+ * distinguish that this has happened. Hashing long strings saves space and
+ * ensures that we won't overrun the maximum entry length for a GIN index.
+ * (But JGIN_MAXLENGTH is quite a bit shorter than GIN's limit. It's chosen
+ * to ensure that the on-disk text datum will have a short varlena header.)
+ * Note that when any hashed item appears in a query, we must recheck index
+ * matches against the heap tuple; currently, this costs nothing because we
+ * must always recheck for other reasons.
+ */
+#define JGINFLAG_KEY 0x01 /* key (or string array element) */
+#define JGINFLAG_NULL 0x02 /* null value */
+#define JGINFLAG_BOOL 0x03 /* boolean value */
+#define JGINFLAG_NUM 0x04 /* numeric value */
+#define JGINFLAG_STR 0x05 /* string value (if not an array element) */
+#define JGINFLAG_HASHED 0x10 /* OR'd into flag if value was hashed */
+#define JGIN_MAXLENGTH 125 /* max length of text part before hashing */
+
+/* Convenience macros */
+#define DatumGetJsonbP(d) ((Jsonb *) PG_DETOAST_DATUM(d))
+#define DatumGetJsonbPCopy(d) ((Jsonb *) PG_DETOAST_DATUM_COPY(d))
+#define JsonbPGetDatum(p) PointerGetDatum(p)
+#define PG_GETARG_JSONB_P(x) DatumGetJsonbP(PG_GETARG_DATUM(x))
+#define PG_GETARG_JSONB_P_COPY(x) DatumGetJsonbPCopy(PG_GETARG_DATUM(x))
+#define PG_RETURN_JSONB_P(x) PG_RETURN_POINTER(x)
+
+typedef struct JsonbPair JsonbPair;
+typedef struct JsonbValue JsonbValue;
+
+/*
+ * Jsonbs are varlena objects, so must meet the varlena convention that the
+ * first int32 of the object contains the total object size in bytes. Be sure
+ * to use VARSIZE() and SET_VARSIZE() to access it, though!
+ *
+ * Jsonb is the on-disk representation, in contrast to the in-memory JsonbValue
+ * representation. Often, JsonbValues are just shims through which a Jsonb
+ * buffer is accessed, but they can also be deep copied and passed around.
+ *
+ * Jsonb is a tree structure. Each node in the tree consists of a JEntry
+ * header and a variable-length content (possibly of zero size). The JEntry
+ * header indicates what kind of a node it is, e.g. a string or an array,
+ * and provides the length of its variable-length portion.
+ *
+ * The JEntry and the content of a node are not stored physically together.
+ * Instead, the container array or object has an array that holds the JEntrys
+ * of all the child nodes, followed by their variable-length portions.
+ *
+ * The root node is an exception; it has no parent array or object that could
+ * hold its JEntry. Hence, no JEntry header is stored for the root node. It
+ * is implicitly known that the root node must be an array or an object,
+ * so we can get away without the type indicator as long as we can distinguish
+ * the two. For that purpose, both an array and an object begin with a uint32
+ * header field, which contains an JB_FOBJECT or JB_FARRAY flag. When a naked
+ * scalar value needs to be stored as a Jsonb value, what we actually store is
+ * an array with one element, with the flags in the array's header field set
+ * to JB_FSCALAR | JB_FARRAY.
+ *
+ * Overall, the Jsonb struct requires 4-bytes alignment. Within the struct,
+ * the variable-length portion of some node types is aligned to a 4-byte
+ * boundary, while others are not. When alignment is needed, the padding is
+ * in the beginning of the node that requires it. For example, if a numeric
+ * node is stored after a string node, so that the numeric node begins at
+ * offset 3, the variable-length portion of the numeric node will begin with
+ * one padding byte so that the actual numeric data is 4-byte aligned.
+ */
+
+/*
+ * JEntry format.
+ *
+ * The least significant 28 bits store either the data length of the entry,
+ * or its end+1 offset from the start of the variable-length portion of the
+ * containing object. The next three bits store the type of the entry, and
+ * the high-order bit tells whether the least significant bits store a length
+ * or an offset.
+ *
+ * The reason for the offset-or-length complication is to compromise between
+ * access speed and data compressibility. In the initial design each JEntry
+ * always stored an offset, but this resulted in JEntry arrays with horrible
+ * compressibility properties, so that TOAST compression of a JSONB did not
+ * work well. Storing only lengths would greatly improve compressibility,
+ * but it makes random access into large arrays expensive (O(N) not O(1)).
+ * So what we do is store an offset in every JB_OFFSET_STRIDE'th JEntry and
+ * a length in the rest. This results in reasonably compressible data (as
+ * long as the stride isn't too small). We may have to examine as many as
+ * JB_OFFSET_STRIDE JEntrys in order to find out the offset or length of any
+ * given item, but that's still O(1) no matter how large the container is.
+ *
+ * We could avoid eating a flag bit for this purpose if we were to store
+ * the stride in the container header, or if we were willing to treat the
+ * stride as an unchangeable constant. Neither of those options is very
+ * attractive though.
+ */
+typedef uint32 JEntry;
+
+#define JENTRY_OFFLENMASK 0x0FFFFFFF
+#define JENTRY_TYPEMASK 0x70000000
+#define JENTRY_HAS_OFF 0x80000000
+
+/* values stored in the type bits */
+#define JENTRY_ISSTRING 0x00000000
+#define JENTRY_ISNUMERIC 0x10000000
+#define JENTRY_ISBOOL_FALSE 0x20000000
+#define JENTRY_ISBOOL_TRUE 0x30000000
+#define JENTRY_ISNULL 0x40000000
+#define JENTRY_ISCONTAINER 0x50000000 /* array or object */
+
+/* Access macros. Note possible multiple evaluations */
+#define JBE_OFFLENFLD(je_) ((je_) & JENTRY_OFFLENMASK)
+#define JBE_HAS_OFF(je_) (((je_) & JENTRY_HAS_OFF) != 0)
+#define JBE_ISSTRING(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISSTRING)
+#define JBE_ISNUMERIC(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNUMERIC)
+#define JBE_ISCONTAINER(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISCONTAINER)
+#define JBE_ISNULL(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISNULL)
+#define JBE_ISBOOL_TRUE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_TRUE)
+#define JBE_ISBOOL_FALSE(je_) (((je_) & JENTRY_TYPEMASK) == JENTRY_ISBOOL_FALSE)
+#define JBE_ISBOOL(je_) (JBE_ISBOOL_TRUE(je_) || JBE_ISBOOL_FALSE(je_))
+
+/* Macro for advancing an offset variable to the next JEntry */
+#define JBE_ADVANCE_OFFSET(offset, je) \
+ do { \
+ JEntry je_ = (je); \
+ if (JBE_HAS_OFF(je_)) \
+ (offset) = JBE_OFFLENFLD(je_); \
+ else \
+ (offset) += JBE_OFFLENFLD(je_); \
+ } while(0)
+
+/*
+ * We store an offset, not a length, every JB_OFFSET_STRIDE children.
+ * Caution: this macro should only be referenced when creating a JSONB
+ * value. When examining an existing value, pay attention to the HAS_OFF
+ * bits instead. This allows changes in the offset-placement heuristic
+ * without breaking on-disk compatibility.
+ */
+#define JB_OFFSET_STRIDE 32
+
+/*
+ * A jsonb array or object node, within a Jsonb Datum.
+ *
+ * An array has one child for each element, stored in array order.
+ *
+ * An object has two children for each key/value pair. The keys all appear
+ * first, in key sort order; then the values appear, in an order matching the
+ * key order. This arrangement keeps the keys compact in memory, making a
+ * search for a particular key more cache-friendly.
+ */
+typedef struct JsonbContainer
+{
+ uint32 header; /* number of elements or key/value pairs, and
+ * flags */
+ JEntry children[FLEXIBLE_ARRAY_MEMBER];
+
+ /* the data for each child node follows. */
+} JsonbContainer;
+
+/* flags for the header-field in JsonbContainer */
+#define JB_CMASK 0x0FFFFFFF /* mask for count field */
+#define JB_FSCALAR 0x10000000 /* flag bits */
+#define JB_FOBJECT 0x20000000
+#define JB_FARRAY 0x40000000
+
+/* convenience macros for accessing a JsonbContainer struct */
+#define JsonContainerSize(jc) ((jc)->header & JB_CMASK)
+#define JsonContainerIsScalar(jc) (((jc)->header & JB_FSCALAR) != 0)
+#define JsonContainerIsObject(jc) (((jc)->header & JB_FOBJECT) != 0)
+#define JsonContainerIsArray(jc) (((jc)->header & JB_FARRAY) != 0)
+
+/* The top-level on-disk format for a jsonb datum. */
+typedef struct
+{
+ int32 vl_len_; /* varlena header (do not touch directly!) */
+ JsonbContainer root;
+} Jsonb;
+
+/* convenience macros for accessing the root container in a Jsonb datum */
+#define JB_ROOT_COUNT(jbp_) (*(uint32 *) VARDATA(jbp_) & JB_CMASK)
+#define JB_ROOT_IS_SCALAR(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FSCALAR) != 0)
+#define JB_ROOT_IS_OBJECT(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FOBJECT) != 0)
+#define JB_ROOT_IS_ARRAY(jbp_) ((*(uint32 *) VARDATA(jbp_) & JB_FARRAY) != 0)
+
+
+enum jbvType
+{
+ /* Scalar types */
+ jbvNull = 0x0,
+ jbvString,
+ jbvNumeric,
+ jbvBool,
+ /* Composite types */
+ jbvArray = 0x10,
+ jbvObject,
+ /* Binary (i.e. struct Jsonb) jbvArray/jbvObject */
+ jbvBinary,
+
+ /*
+ * Virtual types.
+ *
+ * These types are used only for in-memory JSON processing and serialized
+ * into JSON strings when outputted to json/jsonb.
+ */
+ jbvDatetime = 0x20,
+};
+
+/*
+ * JsonbValue: In-memory representation of Jsonb. This is a convenient
+ * deserialized representation, that can easily support using the "val"
+ * union across underlying types during manipulation. The Jsonb on-disk
+ * representation has various alignment considerations.
+ */
+struct JsonbValue
+{
+ enum jbvType type; /* Influences sort order */
+
+ union
+ {
+ Numeric numeric;
+ bool boolean;
+ struct
+ {
+ int len;
+ char *val; /* Not necessarily null-terminated */
+ } string; /* String primitive type */
+
+ struct
+ {
+ int nElems;
+ JsonbValue *elems;
+ bool rawScalar; /* Top-level "raw scalar" array? */
+ } array; /* Array container type */
+
+ struct
+ {
+ int nPairs; /* 1 pair, 2 elements */
+ JsonbPair *pairs;
+ } object; /* Associative container type */
+
+ struct
+ {
+ int len;
+ JsonbContainer *data;
+ } binary; /* Array or object, in on-disk format */
+
+ struct
+ {
+ Datum value;
+ Oid typid;
+ int32 typmod;
+ int tz; /* Numeric time zone, in seconds, for
+ * TimestampTz data type */
+ } datetime;
+ } val;
+};
+
+#define IsAJsonbScalar(jsonbval) (((jsonbval)->type >= jbvNull && \
+ (jsonbval)->type <= jbvBool) || \
+ (jsonbval)->type == jbvDatetime)
+
+/*
+ * Key/value pair within an Object.
+ *
+ * This struct type is only used briefly while constructing a Jsonb; it is
+ * *not* the on-disk representation.
+ *
+ * Pairs with duplicate keys are de-duplicated. We store the originally
+ * observed pair ordering for the purpose of removing duplicates in a
+ * well-defined way (which is "last observed wins").
+ */
+struct JsonbPair
+{
+ JsonbValue key; /* Must be a jbvString */
+ JsonbValue value; /* May be of any type */
+ uint32 order; /* Pair's index in original sequence */
+};
+
+/* Conversion state used when parsing Jsonb from text, or for type coercion */
+typedef struct JsonbParseState
+{
+ JsonbValue contVal;
+ Size size;
+ struct JsonbParseState *next;
+} JsonbParseState;
+
+/*
+ * JsonbIterator holds details of the type for each iteration. It also stores a
+ * Jsonb varlena buffer, which can be directly accessed in some contexts.
+ */
+typedef enum
+{
+ JBI_ARRAY_START,
+ JBI_ARRAY_ELEM,
+ JBI_OBJECT_START,
+ JBI_OBJECT_KEY,
+ JBI_OBJECT_VALUE
+} JsonbIterState;
+
+typedef struct JsonbIterator
+{
+ /* Container being iterated */
+ JsonbContainer *container;
+ uint32 nElems; /* Number of elements in children array (will
+ * be nPairs for objects) */
+ bool isScalar; /* Pseudo-array scalar value? */
+ JEntry *children; /* JEntrys for child nodes */
+ /* Data proper. This points to the beginning of the variable-length data */
+ char *dataProper;
+
+ /* Current item in buffer (up to nElems) */
+ int curIndex;
+
+ /* Data offset corresponding to current item */
+ uint32 curDataOffset;
+
+ /*
+ * If the container is an object, we want to return keys and values
+ * alternately; so curDataOffset points to the current key, and
+ * curValueOffset points to the current value.
+ */
+ uint32 curValueOffset;
+
+ /* Private state */
+ JsonbIterState state;
+
+ struct JsonbIterator *parent;
+} JsonbIterator;
+
+
+/* Support functions */
+extern uint32 getJsonbOffset(const JsonbContainer *jc, int index);
+extern uint32 getJsonbLength(const JsonbContainer *jc, int index);
+extern int compareJsonbContainers(JsonbContainer *a, JsonbContainer *b);
+extern JsonbValue *findJsonbValueFromContainer(JsonbContainer *sheader,
+ uint32 flags,
+ JsonbValue *key);
+extern JsonbValue *getKeyJsonValueFromContainer(JsonbContainer *container,
+ const char *keyVal, int keyLen,
+ JsonbValue *res);
+extern JsonbValue *getIthJsonbValueFromContainer(JsonbContainer *sheader,
+ uint32 i);
+extern JsonbValue *pushJsonbValue(JsonbParseState **pstate,
+ JsonbIteratorToken seq, JsonbValue *jbval);
+extern JsonbIterator *JsonbIteratorInit(JsonbContainer *container);
+extern JsonbIteratorToken JsonbIteratorNext(JsonbIterator **it, JsonbValue *val,
+ bool skipNested);
+extern Jsonb *JsonbValueToJsonb(JsonbValue *val);
+extern bool JsonbDeepContains(JsonbIterator **val,
+ JsonbIterator **mContained);
+extern void JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash);
+extern void JsonbHashScalarValueExtended(const JsonbValue *scalarVal,
+ uint64 *hash, uint64 seed);
+
+/* jsonb.c support functions */
+extern char *JsonbToCString(StringInfo out, JsonbContainer *in,
+ int estimated_len);
+extern char *JsonbToCStringIndent(StringInfo out, JsonbContainer *in,
+ int estimated_len);
+extern bool JsonbExtractScalar(JsonbContainer *jbc, JsonbValue *res);
+extern const char *JsonbTypeName(JsonbValue *jb);
+
+
+#endif /* __JSONB_H__ */