diff options
Diffstat (limited to 'src/backend/utils/adt/jsonb_util.c')
-rw-r--r-- | src/backend/utils/adt/jsonb_util.c | 1967 |
1 files changed, 1967 insertions, 0 deletions
diff --git a/src/backend/utils/adt/jsonb_util.c b/src/backend/utils/adt/jsonb_util.c new file mode 100644 index 0000000..5711187 --- /dev/null +++ b/src/backend/utils/adt/jsonb_util.c @@ -0,0 +1,1967 @@ +/*------------------------------------------------------------------------- + * + * jsonb_util.c + * converting between Jsonb and JsonbValues, and iterating. + * + * Copyright (c) 2014-2021, PostgreSQL Global Development Group + * + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_util.c + * + *------------------------------------------------------------------------- + */ +#include "postgres.h" + +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "common/jsonapi.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/datetime.h" +#include "utils/json.h" +#include "utils/jsonb.h" +#include "utils/memutils.h" +#include "utils/varlena.h" + +/* + * Maximum number of elements in an array (or key/value pairs in an object). + * This is limited by two things: the size of the JEntry array must fit + * in MaxAllocSize, and the number of elements (or pairs) must fit in the bits + * reserved for that in the JsonbContainer.header field. + * + * (The total size of an array's or object's elements is also limited by + * JENTRY_OFFLENMASK, but we're not concerned about that here.) + */ +#define JSONB_MAX_ELEMS (Min(MaxAllocSize / sizeof(JsonbValue), JB_CMASK)) +#define JSONB_MAX_PAIRS (Min(MaxAllocSize / sizeof(JsonbPair), JB_CMASK)) + +static void fillJsonbValue(JsonbContainer *container, int index, + char *base_addr, uint32 offset, + JsonbValue *result); +static bool equalsJsonbScalarValue(JsonbValue *a, JsonbValue *b); +static int compareJsonbScalarValue(JsonbValue *a, JsonbValue *b); +static Jsonb *convertToJsonb(JsonbValue *val); +static void convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbArray(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbObject(StringInfo buffer, JEntry *header, JsonbValue *val, int level); +static void convertJsonbScalar(StringInfo buffer, JEntry *header, JsonbValue *scalarVal); + +static int reserveFromBuffer(StringInfo buffer, int len); +static void appendToBuffer(StringInfo buffer, const char *data, int len); +static void copyToBuffer(StringInfo buffer, int offset, const char *data, int len); +static short padBufferToInt(StringInfo buffer); + +static JsonbIterator *iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent); +static JsonbIterator *freeAndGetParent(JsonbIterator *it); +static JsonbParseState *pushState(JsonbParseState **pstate); +static void appendKey(JsonbParseState *pstate, JsonbValue *scalarVal); +static void appendValue(JsonbParseState *pstate, JsonbValue *scalarVal); +static void appendElement(JsonbParseState *pstate, JsonbValue *scalarVal); +static int lengthCompareJsonbStringValue(const void *a, const void *b); +static int lengthCompareJsonbString(const char *val1, int len1, + const char *val2, int len2); +static int lengthCompareJsonbPair(const void *a, const void *b, void *arg); +static void uniqueifyJsonbObject(JsonbValue *object); +static JsonbValue *pushJsonbValueScalar(JsonbParseState **pstate, + JsonbIteratorToken seq, + JsonbValue *scalarVal); + +void +JsonbToJsonbValue(Jsonb *jsonb, JsonbValue *val) +{ + val->type = jbvBinary; + val->val.binary.data = &jsonb->root; + val->val.binary.len = VARSIZE(jsonb) - VARHDRSZ; +} + +/* + * Turn an in-memory JsonbValue into a Jsonb for on-disk storage. + * + * Generally we find it more convenient to directly iterate through the Jsonb + * representation and only really convert nested scalar values. + * JsonbIteratorNext() does this, so that clients of the iteration code don't + * have to directly deal with the binary representation (JsonbDeepContains() is + * a notable exception, although all exceptions are internal to this module). + * In general, functions that accept a JsonbValue argument are concerned with + * the manipulation of scalar values, or simple containers of scalar values, + * where it would be inconvenient to deal with a great amount of other state. + */ +Jsonb * +JsonbValueToJsonb(JsonbValue *val) +{ + Jsonb *out; + + if (IsAJsonbScalar(val)) + { + /* Scalar value */ + JsonbParseState *pstate = NULL; + JsonbValue *res; + JsonbValue scalarArray; + + scalarArray.type = jbvArray; + scalarArray.val.array.rawScalar = true; + scalarArray.val.array.nElems = 1; + + pushJsonbValue(&pstate, WJB_BEGIN_ARRAY, &scalarArray); + pushJsonbValue(&pstate, WJB_ELEM, val); + res = pushJsonbValue(&pstate, WJB_END_ARRAY, NULL); + + out = convertToJsonb(res); + } + else if (val->type == jbvObject || val->type == jbvArray) + { + out = convertToJsonb(val); + } + else + { + Assert(val->type == jbvBinary); + out = palloc(VARHDRSZ + val->val.binary.len); + SET_VARSIZE(out, VARHDRSZ + val->val.binary.len); + memcpy(VARDATA(out), val->val.binary.data, val->val.binary.len); + } + + return out; +} + +/* + * Get the offset of the variable-length portion of a Jsonb node within + * the variable-length-data part of its container. The node is identified + * by index within the container's JEntry array. + */ +uint32 +getJsonbOffset(const JsonbContainer *jc, int index) +{ + uint32 offset = 0; + int i; + + /* + * Start offset of this entry is equal to the end offset of the previous + * entry. Walk backwards to the most recent entry stored as an end + * offset, returning that offset plus any lengths in between. + */ + for (i = index - 1; i >= 0; i--) + { + offset += JBE_OFFLENFLD(jc->children[i]); + if (JBE_HAS_OFF(jc->children[i])) + break; + } + + return offset; +} + +/* + * Get the length of the variable-length portion of a Jsonb node. + * The node is identified by index within the container's JEntry array. + */ +uint32 +getJsonbLength(const JsonbContainer *jc, int index) +{ + uint32 off; + uint32 len; + + /* + * If the length is stored directly in the JEntry, just return it. + * Otherwise, get the begin offset of the entry, and subtract that from + * the stored end+1 offset. + */ + if (JBE_HAS_OFF(jc->children[index])) + { + off = getJsonbOffset(jc, index); + len = JBE_OFFLENFLD(jc->children[index]) - off; + } + else + len = JBE_OFFLENFLD(jc->children[index]); + + return len; +} + +/* + * BT comparator worker function. Returns an integer less than, equal to, or + * greater than zero, indicating whether a is less than, equal to, or greater + * than b. Consistent with the requirements for a B-Tree operator class + * + * Strings are compared lexically, in contrast with other places where we use a + * much simpler comparator logic for searching through Strings. Since this is + * called from B-Tree support function 1, we're careful about not leaking + * memory here. + */ +int +compareJsonbContainers(JsonbContainer *a, JsonbContainer *b) +{ + JsonbIterator *ita, + *itb; + int res = 0; + + ita = JsonbIteratorInit(a); + itb = JsonbIteratorInit(b); + + do + { + JsonbValue va, + vb; + JsonbIteratorToken ra, + rb; + + ra = JsonbIteratorNext(&ita, &va, false); + rb = JsonbIteratorNext(&itb, &vb, false); + + if (ra == rb) + { + if (ra == WJB_DONE) + { + /* Decisively equal */ + break; + } + + if (ra == WJB_END_ARRAY || ra == WJB_END_OBJECT) + { + /* + * There is no array or object to compare at this stage of + * processing. jbvArray/jbvObject values are compared + * initially, at the WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT + * tokens. + */ + continue; + } + + if (va.type == vb.type) + { + switch (va.type) + { + case jbvString: + case jbvNull: + case jbvNumeric: + case jbvBool: + res = compareJsonbScalarValue(&va, &vb); + break; + case jbvArray: + + /* + * This could be a "raw scalar" pseudo array. That's + * a special case here though, since we still want the + * general type-based comparisons to apply, and as far + * as we're concerned a pseudo array is just a scalar. + */ + if (va.val.array.rawScalar != vb.val.array.rawScalar) + res = (va.val.array.rawScalar) ? -1 : 1; + if (va.val.array.nElems != vb.val.array.nElems) + res = (va.val.array.nElems > vb.val.array.nElems) ? 1 : -1; + break; + case jbvObject: + if (va.val.object.nPairs != vb.val.object.nPairs) + res = (va.val.object.nPairs > vb.val.object.nPairs) ? 1 : -1; + break; + case jbvBinary: + elog(ERROR, "unexpected jbvBinary value"); + break; + case jbvDatetime: + elog(ERROR, "unexpected jbvDatetime value"); + break; + } + } + else + { + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + else + { + /* + * It's safe to assume that the types differed, and that the va + * and vb values passed were set. + * + * If the two values were of the same container type, then there'd + * have been a chance to observe the variation in the number of + * elements/pairs (when processing WJB_BEGIN_OBJECT, say). They're + * either two heterogeneously-typed containers, or a container and + * some scalar type. + * + * We don't have to consider the WJB_END_ARRAY and WJB_END_OBJECT + * cases here, because we would have seen the corresponding + * WJB_BEGIN_ARRAY and WJB_BEGIN_OBJECT tokens first, and + * concluded that they don't match. + */ + Assert(ra != WJB_END_ARRAY && ra != WJB_END_OBJECT); + Assert(rb != WJB_END_ARRAY && rb != WJB_END_OBJECT); + + Assert(va.type != vb.type); + Assert(va.type != jbvBinary); + Assert(vb.type != jbvBinary); + /* Type-defined order */ + res = (va.type > vb.type) ? 1 : -1; + } + } + while (res == 0); + + while (ita != NULL) + { + JsonbIterator *i = ita->parent; + + pfree(ita); + ita = i; + } + while (itb != NULL) + { + JsonbIterator *i = itb->parent; + + pfree(itb); + itb = i; + } + + return res; +} + +/* + * Find value in object (i.e. the "value" part of some key/value pair in an + * object), or find a matching element if we're looking through an array. Do + * so on the basis of equality of the object keys only, or alternatively + * element values only, with a caller-supplied value "key". The "flags" + * argument allows the caller to specify which container types are of interest. + * + * This exported utility function exists to facilitate various cases concerned + * with "containment". If asked to look through an object, the caller had + * better pass a Jsonb String, because their keys can only be strings. + * Otherwise, for an array, any type of JsonbValue will do. + * + * In order to proceed with the search, it is necessary for callers to have + * both specified an interest in exactly one particular container type with an + * appropriate flag, as well as having the pointed-to Jsonb container be of + * one of those same container types at the top level. (Actually, we just do + * whichever makes sense to save callers the trouble of figuring it out - at + * most one can make sense, because the container either points to an array + * (possibly a "raw scalar" pseudo array) or an object.) + * + * Note that we can return a jbvBinary JsonbValue if this is called on an + * object, but we never do so on an array. If the caller asks to look through + * a container type that is not of the type pointed to by the container, + * immediately fall through and return NULL. If we cannot find the value, + * return NULL. Otherwise, return palloc()'d copy of value. + */ +JsonbValue * +findJsonbValueFromContainer(JsonbContainer *container, uint32 flags, + JsonbValue *key) +{ + JEntry *children = container->children; + int count = JsonContainerSize(container); + + Assert((flags & ~(JB_FARRAY | JB_FOBJECT)) == 0); + + /* Quick out without a palloc cycle if object/array is empty */ + if (count <= 0) + return NULL; + + if ((flags & JB_FARRAY) && JsonContainerIsArray(container)) + { + JsonbValue *result = palloc(sizeof(JsonbValue)); + char *base_addr = (char *) (children + count); + uint32 offset = 0; + int i; + + for (i = 0; i < count; i++) + { + fillJsonbValue(container, i, base_addr, offset, result); + + if (key->type == result->type) + { + if (equalsJsonbScalarValue(key, result)) + return result; + } + + JBE_ADVANCE_OFFSET(offset, children[i]); + } + + pfree(result); + } + else if ((flags & JB_FOBJECT) && JsonContainerIsObject(container)) + { + /* Object key passed by caller must be a string */ + Assert(key->type == jbvString); + + return getKeyJsonValueFromContainer(container, key->val.string.val, + key->val.string.len, NULL); + } + + /* Not found */ + return NULL; +} + +/* + * Find value by key in Jsonb object and fetch it into 'res', which is also + * returned. + * + * 'res' can be passed in as NULL, in which case it's newly palloc'ed here. + */ +JsonbValue * +getKeyJsonValueFromContainer(JsonbContainer *container, + const char *keyVal, int keyLen, JsonbValue *res) +{ + JEntry *children = container->children; + int count = JsonContainerSize(container); + char *baseAddr; + uint32 stopLow, + stopHigh; + + Assert(JsonContainerIsObject(container)); + + /* Quick out without a palloc cycle if object is empty */ + if (count <= 0) + return NULL; + + /* + * Binary search the container. Since we know this is an object, account + * for *Pairs* of Jentrys + */ + baseAddr = (char *) (children + count * 2); + stopLow = 0; + stopHigh = count; + while (stopLow < stopHigh) + { + uint32 stopMiddle; + int difference; + const char *candidateVal; + int candidateLen; + + stopMiddle = stopLow + (stopHigh - stopLow) / 2; + + candidateVal = baseAddr + getJsonbOffset(container, stopMiddle); + candidateLen = getJsonbLength(container, stopMiddle); + + difference = lengthCompareJsonbString(candidateVal, candidateLen, + keyVal, keyLen); + + if (difference == 0) + { + /* Found our key, return corresponding value */ + int index = stopMiddle + count; + + if (!res) + res = palloc(sizeof(JsonbValue)); + + fillJsonbValue(container, index, baseAddr, + getJsonbOffset(container, index), + res); + + return res; + } + else + { + if (difference < 0) + stopLow = stopMiddle + 1; + else + stopHigh = stopMiddle; + } + } + + /* Not found */ + return NULL; +} + +/* + * Get i-th value of a Jsonb array. + * + * Returns palloc()'d copy of the value, or NULL if it does not exist. + */ +JsonbValue * +getIthJsonbValueFromContainer(JsonbContainer *container, uint32 i) +{ + JsonbValue *result; + char *base_addr; + uint32 nelements; + + if (!JsonContainerIsArray(container)) + elog(ERROR, "not a jsonb array"); + + nelements = JsonContainerSize(container); + base_addr = (char *) &container->children[nelements]; + + if (i >= nelements) + return NULL; + + result = palloc(sizeof(JsonbValue)); + + fillJsonbValue(container, i, base_addr, + getJsonbOffset(container, i), + result); + + return result; +} + +/* + * A helper function to fill in a JsonbValue to represent an element of an + * array, or a key or value of an object. + * + * The node's JEntry is at container->children[index], and its variable-length + * data is at base_addr + offset. We make the caller determine the offset + * since in many cases the caller can amortize that work across multiple + * children. When it can't, it can just call getJsonbOffset(). + * + * A nested array or object will be returned as jbvBinary, ie. it won't be + * expanded. + */ +static void +fillJsonbValue(JsonbContainer *container, int index, + char *base_addr, uint32 offset, + JsonbValue *result) +{ + JEntry entry = container->children[index]; + + if (JBE_ISNULL(entry)) + { + result->type = jbvNull; + } + else if (JBE_ISSTRING(entry)) + { + result->type = jbvString; + result->val.string.val = base_addr + offset; + result->val.string.len = getJsonbLength(container, index); + Assert(result->val.string.len >= 0); + } + else if (JBE_ISNUMERIC(entry)) + { + result->type = jbvNumeric; + result->val.numeric = (Numeric) (base_addr + INTALIGN(offset)); + } + else if (JBE_ISBOOL_TRUE(entry)) + { + result->type = jbvBool; + result->val.boolean = true; + } + else if (JBE_ISBOOL_FALSE(entry)) + { + result->type = jbvBool; + result->val.boolean = false; + } + else + { + Assert(JBE_ISCONTAINER(entry)); + result->type = jbvBinary; + /* Remove alignment padding from data pointer and length */ + result->val.binary.data = (JsonbContainer *) (base_addr + INTALIGN(offset)); + result->val.binary.len = getJsonbLength(container, index) - + (INTALIGN(offset) - offset); + } +} + +/* + * Push JsonbValue into JsonbParseState. + * + * Used when parsing JSON tokens to form Jsonb, or when converting an in-memory + * JsonbValue to a Jsonb. + * + * Initial state of *JsonbParseState is NULL, since it'll be allocated here + * originally (caller will get JsonbParseState back by reference). + * + * Only sequential tokens pertaining to non-container types should pass a + * JsonbValue. There is one exception -- WJB_BEGIN_ARRAY callers may pass a + * "raw scalar" pseudo array to append it - the actual scalar should be passed + * next and it will be added as the only member of the array. + * + * Values of type jbvBinary, which are rolled up arrays and objects, + * are unpacked before being added to the result. + */ +JsonbValue * +pushJsonbValue(JsonbParseState **pstate, JsonbIteratorToken seq, + JsonbValue *jbval) +{ + JsonbIterator *it; + JsonbValue *res = NULL; + JsonbValue v; + JsonbIteratorToken tok; + int i; + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvObject) + { + pushJsonbValue(pstate, WJB_BEGIN_OBJECT, NULL); + for (i = 0; i < jbval->val.object.nPairs; i++) + { + pushJsonbValue(pstate, WJB_KEY, &jbval->val.object.pairs[i].key); + pushJsonbValue(pstate, WJB_VALUE, &jbval->val.object.pairs[i].value); + } + + return pushJsonbValue(pstate, WJB_END_OBJECT, NULL); + } + + if (jbval && (seq == WJB_ELEM || seq == WJB_VALUE) && jbval->type == jbvArray) + { + pushJsonbValue(pstate, WJB_BEGIN_ARRAY, NULL); + for (i = 0; i < jbval->val.array.nElems; i++) + { + pushJsonbValue(pstate, WJB_ELEM, &jbval->val.array.elems[i]); + } + + return pushJsonbValue(pstate, WJB_END_ARRAY, NULL); + } + + if (!jbval || (seq != WJB_ELEM && seq != WJB_VALUE) || + jbval->type != jbvBinary) + { + /* drop through */ + return pushJsonbValueScalar(pstate, seq, jbval); + } + + /* unpack the binary and add each piece to the pstate */ + it = JsonbIteratorInit(jbval->val.binary.data); + + if ((jbval->val.binary.data->header & JB_FSCALAR) && *pstate) + { + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_BEGIN_ARRAY); + Assert(v.type == jbvArray && v.val.array.rawScalar); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_ELEM); + + res = pushJsonbValueScalar(pstate, seq, &v); + + tok = JsonbIteratorNext(&it, &v, true); + Assert(tok == WJB_END_ARRAY); + Assert(it == NULL); + + return res; + } + + while ((tok = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + res = pushJsonbValueScalar(pstate, tok, + tok < WJB_BEGIN_ARRAY || + (tok == WJB_BEGIN_ARRAY && + v.val.array.rawScalar) ? &v : NULL); + + return res; +} + +/* + * Do the actual pushing, with only scalar or pseudo-scalar-array values + * accepted. + */ +static JsonbValue * +pushJsonbValueScalar(JsonbParseState **pstate, JsonbIteratorToken seq, + JsonbValue *scalarVal) +{ + JsonbValue *result = NULL; + + switch (seq) + { + case WJB_BEGIN_ARRAY: + Assert(!scalarVal || scalarVal->val.array.rawScalar); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvArray; + (*pstate)->contVal.val.array.nElems = 0; + (*pstate)->contVal.val.array.rawScalar = (scalarVal && + scalarVal->val.array.rawScalar); + if (scalarVal && scalarVal->val.array.nElems > 0) + { + /* Assume that this array is still really a scalar */ + Assert(scalarVal->type == jbvArray); + (*pstate)->size = scalarVal->val.array.nElems; + } + else + { + (*pstate)->size = 4; + } + (*pstate)->contVal.val.array.elems = palloc(sizeof(JsonbValue) * + (*pstate)->size); + break; + case WJB_BEGIN_OBJECT: + Assert(!scalarVal); + *pstate = pushState(pstate); + result = &(*pstate)->contVal; + (*pstate)->contVal.type = jbvObject; + (*pstate)->contVal.val.object.nPairs = 0; + (*pstate)->size = 4; + (*pstate)->contVal.val.object.pairs = palloc(sizeof(JsonbPair) * + (*pstate)->size); + break; + case WJB_KEY: + Assert(scalarVal->type == jbvString); + appendKey(*pstate, scalarVal); + break; + case WJB_VALUE: + Assert(IsAJsonbScalar(scalarVal)); + appendValue(*pstate, scalarVal); + break; + case WJB_ELEM: + Assert(IsAJsonbScalar(scalarVal)); + appendElement(*pstate, scalarVal); + break; + case WJB_END_OBJECT: + uniqueifyJsonbObject(&(*pstate)->contVal); + /* fall through! */ + case WJB_END_ARRAY: + /* Steps here common to WJB_END_OBJECT case */ + Assert(!scalarVal); + result = &(*pstate)->contVal; + + /* + * Pop stack and push current array/object as value in parent + * array/object + */ + *pstate = (*pstate)->next; + if (*pstate) + { + switch ((*pstate)->contVal.type) + { + case jbvArray: + appendElement(*pstate, result); + break; + case jbvObject: + appendValue(*pstate, result); + break; + default: + elog(ERROR, "invalid jsonb container type"); + } + } + break; + default: + elog(ERROR, "unrecognized jsonb sequential processing token"); + } + + return result; +} + +/* + * pushJsonbValue() worker: Iteration-like forming of Jsonb + */ +static JsonbParseState * +pushState(JsonbParseState **pstate) +{ + JsonbParseState *ns = palloc(sizeof(JsonbParseState)); + + ns->next = *pstate; + return ns; +} + +/* + * pushJsonbValue() worker: Append a pair key to state when generating a Jsonb + */ +static void +appendKey(JsonbParseState *pstate, JsonbValue *string) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + Assert(string->type == jbvString); + + if (object->val.object.nPairs >= JSONB_MAX_PAIRS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb object pairs exceeds the maximum allowed (%zu)", + JSONB_MAX_PAIRS))); + + if (object->val.object.nPairs >= pstate->size) + { + pstate->size *= 2; + object->val.object.pairs = repalloc(object->val.object.pairs, + sizeof(JsonbPair) * pstate->size); + } + + object->val.object.pairs[object->val.object.nPairs].key = *string; + object->val.object.pairs[object->val.object.nPairs].order = object->val.object.nPairs; +} + +/* + * pushJsonbValue() worker: Append a pair value to state when generating a + * Jsonb + */ +static void +appendValue(JsonbParseState *pstate, JsonbValue *scalarVal) +{ + JsonbValue *object = &pstate->contVal; + + Assert(object->type == jbvObject); + + object->val.object.pairs[object->val.object.nPairs++].value = *scalarVal; +} + +/* + * pushJsonbValue() worker: Append an element to state when generating a Jsonb + */ +static void +appendElement(JsonbParseState *pstate, JsonbValue *scalarVal) +{ + JsonbValue *array = &pstate->contVal; + + Assert(array->type == jbvArray); + + if (array->val.array.nElems >= JSONB_MAX_ELEMS) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("number of jsonb array elements exceeds the maximum allowed (%zu)", + JSONB_MAX_ELEMS))); + + if (array->val.array.nElems >= pstate->size) + { + pstate->size *= 2; + array->val.array.elems = repalloc(array->val.array.elems, + sizeof(JsonbValue) * pstate->size); + } + + array->val.array.elems[array->val.array.nElems++] = *scalarVal; +} + +/* + * Given a JsonbContainer, expand to JsonbIterator to iterate over items + * fully expanded to in-memory representation for manipulation. + * + * See JsonbIteratorNext() for notes on memory management. + */ +JsonbIterator * +JsonbIteratorInit(JsonbContainer *container) +{ + return iteratorFromContainer(container, NULL); +} + +/* + * Get next JsonbValue while iterating + * + * Caller should initially pass their own, original iterator. They may get + * back a child iterator palloc()'d here instead. The function can be relied + * on to free those child iterators, lest the memory allocated for highly + * nested objects become unreasonable, but only if callers don't end iteration + * early (by breaking upon having found something in a search, for example). + * + * Callers in such a scenario, that are particularly sensitive to leaking + * memory in a long-lived context may walk the ancestral tree from the final + * iterator we left them with to its oldest ancestor, pfree()ing as they go. + * They do not have to free any other memory previously allocated for iterators + * but not accessible as direct ancestors of the iterator they're last passed + * back. + * + * Returns "Jsonb sequential processing" token value. Iterator "state" + * reflects the current stage of the process in a less granular fashion, and is + * mostly used here to track things internally with respect to particular + * iterators. + * + * Clients of this function should not have to handle any jbvBinary values + * (since recursive calls will deal with this), provided skipNested is false. + * It is our job to expand the jbvBinary representation without bothering them + * with it. However, clients should not take it upon themselves to touch array + * or Object element/pair buffers, since their element/pair pointers are + * garbage. Also, *val will not be set when returning WJB_END_ARRAY or + * WJB_END_OBJECT, on the assumption that it's only useful to access values + * when recursing in. + */ +JsonbIteratorToken +JsonbIteratorNext(JsonbIterator **it, JsonbValue *val, bool skipNested) +{ + if (*it == NULL) + return WJB_DONE; + + /* + * When stepping into a nested container, we jump back here to start + * processing the child. We will not recurse further in one call, because + * processing the child will always begin in JBI_ARRAY_START or + * JBI_OBJECT_START state. + */ +recurse: + switch ((*it)->state) + { + case JBI_ARRAY_START: + /* Set v to array on first array call */ + val->type = jbvArray; + val->val.array.nElems = (*it)->nElems; + + /* + * v->val.array.elems is not actually set, because we aren't doing + * a full conversion + */ + val->val.array.rawScalar = (*it)->isScalar; + (*it)->curIndex = 0; + (*it)->curDataOffset = 0; + (*it)->curValueOffset = 0; /* not actually used */ + /* Set state for next call */ + (*it)->state = JBI_ARRAY_ELEM; + return WJB_BEGIN_ARRAY; + + case JBI_ARRAY_ELEM: + if ((*it)->curIndex >= (*it)->nElems) + { + /* + * All elements within array already processed. Report this + * to caller, and give it back original parent iterator (which + * independently tracks iteration progress at its level of + * nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_ARRAY; + } + + fillJsonbValue((*it)->container, (*it)->curIndex, + (*it)->dataProper, (*it)->curDataOffset, + val); + + JBE_ADVANCE_OFFSET((*it)->curDataOffset, + (*it)->children[(*it)->curIndex]); + (*it)->curIndex++; + + if (!IsAJsonbScalar(val) && !skipNested) + { + /* Recurse into container. */ + *it = iteratorFromContainer(val->val.binary.data, *it); + goto recurse; + } + else + { + /* + * Scalar item in array, or a container and caller didn't want + * us to recurse into it. + */ + return WJB_ELEM; + } + + case JBI_OBJECT_START: + /* Set v to object on first object call */ + val->type = jbvObject; + val->val.object.nPairs = (*it)->nElems; + + /* + * v->val.object.pairs is not actually set, because we aren't + * doing a full conversion + */ + (*it)->curIndex = 0; + (*it)->curDataOffset = 0; + (*it)->curValueOffset = getJsonbOffset((*it)->container, + (*it)->nElems); + /* Set state for next call */ + (*it)->state = JBI_OBJECT_KEY; + return WJB_BEGIN_OBJECT; + + case JBI_OBJECT_KEY: + if ((*it)->curIndex >= (*it)->nElems) + { + /* + * All pairs within object already processed. Report this to + * caller, and give it back original containing iterator + * (which independently tracks iteration progress at its level + * of nesting). + */ + *it = freeAndGetParent(*it); + return WJB_END_OBJECT; + } + else + { + /* Return key of a key/value pair. */ + fillJsonbValue((*it)->container, (*it)->curIndex, + (*it)->dataProper, (*it)->curDataOffset, + val); + if (val->type != jbvString) + elog(ERROR, "unexpected jsonb type as object key"); + + /* Set state for next call */ + (*it)->state = JBI_OBJECT_VALUE; + return WJB_KEY; + } + + case JBI_OBJECT_VALUE: + /* Set state for next call */ + (*it)->state = JBI_OBJECT_KEY; + + fillJsonbValue((*it)->container, (*it)->curIndex + (*it)->nElems, + (*it)->dataProper, (*it)->curValueOffset, + val); + + JBE_ADVANCE_OFFSET((*it)->curDataOffset, + (*it)->children[(*it)->curIndex]); + JBE_ADVANCE_OFFSET((*it)->curValueOffset, + (*it)->children[(*it)->curIndex + (*it)->nElems]); + (*it)->curIndex++; + + /* + * Value may be a container, in which case we recurse with new, + * child iterator (unless the caller asked not to, by passing + * skipNested). + */ + if (!IsAJsonbScalar(val) && !skipNested) + { + *it = iteratorFromContainer(val->val.binary.data, *it); + goto recurse; + } + else + return WJB_VALUE; + } + + elog(ERROR, "invalid iterator state"); + return -1; +} + +/* + * Initialize an iterator for iterating all elements in a container. + */ +static JsonbIterator * +iteratorFromContainer(JsonbContainer *container, JsonbIterator *parent) +{ + JsonbIterator *it; + + it = palloc0(sizeof(JsonbIterator)); + it->container = container; + it->parent = parent; + it->nElems = JsonContainerSize(container); + + /* Array starts just after header */ + it->children = container->children; + + switch (container->header & (JB_FARRAY | JB_FOBJECT)) + { + case JB_FARRAY: + it->dataProper = + (char *) it->children + it->nElems * sizeof(JEntry); + it->isScalar = JsonContainerIsScalar(container); + /* This is either a "raw scalar", or an array */ + Assert(!it->isScalar || it->nElems == 1); + + it->state = JBI_ARRAY_START; + break; + + case JB_FOBJECT: + it->dataProper = + (char *) it->children + it->nElems * sizeof(JEntry) * 2; + it->state = JBI_OBJECT_START; + break; + + default: + elog(ERROR, "unknown type of jsonb container"); + } + + return it; +} + +/* + * JsonbIteratorNext() worker: Return parent, while freeing memory for current + * iterator + */ +static JsonbIterator * +freeAndGetParent(JsonbIterator *it) +{ + JsonbIterator *v = it->parent; + + pfree(it); + return v; +} + +/* + * Worker for "contains" operator's function + * + * Formally speaking, containment is top-down, unordered subtree isomorphism. + * + * Takes iterators that belong to some container type. These iterators + * "belong" to those values in the sense that they've just been initialized in + * respect of them by the caller (perhaps in a nested fashion). + * + * "val" is lhs Jsonb, and mContained is rhs Jsonb when called from top level. + * We determine if mContained is contained within val. + */ +bool +JsonbDeepContains(JsonbIterator **val, JsonbIterator **mContained) +{ + JsonbValue vval, + vcontained; + JsonbIteratorToken rval, + rcont; + + /* + * Guard against stack overflow due to overly complex Jsonb. + * + * Functions called here independently take this precaution, but that + * might not be sufficient since this is also a recursive function. + */ + check_stack_depth(); + + rval = JsonbIteratorNext(val, &vval, false); + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + if (rval != rcont) + { + /* + * The differing return values can immediately be taken as indicating + * two differing container types at this nesting level, which is + * sufficient reason to give up entirely (but it should be the case + * that they're both some container type). + */ + Assert(rval == WJB_BEGIN_OBJECT || rval == WJB_BEGIN_ARRAY); + Assert(rcont == WJB_BEGIN_OBJECT || rcont == WJB_BEGIN_ARRAY); + return false; + } + else if (rcont == WJB_BEGIN_OBJECT) + { + Assert(vval.type == jbvObject); + Assert(vcontained.type == jbvObject); + + /* + * If the lhs has fewer pairs than the rhs, it can't possibly contain + * the rhs. (This conclusion is safe only because we de-duplicate + * keys in all Jsonb objects; thus there can be no corresponding + * optimization in the array case.) The case probably won't arise + * often, but since it's such a cheap check we may as well make it. + */ + if (vval.val.object.nPairs < vcontained.val.object.nPairs) + return false; + + /* Work through rhs "is it contained within?" object */ + for (;;) + { + JsonbValue *lhsVal; /* lhsVal is from pair in lhs object */ + JsonbValue lhsValBuf; + + rcont = JsonbIteratorNext(mContained, &vcontained, false); + + /* + * When we get through caller's rhs "is it contained within?" + * object without failing to find one of its values, it's + * contained. + */ + if (rcont == WJB_END_OBJECT) + return true; + + Assert(rcont == WJB_KEY); + Assert(vcontained.type == jbvString); + + /* First, find value by key... */ + lhsVal = + getKeyJsonValueFromContainer((*val)->container, + vcontained.val.string.val, + vcontained.val.string.len, + &lhsValBuf); + if (!lhsVal) + return false; + + /* + * ...at this stage it is apparent that there is at least a key + * match for this rhs pair. + */ + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + Assert(rcont == WJB_VALUE); + + /* + * Compare rhs pair's value with lhs pair's value just found using + * key + */ + if (lhsVal->type != vcontained.type) + { + return false; + } + else if (IsAJsonbScalar(lhsVal)) + { + if (!equalsJsonbScalarValue(lhsVal, &vcontained)) + return false; + } + else + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, + *nestContained; + + Assert(lhsVal->type == jbvBinary); + Assert(vcontained.type == jbvBinary); + + nestval = JsonbIteratorInit(lhsVal->val.binary.data); + nestContained = JsonbIteratorInit(vcontained.val.binary.data); + + /* + * Match "value" side of rhs datum object's pair recursively. + * It's a nested structure. + * + * Note that nesting still has to "match up" at the right + * nesting sub-levels. However, there need only be zero or + * more matching pairs (or elements) at each nesting level + * (provided the *rhs* pairs/elements *all* match on each + * level), which enables searching nested structures for a + * single String or other primitive type sub-datum quite + * effectively (provided the user constructed the rhs nested + * structure such that we "know where to look"). + * + * In other words, the mapping of container nodes in the rhs + * "vcontained" Jsonb to internal nodes on the lhs is + * injective, and parent-child edges on the rhs must be mapped + * to parent-child edges on the lhs to satisfy the condition + * of containment (plus of course the mapped nodes must be + * equal). + */ + if (!JsonbDeepContains(&nestval, &nestContained)) + return false; + } + } + } + else if (rcont == WJB_BEGIN_ARRAY) + { + JsonbValue *lhsConts = NULL; + uint32 nLhsElems = vval.val.array.nElems; + + Assert(vval.type == jbvArray); + Assert(vcontained.type == jbvArray); + + /* + * Handle distinction between "raw scalar" pseudo arrays, and real + * arrays. + * + * A raw scalar may contain another raw scalar, and an array may + * contain a raw scalar, but a raw scalar may not contain an array. We + * don't do something like this for the object case, since objects can + * only contain pairs, never raw scalars (a pair is represented by an + * rhs object argument with a single contained pair). + */ + if (vval.val.array.rawScalar && !vcontained.val.array.rawScalar) + return false; + + /* Work through rhs "is it contained within?" array */ + for (;;) + { + rcont = JsonbIteratorNext(mContained, &vcontained, true); + + /* + * When we get through caller's rhs "is it contained within?" + * array without failing to find one of its values, it's + * contained. + */ + if (rcont == WJB_END_ARRAY) + return true; + + Assert(rcont == WJB_ELEM); + + if (IsAJsonbScalar(&vcontained)) + { + if (!findJsonbValueFromContainer((*val)->container, + JB_FARRAY, + &vcontained)) + return false; + } + else + { + uint32 i; + + /* + * If this is first container found in rhs array (at this + * depth), initialize temp lhs array of containers + */ + if (lhsConts == NULL) + { + uint32 j = 0; + + /* Make room for all possible values */ + lhsConts = palloc(sizeof(JsonbValue) * nLhsElems); + + for (i = 0; i < nLhsElems; i++) + { + /* Store all lhs elements in temp array */ + rcont = JsonbIteratorNext(val, &vval, true); + Assert(rcont == WJB_ELEM); + + if (vval.type == jbvBinary) + lhsConts[j++] = vval; + } + + /* No container elements in temp array, so give up now */ + if (j == 0) + return false; + + /* We may have only partially filled array */ + nLhsElems = j; + } + + /* XXX: Nested array containment is O(N^2) */ + for (i = 0; i < nLhsElems; i++) + { + /* Nested container value (object or array) */ + JsonbIterator *nestval, + *nestContained; + bool contains; + + nestval = JsonbIteratorInit(lhsConts[i].val.binary.data); + nestContained = JsonbIteratorInit(vcontained.val.binary.data); + + contains = JsonbDeepContains(&nestval, &nestContained); + + if (nestval) + pfree(nestval); + if (nestContained) + pfree(nestContained); + if (contains) + break; + } + + /* + * Report rhs container value is not contained if couldn't + * match rhs container to *some* lhs cont + */ + if (i == nLhsElems) + return false; + } + } + } + else + { + elog(ERROR, "invalid jsonb container type"); + } + + elog(ERROR, "unexpectedly fell off end of jsonb container"); + return false; +} + +/* + * Hash a JsonbValue scalar value, mixing the hash value into an existing + * hash provided by the caller. + * + * Some callers may wish to independently XOR in JB_FOBJECT and JB_FARRAY + * flags. + */ +void +JsonbHashScalarValue(const JsonbValue *scalarVal, uint32 *hash) +{ + uint32 tmp; + + /* Compute hash value for scalarVal */ + switch (scalarVal->type) + { + case jbvNull: + tmp = 0x01; + break; + case jbvString: + tmp = DatumGetUInt32(hash_any((const unsigned char *) scalarVal->val.string.val, + scalarVal->val.string.len)); + break; + case jbvNumeric: + /* Must hash equal numerics to equal hash codes */ + tmp = DatumGetUInt32(DirectFunctionCall1(hash_numeric, + NumericGetDatum(scalarVal->val.numeric))); + break; + case jbvBool: + tmp = scalarVal->val.boolean ? 0x02 : 0x04; + + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + tmp = 0; /* keep compiler quiet */ + break; + } + + /* + * Combine hash values of successive keys, values and elements by rotating + * the previous value left 1 bit, then XOR'ing in the new + * key/value/element's hash value. + */ + *hash = (*hash << 1) | (*hash >> 31); + *hash ^= tmp; +} + +/* + * Hash a value to a 64-bit value, with a seed. Otherwise, similar to + * JsonbHashScalarValue. + */ +void +JsonbHashScalarValueExtended(const JsonbValue *scalarVal, uint64 *hash, + uint64 seed) +{ + uint64 tmp; + + switch (scalarVal->type) + { + case jbvNull: + tmp = seed + 0x01; + break; + case jbvString: + tmp = DatumGetUInt64(hash_any_extended((const unsigned char *) scalarVal->val.string.val, + scalarVal->val.string.len, + seed)); + break; + case jbvNumeric: + tmp = DatumGetUInt64(DirectFunctionCall2(hash_numeric_extended, + NumericGetDatum(scalarVal->val.numeric), + UInt64GetDatum(seed))); + break; + case jbvBool: + if (seed) + tmp = DatumGetUInt64(DirectFunctionCall2(hashcharextended, + BoolGetDatum(scalarVal->val.boolean), + UInt64GetDatum(seed))); + else + tmp = scalarVal->val.boolean ? 0x02 : 0x04; + + break; + default: + elog(ERROR, "invalid jsonb scalar type"); + break; + } + + *hash = ROTATE_HIGH_AND_LOW_32BITS(*hash); + *hash ^= tmp; +} + +/* + * Are two scalar JsonbValues of the same type a and b equal? + */ +static bool +equalsJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) +{ + if (aScalar->type == bScalar->type) + { + switch (aScalar->type) + { + case jbvNull: + return true; + case jbvString: + return lengthCompareJsonbStringValue(aScalar, bScalar) == 0; + case jbvNumeric: + return DatumGetBool(DirectFunctionCall2(numeric_eq, + PointerGetDatum(aScalar->val.numeric), + PointerGetDatum(bScalar->val.numeric))); + case jbvBool: + return aScalar->val.boolean == bScalar->val.boolean; + + default: + elog(ERROR, "invalid jsonb scalar type"); + } + } + elog(ERROR, "jsonb scalar type mismatch"); + return false; +} + +/* + * Compare two scalar JsonbValues, returning -1, 0, or 1. + * + * Strings are compared using the default collation. Used by B-tree + * operators, where a lexical sort order is generally expected. + */ +static int +compareJsonbScalarValue(JsonbValue *aScalar, JsonbValue *bScalar) +{ + if (aScalar->type == bScalar->type) + { + switch (aScalar->type) + { + case jbvNull: + return 0; + case jbvString: + return varstr_cmp(aScalar->val.string.val, + aScalar->val.string.len, + bScalar->val.string.val, + bScalar->val.string.len, + DEFAULT_COLLATION_OID); + case jbvNumeric: + return DatumGetInt32(DirectFunctionCall2(numeric_cmp, + PointerGetDatum(aScalar->val.numeric), + PointerGetDatum(bScalar->val.numeric))); + case jbvBool: + if (aScalar->val.boolean == bScalar->val.boolean) + return 0; + else if (aScalar->val.boolean > bScalar->val.boolean) + return 1; + else + return -1; + default: + elog(ERROR, "invalid jsonb scalar type"); + } + } + elog(ERROR, "jsonb scalar type mismatch"); + return -1; +} + + +/* + * Functions for manipulating the resizable buffer used by convertJsonb and + * its subroutines. + */ + +/* + * Reserve 'len' bytes, at the end of the buffer, enlarging it if necessary. + * Returns the offset to the reserved area. The caller is expected to fill + * the reserved area later with copyToBuffer(). + */ +static int +reserveFromBuffer(StringInfo buffer, int len) +{ + int offset; + + /* Make more room if needed */ + enlargeStringInfo(buffer, len); + + /* remember current offset */ + offset = buffer->len; + + /* reserve the space */ + buffer->len += len; + + /* + * Keep a trailing null in place, even though it's not useful for us; it + * seems best to preserve the invariants of StringInfos. + */ + buffer->data[buffer->len] = '\0'; + + return offset; +} + +/* + * Copy 'len' bytes to a previously reserved area in buffer. + */ +static void +copyToBuffer(StringInfo buffer, int offset, const char *data, int len) +{ + memcpy(buffer->data + offset, data, len); +} + +/* + * A shorthand for reserveFromBuffer + copyToBuffer. + */ +static void +appendToBuffer(StringInfo buffer, const char *data, int len) +{ + int offset; + + offset = reserveFromBuffer(buffer, len); + copyToBuffer(buffer, offset, data, len); +} + + +/* + * Append padding, so that the length of the StringInfo is int-aligned. + * Returns the number of padding bytes appended. + */ +static short +padBufferToInt(StringInfo buffer) +{ + int padlen, + p, + offset; + + padlen = INTALIGN(buffer->len) - buffer->len; + + offset = reserveFromBuffer(buffer, padlen); + + /* padlen must be small, so this is probably faster than a memset */ + for (p = 0; p < padlen; p++) + buffer->data[offset + p] = '\0'; + + return padlen; +} + +/* + * Given a JsonbValue, convert to Jsonb. The result is palloc'd. + */ +static Jsonb * +convertToJsonb(JsonbValue *val) +{ + StringInfoData buffer; + JEntry jentry; + Jsonb *res; + + /* Should not already have binary representation */ + Assert(val->type != jbvBinary); + + /* Allocate an output buffer. It will be enlarged as needed */ + initStringInfo(&buffer); + + /* Make room for the varlena header */ + reserveFromBuffer(&buffer, VARHDRSZ); + + convertJsonbValue(&buffer, &jentry, val, 0); + + /* + * Note: the JEntry of the root is discarded. Therefore the root + * JsonbContainer struct must contain enough information to tell what kind + * of value it is. + */ + + res = (Jsonb *) buffer.data; + + SET_VARSIZE(res, buffer.len); + + return res; +} + +/* + * Subroutine of convertJsonb: serialize a single JsonbValue into buffer. + * + * The JEntry header for this node is returned in *header. It is filled in + * with the length of this value and appropriate type bits. If we wish to + * store an end offset rather than a length, it is the caller's responsibility + * to adjust for that. + * + * If the value is an array or an object, this recurses. 'level' is only used + * for debugging purposes. + */ +static void +convertJsonbValue(StringInfo buffer, JEntry *header, JsonbValue *val, int level) +{ + check_stack_depth(); + + if (!val) + return; + + /* + * A JsonbValue passed as val should never have a type of jbvBinary, and + * neither should any of its sub-components. Those values will be produced + * by convertJsonbArray and convertJsonbObject, the results of which will + * not be passed back to this function as an argument. + */ + + if (IsAJsonbScalar(val)) + convertJsonbScalar(buffer, header, val); + else if (val->type == jbvArray) + convertJsonbArray(buffer, header, val, level); + else if (val->type == jbvObject) + convertJsonbObject(buffer, header, val, level); + else + elog(ERROR, "unknown type of jsonb container to convert"); +} + +static void +convertJsonbArray(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) +{ + int base_offset; + int jentry_offset; + int i; + int totallen; + uint32 header; + int nElems = val->val.array.nElems; + + /* Remember where in the buffer this array starts. */ + base_offset = buffer->len; + + /* Align to 4-byte boundary (any padding counts as part of my data) */ + padBufferToInt(buffer); + + /* + * Construct the header Jentry and store it in the beginning of the + * variable-length payload. + */ + header = nElems | JB_FARRAY; + if (val->val.array.rawScalar) + { + Assert(nElems == 1); + Assert(level == 0); + header |= JB_FSCALAR; + } + + appendToBuffer(buffer, (char *) &header, sizeof(uint32)); + + /* Reserve space for the JEntries of the elements. */ + jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nElems); + + totallen = 0; + for (i = 0; i < nElems; i++) + { + JsonbValue *elem = &val->val.array.elems[i]; + int len; + JEntry meta; + + /* + * Convert element, producing a JEntry and appending its + * variable-length data to buffer + */ + convertJsonbValue(buffer, &meta, elem, level + 1); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if ((i % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + + /* Total data size is everything we've appended to buffer */ + totallen = buffer->len - base_offset; + + /* Check length again, since we didn't include the metadata above */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb array elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* Initialize the header of this node in the container's JEntry array */ + *pheader = JENTRY_ISCONTAINER | totallen; +} + +static void +convertJsonbObject(StringInfo buffer, JEntry *pheader, JsonbValue *val, int level) +{ + int base_offset; + int jentry_offset; + int i; + int totallen; + uint32 header; + int nPairs = val->val.object.nPairs; + + /* Remember where in the buffer this object starts. */ + base_offset = buffer->len; + + /* Align to 4-byte boundary (any padding counts as part of my data) */ + padBufferToInt(buffer); + + /* + * Construct the header Jentry and store it in the beginning of the + * variable-length payload. + */ + header = nPairs | JB_FOBJECT; + appendToBuffer(buffer, (char *) &header, sizeof(uint32)); + + /* Reserve space for the JEntries of the keys and values. */ + jentry_offset = reserveFromBuffer(buffer, sizeof(JEntry) * nPairs * 2); + + /* + * Iterate over the keys, then over the values, since that is the ordering + * we want in the on-disk representation. + */ + totallen = 0; + for (i = 0; i < nPairs; i++) + { + JsonbPair *pair = &val->val.object.pairs[i]; + int len; + JEntry meta; + + /* + * Convert key, producing a JEntry and appending its variable-length + * data to buffer + */ + convertJsonbScalar(buffer, &meta, &pair->key); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if ((i % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + for (i = 0; i < nPairs; i++) + { + JsonbPair *pair = &val->val.object.pairs[i]; + int len; + JEntry meta; + + /* + * Convert value, producing a JEntry and appending its variable-length + * data to buffer + */ + convertJsonbValue(buffer, &meta, &pair->value, level + 1); + + len = JBE_OFFLENFLD(meta); + totallen += len; + + /* + * Bail out if total variable-length data exceeds what will fit in a + * JEntry length field. We check this in each iteration, not just + * once at the end, to forestall possible integer overflow. + */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* + * Convert each JB_OFFSET_STRIDE'th length to an offset. + */ + if (((i + nPairs) % JB_OFFSET_STRIDE) == 0) + meta = (meta & JENTRY_TYPEMASK) | totallen | JENTRY_HAS_OFF; + + copyToBuffer(buffer, jentry_offset, (char *) &meta, sizeof(JEntry)); + jentry_offset += sizeof(JEntry); + } + + /* Total data size is everything we've appended to buffer */ + totallen = buffer->len - base_offset; + + /* Check length again, since we didn't include the metadata above */ + if (totallen > JENTRY_OFFLENMASK) + ereport(ERROR, + (errcode(ERRCODE_PROGRAM_LIMIT_EXCEEDED), + errmsg("total size of jsonb object elements exceeds the maximum of %u bytes", + JENTRY_OFFLENMASK))); + + /* Initialize the header of this node in the container's JEntry array */ + *pheader = JENTRY_ISCONTAINER | totallen; +} + +static void +convertJsonbScalar(StringInfo buffer, JEntry *jentry, JsonbValue *scalarVal) +{ + int numlen; + short padlen; + + switch (scalarVal->type) + { + case jbvNull: + *jentry = JENTRY_ISNULL; + break; + + case jbvString: + appendToBuffer(buffer, scalarVal->val.string.val, scalarVal->val.string.len); + + *jentry = scalarVal->val.string.len; + break; + + case jbvNumeric: + numlen = VARSIZE_ANY(scalarVal->val.numeric); + padlen = padBufferToInt(buffer); + + appendToBuffer(buffer, (char *) scalarVal->val.numeric, numlen); + + *jentry = JENTRY_ISNUMERIC | (padlen + numlen); + break; + + case jbvBool: + *jentry = (scalarVal->val.boolean) ? + JENTRY_ISBOOL_TRUE : JENTRY_ISBOOL_FALSE; + break; + + case jbvDatetime: + { + char buf[MAXDATELEN + 1]; + size_t len; + + JsonEncodeDateTime(buf, + scalarVal->val.datetime.value, + scalarVal->val.datetime.typid, + &scalarVal->val.datetime.tz); + len = strlen(buf); + appendToBuffer(buffer, buf, len); + + *jentry = len; + } + break; + + default: + elog(ERROR, "invalid jsonb scalar type"); + } +} + +/* + * Compare two jbvString JsonbValue values, a and b. + * + * This is a special qsort() comparator used to sort strings in certain + * internal contexts where it is sufficient to have a well-defined sort order. + * In particular, object pair keys are sorted according to this criteria to + * facilitate cheap binary searches where we don't care about lexical sort + * order. + * + * a and b are first sorted based on their length. If a tie-breaker is + * required, only then do we consider string binary equality. + */ +static int +lengthCompareJsonbStringValue(const void *a, const void *b) +{ + const JsonbValue *va = (const JsonbValue *) a; + const JsonbValue *vb = (const JsonbValue *) b; + + Assert(va->type == jbvString); + Assert(vb->type == jbvString); + + return lengthCompareJsonbString(va->val.string.val, va->val.string.len, + vb->val.string.val, vb->val.string.len); +} + +/* + * Subroutine for lengthCompareJsonbStringValue + * + * This is also useful separately to implement binary search on + * JsonbContainers. + */ +static int +lengthCompareJsonbString(const char *val1, int len1, const char *val2, int len2) +{ + if (len1 == len2) + return memcmp(val1, val2, len1); + else + return len1 > len2 ? 1 : -1; +} + +/* + * qsort_arg() comparator to compare JsonbPair values. + * + * Third argument 'binequal' may point to a bool. If it's set, *binequal is set + * to true iff a and b have full binary equality, since some callers have an + * interest in whether the two values are equal or merely equivalent. + * + * N.B: String comparisons here are "length-wise" + * + * Pairs with equals keys are ordered such that the order field is respected. + */ +static int +lengthCompareJsonbPair(const void *a, const void *b, void *binequal) +{ + const JsonbPair *pa = (const JsonbPair *) a; + const JsonbPair *pb = (const JsonbPair *) b; + int res; + + res = lengthCompareJsonbStringValue(&pa->key, &pb->key); + if (res == 0 && binequal) + *((bool *) binequal) = true; + + /* + * Guarantee keeping order of equal pair. Unique algorithm will prefer + * first element as value. + */ + if (res == 0) + res = (pa->order > pb->order) ? -1 : 1; + + return res; +} + +/* + * Sort and unique-ify pairs in JsonbValue object + */ +static void +uniqueifyJsonbObject(JsonbValue *object) +{ + bool hasNonUniq = false; + + Assert(object->type == jbvObject); + + if (object->val.object.nPairs > 1) + qsort_arg(object->val.object.pairs, object->val.object.nPairs, sizeof(JsonbPair), + lengthCompareJsonbPair, &hasNonUniq); + + if (hasNonUniq) + { + JsonbPair *ptr = object->val.object.pairs + 1, + *res = object->val.object.pairs; + + while (ptr - object->val.object.pairs < object->val.object.nPairs) + { + /* Avoid copying over duplicate */ + if (lengthCompareJsonbStringValue(ptr, res) != 0) + { + res++; + if (ptr != res) + memcpy(res, ptr, sizeof(JsonbPair)); + } + ptr++; + } + + object->val.object.nPairs = res + 1 - object->val.object.pairs; + } +} |