diff options
Diffstat (limited to 'src/backend/utils/adt/jsonb_gin.c')
-rw-r--r-- | src/backend/utils/adt/jsonb_gin.c | 1409 |
1 files changed, 1409 insertions, 0 deletions
diff --git a/src/backend/utils/adt/jsonb_gin.c b/src/backend/utils/adt/jsonb_gin.c new file mode 100644 index 0000000..e941439 --- /dev/null +++ b/src/backend/utils/adt/jsonb_gin.c @@ -0,0 +1,1409 @@ +/*------------------------------------------------------------------------- + * + * jsonb_gin.c + * GIN support functions for jsonb + * + * Copyright (c) 2014-2023, PostgreSQL Global Development Group + * + * We provide two opclasses for jsonb indexing: jsonb_ops and jsonb_path_ops. + * For their description see json.sgml and comments in jsonb.h. + * + * The operators support, among the others, "jsonb @? jsonpath" and + * "jsonb @@ jsonpath". Expressions containing these operators are easily + * expressed through each other. + * + * jb @? 'path' <=> jb @@ 'EXISTS(path)' + * jb @@ 'expr' <=> jb @? '$ ? (expr)' + * + * Thus, we're going to consider only @@ operator, while regarding @? operator + * the same is true for jb @@ 'EXISTS(path)'. + * + * Result of jsonpath query extraction is a tree, which leaf nodes are index + * entries and non-leaf nodes are AND/OR logical expressions. Basically we + * extract following statements out of jsonpath: + * + * 1) "accessors_chain = const", + * 2) "EXISTS(accessors_chain)". + * + * Accessors chain may consist of .key, [*] and [index] accessors. jsonb_ops + * additionally supports .* and .**. + * + * For now, both jsonb_ops and jsonb_path_ops supports only statements of + * the 1st find. jsonb_ops might also support statements of the 2nd kind, + * but given we have no statistics keys extracted from accessors chain + * are likely non-selective. Therefore, we choose to not confuse optimizer + * and skip statements of the 2nd kind altogether. In future versions that + * might be changed. + * + * In jsonb_ops statement of the 1st kind is split into expression of AND'ed + * keys and const. Sometimes const might be interpreted as both value or key + * in jsonb_ops. Then statement of 1st kind is decomposed into the expression + * below. + * + * key1 AND key2 AND ... AND keyN AND (const_as_value OR const_as_key) + * + * jsonb_path_ops transforms each statement of the 1st kind into single hash + * entry below. + * + * HASH(key1, key2, ... , keyN, const) + * + * Despite statements of the 2nd kind are not supported by both jsonb_ops and + * jsonb_path_ops, EXISTS(path) expressions might be still supported, + * when statements of 1st kind could be extracted out of their filters. + * + * IDENTIFICATION + * src/backend/utils/adt/jsonb_gin.c + * + *------------------------------------------------------------------------- + */ + +#include "postgres.h" + +#include "access/gin.h" +#include "access/stratnum.h" +#include "catalog/pg_collation.h" +#include "catalog/pg_type.h" +#include "common/hashfn.h" +#include "miscadmin.h" +#include "utils/builtins.h" +#include "utils/jsonb.h" +#include "utils/jsonpath.h" +#include "utils/varlena.h" + +typedef struct PathHashStack +{ + uint32 hash; + struct PathHashStack *parent; +} PathHashStack; + +/* Buffer for GIN entries */ +typedef struct GinEntries +{ + Datum *buf; + int count; + int allocated; +} GinEntries; + +typedef enum JsonPathGinNodeType +{ + JSP_GIN_OR, + JSP_GIN_AND, + JSP_GIN_ENTRY +} JsonPathGinNodeType; + +typedef struct JsonPathGinNode JsonPathGinNode; + +/* Node in jsonpath expression tree */ +struct JsonPathGinNode +{ + JsonPathGinNodeType type; + union + { + int nargs; /* valid for OR and AND nodes */ + int entryIndex; /* index in GinEntries array, valid for ENTRY + * nodes after entries output */ + Datum entryDatum; /* path hash or key name/scalar, valid for + * ENTRY nodes before entries output */ + } val; + JsonPathGinNode *args[FLEXIBLE_ARRAY_MEMBER]; /* valid for OR and AND + * nodes */ +}; + +/* + * jsonb_ops entry extracted from jsonpath item. Corresponding path item + * may be: '.key', '.*', '.**', '[index]' or '[*]'. + * Entry type is stored in 'type' field. + */ +typedef struct JsonPathGinPathItem +{ + struct JsonPathGinPathItem *parent; + Datum keyName; /* key name (for '.key' path item) or NULL */ + JsonPathItemType type; /* type of jsonpath item */ +} JsonPathGinPathItem; + +/* GIN representation of the extracted json path */ +typedef union JsonPathGinPath +{ + JsonPathGinPathItem *items; /* list of path items (jsonb_ops) */ + uint32 hash; /* hash of the path (jsonb_path_ops) */ +} JsonPathGinPath; + +typedef struct JsonPathGinContext JsonPathGinContext; + +/* Callback, which stores information about path item into JsonPathGinPath */ +typedef bool (*JsonPathGinAddPathItemFunc) (JsonPathGinPath *path, + JsonPathItem *jsp); + +/* + * Callback, which extracts set of nodes from statement of 1st kind + * (scalar != NULL) or statement of 2nd kind (scalar == NULL). + */ +typedef List *(*JsonPathGinExtractNodesFunc) (JsonPathGinContext *cxt, + JsonPathGinPath path, + JsonbValue *scalar, + List *nodes); + +/* Context for jsonpath entries extraction */ +struct JsonPathGinContext +{ + JsonPathGinAddPathItemFunc add_path_item; + JsonPathGinExtractNodesFunc extract_nodes; + bool lax; +}; + +static Datum make_text_key(char flag, const char *str, int len); +static Datum make_scalar_key(const JsonbValue *scalarVal, bool is_key); + +static JsonPathGinNode *extract_jsp_bool_expr(JsonPathGinContext *cxt, + JsonPathGinPath path, JsonPathItem *jsp, bool not); + + +/* Initialize GinEntries struct */ +static void +init_gin_entries(GinEntries *entries, int preallocated) +{ + entries->allocated = preallocated; + entries->buf = preallocated ? palloc(sizeof(Datum) * preallocated) : NULL; + entries->count = 0; +} + +/* Add new entry to GinEntries */ +static int +add_gin_entry(GinEntries *entries, Datum entry) +{ + int id = entries->count; + + if (entries->count >= entries->allocated) + { + if (entries->allocated) + { + entries->allocated *= 2; + entries->buf = repalloc(entries->buf, + sizeof(Datum) * entries->allocated); + } + else + { + entries->allocated = 8; + entries->buf = palloc(sizeof(Datum) * entries->allocated); + } + } + + entries->buf[entries->count++] = entry; + + return id; +} + +/* + * + * jsonb_ops GIN opclass support functions + * + */ + +Datum +gin_compare_jsonb(PG_FUNCTION_ARGS) +{ + text *arg1 = PG_GETARG_TEXT_PP(0); + text *arg2 = PG_GETARG_TEXT_PP(1); + int32 result; + char *a1p, + *a2p; + int len1, + len2; + + a1p = VARDATA_ANY(arg1); + a2p = VARDATA_ANY(arg2); + + len1 = VARSIZE_ANY_EXHDR(arg1); + len2 = VARSIZE_ANY_EXHDR(arg2); + + /* Compare text as bttextcmp does, but always using C collation */ + result = varstr_cmp(a1p, len1, a2p, len2, C_COLLATION_OID); + + PG_FREE_IF_COPY(arg1, 0); + PG_FREE_IF_COPY(arg2, 1); + + PG_RETURN_INT32(result); +} + +Datum +gin_extract_jsonb(PG_FUNCTION_ARGS) +{ + Jsonb *jb = (Jsonb *) PG_GETARG_JSONB_P(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + int total = JB_ROOT_COUNT(jb); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + GinEntries entries; + + /* If the root level is empty, we certainly have no keys */ + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + /* Otherwise, use 2 * root count as initial estimate of result size */ + init_gin_entries(&entries, 2 * total); + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + switch (r) + { + case WJB_KEY: + add_gin_entry(&entries, make_scalar_key(&v, true)); + break; + case WJB_ELEM: + /* Pretend string array elements are keys, see jsonb.h */ + add_gin_entry(&entries, make_scalar_key(&v, v.type == jbvString)); + break; + case WJB_VALUE: + add_gin_entry(&entries, make_scalar_key(&v, false)); + break; + default: + /* we can ignore structural items */ + break; + } + } + + *nentries = entries.count; + + PG_RETURN_POINTER(entries.buf); +} + +/* Append JsonPathGinPathItem to JsonPathGinPath (jsonb_ops) */ +static bool +jsonb_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) +{ + JsonPathGinPathItem *pentry; + Datum keyName; + + switch (jsp->type) + { + case jpiRoot: + path->items = NULL; /* reset path */ + return true; + + case jpiKey: + { + int len; + char *key = jspGetString(jsp, &len); + + keyName = make_text_key(JGINFLAG_KEY, key, len); + break; + } + + case jpiAny: + case jpiAnyKey: + case jpiAnyArray: + case jpiIndexArray: + keyName = PointerGetDatum(NULL); + break; + + default: + /* other path items like item methods are not supported */ + return false; + } + + pentry = palloc(sizeof(*pentry)); + + pentry->type = jsp->type; + pentry->keyName = keyName; + pentry->parent = path->items; + + path->items = pentry; + + return true; +} + +/* Combine existing path hash with next key hash (jsonb_path_ops) */ +static bool +jsonb_path_ops__add_path_item(JsonPathGinPath *path, JsonPathItem *jsp) +{ + switch (jsp->type) + { + case jpiRoot: + path->hash = 0; /* reset path hash */ + return true; + + case jpiKey: + { + JsonbValue jbv; + + jbv.type = jbvString; + jbv.val.string.val = jspGetString(jsp, &jbv.val.string.len); + + JsonbHashScalarValue(&jbv, &path->hash); + return true; + } + + case jpiIndexArray: + case jpiAnyArray: + return true; /* path hash is unchanged */ + + default: + /* other items (wildcard paths, item methods) are not supported */ + return false; + } +} + +static JsonPathGinNode * +make_jsp_entry_node(Datum entry) +{ + JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args)); + + node->type = JSP_GIN_ENTRY; + node->val.entryDatum = entry; + + return node; +} + +static JsonPathGinNode * +make_jsp_entry_node_scalar(JsonbValue *scalar, bool iskey) +{ + return make_jsp_entry_node(make_scalar_key(scalar, iskey)); +} + +static JsonPathGinNode * +make_jsp_expr_node(JsonPathGinNodeType type, int nargs) +{ + JsonPathGinNode *node = palloc(offsetof(JsonPathGinNode, args) + + sizeof(node->args[0]) * nargs); + + node->type = type; + node->val.nargs = nargs; + + return node; +} + +static JsonPathGinNode * +make_jsp_expr_node_args(JsonPathGinNodeType type, List *args) +{ + JsonPathGinNode *node = make_jsp_expr_node(type, list_length(args)); + ListCell *lc; + int i = 0; + + foreach(lc, args) + node->args[i++] = lfirst(lc); + + return node; +} + +static JsonPathGinNode * +make_jsp_expr_node_binary(JsonPathGinNodeType type, + JsonPathGinNode *arg1, JsonPathGinNode *arg2) +{ + JsonPathGinNode *node = make_jsp_expr_node(type, 2); + + node->args[0] = arg1; + node->args[1] = arg2; + + return node; +} + +/* Append a list of nodes from the jsonpath (jsonb_ops). */ +static List * +jsonb_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonbValue *scalar, List *nodes) +{ + JsonPathGinPathItem *pentry; + + if (scalar) + { + JsonPathGinNode *node; + + /* + * Append path entry nodes only if scalar is provided. See header + * comment for details. + */ + for (pentry = path.items; pentry; pentry = pentry->parent) + { + if (pentry->type == jpiKey) /* only keys are indexed */ + nodes = lappend(nodes, make_jsp_entry_node(pentry->keyName)); + } + + /* Append scalar node for equality queries. */ + if (scalar->type == jbvString) + { + JsonPathGinPathItem *last = path.items; + GinTernaryValue key_entry; + + /* + * Assuming that jsonb_ops interprets string array elements as + * keys, we may extract key or non-key entry or even both. In the + * latter case we create OR-node. It is possible in lax mode + * where arrays are automatically unwrapped, or in strict mode for + * jpiAny items. + */ + + if (cxt->lax) + key_entry = GIN_MAYBE; + else if (!last) /* root ($) */ + key_entry = GIN_FALSE; + else if (last->type == jpiAnyArray || last->type == jpiIndexArray) + key_entry = GIN_TRUE; + else if (last->type == jpiAny) + key_entry = GIN_MAYBE; + else + key_entry = GIN_FALSE; + + if (key_entry == GIN_MAYBE) + { + JsonPathGinNode *n1 = make_jsp_entry_node_scalar(scalar, true); + JsonPathGinNode *n2 = make_jsp_entry_node_scalar(scalar, false); + + node = make_jsp_expr_node_binary(JSP_GIN_OR, n1, n2); + } + else + { + node = make_jsp_entry_node_scalar(scalar, + key_entry == GIN_TRUE); + } + } + else + { + node = make_jsp_entry_node_scalar(scalar, false); + } + + nodes = lappend(nodes, node); + } + + return nodes; +} + +/* Append a list of nodes from the jsonpath (jsonb_path_ops). */ +static List * +jsonb_path_ops__extract_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonbValue *scalar, List *nodes) +{ + if (scalar) + { + /* append path hash node for equality queries */ + uint32 hash = path.hash; + + JsonbHashScalarValue(scalar, &hash); + + return lappend(nodes, + make_jsp_entry_node(UInt32GetDatum(hash))); + } + else + { + /* jsonb_path_ops doesn't support EXISTS queries => nothing to append */ + return nodes; + } +} + +/* + * Extract a list of expression nodes that need to be AND-ed by the caller. + * Extracted expression is 'path == scalar' if 'scalar' is non-NULL, and + * 'EXISTS(path)' otherwise. + */ +static List * +extract_jsp_path_expr_nodes(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, JsonbValue *scalar) +{ + JsonPathItem next; + List *nodes = NIL; + + for (;;) + { + switch (jsp->type) + { + case jpiCurrent: + break; + + case jpiFilter: + { + JsonPathItem arg; + JsonPathGinNode *filter; + + jspGetArg(jsp, &arg); + + filter = extract_jsp_bool_expr(cxt, path, &arg, false); + + if (filter) + nodes = lappend(nodes, filter); + + break; + } + + default: + if (!cxt->add_path_item(&path, jsp)) + + /* + * Path is not supported by the index opclass, return only + * the extracted filter nodes. + */ + return nodes; + break; + } + + if (!jspGetNext(jsp, &next)) + break; + + jsp = &next; + } + + /* + * Append nodes from the path expression itself to the already extracted + * list of filter nodes. + */ + return cxt->extract_nodes(cxt, path, scalar, nodes); +} + +/* + * Extract an expression node from one of following jsonpath path expressions: + * EXISTS(jsp) (when 'scalar' is NULL) + * jsp == scalar (when 'scalar' is not NULL). + * + * The current path (@) is passed in 'path'. + */ +static JsonPathGinNode * +extract_jsp_path_expr(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, JsonbValue *scalar) +{ + /* extract a list of nodes to be AND-ed */ + List *nodes = extract_jsp_path_expr_nodes(cxt, path, jsp, scalar); + + if (nodes == NIL) + /* no nodes were extracted => full scan is needed for this path */ + return NULL; + + if (list_length(nodes) == 1) + return linitial(nodes); /* avoid extra AND-node */ + + /* construct AND-node for path with filters */ + return make_jsp_expr_node_args(JSP_GIN_AND, nodes); +} + +/* Recursively extract nodes from the boolean jsonpath expression. */ +static JsonPathGinNode * +extract_jsp_bool_expr(JsonPathGinContext *cxt, JsonPathGinPath path, + JsonPathItem *jsp, bool not) +{ + check_stack_depth(); + + switch (jsp->type) + { + case jpiAnd: /* expr && expr */ + case jpiOr: /* expr || expr */ + { + JsonPathItem arg; + JsonPathGinNode *larg; + JsonPathGinNode *rarg; + JsonPathGinNodeType type; + + jspGetLeftArg(jsp, &arg); + larg = extract_jsp_bool_expr(cxt, path, &arg, not); + + jspGetRightArg(jsp, &arg); + rarg = extract_jsp_bool_expr(cxt, path, &arg, not); + + if (!larg || !rarg) + { + if (jsp->type == jpiOr) + return NULL; + + return larg ? larg : rarg; + } + + type = not ^ (jsp->type == jpiAnd) ? JSP_GIN_AND : JSP_GIN_OR; + + return make_jsp_expr_node_binary(type, larg, rarg); + } + + case jpiNot: /* !expr */ + { + JsonPathItem arg; + + jspGetArg(jsp, &arg); + + /* extract child expression inverting 'not' flag */ + return extract_jsp_bool_expr(cxt, path, &arg, !not); + } + + case jpiExists: /* EXISTS(path) */ + { + JsonPathItem arg; + + if (not) + return NULL; /* NOT EXISTS is not supported */ + + jspGetArg(jsp, &arg); + + return extract_jsp_path_expr(cxt, path, &arg, NULL); + } + + case jpiNotEqual: + + /* + * 'not' == true case is not supported here because '!(path != + * scalar)' is not equivalent to 'path == scalar' in the general + * case because of sequence comparison semantics: 'path == scalar' + * === 'EXISTS (path, @ == scalar)', '!(path != scalar)' === + * 'FOR_ALL(path, @ == scalar)'. So, we should translate '!(path + * != scalar)' into GIN query 'path == scalar || EMPTY(path)', but + * 'EMPTY(path)' queries are not supported by the both jsonb + * opclasses. However in strict mode we could omit 'EMPTY(path)' + * part if the path can return exactly one item (it does not + * contain wildcard accessors or item methods like .keyvalue() + * etc.). + */ + return NULL; + + case jpiEqual: /* path == scalar */ + { + JsonPathItem left_item; + JsonPathItem right_item; + JsonPathItem *path_item; + JsonPathItem *scalar_item; + JsonbValue scalar; + + if (not) + return NULL; + + jspGetLeftArg(jsp, &left_item); + jspGetRightArg(jsp, &right_item); + + if (jspIsScalar(left_item.type)) + { + scalar_item = &left_item; + path_item = &right_item; + } + else if (jspIsScalar(right_item.type)) + { + scalar_item = &right_item; + path_item = &left_item; + } + else + return NULL; /* at least one operand should be a scalar */ + + switch (scalar_item->type) + { + case jpiNull: + scalar.type = jbvNull; + break; + case jpiBool: + scalar.type = jbvBool; + scalar.val.boolean = !!*scalar_item->content.value.data; + break; + case jpiNumeric: + scalar.type = jbvNumeric; + scalar.val.numeric = + (Numeric) scalar_item->content.value.data; + break; + case jpiString: + scalar.type = jbvString; + scalar.val.string.val = scalar_item->content.value.data; + scalar.val.string.len = + scalar_item->content.value.datalen; + break; + default: + elog(ERROR, "invalid scalar jsonpath item type: %d", + scalar_item->type); + return NULL; + } + + return extract_jsp_path_expr(cxt, path, path_item, &scalar); + } + + default: + return NULL; /* not a boolean expression */ + } +} + +/* Recursively emit all GIN entries found in the node tree */ +static void +emit_jsp_gin_entries(JsonPathGinNode *node, GinEntries *entries) +{ + check_stack_depth(); + + switch (node->type) + { + case JSP_GIN_ENTRY: + /* replace datum with its index in the array */ + node->val.entryIndex = add_gin_entry(entries, node->val.entryDatum); + break; + + case JSP_GIN_OR: + case JSP_GIN_AND: + { + int i; + + for (i = 0; i < node->val.nargs; i++) + emit_jsp_gin_entries(node->args[i], entries); + + break; + } + } +} + +/* + * Recursively extract GIN entries from jsonpath query. + * Root expression node is put into (*extra_data)[0]. + */ +static Datum * +extract_jsp_query(JsonPath *jp, StrategyNumber strat, bool pathOps, + int32 *nentries, Pointer **extra_data) +{ + JsonPathGinContext cxt; + JsonPathItem root; + JsonPathGinNode *node; + JsonPathGinPath path = {0}; + GinEntries entries = {0}; + + cxt.lax = (jp->header & JSONPATH_LAX) != 0; + + if (pathOps) + { + cxt.add_path_item = jsonb_path_ops__add_path_item; + cxt.extract_nodes = jsonb_path_ops__extract_nodes; + } + else + { + cxt.add_path_item = jsonb_ops__add_path_item; + cxt.extract_nodes = jsonb_ops__extract_nodes; + } + + jspInit(&root, jp); + + node = strat == JsonbJsonpathExistsStrategyNumber + ? extract_jsp_path_expr(&cxt, path, &root, NULL) + : extract_jsp_bool_expr(&cxt, path, &root, false); + + if (!node) + { + *nentries = 0; + return NULL; + } + + emit_jsp_gin_entries(node, &entries); + + *nentries = entries.count; + if (!*nentries) + return NULL; + + *extra_data = palloc0(sizeof(**extra_data) * entries.count); + **extra_data = (Pointer) node; + + return entries.buf; +} + +/* + * Recursively execute jsonpath expression. + * 'check' is a bool[] or a GinTernaryValue[] depending on 'ternary' flag. + */ +static GinTernaryValue +execute_jsp_gin_node(JsonPathGinNode *node, void *check, bool ternary) +{ + GinTernaryValue res; + GinTernaryValue v; + int i; + + switch (node->type) + { + case JSP_GIN_AND: + res = GIN_TRUE; + for (i = 0; i < node->val.nargs; i++) + { + v = execute_jsp_gin_node(node->args[i], check, ternary); + if (v == GIN_FALSE) + return GIN_FALSE; + else if (v == GIN_MAYBE) + res = GIN_MAYBE; + } + return res; + + case JSP_GIN_OR: + res = GIN_FALSE; + for (i = 0; i < node->val.nargs; i++) + { + v = execute_jsp_gin_node(node->args[i], check, ternary); + if (v == GIN_TRUE) + return GIN_TRUE; + else if (v == GIN_MAYBE) + res = GIN_MAYBE; + } + return res; + + case JSP_GIN_ENTRY: + { + int index = node->val.entryIndex; + + if (ternary) + return ((GinTernaryValue *) check)[index]; + else + return ((bool *) check)[index] ? GIN_TRUE : GIN_FALSE; + } + + default: + elog(ERROR, "invalid jsonpath gin node type: %d", node->type); + return GIN_FALSE; /* keep compiler quiet */ + } +} + +Datum +gin_extract_jsonb_query(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy == JsonbContainsStrategyNumber) + { + /* Query is a jsonb, so just apply gin_extract_jsonb... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + /* ...although "contains {}" requires a full index scan */ + if (*nentries == 0) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbExistsStrategyNumber) + { + /* Query is a text string, which we treat as a key */ + text *query = PG_GETARG_TEXT_PP(0); + + *nentries = 1; + entries = (Datum *) palloc(sizeof(Datum)); + entries[0] = make_text_key(JGINFLAG_KEY, + VARDATA_ANY(query), + VARSIZE_ANY_EXHDR(query)); + } + else if (strategy == JsonbExistsAnyStrategyNumber || + strategy == JsonbExistsAllStrategyNumber) + { + /* Query is a text array; each element is treated as a key */ + ArrayType *query = PG_GETARG_ARRAYTYPE_P(0); + Datum *key_datums; + bool *key_nulls; + int key_count; + int i, + j; + + deconstruct_array_builtin(query, TEXTOID, &key_datums, &key_nulls, &key_count); + + entries = (Datum *) palloc(sizeof(Datum) * key_count); + + for (i = 0, j = 0; i < key_count; i++) + { + /* Nulls in the array are ignored */ + if (key_nulls[i]) + continue; + /* We rely on the array elements not being toasted */ + entries[j++] = make_text_key(JGINFLAG_KEY, + VARDATA_ANY(key_datums[i]), + VARSIZE_ANY_EXHDR(key_datums[i])); + } + + *nentries = j; + /* ExistsAll with no keys should match everything */ + if (j == 0 && strategy == JsonbExistsAllStrategyNumber) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + JsonPath *jp = PG_GETARG_JSONPATH_P(0); + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + entries = extract_jsp_query(jp, strategy, false, nentries, extra_data); + + if (!entries) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + entries = NULL; /* keep compiler quiet */ + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_consistent_jsonb(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * We must always recheck, since we can't tell from the index whether + * the positions of the matched items match the structure of the query + * object. (Even if we could, we'd also have to worry about hashed + * keys and the index's failure to distinguish keys from string array + * elements.) However, the tuple certainly doesn't match unless it + * contains all the query keys. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbExistsStrategyNumber) + { + /* + * Although the key is certainly present in the index, we must recheck + * because (1) the key might be hashed, and (2) the index match might + * be for a key that's not at top level of the JSON object. For (1), + * we could look at the query key to see if it's hashed and not + * recheck if not, but the index lacks enough info to tell about (2). + */ + *recheck = true; + res = true; + } + else if (strategy == JsonbExistsAnyStrategyNumber) + { + /* As for plain exists, we must recheck */ + *recheck = true; + res = true; + } + else if (strategy == JsonbExistsAllStrategyNumber) + { + /* As for plain exists, we must recheck */ + *recheck = true; + /* ... but unless all the keys are present, we can say "false" */ + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + *recheck = true; + + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + false) != GIN_FALSE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i; + + /* + * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; this + * corresponds to always forcing recheck in the regular consistent + * function, for the reasons listed there. + */ + if (strategy == JsonbContainsStrategyNumber || + strategy == JsonbExistsAllStrategyNumber) + { + /* All extracted keys must be present */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + } + else if (strategy == JsonbExistsStrategyNumber || + strategy == JsonbExistsAnyStrategyNumber) + { + /* At least one extracted key must be present */ + res = GIN_FALSE; + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_TRUE || + check[i] == GIN_MAYBE) + { + res = GIN_MAYBE; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + true); + + /* Should always recheck the result */ + if (res == GIN_TRUE) + res = GIN_MAYBE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +/* + * + * jsonb_path_ops GIN opclass support functions + * + * In a jsonb_path_ops index, the GIN keys are uint32 hashes, one per JSON + * value; but the JSON key(s) leading to each value are also included in its + * hash computation. This means we can only support containment queries, + * but the index can distinguish, for example, {"foo": 42} from {"bar": 42} + * since different hashes will be generated. + * + */ + +Datum +gin_extract_jsonb_path(PG_FUNCTION_ARGS) +{ + Jsonb *jb = PG_GETARG_JSONB_P(0); + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + int total = JB_ROOT_COUNT(jb); + JsonbIterator *it; + JsonbValue v; + JsonbIteratorToken r; + PathHashStack tail; + PathHashStack *stack; + GinEntries entries; + + /* If the root level is empty, we certainly have no keys */ + if (total == 0) + { + *nentries = 0; + PG_RETURN_POINTER(NULL); + } + + /* Otherwise, use 2 * root count as initial estimate of result size */ + init_gin_entries(&entries, 2 * total); + + /* We keep a stack of partial hashes corresponding to parent key levels */ + tail.parent = NULL; + tail.hash = 0; + stack = &tail; + + it = JsonbIteratorInit(&jb->root); + + while ((r = JsonbIteratorNext(&it, &v, false)) != WJB_DONE) + { + PathHashStack *parent; + + switch (r) + { + case WJB_BEGIN_ARRAY: + case WJB_BEGIN_OBJECT: + /* Push a stack level for this object */ + parent = stack; + stack = (PathHashStack *) palloc(sizeof(PathHashStack)); + + /* + * We pass forward hashes from outer nesting levels so that + * the hashes for nested values will include outer keys as + * well as their own keys. + * + * Nesting an array within another array will not alter + * innermost scalar element hash values, but that seems + * inconsequential. + */ + stack->hash = parent->hash; + stack->parent = parent; + break; + case WJB_KEY: + /* mix this key into the current outer hash */ + JsonbHashScalarValue(&v, &stack->hash); + /* hash is now ready to incorporate the value */ + break; + case WJB_ELEM: + case WJB_VALUE: + /* mix the element or value's hash into the prepared hash */ + JsonbHashScalarValue(&v, &stack->hash); + /* and emit an index entry */ + add_gin_entry(&entries, UInt32GetDatum(stack->hash)); + /* reset hash for next key, value, or sub-object */ + stack->hash = stack->parent->hash; + break; + case WJB_END_ARRAY: + case WJB_END_OBJECT: + /* Pop the stack */ + parent = stack->parent; + pfree(stack); + stack = parent; + /* reset hash for next key, value, or sub-object */ + if (stack->parent) + stack->hash = stack->parent->hash; + else + stack->hash = 0; + break; + default: + elog(ERROR, "invalid JsonbIteratorNext rc: %d", (int) r); + } + } + + *nentries = entries.count; + + PG_RETURN_POINTER(entries.buf); +} + +Datum +gin_extract_jsonb_query_path(PG_FUNCTION_ARGS) +{ + int32 *nentries = (int32 *) PG_GETARG_POINTER(1); + StrategyNumber strategy = PG_GETARG_UINT16(2); + int32 *searchMode = (int32 *) PG_GETARG_POINTER(6); + Datum *entries; + + if (strategy == JsonbContainsStrategyNumber) + { + /* Query is a jsonb, so just apply gin_extract_jsonb_path ... */ + entries = (Datum *) + DatumGetPointer(DirectFunctionCall2(gin_extract_jsonb_path, + PG_GETARG_DATUM(0), + PointerGetDatum(nentries))); + + /* ... although "contains {}" requires a full index scan */ + if (*nentries == 0) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + JsonPath *jp = PG_GETARG_JSONPATH_P(0); + Pointer **extra_data = (Pointer **) PG_GETARG_POINTER(4); + + entries = extract_jsp_query(jp, strategy, true, nentries, extra_data); + + if (!entries) + *searchMode = GIN_SEARCH_MODE_ALL; + } + else + { + elog(ERROR, "unrecognized strategy number: %d", strategy); + entries = NULL; + } + + PG_RETURN_POINTER(entries); +} + +Datum +gin_consistent_jsonb_path(PG_FUNCTION_ARGS) +{ + bool *check = (bool *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + bool *recheck = (bool *) PG_GETARG_POINTER(5); + bool res = true; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * jsonb_path_ops is necessarily lossy, not only because of hash + * collisions but also because it doesn't preserve complete + * information about the structure of the JSON object. Besides, there + * are some special rules around the containment of raw scalars in + * arrays that are not handled here. So we must always recheck a + * match. However, if not all of the keys are present, the tuple + * certainly doesn't match. + */ + *recheck = true; + for (i = 0; i < nkeys; i++) + { + if (!check[i]) + { + res = false; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + *recheck = true; + + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + false) != GIN_FALSE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_BOOL(res); +} + +Datum +gin_triconsistent_jsonb_path(PG_FUNCTION_ARGS) +{ + GinTernaryValue *check = (GinTernaryValue *) PG_GETARG_POINTER(0); + StrategyNumber strategy = PG_GETARG_UINT16(1); + + /* Jsonb *query = PG_GETARG_JSONB_P(2); */ + int32 nkeys = PG_GETARG_INT32(3); + Pointer *extra_data = (Pointer *) PG_GETARG_POINTER(4); + GinTernaryValue res = GIN_MAYBE; + int32 i; + + if (strategy == JsonbContainsStrategyNumber) + { + /* + * Note that we never return GIN_TRUE, only GIN_MAYBE or GIN_FALSE; + * this corresponds to always forcing recheck in the regular + * consistent function, for the reasons listed there. + */ + for (i = 0; i < nkeys; i++) + { + if (check[i] == GIN_FALSE) + { + res = GIN_FALSE; + break; + } + } + } + else if (strategy == JsonbJsonpathPredicateStrategyNumber || + strategy == JsonbJsonpathExistsStrategyNumber) + { + if (nkeys > 0) + { + Assert(extra_data && extra_data[0]); + res = execute_jsp_gin_node((JsonPathGinNode *) extra_data[0], check, + true); + + /* Should always recheck the result */ + if (res == GIN_TRUE) + res = GIN_MAYBE; + } + } + else + elog(ERROR, "unrecognized strategy number: %d", strategy); + + PG_RETURN_GIN_TERNARY_VALUE(res); +} + +/* + * Construct a jsonb_ops GIN key from a flag byte and a textual representation + * (which need not be null-terminated). This function is responsible + * for hashing overlength text representations; it will add the + * JGINFLAG_HASHED bit to the flag value if it does that. + */ +static Datum +make_text_key(char flag, const char *str, int len) +{ + text *item; + char hashbuf[10]; + + if (len > JGIN_MAXLENGTH) + { + uint32 hashval; + + hashval = DatumGetUInt32(hash_any((const unsigned char *) str, len)); + snprintf(hashbuf, sizeof(hashbuf), "%08x", hashval); + str = hashbuf; + len = 8; + flag |= JGINFLAG_HASHED; + } + + /* + * Now build the text Datum. For simplicity we build a 4-byte-header + * varlena text Datum here, but we expect it will get converted to short + * header format when stored in the index. + */ + item = (text *) palloc(VARHDRSZ + len + 1); + SET_VARSIZE(item, VARHDRSZ + len + 1); + + *VARDATA(item) = flag; + + memcpy(VARDATA(item) + 1, str, len); + + return PointerGetDatum(item); +} + +/* + * Create a textual representation of a JsonbValue that will serve as a GIN + * key in a jsonb_ops index. is_key is true if the JsonbValue is a key, + * or if it is a string array element (since we pretend those are keys, + * see jsonb.h). + */ +static Datum +make_scalar_key(const JsonbValue *scalarVal, bool is_key) +{ + Datum item; + char *cstr; + + switch (scalarVal->type) + { + case jbvNull: + Assert(!is_key); + item = make_text_key(JGINFLAG_NULL, "", 0); + break; + case jbvBool: + Assert(!is_key); + item = make_text_key(JGINFLAG_BOOL, + scalarVal->val.boolean ? "t" : "f", 1); + break; + case jbvNumeric: + Assert(!is_key); + + /* + * A normalized textual representation, free of trailing zeroes, + * is required so that numerically equal values will produce equal + * strings. + * + * It isn't ideal that numerics are stored in a relatively bulky + * textual format. However, it's a notationally convenient way of + * storing a "union" type in the GIN B-Tree, and indexing Jsonb + * strings takes precedence. + */ + cstr = numeric_normalize(scalarVal->val.numeric); + item = make_text_key(JGINFLAG_NUM, cstr, strlen(cstr)); + pfree(cstr); + break; + case jbvString: + item = make_text_key(is_key ? JGINFLAG_KEY : JGINFLAG_STR, + scalarVal->val.string.val, + scalarVal->val.string.len); + break; + default: + elog(ERROR, "unrecognized jsonb scalar type: %d", scalarVal->type); + item = 0; /* keep compiler quiet */ + break; + } + + return item; +} |