Adding upstream version 1:10.11.6.upstream/1%10.11.6

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 12:24:36 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-13 12:24:36 +0000
commit: 06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
tree: e2c7b5777f728320e5b5542b6213fd3591ba51e2 /include/json_lib.h
parent: Initial commit. (diff)
download: mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz
mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip
1 files changed, 451 insertions, 0 deletions
diff --git a/include/json_lib.h b/include/json_lib.h
new file mode 100644
index 00000000..6342e36a
--- /dev/null
+++ b/include/json_lib.h
@@ -0,0 +1,451 @@
+#ifndef JSON_LIB_INCLUDED
+#define JSON_LIB_INCLUDED
+
+#include <my_sys.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define JSON_DEPTH_LIMIT 32
+
+/*
+  When error happens, the c_next of the JSON engine contains the
+  character that caused the error, and the c_str is the position
+  in string where the error occurs.
+*/
+enum json_errors {
+  JE_BAD_CHR= -1,      /* Invalid character, charset handler cannot read it. */
+
+  JE_NOT_JSON_CHR= -2, /* Character met not used in JSON. */
+                       /* ASCII 00-08 for instance.       */
+
+  JE_EOS= -3,          /* Unexpected end of string. */
+
+  JE_SYN= -4,          /* The next character breaks the JSON syntax. */
+
+  JE_STRING_CONST= -5, /* Character disallowed in string constant. */
+
+  JE_ESCAPING= -6,     /* Error in the escaping. */
+
+  JE_DEPTH= -7,        /* The limit on the JSON depth was overrun. */
+};
+
+
+typedef struct st_json_string_t
+{
+  const uchar *c_str;    /* Current position in JSON string */
+  const uchar *str_end;  /* The end on the string. */
+  my_wc_t c_next;        /* UNICODE of the last read character */
+  int c_next_len;        /* character lenght of the last read character. */
+  int error;             /* error code. */
+
+  CHARSET_INFO *cs;      /* Character set of the JSON string. */
+
+  my_charset_conv_mb_wc wc; /* UNICODE conversion function. */
+                            /* It's taken out of the cs just to speed calls. */
+} json_string_t;
+
+
+void json_string_set_cs(json_string_t *s, CHARSET_INFO *i_cs);
+void json_string_set_str(json_string_t *s,
+                         const uchar *str, const uchar *end);
+#define json_next_char(j) \
+  ((j)->c_next_len= (j)->wc((j)->cs, &(j)->c_next, (j)->c_str, (j)->str_end))
+#define json_eos(j) ((j)->c_str >= (j)->str_end)
+/*
+  read_string_const_chr() reads the next character of the string constant
+  and saves it to the js->c_next.
+  It takes into account possible escapings, so if for instance
+  the string is '\b', the read_string_const_chr() sets 8.
+*/
+int json_read_string_const_chr(json_string_t *js);
+
+
+/*
+  Various JSON-related operations expect JSON path as a parameter.
+  The path is a string like this "$.keyA[2].*"
+  The path itself is a number of steps specifying either a key or a position
+  in an array. Some of them can be wildcards.
+  So the representation of the JSON path is the json_path_t class
+  containing an array of json_path_step_t objects.
+*/
+
+
+/* Path step types - actually bitmasks to let '&' or '|' operations. */
+enum json_path_step_types
+{
+  JSON_PATH_KEY_NULL=0,
+  JSON_PATH_KEY=1,   /* Must be equal to JSON_VALUE_OBJECT. */
+  JSON_PATH_ARRAY=2, /* Must be equal to JSON_VALUE_ARRAY. */
+  JSON_PATH_KEY_OR_ARRAY=3,
+  JSON_PATH_WILD=4, /* Step like .* or [*] */
+  JSON_PATH_DOUBLE_WILD=8, /* Step like **.k or **[1] */
+  JSON_PATH_KEY_WILD= 1+4,
+  JSON_PATH_KEY_DOUBLEWILD= 1+8,
+  JSON_PATH_ARRAY_WILD= 2+4,
+  JSON_PATH_ARRAY_DOUBLEWILD= 2+8,
+  JSON_PATH_NEGATIVE_INDEX= 16,
+  JSON_PATH_ARRAY_RANGE= 32
+};
+
+
+typedef struct st_json_path_step_t
+{
+  enum json_path_step_types type;  /* The type of the step -   */
+                                   /* see json_path_step_types */
+  const uchar *key; /* Pointer to the beginning of the key. */
+  const uchar *key_end;  /* Pointer to the end of the key. */
+  int n_item;  /* Item number in an array. No meaning for the key step. */
+  int n_item_end; /* Last index of the range. */
+} json_path_step_t;
+
+
+typedef struct st_json_path_t
+{
+  json_string_t s;  /* The string to be parsed. */
+  json_path_step_t steps[JSON_DEPTH_LIMIT]; /* Steps of the path. */
+  json_path_step_t *last_step; /* Points to the last step. */
+
+  int mode_strict; /* TRUE if the path specified as 'strict' */
+  enum json_path_step_types types_used; /* The '|' of all step's 'type'-s */
+} json_path_t;
+
+
+int json_path_setup(json_path_t *p,
+                    CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
+
+
+/*
+  The set of functions and structures below provides interface
+  to the JSON text parser.
+  Running the parser normally goes like this:
+
+    json_engine_t j_eng;   // structure keeps parser's data
+    json_scan_start(j_eng) // begin the parsing
+
+    do
+    {
+      // The parser has read next piece of JSON
+      // and set fields of j_eng structure accordingly.
+      // So let's see what we have:
+      switch (j_eng.state)
+      {
+        case JST_KEY:
+           // Handle key name. See the json_read_keyname_chr()
+           // Probably compare it with the keyname we're looking for
+        case JST_VALUE:
+           // Handle value. It is either value of the key or an array item.
+           // see the json_read_value()
+        case JST_OBJ_START:
+          // parser found an object (the '{' in JSON)
+        case JST_OBJ_END:
+          // parser found the end of the object (the '}' in JSON)
+        case JST_ARRAY_START:
+          // parser found an array (the '[' in JSON)
+        case JST_ARRAY_END:
+          // parser found the end of the array (the ']' in JSON)
+
+      };
+    } while (json_scan_next() == 0);  // parse next structure
+
+    
+    if (j_eng.s.error)  // we need to check why the loop ended.
+                        // Did we get to the end of JSON, or came upon error.
+    {
+       signal_error_in_JSON()
+    }
+
+
+  Parts of JSON can be quickly skipped. If we are not interested
+  in a particular key, we can just skip it with json_skip_key() call.
+  Similarly json_skip_level() goes right to the end of an object
+  or an array.
+*/
+
+
+/* These are JSON parser states that user can expect and handle.  */
+enum json_states {
+  JST_VALUE,       /* value found      */
+  JST_KEY,         /* key found        */
+  JST_OBJ_START,   /* object           */
+  JST_OBJ_END,     /* object ended     */
+  JST_ARRAY_START, /* array            */
+  JST_ARRAY_END,   /* array ended      */
+  NR_JSON_USER_STATES
+};
+
+
+enum json_value_types
+{
+  JSON_VALUE_UNINITIALIZED=0,
+  JSON_VALUE_OBJECT=1,
+  JSON_VALUE_ARRAY=2,
+  JSON_VALUE_STRING=3,
+  JSON_VALUE_NUMBER=4,
+  JSON_VALUE_TRUE=5,
+  JSON_VALUE_FALSE=6,
+  JSON_VALUE_NULL=7
+};
+
+
+enum json_num_flags
+{
+  JSON_NUM_NEG=1,        /* Number is negative. */
+  JSON_NUM_FRAC_PART=2,  /* The fractional part is not empty. */
+  JSON_NUM_EXP=4,        /* The number has the 'e' part. */
+};
+
+
+typedef struct st_json_engine_t
+{
+  json_string_t s;  /* String to parse. */
+  int sav_c_len;    /* Length of the current character.
+                       Can be more than 1 for multibyte charsets */
+
+  int state; /* The state of the parser. One of 'enum json_states'.
+                It tells us what construction of JSON we've just read. */
+
+  /* These values are only set after the json_read_value() call. */
+  enum json_value_types value_type; /* type of the value.*/
+  const uchar *value;      /* Points to the value. */
+  const uchar *value_begin;/* Points to where the value starts in the JSON. */
+  int value_escaped;       /* Flag telling if the string value has escaping.*/
+  uint num_flags;  /* the details of the JSON_VALUE_NUMBER, is it negative,
+                      or if it has the fractional part.
+                      See the enum json_num_flags. */
+
+  /*
+    In most cases the 'value' and 'value_begin' are equal.
+    They only differ if the value is a string constants. Then 'value_begin'
+    points to the starting quotation mark, while the 'value' - to
+    the first character of the string.
+  */
+
+  const uchar *value_end; /* Points to the next character after the value. */
+  int value_len; /* The length of the value. Does not count quotations for */
+                 /* string constants. */
+
+  int stack[JSON_DEPTH_LIMIT]; /* Keeps the stack of nested JSON structures. */
+  int stack_p;                 /* The 'stack' pointer. */
+  volatile uchar *killed_ptr;
+} json_engine_t;
+
+
+int json_scan_start(json_engine_t *je,
+                        CHARSET_INFO *i_cs, const uchar *str, const uchar *end);
+int json_scan_next(json_engine_t *j);
+
+
+/*
+  json_read_keyname_chr() function assists parsing the name of an JSON key.
+  It only can be called when the json_engine is in JST_KEY.
+  The json_read_keyname_chr() reads one character of the name of the key,
+  and puts it in j_eng.s.next_c.
+  Typical usage is like this:
+
+  if (j_eng.state == JST_KEY)
+  {
+    while (json_read_keyname_chr(&j) == 0)
+    {
+      //handle next character i.e. match it against the pattern
+    }
+  }
+*/
+
+int json_read_keyname_chr(json_engine_t *j);
+
+
+/*
+  Check if the name of the current JSON key matches
+  the step of the path.
+*/
+int json_key_matches(json_engine_t *je, json_string_t *k);
+
+
+/*
+  json_read_value() function parses the JSON value syntax,
+  so that we can handle the value of a key or an array item.
+  It only returns meaningful result when the engine is in
+  the JST_VALUE state.
+
+  Typical usage is like this:
+
+  if (j_eng.state ==  JST_VALUE)
+  {
+    json_read_value(&j_eng);
+    switch(j_eng.value_type)
+    {
+      case JSON_VALUE_STRING:
+        // get the string
+        str= j_eng.value;
+        str_length= j_eng.value_len;
+      case JSON_VALUE_NUMBER:
+        // get the number
+      ... etc
+    }
+*/
+int json_read_value(json_engine_t *j);
+
+
+/*
+  json_skip_key() makes parser skip the content of the current
+  JSON key quickly.
+  It can be called only when the json_engine state is JST_KEY.
+  Typical usage is:
+
+  if (j_eng.state == JST_KEY)
+  {
+    if (key_does_not_match(j_eng))
+      json_skip_key(j_eng);
+  }
+*/
+
+int json_skip_key(json_engine_t *j);
+
+
+typedef const int *json_level_t;
+
+/*
+  json_skip_to_level() makes parser quickly get out of nested
+  loops and arrays. It is used when we're not interested in what is
+  there in the rest of these structures.
+  The 'level' should be remembered in advance.
+        json_level_t level= json_get_level(j);
+        .... // getting into the nested JSON structures
+        json_skip_to_level(j, level);
+*/
+#define json_get_level(j) (j->stack_p)
+
+int json_skip_to_level(json_engine_t *j, int level);
+
+/*
+  json_skip_level() works as above with just current structure.
+  So it gets to the end of the current JSON array or object.
+*/
+#define json_skip_level(json_engine) \
+  json_skip_to_level((json_engine), (json_engine)->stack_p)
+
+
+/*
+  works as json_skip_level() but also counts items on the current
+  level skipped.
+*/
+int json_skip_level_and_count(json_engine_t *j, int *n_items_skipped);
+
+#define json_skip_array_item json_skip_key
+
+/*
+  Checks if the current value is of scalar type -
+  not an OBJECT nor ARRAY.
+*/
+#define json_value_scalar(je)  ((je)->value_type > JSON_VALUE_ARRAY)
+
+
+/*
+  Look for the JSON PATH in the json string.
+  Function can be called several times with same JSON/PATH to
+  find multiple matches.
+  On the first call, the json_engine_t parameter should be
+  initialized with the JSON string, and the json_path_t with the JSON path
+  appropriately. The 'p_cur_step' should point at the first
+  step of the path.
+  The 'array_counters' is the array of JSON_DEPTH_LIMIT size.
+  It stores the array counters of the parsed JSON.
+  If function returns 0, it means it found the match. The position of
+  the match is je->s.c_str. Then we can call the json_find_path()
+  with same engine/path/p_cur_step to get the next match.
+  Non-zero return means no matches found.
+  Check je->s.error to see if there was an error in JSON.
+*/
+int json_find_path(json_engine_t *je,
+                   json_path_t *p, json_path_step_t **p_cur_step,
+                   int *array_counters);
+
+
+typedef struct st_json_find_paths_t
+{
+  uint n_paths;
+  json_path_t *paths;
+  uint cur_depth;
+  uint *path_depths;
+  int array_counters[JSON_DEPTH_LIMIT];
+} json_find_paths_t;
+
+
+int json_find_paths_first(json_engine_t *je, json_find_paths_t *state,
+                          uint n_paths, json_path_t *paths, uint *path_depths);
+int json_find_paths_next(json_engine_t *je, json_find_paths_t *state);
+
+
+#define JSON_ERROR_OUT_OF_SPACE  (-1)
+#define JSON_ERROR_ILLEGAL_SYMBOL (-2)
+
+/*
+  Convert JSON string constant into ordinary string constant
+  which can involve unpacking json escapes and changing character set.
+  Returns negative integer in the case of an error,
+  the length of the result otherwise.
+*/
+int json_unescape(CHARSET_INFO *json_cs,
+                  const uchar *json_str, const uchar *json_end,
+                  CHARSET_INFO *res_cs,
+                  uchar *res, uchar *res_end);
+
+/*
+  Convert a string constant into JSON string constant.
+  This can involve appropriate escaping and changing the character set.
+  Returns the length of the result on success,
+  on error returns a negative error code.
+  Some error codes:
+    JSON_ERROR_OUT_OF_SPACE    Not enough space in the provided buffer
+    JSON_ERROR_ILLEGAL_SYMBOL  Source symbol cannot be represented in JSON
+*/
+int json_escape(CHARSET_INFO *str_cs, const uchar *str, const uchar *str_end,
+                CHARSET_INFO *json_cs, uchar *json, uchar *json_end);
+
+
+/*
+  Appends the ASCII string to the json with the charset conversion.
+*/
+int json_append_ascii(CHARSET_INFO *json_cs,
+                      uchar *json, uchar *json_end,
+                      const uchar *ascii, const uchar *ascii_end);
+
+
+/*
+  Scan the JSON and return paths met one-by-one.
+     json_get_path_start(&p)
+     while (json_get_path_next(&p))
+     {
+       handle_the_next_path();
+     }
+*/
+
+int json_get_path_start(json_engine_t *je, CHARSET_INFO *i_cs,
+                        const uchar *str, const uchar *end,
+                        json_path_t *p);
+
+
+int json_get_path_next(json_engine_t *je, json_path_t *p);
+
+int json_path_compare(const json_path_t *a, const json_path_t *b,
+                      enum json_value_types vt, const int* array_size_counter);
+
+int json_valid(const char *js, size_t js_len, CHARSET_INFO *cs);
+
+int json_locate_key(const char *js, const char *js_end,
+                    const char *kname,
+                    const char **key_start, const char **key_end,
+                    int *comma_pos);
+
+int json_normalize(DYNAMIC_STRING *result,
+                   const char *s, size_t size, CHARSET_INFO *cs);
+
+int json_skip_array_and_count(json_engine_t *j, int* n_item);
+
+#ifdef  __cplusplus
+}
+#endif
+
+#endif /* JSON_LIB_INCLUDED */
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 12:24:36 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-13 12:24:36 +0000
commit	06eaf7232e9a920468c0f8d74dcf2fe8b555501c (patch)
tree	e2c7b5777f728320e5b5542b6213fd3591ba51e2 /include/json_lib.h
parent	Initial commit. (diff)
download	mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.tar.xz mariadb-06eaf7232e9a920468c0f8d74dcf2fe8b555501c.zip