summaryrefslogtreecommitdiffstats
path: root/comm/third_party/json-c/json_tokener.c
diff options
context:
space:
mode:
Diffstat (limited to 'comm/third_party/json-c/json_tokener.c')
-rw-r--r--comm/third_party/json-c/json_tokener.c1300
1 files changed, 1300 insertions, 0 deletions
diff --git a/comm/third_party/json-c/json_tokener.c b/comm/third_party/json-c/json_tokener.c
new file mode 100644
index 0000000000..0c09b66e8d
--- /dev/null
+++ b/comm/third_party/json-c/json_tokener.c
@@ -0,0 +1,1300 @@
+/*
+ * $Id: json_tokener.c,v 1.20 2006/07/25 03:24:50 mclark Exp $
+ *
+ * Copyright (c) 2004, 2005 Metaparadigm Pte. Ltd.
+ * Michael Clark <michael@metaparadigm.com>
+ *
+ * This library is free software; you can redistribute it and/or modify
+ * it under the terms of the MIT license. See COPYING for details.
+ *
+ *
+ * Copyright (c) 2008-2009 Yahoo! Inc. All rights reserved.
+ * The copyrights to the contents of this file are licensed under the MIT License
+ * (https://www.opensource.org/licenses/mit-license.php)
+ */
+
+#include "config.h"
+
+#include "math_compat.h"
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stddef.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "debug.h"
+#include "json_inttypes.h"
+#include "json_object.h"
+#include "json_object_private.h"
+#include "json_tokener.h"
+#include "json_util.h"
+#include "printbuf.h"
+#include "strdup_compat.h"
+
+#ifdef HAVE_LOCALE_H
+#include <locale.h>
+#endif /* HAVE_LOCALE_H */
+#ifdef HAVE_XLOCALE_H
+#include <xlocale.h>
+#endif
+#ifdef HAVE_STRINGS_H
+#include <strings.h>
+#endif /* HAVE_STRINGS_H */
+
+#define jt_hexdigit(x) (((x) <= '9') ? (x) - '0' : ((x)&7) + 9)
+
+#if !HAVE_STRNCASECMP && defined(_MSC_VER)
+/* MSC has the version as _strnicmp */
+#define strncasecmp _strnicmp
+#elif !HAVE_STRNCASECMP
+#error You do not have strncasecmp on your system.
+#endif /* HAVE_STRNCASECMP */
+
+#if defined(_MSC_VER) && (_MSC_VER <= 1800)
+/* VS2013 doesn't know about "inline" */
+#define inline __inline
+#elif defined(AIX_CC)
+#define inline
+#endif
+
+/* The following helper functions are used to speed up parsing. They
+ * are faster than their ctype counterparts because they assume that
+ * the input is in ASCII and that the locale is set to "C". The
+ * compiler will also inline these functions, providing an additional
+ * speedup by saving on function calls.
+ */
+static inline int is_ws_char(char c)
+{
+ return c == ' '
+ || c == '\t'
+ || c == '\n'
+ || c == '\r';
+}
+
+static inline int is_hex_char(char c)
+{
+ return (c >= '0' && c <= '9')
+ || (c >= 'A' && c <= 'F')
+ || (c >= 'a' && c <= 'f');
+}
+
+/* Use C99 NAN by default; if not available, nan("") should work too. */
+#ifndef NAN
+#define NAN nan("")
+#endif /* !NAN */
+
+static const char json_null_str[] = "null";
+static const int json_null_str_len = sizeof(json_null_str) - 1;
+static const char json_inf_str[] = "Infinity";
+/* Swapped case "Infinity" to avoid need to call tolower() on input chars: */
+static const char json_inf_str_invert[] = "iNFINITY";
+static const unsigned int json_inf_str_len = sizeof(json_inf_str) - 1;
+static const char json_nan_str[] = "NaN";
+static const int json_nan_str_len = sizeof(json_nan_str) - 1;
+static const char json_true_str[] = "true";
+static const int json_true_str_len = sizeof(json_true_str) - 1;
+static const char json_false_str[] = "false";
+static const int json_false_str_len = sizeof(json_false_str) - 1;
+
+/* clang-format off */
+static const char *json_tokener_errors[] = {
+ "success",
+ "continue",
+ "nesting too deep",
+ "unexpected end of data",
+ "unexpected character",
+ "null expected",
+ "boolean expected",
+ "number expected",
+ "array value separator ',' expected",
+ "quoted object property name expected",
+ "object property name separator ':' expected",
+ "object value separator ',' expected",
+ "invalid string sequence",
+ "expected comment",
+ "invalid utf-8 string",
+ "buffer size overflow"
+};
+/* clang-format on */
+
+/**
+ * validete the utf-8 string in strict model.
+ * if not utf-8 format, return err.
+ */
+static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes);
+
+static int json_tokener_parse_double(const char *buf, int len, double *retval);
+
+const char *json_tokener_error_desc(enum json_tokener_error jerr)
+{
+ int jerr_int = (int)jerr;
+ if (jerr_int < 0 ||
+ jerr_int >= (int)(sizeof(json_tokener_errors) / sizeof(json_tokener_errors[0])))
+ return "Unknown error, "
+ "invalid json_tokener_error value passed to json_tokener_error_desc()";
+ return json_tokener_errors[jerr];
+}
+
+enum json_tokener_error json_tokener_get_error(struct json_tokener *tok)
+{
+ return tok->err;
+}
+
+/* Stuff for decoding unicode sequences */
+#define IS_HIGH_SURROGATE(uc) (((uc)&0xFC00) == 0xD800)
+#define IS_LOW_SURROGATE(uc) (((uc)&0xFC00) == 0xDC00)
+#define DECODE_SURROGATE_PAIR(hi, lo) ((((hi)&0x3FF) << 10) + ((lo)&0x3FF) + 0x10000)
+static unsigned char utf8_replacement_char[3] = {0xEF, 0xBF, 0xBD};
+
+struct json_tokener *json_tokener_new_ex(int depth)
+{
+ struct json_tokener *tok;
+
+ tok = (struct json_tokener *)calloc(1, sizeof(struct json_tokener));
+ if (!tok)
+ return NULL;
+ tok->stack = (struct json_tokener_srec *)calloc(depth, sizeof(struct json_tokener_srec));
+ if (!tok->stack)
+ {
+ free(tok);
+ return NULL;
+ }
+ tok->pb = printbuf_new();
+ if (!tok->pb)
+ {
+ free(tok->stack);
+ free(tok);
+ return NULL;
+ }
+ tok->max_depth = depth;
+ json_tokener_reset(tok);
+ return tok;
+}
+
+struct json_tokener *json_tokener_new(void)
+{
+ return json_tokener_new_ex(JSON_TOKENER_DEFAULT_DEPTH);
+}
+
+void json_tokener_free(struct json_tokener *tok)
+{
+ json_tokener_reset(tok);
+ if (tok->pb)
+ printbuf_free(tok->pb);
+ free(tok->stack);
+ free(tok);
+}
+
+static void json_tokener_reset_level(struct json_tokener *tok, int depth)
+{
+ tok->stack[depth].state = json_tokener_state_eatws;
+ tok->stack[depth].saved_state = json_tokener_state_start;
+ json_object_put(tok->stack[depth].current);
+ tok->stack[depth].current = NULL;
+ free(tok->stack[depth].obj_field_name);
+ tok->stack[depth].obj_field_name = NULL;
+}
+
+void json_tokener_reset(struct json_tokener *tok)
+{
+ int i;
+ if (!tok)
+ return;
+
+ for (i = tok->depth; i >= 0; i--)
+ json_tokener_reset_level(tok, i);
+ tok->depth = 0;
+ tok->err = json_tokener_success;
+}
+
+struct json_object *json_tokener_parse(const char *str)
+{
+ enum json_tokener_error jerr_ignored;
+ struct json_object *obj;
+ obj = json_tokener_parse_verbose(str, &jerr_ignored);
+ return obj;
+}
+
+struct json_object *json_tokener_parse_verbose(const char *str, enum json_tokener_error *error)
+{
+ struct json_tokener *tok;
+ struct json_object *obj;
+
+ tok = json_tokener_new();
+ if (!tok)
+ return NULL;
+ obj = json_tokener_parse_ex(tok, str, -1);
+ *error = tok->err;
+ if (tok->err != json_tokener_success
+#if 0
+ /* This would be a more sensible default, and cause parsing
+ * things like "null123" to fail when the caller can't know
+ * where the parsing left off, but starting to fail would
+ * be a notable behaviour change. Save for a 1.0 release.
+ */
+ || json_tokener_get_parse_end(tok) != strlen(str)
+#endif
+ )
+
+ {
+ if (obj != NULL)
+ json_object_put(obj);
+ obj = NULL;
+ }
+
+ json_tokener_free(tok);
+ return obj;
+}
+
+#define state tok->stack[tok->depth].state
+#define saved_state tok->stack[tok->depth].saved_state
+#define current tok->stack[tok->depth].current
+#define obj_field_name tok->stack[tok->depth].obj_field_name
+
+/* Optimization:
+ * json_tokener_parse_ex() consumed a lot of CPU in its main loop,
+ * iterating character-by character. A large performance boost is
+ * achieved by using tighter loops to locally handle units such as
+ * comments and strings. Loops that handle an entire token within
+ * their scope also gather entire strings and pass them to
+ * printbuf_memappend() in a single call, rather than calling
+ * printbuf_memappend() one char at a time.
+ *
+ * PEEK_CHAR() and ADVANCE_CHAR() macros are used for code that is
+ * common to both the main loop and the tighter loops.
+ */
+
+/* PEEK_CHAR(dest, tok) macro:
+ * Peeks at the current char and stores it in dest.
+ * Returns 1 on success, sets tok->err and returns 0 if no more chars.
+ * Implicit inputs: str, len, nBytesp vars
+ */
+#define PEEK_CHAR(dest, tok) \
+ (((tok)->char_offset == len) \
+ ? (((tok)->depth == 0 && state == json_tokener_state_eatws && \
+ saved_state == json_tokener_state_finish) \
+ ? (((tok)->err = json_tokener_success), 0) \
+ : (((tok)->err = json_tokener_continue), 0)) \
+ : (((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && \
+ (!json_tokener_validate_utf8(*str, nBytesp))) \
+ ? ((tok->err = json_tokener_error_parse_utf8_string), 0) \
+ : (((dest) = *str), 1)))
+
+/* ADVANCE_CHAR() macro:
+ * Increments str & tok->char_offset.
+ * For convenience of existing conditionals, returns the old value of c (0 on eof)
+ * Implicit inputs: c var
+ */
+#define ADVANCE_CHAR(str, tok) (++(str), ((tok)->char_offset)++, c)
+
+/* End optimization macro defs */
+
+struct json_object *json_tokener_parse_ex(struct json_tokener *tok, const char *str, int len)
+{
+ struct json_object *obj = NULL;
+ char c = '\1';
+ unsigned int nBytes = 0;
+ unsigned int *nBytesp = &nBytes;
+
+#ifdef HAVE_USELOCALE
+ locale_t oldlocale = uselocale(NULL);
+ locale_t newloc;
+#elif defined(HAVE_SETLOCALE)
+ char *oldlocale = NULL;
+#endif
+
+ tok->char_offset = 0;
+ tok->err = json_tokener_success;
+
+ /* this interface is presently not 64-bit clean due to the int len argument
+ * and the internal printbuf interface that takes 32-bit int len arguments
+ * so the function limits the maximum string size to INT32_MAX (2GB).
+ * If the function is called with len == -1 then strlen is called to check
+ * the string length is less than INT32_MAX (2GB)
+ */
+ if ((len < -1) || (len == -1 && strlen(str) > INT32_MAX))
+ {
+ tok->err = json_tokener_error_size;
+ return NULL;
+ }
+
+#ifdef HAVE_USELOCALE
+ {
+ locale_t duploc = duplocale(oldlocale);
+ newloc = newlocale(LC_NUMERIC_MASK, "C", duploc);
+ if (newloc == NULL)
+ {
+ freelocale(duploc);
+ return NULL;
+ }
+ uselocale(newloc);
+ }
+#elif defined(HAVE_SETLOCALE)
+ {
+ char *tmplocale;
+ tmplocale = setlocale(LC_NUMERIC, NULL);
+ if (tmplocale)
+ oldlocale = strdup(tmplocale);
+ setlocale(LC_NUMERIC, "C");
+ }
+#endif
+
+ while (PEEK_CHAR(c, tok)) // Note: c might be '\0' !
+ {
+
+ redo_char:
+ switch (state)
+ {
+
+ case json_tokener_state_eatws:
+ /* Advance until we change state */
+ while (is_ws_char(c))
+ {
+ if ((!ADVANCE_CHAR(str, tok)) || (!PEEK_CHAR(c, tok)))
+ goto out;
+ }
+ if (c == '/' && !(tok->flags & JSON_TOKENER_STRICT))
+ {
+ printbuf_reset(tok->pb);
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ state = json_tokener_state_comment_start;
+ }
+ else
+ {
+ state = saved_state;
+ goto redo_char;
+ }
+ break;
+
+ case json_tokener_state_start:
+ switch (c)
+ {
+ case '{':
+ state = json_tokener_state_eatws;
+ saved_state = json_tokener_state_object_field_start;
+ current = json_object_new_object();
+ if (current == NULL)
+ goto out;
+ break;
+ case '[':
+ state = json_tokener_state_eatws;
+ saved_state = json_tokener_state_array;
+ current = json_object_new_array();
+ if (current == NULL)
+ goto out;
+ break;
+ case 'I':
+ case 'i':
+ state = json_tokener_state_inf;
+ printbuf_reset(tok->pb);
+ tok->st_pos = 0;
+ goto redo_char;
+ case 'N':
+ case 'n':
+ state = json_tokener_state_null; // or NaN
+ printbuf_reset(tok->pb);
+ tok->st_pos = 0;
+ goto redo_char;
+ case '\'':
+ if (tok->flags & JSON_TOKENER_STRICT)
+ {
+ /* in STRICT mode only double-quote are allowed */
+ tok->err = json_tokener_error_parse_unexpected;
+ goto out;
+ }
+ /* FALLTHRU */
+ case '"':
+ state = json_tokener_state_string;
+ printbuf_reset(tok->pb);
+ tok->quote_char = c;
+ break;
+ case 'T':
+ case 't':
+ case 'F':
+ case 'f':
+ state = json_tokener_state_boolean;
+ printbuf_reset(tok->pb);
+ tok->st_pos = 0;
+ goto redo_char;
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ case '-':
+ state = json_tokener_state_number;
+ printbuf_reset(tok->pb);
+ tok->is_double = 0;
+ goto redo_char;
+ default: tok->err = json_tokener_error_parse_unexpected; goto out;
+ }
+ break;
+
+ case json_tokener_state_finish:
+ if (tok->depth == 0)
+ goto out;
+ obj = json_object_get(current);
+ json_tokener_reset_level(tok, tok->depth);
+ tok->depth--;
+ goto redo_char;
+
+ case json_tokener_state_inf: /* aka starts with 'i' (or 'I', or "-i", or "-I") */
+ {
+ /* If we were guaranteed to have len set, then we could (usually) handle
+ * the entire "Infinity" check in a single strncmp (strncasecmp), but
+ * since len might be -1 (i.e. "read until \0"), we need to check it
+ * a character at a time.
+ * Trying to handle it both ways would make this code considerably more
+ * complicated with likely little performance benefit.
+ */
+ int is_negative = 0;
+
+ /* Note: tok->st_pos must be 0 when state is set to json_tokener_state_inf */
+ while (tok->st_pos < (int)json_inf_str_len)
+ {
+ char inf_char = *str;
+ if (inf_char != json_inf_str[tok->st_pos] &&
+ ((tok->flags & JSON_TOKENER_STRICT) ||
+ inf_char != json_inf_str_invert[tok->st_pos])
+ )
+ {
+ tok->err = json_tokener_error_parse_unexpected;
+ goto out;
+ }
+ tok->st_pos++;
+ (void)ADVANCE_CHAR(str, tok);
+ if (!PEEK_CHAR(c, tok))
+ {
+ /* out of input chars, for now at least */
+ goto out;
+ }
+ }
+ /* We checked the full length of "Infinity", so create the object.
+ * When handling -Infinity, the number parsing code will have dropped
+ * the "-" into tok->pb for us, so check it now.
+ */
+ if (printbuf_length(tok->pb) > 0 && *(tok->pb->buf) == '-')
+ {
+ is_negative = 1;
+ }
+ current = json_object_new_double(is_negative ? -INFINITY : INFINITY);
+ if (current == NULL)
+ goto out;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ break;
+ case json_tokener_state_null: /* aka starts with 'n' */
+ {
+ int size;
+ int size_nan;
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ size = json_min(tok->st_pos + 1, json_null_str_len);
+ size_nan = json_min(tok->st_pos + 1, json_nan_str_len);
+ if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ strncasecmp(json_null_str, tok->pb->buf, size) == 0) ||
+ (strncmp(json_null_str, tok->pb->buf, size) == 0))
+ {
+ if (tok->st_pos == json_null_str_len)
+ {
+ current = NULL;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ }
+ else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ strncasecmp(json_nan_str, tok->pb->buf, size_nan) == 0) ||
+ (strncmp(json_nan_str, tok->pb->buf, size_nan) == 0))
+ {
+ if (tok->st_pos == json_nan_str_len)
+ {
+ current = json_object_new_double(NAN);
+ if (current == NULL)
+ goto out;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_null;
+ goto out;
+ }
+ tok->st_pos++;
+ }
+ break;
+
+ case json_tokener_state_comment_start:
+ if (c == '*')
+ {
+ state = json_tokener_state_comment;
+ }
+ else if (c == '/')
+ {
+ state = json_tokener_state_comment_eol;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_comment;
+ goto out;
+ }
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ break;
+
+ case json_tokener_state_comment:
+ {
+ /* Advance until we change state */
+ const char *case_start = str;
+ while (c != '*')
+ {
+ if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ goto out;
+ }
+ }
+ printbuf_memappend_fast(tok->pb, case_start, 1 + str - case_start);
+ state = json_tokener_state_comment_end;
+ }
+ break;
+
+ case json_tokener_state_comment_eol:
+ {
+ /* Advance until we change state */
+ const char *case_start = str;
+ while (c != '\n')
+ {
+ if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ goto out;
+ }
+ }
+ printbuf_memappend_fast(tok->pb, case_start, str - case_start);
+ MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
+ state = json_tokener_state_eatws;
+ }
+ break;
+
+ case json_tokener_state_comment_end:
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ if (c == '/')
+ {
+ MC_DEBUG("json_tokener_comment: %s\n", tok->pb->buf);
+ state = json_tokener_state_eatws;
+ }
+ else
+ {
+ state = json_tokener_state_comment;
+ }
+ break;
+
+ case json_tokener_state_string:
+ {
+ /* Advance until we change state */
+ const char *case_start = str;
+ while (1)
+ {
+ if (c == tok->quote_char)
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ current =
+ json_object_new_string_len(tok->pb->buf, tok->pb->bpos);
+ if (current == NULL)
+ goto out;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ break;
+ }
+ else if (c == '\\')
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ saved_state = json_tokener_state_string;
+ state = json_tokener_state_string_escape;
+ break;
+ }
+ if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ goto out;
+ }
+ }
+ }
+ break;
+
+ case json_tokener_state_string_escape:
+ switch (c)
+ {
+ case '"':
+ case '\\':
+ case '/':
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ state = saved_state;
+ break;
+ case 'b':
+ case 'n':
+ case 'r':
+ case 't':
+ case 'f':
+ if (c == 'b')
+ printbuf_memappend_fast(tok->pb, "\b", 1);
+ else if (c == 'n')
+ printbuf_memappend_fast(tok->pb, "\n", 1);
+ else if (c == 'r')
+ printbuf_memappend_fast(tok->pb, "\r", 1);
+ else if (c == 't')
+ printbuf_memappend_fast(tok->pb, "\t", 1);
+ else if (c == 'f')
+ printbuf_memappend_fast(tok->pb, "\f", 1);
+ state = saved_state;
+ break;
+ case 'u':
+ tok->ucs_char = 0;
+ tok->st_pos = 0;
+ state = json_tokener_state_escape_unicode;
+ break;
+ default: tok->err = json_tokener_error_parse_string; goto out;
+ }
+ break;
+
+ // ===================================================
+
+ case json_tokener_state_escape_unicode:
+ {
+ /* Handle a 4-byte \uNNNN sequence, or two sequences if a surrogate pair */
+ while (1)
+ {
+ if (!c || !is_hex_char(c))
+ {
+ tok->err = json_tokener_error_parse_string;
+ goto out;
+ }
+ tok->ucs_char |=
+ ((unsigned int)jt_hexdigit(c) << ((3 - tok->st_pos) * 4));
+ tok->st_pos++;
+ if (tok->st_pos >= 4)
+ break;
+
+ (void)ADVANCE_CHAR(str, tok);
+ if (!PEEK_CHAR(c, tok))
+ {
+ /*
+ * We're out of characters in the current call to
+ * json_tokener_parse(), but a subsequent call might
+ * provide us with more, so leave our current state
+ * as-is (including tok->high_surrogate) and return.
+ */
+ goto out;
+ }
+ }
+ tok->st_pos = 0;
+
+ /* Now, we have a full \uNNNN sequence in tok->ucs_char */
+
+ /* If the *previous* sequence was a high surrogate ... */
+ if (tok->high_surrogate)
+ {
+ if (IS_LOW_SURROGATE(tok->ucs_char))
+ {
+ /* Recalculate the ucs_char, then fall thru to process normally */
+ tok->ucs_char = DECODE_SURROGATE_PAIR(tok->high_surrogate,
+ tok->ucs_char);
+ }
+ else
+ {
+ /* High surrogate was not followed by a low surrogate
+ * Replace the high and process the rest normally
+ */
+ printbuf_memappend_fast(tok->pb,
+ (char *)utf8_replacement_char, 3);
+ }
+ tok->high_surrogate = 0;
+ }
+
+ if (tok->ucs_char < 0x80)
+ {
+ unsigned char unescaped_utf[1];
+ unescaped_utf[0] = tok->ucs_char;
+ printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 1);
+ }
+ else if (tok->ucs_char < 0x800)
+ {
+ unsigned char unescaped_utf[2];
+ unescaped_utf[0] = 0xc0 | (tok->ucs_char >> 6);
+ unescaped_utf[1] = 0x80 | (tok->ucs_char & 0x3f);
+ printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 2);
+ }
+ else if (IS_HIGH_SURROGATE(tok->ucs_char))
+ {
+ /*
+ * The next two characters should be \u, HOWEVER,
+ * we can't simply peek ahead here, because the
+ * characters we need might not be passed to us
+ * until a subsequent call to json_tokener_parse.
+ * Instead, transition through a couple of states.
+ * (now):
+ * _escape_unicode => _unicode_need_escape
+ * (see a '\\' char):
+ * _unicode_need_escape => _unicode_need_u
+ * (see a 'u' char):
+ * _unicode_need_u => _escape_unicode
+ * ...and we'll end up back around here.
+ */
+ tok->high_surrogate = tok->ucs_char;
+ tok->ucs_char = 0;
+ state = json_tokener_state_escape_unicode_need_escape;
+ break;
+ }
+ else if (IS_LOW_SURROGATE(tok->ucs_char))
+ {
+ /* Got a low surrogate not preceded by a high */
+ printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ }
+ else if (tok->ucs_char < 0x10000)
+ {
+ unsigned char unescaped_utf[3];
+ unescaped_utf[0] = 0xe0 | (tok->ucs_char >> 12);
+ unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
+ unescaped_utf[2] = 0x80 | (tok->ucs_char & 0x3f);
+ printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 3);
+ }
+ else if (tok->ucs_char < 0x110000)
+ {
+ unsigned char unescaped_utf[4];
+ unescaped_utf[0] = 0xf0 | ((tok->ucs_char >> 18) & 0x07);
+ unescaped_utf[1] = 0x80 | ((tok->ucs_char >> 12) & 0x3f);
+ unescaped_utf[2] = 0x80 | ((tok->ucs_char >> 6) & 0x3f);
+ unescaped_utf[3] = 0x80 | (tok->ucs_char & 0x3f);
+ printbuf_memappend_fast(tok->pb, (char *)unescaped_utf, 4);
+ }
+ else
+ {
+ /* Don't know what we got--insert the replacement char */
+ printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ }
+ state = saved_state; // i.e. _state_string or _state_object_field
+ }
+ break;
+
+ case json_tokener_state_escape_unicode_need_escape:
+ // We get here after processing a high_surrogate
+ // require a '\\' char
+ if (!c || c != '\\')
+ {
+ /* Got a high surrogate without another sequence following
+ * it. Put a replacement char in for the high surrogate
+ * and pop back up to _state_string or _state_object_field.
+ */
+ printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ tok->high_surrogate = 0;
+ tok->ucs_char = 0;
+ tok->st_pos = 0;
+ state = saved_state;
+ goto redo_char;
+ }
+ state = json_tokener_state_escape_unicode_need_u;
+ break;
+
+ case json_tokener_state_escape_unicode_need_u:
+ /* We already had a \ char, check that it's \u */
+ if (!c || c != 'u')
+ {
+ /* Got a high surrogate with some non-unicode escape
+ * sequence following it.
+ * Put a replacement char in for the high surrogate
+ * and handle the escape sequence normally.
+ */
+ printbuf_memappend_fast(tok->pb, (char *)utf8_replacement_char, 3);
+ tok->high_surrogate = 0;
+ tok->ucs_char = 0;
+ tok->st_pos = 0;
+ state = json_tokener_state_string_escape;
+ goto redo_char;
+ }
+ state = json_tokener_state_escape_unicode;
+ break;
+
+ // ===================================================
+
+ case json_tokener_state_boolean:
+ {
+ int size1, size2;
+ printbuf_memappend_fast(tok->pb, &c, 1);
+ size1 = json_min(tok->st_pos + 1, json_true_str_len);
+ size2 = json_min(tok->st_pos + 1, json_false_str_len);
+ if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ strncasecmp(json_true_str, tok->pb->buf, size1) == 0) ||
+ (strncmp(json_true_str, tok->pb->buf, size1) == 0))
+ {
+ if (tok->st_pos == json_true_str_len)
+ {
+ current = json_object_new_boolean(1);
+ if (current == NULL)
+ goto out;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ }
+ else if ((!(tok->flags & JSON_TOKENER_STRICT) &&
+ strncasecmp(json_false_str, tok->pb->buf, size2) == 0) ||
+ (strncmp(json_false_str, tok->pb->buf, size2) == 0))
+ {
+ if (tok->st_pos == json_false_str_len)
+ {
+ current = json_object_new_boolean(0);
+ if (current == NULL)
+ goto out;
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_boolean;
+ goto out;
+ }
+ tok->st_pos++;
+ }
+ break;
+
+ case json_tokener_state_number:
+ {
+ /* Advance until we change state */
+ const char *case_start = str;
+ int case_len = 0;
+ int is_exponent = 0;
+ int neg_sign_ok = 1;
+ int pos_sign_ok = 0;
+ if (printbuf_length(tok->pb) > 0)
+ {
+ /* We don't save all state from the previous incremental parse
+ so we need to re-generate it based on the saved string so far.
+ */
+ char *e_loc = strchr(tok->pb->buf, 'e');
+ if (!e_loc)
+ e_loc = strchr(tok->pb->buf, 'E');
+ if (e_loc)
+ {
+ char *last_saved_char =
+ &tok->pb->buf[printbuf_length(tok->pb) - 1];
+ is_exponent = 1;
+ pos_sign_ok = neg_sign_ok = 1;
+ /* If the "e" isn't at the end, we can't start with a '-' */
+ if (e_loc != last_saved_char)
+ {
+ neg_sign_ok = 0;
+ pos_sign_ok = 0;
+ }
+ // else leave it set to 1, i.e. start of the new input
+ }
+ }
+
+ while (c && ((c >= '0' && c <= '9') ||
+ (!is_exponent && (c == 'e' || c == 'E')) ||
+ (neg_sign_ok && c == '-') || (pos_sign_ok && c == '+') ||
+ (!tok->is_double && c == '.')))
+ {
+ pos_sign_ok = neg_sign_ok = 0;
+ ++case_len;
+
+ /* non-digit characters checks */
+ /* note: since the main loop condition to get here was
+ * an input starting with 0-9 or '-', we are
+ * protected from input starting with '.' or
+ * e/E.
+ */
+ switch (c)
+ {
+ case '.':
+ tok->is_double = 1;
+ pos_sign_ok = 1;
+ neg_sign_ok = 1;
+ break;
+ case 'e': /* FALLTHRU */
+ case 'E':
+ is_exponent = 1;
+ tok->is_double = 1;
+ /* the exponent part can begin with a negative sign */
+ pos_sign_ok = neg_sign_ok = 1;
+ break;
+ default: break;
+ }
+
+ if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ {
+ printbuf_memappend_fast(tok->pb, case_start, case_len);
+ goto out;
+ }
+ }
+ /*
+ Now we know c isn't a valid number char, but check whether
+ it might have been intended to be, and return a potentially
+ more understandable error right away.
+ However, if we're at the top-level, use the number as-is
+ because c can be part of a new object to parse on the
+ next call to json_tokener_parse().
+ */
+ if (tok->depth > 0 && c != ',' && c != ']' && c != '}' && c != '/' &&
+ c != 'I' && c != 'i' && !is_ws_char(c))
+ {
+ tok->err = json_tokener_error_parse_number;
+ goto out;
+ }
+ if (case_len > 0)
+ printbuf_memappend_fast(tok->pb, case_start, case_len);
+
+ // Check for -Infinity
+ if (tok->pb->buf[0] == '-' && case_len <= 1 && (c == 'i' || c == 'I'))
+ {
+ state = json_tokener_state_inf;
+ tok->st_pos = 0;
+ goto redo_char;
+ }
+ if (tok->is_double && !(tok->flags & JSON_TOKENER_STRICT))
+ {
+ /* Trim some chars off the end, to allow things
+ like "123e+" to parse ok. */
+ while (printbuf_length(tok->pb) > 1)
+ {
+ char last_char = tok->pb->buf[printbuf_length(tok->pb) - 1];
+ if (last_char != 'e' && last_char != 'E' &&
+ last_char != '-' && last_char != '+')
+ {
+ break;
+ }
+ tok->pb->buf[printbuf_length(tok->pb) - 1] = '\0';
+ printbuf_length(tok->pb)--;
+ }
+ }
+ }
+ {
+ int64_t num64;
+ uint64_t numuint64;
+ double numd;
+ if (!tok->is_double && tok->pb->buf[0] == '-' &&
+ json_parse_int64(tok->pb->buf, &num64) == 0)
+ {
+ current = json_object_new_int64(num64);
+ if (current == NULL)
+ goto out;
+ }
+ else if (!tok->is_double && tok->pb->buf[0] != '-' &&
+ json_parse_uint64(tok->pb->buf, &numuint64) == 0)
+ {
+ if (numuint64 && tok->pb->buf[0] == '0' &&
+ (tok->flags & JSON_TOKENER_STRICT))
+ {
+ tok->err = json_tokener_error_parse_number;
+ goto out;
+ }
+ if (numuint64 <= INT64_MAX)
+ {
+ num64 = (uint64_t)numuint64;
+ current = json_object_new_int64(num64);
+ if (current == NULL)
+ goto out;
+ }
+ else
+ {
+ current = json_object_new_uint64(numuint64);
+ if (current == NULL)
+ goto out;
+ }
+ }
+ else if (tok->is_double &&
+ json_tokener_parse_double(
+ tok->pb->buf, printbuf_length(tok->pb), &numd) == 0)
+ {
+ current = json_object_new_double_s(numd, tok->pb->buf);
+ if (current == NULL)
+ goto out;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_number;
+ goto out;
+ }
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+ }
+ break;
+
+ case json_tokener_state_array_after_sep:
+ case json_tokener_state_array:
+ if (c == ']')
+ {
+ // Minimize memory usage; assume parsed objs are unlikely to be changed
+ json_object_array_shrink(current, 0);
+
+ if (state == json_tokener_state_array_after_sep &&
+ (tok->flags & JSON_TOKENER_STRICT))
+ {
+ tok->err = json_tokener_error_parse_unexpected;
+ goto out;
+ }
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ }
+ else
+ {
+ if (tok->depth >= tok->max_depth - 1)
+ {
+ tok->err = json_tokener_error_depth;
+ goto out;
+ }
+ state = json_tokener_state_array_add;
+ tok->depth++;
+ json_tokener_reset_level(tok, tok->depth);
+ goto redo_char;
+ }
+ break;
+
+ case json_tokener_state_array_add:
+ if (json_object_array_add(current, obj) != 0)
+ goto out;
+ saved_state = json_tokener_state_array_sep;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+
+ case json_tokener_state_array_sep:
+ if (c == ']')
+ {
+ // Minimize memory usage; assume parsed objs are unlikely to be changed
+ json_object_array_shrink(current, 0);
+
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ }
+ else if (c == ',')
+ {
+ saved_state = json_tokener_state_array_after_sep;
+ state = json_tokener_state_eatws;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_array;
+ goto out;
+ }
+ break;
+
+ case json_tokener_state_object_field_start:
+ case json_tokener_state_object_field_start_after_sep:
+ if (c == '}')
+ {
+ if (state == json_tokener_state_object_field_start_after_sep &&
+ (tok->flags & JSON_TOKENER_STRICT))
+ {
+ tok->err = json_tokener_error_parse_unexpected;
+ goto out;
+ }
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ }
+ else if (c == '"' || c == '\'')
+ {
+ tok->quote_char = c;
+ printbuf_reset(tok->pb);
+ state = json_tokener_state_object_field;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_object_key_name;
+ goto out;
+ }
+ break;
+
+ case json_tokener_state_object_field:
+ {
+ /* Advance until we change state */
+ const char *case_start = str;
+ while (1)
+ {
+ if (c == tok->quote_char)
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ obj_field_name = strdup(tok->pb->buf);
+ saved_state = json_tokener_state_object_field_end;
+ state = json_tokener_state_eatws;
+ break;
+ }
+ else if (c == '\\')
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ saved_state = json_tokener_state_object_field;
+ state = json_tokener_state_string_escape;
+ break;
+ }
+ if (!ADVANCE_CHAR(str, tok) || !PEEK_CHAR(c, tok))
+ {
+ printbuf_memappend_fast(tok->pb, case_start,
+ str - case_start);
+ goto out;
+ }
+ }
+ }
+ break;
+
+ case json_tokener_state_object_field_end:
+ if (c == ':')
+ {
+ saved_state = json_tokener_state_object_value;
+ state = json_tokener_state_eatws;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_object_key_sep;
+ goto out;
+ }
+ break;
+
+ case json_tokener_state_object_value:
+ if (tok->depth >= tok->max_depth - 1)
+ {
+ tok->err = json_tokener_error_depth;
+ goto out;
+ }
+ state = json_tokener_state_object_value_add;
+ tok->depth++;
+ json_tokener_reset_level(tok, tok->depth);
+ goto redo_char;
+
+ case json_tokener_state_object_value_add:
+ json_object_object_add(current, obj_field_name, obj);
+ free(obj_field_name);
+ obj_field_name = NULL;
+ saved_state = json_tokener_state_object_sep;
+ state = json_tokener_state_eatws;
+ goto redo_char;
+
+ case json_tokener_state_object_sep:
+ /* { */
+ if (c == '}')
+ {
+ saved_state = json_tokener_state_finish;
+ state = json_tokener_state_eatws;
+ }
+ else if (c == ',')
+ {
+ saved_state = json_tokener_state_object_field_start_after_sep;
+ state = json_tokener_state_eatws;
+ }
+ else
+ {
+ tok->err = json_tokener_error_parse_object_value_sep;
+ goto out;
+ }
+ break;
+ }
+ (void)ADVANCE_CHAR(str, tok);
+ if (!c) // This is the char *before* advancing
+ break;
+ } /* while(PEEK_CHAR) */
+
+out:
+ if ((tok->flags & JSON_TOKENER_VALIDATE_UTF8) && (nBytes != 0))
+ {
+ tok->err = json_tokener_error_parse_utf8_string;
+ }
+ if (c && (state == json_tokener_state_finish) && (tok->depth == 0) &&
+ (tok->flags & (JSON_TOKENER_STRICT | JSON_TOKENER_ALLOW_TRAILING_CHARS)) ==
+ JSON_TOKENER_STRICT)
+ {
+ /* unexpected char after JSON data */
+ tok->err = json_tokener_error_parse_unexpected;
+ }
+ if (!c)
+ {
+ /* We hit an eof char (0) */
+ if (state != json_tokener_state_finish && saved_state != json_tokener_state_finish)
+ tok->err = json_tokener_error_parse_eof;
+ }
+
+#ifdef HAVE_USELOCALE
+ uselocale(oldlocale);
+ freelocale(newloc);
+#elif defined(HAVE_SETLOCALE)
+ setlocale(LC_NUMERIC, oldlocale);
+ free(oldlocale);
+#endif
+
+ if (tok->err == json_tokener_success)
+ {
+ json_object *ret = json_object_get(current);
+ int ii;
+
+ /* Partially reset, so we parse additional objects on subsequent calls. */
+ for (ii = tok->depth; ii >= 0; ii--)
+ json_tokener_reset_level(tok, ii);
+ return ret;
+ }
+
+ MC_DEBUG("json_tokener_parse_ex: error %s at offset %d\n", json_tokener_errors[tok->err],
+ tok->char_offset);
+ return NULL;
+}
+
+static json_bool json_tokener_validate_utf8(const char c, unsigned int *nBytes)
+{
+ unsigned char chr = c;
+ if (*nBytes == 0)
+ {
+ if (chr >= 0x80)
+ {
+ if ((chr & 0xe0) == 0xc0)
+ *nBytes = 1;
+ else if ((chr & 0xf0) == 0xe0)
+ *nBytes = 2;
+ else if ((chr & 0xf8) == 0xf0)
+ *nBytes = 3;
+ else
+ return 0;
+ }
+ }
+ else
+ {
+ if ((chr & 0xC0) != 0x80)
+ return 0;
+ (*nBytes)--;
+ }
+ return 1;
+}
+
+void json_tokener_set_flags(struct json_tokener *tok, int flags)
+{
+ tok->flags = flags;
+}
+
+size_t json_tokener_get_parse_end(struct json_tokener *tok)
+{
+ assert(tok->char_offset >= 0); /* Drop this line when char_offset becomes a size_t */
+ return (size_t)tok->char_offset;
+}
+
+static int json_tokener_parse_double(const char *buf, int len, double *retval)
+{
+ char *end;
+ *retval = strtod(buf, &end);
+ if (buf + len == end)
+ return 0; // It worked
+ return 1;
+}