summaryrefslogtreecommitdiffstats
path: root/src/spdk/lib/json/json_parse.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/spdk/lib/json/json_parse.c668
1 files changed, 668 insertions, 0 deletions
diff --git a/src/spdk/lib/json/json_parse.c b/src/spdk/lib/json/json_parse.c
new file mode 100644
index 00000000..8639d5ff
--- /dev/null
+++ b/src/spdk/lib/json/json_parse.c
@@ -0,0 +1,668 @@
+/*-
+ * BSD LICENSE
+ *
+ * Copyright (c) Intel Corporation.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in
+ * the documentation and/or other materials provided with the
+ * distribution.
+ * * Neither the name of Intel Corporation nor the names of its
+ * contributors may be used to endorse or promote products derived
+ * from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "spdk/json.h"
+
+#include "spdk_internal/utf.h"
+
+#define SPDK_JSON_MAX_NESTING_DEPTH 64
+
+static int
+hex_value(uint8_t c)
+{
+#define V(x, y) [x] = y + 1
+ static const int8_t val[256] = {
+ V('0', 0), V('1', 1), V('2', 2), V('3', 3), V('4', 4),
+ V('5', 5), V('6', 6), V('7', 7), V('8', 8), V('9', 9),
+ V('A', 0xA), V('B', 0xB), V('C', 0xC), V('D', 0xD), V('E', 0xE), V('F', 0xF),
+ V('a', 0xA), V('b', 0xB), V('c', 0xC), V('d', 0xD), V('e', 0xE), V('f', 0xF),
+ };
+#undef V
+
+ return val[c] - 1;
+}
+
+static int
+json_decode_string_escape_unicode(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+ uint8_t *str = *strp;
+ int v0, v1, v2, v3;
+ uint32_t val;
+ uint32_t surrogate_high = 0;
+ int rc;
+decode:
+ /* \uXXXX */
+ assert(buf_end > str);
+
+ if (*str++ != '\\') { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ if (*str++ != 'u') { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ if ((v3 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ if ((v2 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ if ((v1 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ if ((v0 = hex_value(*str++)) < 0) { return SPDK_JSON_PARSE_INVALID; }
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+
+ val = v0 | (v1 << 4) | (v2 << 8) | (v3 << 12);
+
+ if (surrogate_high) {
+ /* We already parsed the high surrogate, so this should be the low part. */
+ if (!utf16_valid_surrogate_low(val)) {
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ /* Convert UTF-16 surrogate pair into codepoint and fall through to utf8_encode. */
+ val = utf16_decode_surrogate_pair(surrogate_high, val);
+ } else if (utf16_valid_surrogate_high(val)) {
+ surrogate_high = val;
+
+ /*
+ * We parsed a \uXXXX sequence that decoded to the first half of a
+ * UTF-16 surrogate pair, so it must be immediately followed by another
+ * \uXXXX escape.
+ *
+ * Loop around to get the low half of the surrogate pair.
+ */
+ if (buf_end == str) { return SPDK_JSON_PARSE_INCOMPLETE; }
+ goto decode;
+ } else if (utf16_valid_surrogate_low(val)) {
+ /*
+ * We found the second half of surrogate pair without the first half;
+ * this is an invalid encoding.
+ */
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ /*
+ * Convert Unicode escape (or surrogate pair) to UTF-8 in place.
+ *
+ * This is safe (will not write beyond the buffer) because the \uXXXX sequence is 6 bytes
+ * (or 12 bytes for surrogate pairs), and the longest possible UTF-8 encoding of a
+ * single codepoint is 4 bytes.
+ */
+ if (out) {
+ rc = utf8_encode_unsafe(out, val);
+ } else {
+ rc = utf8_codepoint_len(val);
+ }
+ if (rc < 0) {
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ *strp = str; /* update input pointer */
+ return rc; /* return number of bytes decoded */
+}
+
+static int
+json_decode_string_escape_twochar(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+ static const uint8_t escapes[256] = {
+ ['b'] = '\b',
+ ['f'] = '\f',
+ ['n'] = '\n',
+ ['r'] = '\r',
+ ['t'] = '\t',
+ ['/'] = '/',
+ ['"'] = '"',
+ ['\\'] = '\\',
+ };
+ uint8_t *str = *strp;
+ uint8_t c;
+
+ assert(buf_end > str);
+ if (buf_end - str < 2) {
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ }
+
+ assert(str[0] == '\\');
+
+ c = escapes[str[1]];
+ if (c) {
+ if (out) {
+ *out = c;
+ }
+ *strp += 2; /* consumed two bytes */
+ return 1; /* produced one byte */
+ }
+
+ return SPDK_JSON_PARSE_INVALID;
+}
+
+/*
+ * Decode JSON string backslash escape.
+ * \param strp pointer to pointer to first character of escape (the backslash).
+ * *strp is also advanced to indicate how much input was consumed.
+ *
+ * \return Number of bytes appended to out
+ */
+static int
+json_decode_string_escape(uint8_t **strp, uint8_t *buf_end, uint8_t *out)
+{
+ int rc;
+
+ rc = json_decode_string_escape_twochar(strp, buf_end, out);
+ if (rc > 0) {
+ return rc;
+ }
+
+ return json_decode_string_escape_unicode(strp, buf_end, out);
+}
+
+/*
+ * Decode JSON string in place.
+ *
+ * \param str_start Pointer to the beginning of the string (the opening " character).
+ *
+ * \return Number of bytes in decoded string (beginning from start).
+ */
+static int
+json_decode_string(uint8_t *str_start, uint8_t *buf_end, uint8_t **str_end, uint32_t flags)
+{
+ uint8_t *str = str_start;
+ uint8_t *out = str_start + 1; /* Decode string in place (skip the initial quote) */
+ int rc;
+
+ if (buf_end - str_start < 2) {
+ /*
+ * Shortest valid string (the empty string) is two bytes (""),
+ * so this can't possibly be valid
+ */
+ *str_end = str;
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ }
+
+ if (*str++ != '"') {
+ *str_end = str;
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ while (str < buf_end) {
+ if (str[0] == '"') {
+ /*
+ * End of string.
+ * Update str_end to point at next input byte and return output length.
+ */
+ *str_end = str + 1;
+ return out - str_start - 1;
+ } else if (str[0] == '\\') {
+ rc = json_decode_string_escape(&str, buf_end,
+ flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE ? out : NULL);
+ assert(rc != 0);
+ if (rc < 0) {
+ *str_end = str;
+ return rc;
+ }
+ out += rc;
+ } else if (str[0] <= 0x1f) {
+ /* control characters must be escaped */
+ *str_end = str;
+ return SPDK_JSON_PARSE_INVALID;
+ } else {
+ rc = utf8_valid(str, buf_end);
+ if (rc == 0) {
+ *str_end = str;
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ } else if (rc < 0) {
+ *str_end = str;
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ if (out && out != str && (flags & SPDK_JSON_PARSE_FLAG_DECODE_IN_PLACE)) {
+ memmove(out, str, rc);
+ }
+ out += rc;
+ str += rc;
+ }
+ }
+
+ /* If execution gets here, we ran out of buffer. */
+ *str_end = str;
+ return SPDK_JSON_PARSE_INCOMPLETE;
+}
+
+static int
+json_valid_number(uint8_t *start, uint8_t *buf_end)
+{
+ uint8_t *p = start;
+ uint8_t c;
+
+ if (p >= buf_end) { return -1; }
+
+ c = *p++;
+ if (c >= '1' && c <= '9') { goto num_int_digits; }
+ if (c == '0') { goto num_frac_or_exp; }
+ if (c == '-') { goto num_int_first_digit; }
+ p--;
+ goto done_invalid;
+
+num_int_first_digit:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c == '0') { goto num_frac_or_exp; }
+ if (c >= '1' && c <= '9') { goto num_int_digits; }
+ p--;
+ }
+ goto done_invalid;
+
+num_int_digits:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_int_digits; }
+ if (c == '.') { goto num_frac_first_digit; }
+ if (c == 'e' || c == 'E') { goto num_exp_sign; }
+ p--;
+ }
+ goto done_valid;
+
+num_frac_or_exp:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c == '.') { goto num_frac_first_digit; }
+ if (c == 'e' || c == 'E') { goto num_exp_sign; }
+ p--;
+ }
+ goto done_valid;
+
+num_frac_first_digit:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_frac_digits; }
+ p--;
+ }
+ goto done_invalid;
+
+num_frac_digits:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_frac_digits; }
+ if (c == 'e' || c == 'E') { goto num_exp_sign; }
+ p--;
+ }
+ goto done_valid;
+
+num_exp_sign:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_exp_digits; }
+ if (c == '-' || c == '+') { goto num_exp_first_digit; }
+ p--;
+ }
+ goto done_invalid;
+
+num_exp_first_digit:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_exp_digits; }
+ p--;
+ }
+ goto done_invalid;
+
+num_exp_digits:
+ if (spdk_likely(p != buf_end)) {
+ c = *p++;
+ if (c >= '0' && c <= '9') { goto num_exp_digits; }
+ p--;
+ }
+ goto done_valid;
+
+done_valid:
+ /* Valid end state */
+ return p - start;
+
+done_invalid:
+ /* Invalid end state */
+ if (p == buf_end) {
+ /* Hit the end of the buffer - the stream is incomplete. */
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ }
+
+ /* Found an invalid character in an invalid end state */
+ return SPDK_JSON_PARSE_INVALID;
+}
+
+static int
+json_valid_comment(const uint8_t *start, const uint8_t *buf_end)
+{
+ const uint8_t *p = start;
+ bool multiline;
+
+ assert(buf_end > p);
+ if (buf_end - p < 2) {
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ }
+
+ if (p[0] != '/') {
+ return SPDK_JSON_PARSE_INVALID;
+ }
+ if (p[1] == '*') {
+ multiline = true;
+ } else if (p[1] == '/') {
+ multiline = false;
+ } else {
+ return SPDK_JSON_PARSE_INVALID;
+ }
+ p += 2;
+
+ if (multiline) {
+ while (p != buf_end - 1) {
+ if (p[0] == '*' && p[1] == '/') {
+ /* Include the terminating star and slash in the comment */
+ return p - start + 2;
+ }
+ p++;
+ }
+ } else {
+ while (p != buf_end) {
+ if (*p == '\r' || *p == '\n') {
+ /* Do not include the line terminator in the comment */
+ return p - start;
+ }
+ p++;
+ }
+ }
+
+ return SPDK_JSON_PARSE_INCOMPLETE;
+}
+
+struct json_literal {
+ enum spdk_json_val_type type;
+ uint32_t len;
+ uint8_t str[8];
+};
+
+/*
+ * JSON only defines 3 possible literals; they can be uniquely identified by bits
+ * 3 and 4 of the first character:
+ * 'f' = 0b11[00]110
+ * 'n' = 0b11[01]110
+ * 't' = 0b11[10]100
+ * These two bits can be used as an index into the g_json_literals array.
+ */
+static const struct json_literal g_json_literals[] = {
+ {SPDK_JSON_VAL_FALSE, 5, "false"},
+ {SPDK_JSON_VAL_NULL, 4, "null"},
+ {SPDK_JSON_VAL_TRUE, 4, "true"},
+ {}
+};
+
+static int
+match_literal(const uint8_t *start, const uint8_t *end, const uint8_t *literal, size_t len)
+{
+ assert(end >= start);
+ if ((size_t)(end - start) < len) {
+ return SPDK_JSON_PARSE_INCOMPLETE;
+ }
+
+ if (memcmp(start, literal, len) != 0) {
+ return SPDK_JSON_PARSE_INVALID;
+ }
+
+ return len;
+}
+
+ssize_t
+spdk_json_parse(void *json, size_t size, struct spdk_json_val *values, size_t num_values,
+ void **end, uint32_t flags)
+{
+ uint8_t *json_end = json + size;
+ enum spdk_json_val_type containers[SPDK_JSON_MAX_NESTING_DEPTH];
+ size_t con_value[SPDK_JSON_MAX_NESTING_DEPTH];
+ enum spdk_json_val_type con_type = SPDK_JSON_VAL_INVALID;
+ bool trailing_comma = false;
+ size_t depth = 0; /* index into containers */
+ size_t cur_value = 0; /* index into values */
+ size_t con_start_value;
+ uint8_t *data = json;
+ uint8_t *new_data;
+ int rc = 0;
+ const struct json_literal *lit;
+ enum {
+ STATE_VALUE, /* initial state */
+ STATE_VALUE_SEPARATOR, /* value separator (comma) */
+ STATE_NAME, /* "name": value */
+ STATE_NAME_SEPARATOR, /* colon */
+ STATE_END, /* parsed the complete value, so only whitespace is valid */
+ } state = STATE_VALUE;
+
+#define ADD_VALUE(t, val_start_ptr, val_end_ptr) \
+ if (values && cur_value < num_values) { \
+ values[cur_value].type = t; \
+ values[cur_value].start = val_start_ptr; \
+ values[cur_value].len = val_end_ptr - val_start_ptr; \
+ } \
+ cur_value++
+
+ while (data < json_end) {
+ uint8_t c = *data;
+
+ switch (c) {
+ case ' ':
+ case '\t':
+ case '\r':
+ case '\n':
+ /* Whitespace is allowed between any tokens. */
+ data++;
+ break;
+
+ case 't':
+ case 'f':
+ case 'n':
+ /* true, false, or null */
+ if (state != STATE_VALUE) { goto done_invalid; }
+ lit = &g_json_literals[(c >> 3) & 3]; /* See comment above g_json_literals[] */
+ assert(lit->str[0] == c);
+ rc = match_literal(data, json_end, lit->str, lit->len);
+ if (rc < 0) { goto done_rc; }
+ ADD_VALUE(lit->type, data, data + rc);
+ data += rc;
+ state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+ trailing_comma = false;
+ break;
+
+ case '"':
+ if (state != STATE_VALUE && state != STATE_NAME) { goto done_invalid; }
+ rc = json_decode_string(data, json_end, &new_data, flags);
+ if (rc < 0) {
+ data = new_data;
+ goto done_rc;
+ }
+ /*
+ * Start is data + 1 to skip initial quote.
+ * Length is data + rc - 1 to skip both quotes.
+ */
+ ADD_VALUE(state == STATE_VALUE ? SPDK_JSON_VAL_STRING : SPDK_JSON_VAL_NAME,
+ data + 1, data + rc - 1);
+ data = new_data;
+ if (state == STATE_NAME) {
+ state = STATE_NAME_SEPARATOR;
+ } else {
+ state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+ }
+ trailing_comma = false;
+ break;
+
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ if (state != STATE_VALUE) { goto done_invalid; }
+ rc = json_valid_number(data, json_end);
+ if (rc < 0) { goto done_rc; }
+ ADD_VALUE(SPDK_JSON_VAL_NUMBER, data, data + rc);
+ data += rc;
+ state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+ trailing_comma = false;
+ break;
+
+ case '{':
+ case '[':
+ if (state != STATE_VALUE) { goto done_invalid; }
+ if (depth == SPDK_JSON_MAX_NESTING_DEPTH) {
+ rc = SPDK_JSON_PARSE_MAX_DEPTH_EXCEEDED;
+ goto done_rc;
+ }
+ if (c == '{') {
+ con_type = SPDK_JSON_VAL_OBJECT_BEGIN;
+ state = STATE_NAME;
+ } else {
+ con_type = SPDK_JSON_VAL_ARRAY_BEGIN;
+ state = STATE_VALUE;
+ }
+ con_value[depth] = cur_value;
+ containers[depth++] = con_type;
+ ADD_VALUE(con_type, data, data + 1);
+ data++;
+ trailing_comma = false;
+ break;
+
+ case '}':
+ case ']':
+ if (trailing_comma) { goto done_invalid; }
+ if (depth == 0) { goto done_invalid; }
+ con_type = containers[--depth];
+ con_start_value = con_value[depth];
+ if (values && con_start_value < num_values) {
+ values[con_start_value].len = cur_value - con_start_value - 1;
+ }
+ if (c == '}') {
+ if (state != STATE_NAME && state != STATE_VALUE_SEPARATOR) {
+ goto done_invalid;
+ }
+ if (con_type != SPDK_JSON_VAL_OBJECT_BEGIN) {
+ goto done_invalid;
+ }
+ ADD_VALUE(SPDK_JSON_VAL_OBJECT_END, data, data + 1);
+ } else {
+ if (state != STATE_VALUE && state != STATE_VALUE_SEPARATOR) {
+ goto done_invalid;
+ }
+ if (con_type != SPDK_JSON_VAL_ARRAY_BEGIN) {
+ goto done_invalid;
+ }
+ ADD_VALUE(SPDK_JSON_VAL_ARRAY_END, data, data + 1);
+ }
+ con_type = depth == 0 ? SPDK_JSON_VAL_INVALID : containers[depth - 1];
+ data++;
+ state = depth ? STATE_VALUE_SEPARATOR : STATE_END;
+ trailing_comma = false;
+ break;
+
+ case ',':
+ if (state != STATE_VALUE_SEPARATOR) { goto done_invalid; }
+ data++;
+ assert(con_type == SPDK_JSON_VAL_ARRAY_BEGIN ||
+ con_type == SPDK_JSON_VAL_OBJECT_BEGIN);
+ state = con_type == SPDK_JSON_VAL_ARRAY_BEGIN ? STATE_VALUE : STATE_NAME;
+ trailing_comma = true;
+ break;
+
+ case ':':
+ if (state != STATE_NAME_SEPARATOR) { goto done_invalid; }
+ data++;
+ state = STATE_VALUE;
+ break;
+
+ case '/':
+ if (!(flags & SPDK_JSON_PARSE_FLAG_ALLOW_COMMENTS)) {
+ goto done_invalid;
+ }
+ rc = json_valid_comment(data, json_end);
+ if (rc < 0) { goto done_rc; }
+ /* Skip over comment */
+ data += rc;
+ break;
+
+ default:
+ goto done_invalid;
+ }
+
+ if (state == STATE_END) {
+ break;
+ }
+ }
+
+ if (state == STATE_END) {
+ /* Skip trailing whitespace */
+ while (data < json_end) {
+ uint8_t c = *data;
+
+ if (c == ' ' || c == '\t' || c == '\r' || c == '\n') {
+ data++;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * These asserts are just for sanity checking - they are guaranteed by the allowed
+ * state transitions.
+ */
+ assert(depth == 0);
+ assert(trailing_comma == false);
+ assert(data <= json_end);
+ if (end) {
+ *end = data;
+ }
+ return cur_value;
+ }
+
+ /* Invalid end state - ran out of data */
+ rc = SPDK_JSON_PARSE_INCOMPLETE;
+
+done_rc:
+ assert(rc < 0);
+ if (end) {
+ *end = data;
+ }
+ return rc;
+
+done_invalid:
+ rc = SPDK_JSON_PARSE_INVALID;
+ goto done_rc;
+}