diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-10 21:30:40 +0000 |
commit | 133a45c109da5310add55824db21af5239951f93 (patch) | |
tree | ba6ac4c0a950a0dda56451944315d66409923918 /contrib/libucl/ucl_parser.c | |
parent | Initial commit. (diff) | |
download | rspamd-133a45c109da5310add55824db21af5239951f93.tar.xz rspamd-133a45c109da5310add55824db21af5239951f93.zip |
Adding upstream version 3.8.1.upstream/3.8.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'contrib/libucl/ucl_parser.c')
-rw-r--r-- | contrib/libucl/ucl_parser.c | 3212 |
1 files changed, 3212 insertions, 0 deletions
diff --git a/contrib/libucl/ucl_parser.c b/contrib/libucl/ucl_parser.c new file mode 100644 index 0000000..354bfe8 --- /dev/null +++ b/contrib/libucl/ucl_parser.c @@ -0,0 +1,3212 @@ +/* Copyright (c) 2013, Vsevolod Stakhov + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED ''AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED + * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL AUTHOR BE LIABLE FOR ANY + * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND + * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <math.h> +#include "ucl.h" +#include "ucl_internal.h" +#include "ucl_chartable.h" + +/** + * @file ucl_parser.c + * The implementation of ucl parser + */ + +struct ucl_parser_saved_state { + unsigned int line; + unsigned int column; + size_t remain; + const unsigned char *pos; +}; + +/** + * Move up to len characters + * @param parser + * @param begin + * @param len + * @return new position in chunk + */ +#define ucl_chunk_skipc(chunk, p) \ +do { \ + if (p == chunk->end) { \ + break; \ + } \ + if (*(p) == '\n') { \ + (chunk)->line ++; \ + (chunk)->column = 0; \ + } \ + else (chunk)->column ++; \ + (p++); \ + (chunk)->pos ++; \ + (chunk)->remain --; \ +} while (0) + +static inline void +ucl_set_err (struct ucl_parser *parser, int code, const char *str, UT_string **err) +{ + const char *fmt_string, *filename; + struct ucl_chunk *chunk = parser->chunks; + + if (parser->cur_file) { + filename = parser->cur_file; + } + else { + filename = "<unknown>"; + } + + if (chunk->pos < chunk->end) { + if (isgraph (*chunk->pos)) { + fmt_string = "error while parsing %s: " + "line: %d, column: %d - '%s', character: '%c'"; + } + else { + fmt_string = "error while parsing %s: " + "line: %d, column: %d - '%s', character: '0x%02x'"; + } + ucl_create_err (err, fmt_string, + filename, chunk->line, chunk->column, + str, *chunk->pos); + } + else { + ucl_create_err (err, "error while parsing %s: at the end of chunk: %s", + filename, str); + } + + parser->err_code = code; + parser->state = UCL_STATE_ERROR; +} + +static void +ucl_save_comment (struct ucl_parser *parser, const char *begin, size_t len) +{ + ucl_object_t *nobj; + + if (len > 0 && begin != NULL) { + nobj = ucl_object_fromstring_common (begin, len, 0); + + if (parser->last_comment) { + /* We need to append data to an existing object */ + DL_APPEND (parser->last_comment, nobj); + } + else { + parser->last_comment = nobj; + } + } +} + +static void +ucl_attach_comment (struct ucl_parser *parser, ucl_object_t *obj, bool before) +{ + if (parser->last_comment) { + ucl_object_insert_key (parser->comments, parser->last_comment, + (const char *)&obj, sizeof (void *), true); + + if (before) { + parser->last_comment->flags |= UCL_OBJECT_INHERITED; + } + + parser->last_comment = NULL; + } +} + +/** + * Skip all comments from the current pos resolving nested and multiline comments + * @param parser + * @return + */ +static bool +ucl_skip_comments (struct ucl_parser *parser) +{ + struct ucl_chunk *chunk = parser->chunks; + const unsigned char *p, *beg = NULL; + int comments_nested = 0; + bool quoted = false; + + p = chunk->pos; + +start: + if (chunk->remain > 0 && *p == '#') { + if (parser->state != UCL_STATE_SCOMMENT && + parser->state != UCL_STATE_MCOMMENT) { + beg = p; + + while (p < chunk->end) { + if (*p == '\n') { + if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { + ucl_save_comment (parser, beg, p - beg); + beg = NULL; + } + + ucl_chunk_skipc (chunk, p); + + goto start; + } + ucl_chunk_skipc (chunk, p); + } + } + } + else if (chunk->remain >= 2 && *p == '/') { + if (p[1] == '*') { + beg = p; + ucl_chunk_skipc (chunk, p); + comments_nested ++; + ucl_chunk_skipc (chunk, p); + + while (p < chunk->end) { + if (*p == '"' && *(p - 1) != '\\') { + quoted = !quoted; + } + + if (!quoted) { + if (*p == '*') { + ucl_chunk_skipc (chunk, p); + if (chunk->remain > 0 && *p == '/') { + comments_nested --; + if (comments_nested == 0) { + if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { + ucl_save_comment (parser, beg, p - beg + 1); + beg = NULL; + } + + ucl_chunk_skipc (chunk, p); + goto start; + } + } + ucl_chunk_skipc (chunk, p); + } + else if (p[0] == '/' && chunk->remain >= 2 && p[1] == '*') { + comments_nested ++; + ucl_chunk_skipc (chunk, p); + ucl_chunk_skipc (chunk, p); + continue; + } + } + + ucl_chunk_skipc (chunk, p); + } + if (comments_nested != 0) { + ucl_set_err (parser, UCL_ENESTED, + "unfinished multiline comment", &parser->err); + return false; + } + } + } + + if (beg && p > beg && (parser->flags & UCL_PARSER_SAVE_COMMENTS)) { + ucl_save_comment (parser, beg, p - beg); + } + + return true; +} + +/** + * Return multiplier for a character + * @param c multiplier character + * @param is_bytes if true use 1024 multiplier + * @return multiplier + */ +static inline unsigned long +ucl_lex_num_multiplier (const unsigned char c, bool is_bytes) { + const struct { + char c; + long mult_normal; + long mult_bytes; + } multipliers[] = { + {'m', 1000 * 1000, 1024 * 1024}, + {'k', 1000, 1024}, + {'g', 1000 * 1000 * 1000, 1024 * 1024 * 1024} + }; + int i; + + for (i = 0; i < 3; i ++) { + if (tolower (c) == multipliers[i].c) { + if (is_bytes) { + return multipliers[i].mult_bytes; + } + return multipliers[i].mult_normal; + } + } + + return 1; +} + + +/** + * Return multiplier for time scaling + * @param c + * @return + */ +static inline double +ucl_lex_time_multiplier (const unsigned char c) { + const struct { + char c; + double mult; + } multipliers[] = { + {'m', 60}, + {'h', 60 * 60}, + {'d', 60 * 60 * 24}, + {'w', 60 * 60 * 24 * 7}, + {'y', 60 * 60 * 24 * 365} + }; + int i; + + for (i = 0; i < 5; i ++) { + if (tolower (c) == multipliers[i].c) { + return multipliers[i].mult; + } + } + + return 1; +} + +/** + * Return true if a character is a end of an atom + * @param c + * @return + */ +static inline bool +ucl_lex_is_atom_end (const unsigned char c) +{ + return ucl_test_character (c, UCL_CHARACTER_VALUE_END); +} + +static inline bool +ucl_lex_is_comment (const unsigned char c1, const unsigned char c2) +{ + if (c1 == '/') { + if (c2 == '*') { + return true; + } + } + else if (c1 == '#') { + return true; + } + return false; +} + +/** + * Check variable found + * @param parser + * @param ptr + * @param remain + * @param out_len + * @param strict + * @param found + * @return + */ +static inline const char * +ucl_check_variable_safe (struct ucl_parser *parser, const char *ptr, size_t remain, + size_t *out_len, bool strict, bool *found) +{ + struct ucl_variable *var; + unsigned char *dst; + size_t dstlen; + bool need_free = false; + + LL_FOREACH (parser->variables, var) { + if (strict) { + if (remain == var->var_len) { + if (memcmp (ptr, var->var, var->var_len) == 0) { + *out_len += var->value_len; + *found = true; + return (ptr + var->var_len); + } + } + } + else { + if (remain >= var->var_len) { + if (memcmp (ptr, var->var, var->var_len) == 0) { + *out_len += var->value_len; + *found = true; + return (ptr + var->var_len); + } + } + } + } + + /* XXX: can only handle ${VAR} */ + if (!(*found) && parser->var_handler != NULL && strict) { + /* Call generic handler */ + if (parser->var_handler (ptr, remain, &dst, &dstlen, &need_free, + parser->var_data)) { + *found = true; + if (need_free) { + free (dst); + } + return (ptr + remain); + } + } + + return ptr; +} + +/** + * Check for a variable in a given string + * @param parser + * @param ptr + * @param remain + * @param out_len + * @param vars_found + * @return + */ +static const char * +ucl_check_variable (struct ucl_parser *parser, const char *ptr, + size_t remain, size_t *out_len, bool *vars_found) +{ + const char *p, *end, *ret = ptr; + bool found = false; + + if (*ptr == '{') { + /* We need to match the variable enclosed in braces */ + p = ptr + 1; + end = ptr + remain; + while (p < end) { + if (*p == '}') { + ret = ucl_check_variable_safe (parser, ptr + 1, p - ptr - 1, + out_len, true, &found); + if (found) { + /* {} must be excluded actually */ + ret ++; + if (!*vars_found) { + *vars_found = true; + } + } + else { + *out_len += 2; + } + break; + } + p ++; + } + if(p == end) { + (*out_len) ++; + } + } + else if (*ptr != '$') { + /* Not count escaped dollar sign */ + ret = ucl_check_variable_safe (parser, ptr, remain, out_len, false, &found); + if (found && !*vars_found) { + *vars_found = true; + } + if (!found) { + (*out_len) ++; + } + } + else { + ret ++; + (*out_len) ++; + } + + return ret; +} + +/** + * Expand a single variable + * @param parser + * @param ptr + * @param in_len + * @param dest + * @param out_len + * @return + */ +static const char * +ucl_expand_single_variable (struct ucl_parser *parser, const char *ptr, + size_t in_len, unsigned char **dest, size_t out_len) +{ + unsigned char *d = *dest, *dst; + const char *p = ptr + 1, *ret; + struct ucl_variable *var; + size_t dstlen; + bool need_free = false; + bool found = false; + bool strict = false; + + ret = ptr + 1; + /* For the $ sign */ + in_len --; + + if (*p == '$') { + *d++ = *p++; + *dest = d; + return p; + } + else if (*p == '{') { + p ++; + in_len --; + strict = true; + ret += 2; + } + + LL_FOREACH (parser->variables, var) { + if (out_len >= var->value_len && in_len >= (var->var_len + (strict ? 1 : 0))) { + if (memcmp (p, var->var, var->var_len) == 0) { + if (!strict || p[var->var_len] == '}') { + memcpy (d, var->value, var->value_len); + ret += var->var_len; + d += var->value_len; + found = true; + break; + } + } + } + } + + if (!found) { + if (strict && parser->var_handler != NULL) { + dstlen = out_len; + + if (parser->var_handler (p, in_len, &dst, &dstlen, &need_free, + parser->var_data)) { + if (dstlen > out_len) { + /* We do not have enough space! */ + if (need_free) { + free (dst); + } + } + else { + memcpy(d, dst, dstlen); + ret += in_len; + d += dstlen; + found = true; + + if (need_free) { + free(dst); + } + } + } + } + + /* Leave variable as is, in this case we use dest */ + if (!found) { + if (strict && out_len >= 2) { + /* Copy '${' */ + memcpy (d, ptr, 2); + d += 2; + ret --; + } + else { + memcpy (d, ptr, 1); + d ++; + } + } + } + + *dest = d; + return ret; +} + +/** + * Expand variables in string + * @param parser + * @param dst + * @param src + * @param in_len + * @return + */ +static ssize_t +ucl_expand_variable (struct ucl_parser *parser, unsigned char **dst, + const char *src, size_t in_len) +{ + const char *p, *end = src + in_len; + unsigned char *d, *d_end; + size_t out_len = 0; + bool vars_found = false; + + if (parser->flags & UCL_PARSER_DISABLE_MACRO) { + *dst = NULL; + return in_len; + } + + p = src; + while (p != end) { + if (*p == '$' && p + 1 != end) { + p = ucl_check_variable (parser, p + 1, end - p - 1, &out_len, &vars_found); + } + else { + p ++; + out_len ++; + } + } + + if (!vars_found) { + /* Trivial case */ + *dst = NULL; + return in_len; + } + + *dst = UCL_ALLOC (out_len + 1); + if (*dst == NULL) { + return in_len; + } + + d = *dst; + d_end = d + out_len; + p = src; + while (p != end && d != d_end) { + if (*p == '$' && p + 1 != end) { + p = ucl_expand_single_variable (parser, p, end - p, &d, d_end - d); + } + else { + *d++ = *p++; + } + } + + *d = '\0'; + + return out_len; +} + +/** + * Store or copy pointer to the trash stack + * @param parser parser object + * @param src src string + * @param dst destination buffer (trash stack pointer) + * @param dst_const const destination pointer (e.g. value of object) + * @param in_len input length + * @param need_unescape need to unescape source (and copy it) + * @param need_lowercase need to lowercase value (and copy) + * @param need_expand need to expand variables (and copy as well) + * @param unescape_squote unescape single quoted string + * @return output length (excluding \0 symbol) + */ +static inline ssize_t +ucl_copy_or_store_ptr (struct ucl_parser *parser, + const unsigned char *src, unsigned char **dst, + const char **dst_const, size_t in_len, + bool need_unescape, bool need_lowercase, bool need_expand, + bool unescape_squote) +{ + ssize_t ret = -1, tret; + unsigned char *tmp; + + if (need_unescape || need_lowercase || + (need_expand && parser->variables != NULL) || + !(parser->flags & UCL_PARSER_ZEROCOPY)) { + /* Copy string */ + *dst = UCL_ALLOC (in_len + 1); + if (*dst == NULL) { + ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for a string", + &parser->err); + return false; + } + if (need_lowercase) { + ret = ucl_strlcpy_tolower (*dst, src, in_len + 1); + } + else { + ret = ucl_strlcpy_unsafe (*dst, src, in_len + 1); + } + + if (need_unescape) { + if (!unescape_squote) { + ret = ucl_unescape_json_string (*dst, ret); + } + else { + ret = ucl_unescape_squoted_string (*dst, ret); + } + } + + if (need_expand) { + tmp = *dst; + tret = ret; + ret = ucl_expand_variable (parser, dst, tmp, ret); + if (*dst == NULL) { + /* Nothing to expand */ + *dst = tmp; + ret = tret; + } + else { + /* Free unexpanded value */ + UCL_FREE (in_len + 1, tmp); + } + } + *dst_const = *dst; + } + else { + *dst_const = src; + ret = in_len; + } + + return ret; +} + +/** + * Create and append an object at the specified level + * @param parser + * @param is_array + * @param level + * @return + */ +static inline ucl_object_t * +ucl_parser_add_container (ucl_object_t *obj, struct ucl_parser *parser, + bool is_array, uint32_t level, bool has_obrace) +{ + struct ucl_stack *st; + ucl_object_t *nobj; + + if (obj == NULL) { + nobj = ucl_object_new_full (is_array ? UCL_ARRAY : UCL_OBJECT, parser->chunks->priority); + if (nobj == NULL) { + goto enomem0; + } + } else { + if (obj->type == (is_array ? UCL_OBJECT : UCL_ARRAY)) { + /* Bad combination for merge: array and object */ + ucl_set_err (parser, UCL_EMERGE, + "cannot merge an object with an array", + &parser->err); + + return NULL; + } + nobj = obj; + nobj->type = is_array ? UCL_ARRAY : UCL_OBJECT; + } + + if (!is_array) { + if (nobj->value.ov == NULL) { + nobj->value.ov = ucl_hash_create (parser->flags & UCL_PARSER_KEY_LOWERCASE); + if (nobj->value.ov == NULL) { + goto enomem1; + } + } + parser->state = UCL_STATE_KEY; + } else { + parser->state = UCL_STATE_VALUE; + } + + st = UCL_ALLOC (sizeof (struct ucl_stack)); + + if (st == NULL) { + goto enomem1; + } + + st->obj = nobj; + + if (level >= UINT16_MAX) { + ucl_set_err (parser, UCL_ENESTED, + "objects are nesting too deep (over 65535 limit)", + &parser->err); + if (nobj != obj) { + ucl_object_unref (obj); + } + + UCL_FREE(sizeof (struct ucl_stack), st); + + return NULL; + } + + + st->e.params.level = level; + st->e.params.line = parser->chunks->line; + st->chunk = parser->chunks; + + if (has_obrace) { + st->e.params.flags = UCL_STACK_HAS_OBRACE; + } + else { + st->e.params.flags = 0; + } + + LL_PREPEND (parser->stack, st); + parser->cur_obj = nobj; + + return nobj; +enomem1: + if (nobj != obj) + ucl_object_unref (nobj); +enomem0: + ucl_set_err (parser, UCL_EINTERNAL, "cannot allocate memory for an object", + &parser->err); + return NULL; +} + +int +ucl_maybe_parse_number (ucl_object_t *obj, + const char *start, const char *end, const char **pos, + bool allow_double, bool number_bytes, bool allow_time) +{ + const char *p = start, *c = start; + char *endptr; + bool got_dot = false, got_exp = false, need_double = false, + is_time = false, valid_start = false, is_hex = false; + int is_neg = 0; + double dv = 0; + int64_t lv = 0; + + if (*p == '-') { + is_neg = 1; + c ++; + p ++; + } + while (p < end) { + if (is_hex && isxdigit (*p)) { + p ++; + } + else if (isdigit (*p)) { + valid_start = true; + p ++; + } + else if (!is_hex && (*p == 'x' || *p == 'X')) { + is_hex = true; + allow_double = false; + c = p + 1; + p ++; + } + else if (allow_double) { + if (p == c) { + /* Empty digits sequence, not a number */ + *pos = start; + return EINVAL; + } + else if (*p == '.') { + if (got_dot) { + /* Double dots, not a number */ + *pos = start; + return EINVAL; + } + else { + got_dot = true; + need_double = true; + p ++; + } + } + else if (*p == 'e' || *p == 'E') { + if (got_exp) { + /* Double exp, not a number */ + *pos = start; + return EINVAL; + } + else { + got_exp = true; + need_double = true; + p ++; + if (p >= end) { + *pos = start; + return EINVAL; + } + if (!isdigit (*p) && *p != '+' && *p != '-') { + /* Wrong exponent sign */ + *pos = start; + return EINVAL; + } + else { + p ++; + } + } + } + else { + /* Got the end of the number, need to check */ + break; + } + } + else if (!allow_double && *p == '.') { + /* Unexpected dot */ + *pos = start; + return EINVAL; + } + else { + break; + } + } + + if (!valid_start || p == c) { + *pos = start; + return EINVAL; + } + + char numbuf[128]; + + if ((size_t)(p - c + 1) >= sizeof(numbuf)) { + *pos = start; + return EINVAL; + } + + if (is_neg) { + numbuf[0] = '-'; + ucl_strlcpy (&numbuf[1], c, p - c + 1); + } + else { + ucl_strlcpy (numbuf, c, p - c + 1); + } + + errno = 0; + if (need_double) { + dv = strtod (numbuf, &endptr); + } + else { + if (is_hex) { + lv = strtoimax (numbuf, &endptr, 16); + } + else { + lv = strtoimax (numbuf, &endptr, 10); + } + } + if (errno == ERANGE) { + *pos = start; + return ERANGE; + } + + /* Now check endptr and move it from numbuf to the real ending */ + if (endptr != NULL) { + long shift = endptr - numbuf - is_neg; + endptr = (char *)c + shift; + } + if (endptr >= end) { + p = end; + goto set_obj; + } + if (endptr == NULL || ucl_lex_is_atom_end (*endptr) || *endptr == '\0') { + p = endptr; + goto set_obj; + } + + if (endptr < end && endptr != start) { + switch (*p) { + case 'm': + case 'M': + case 'g': + case 'G': + case 'k': + case 'K': + if (end - p >= 2) { + if (p[1] == 's' || p[1] == 'S') { + /* Milliseconds */ + if (!need_double) { + need_double = true; + dv = lv; + } + is_time = true; + if (p[0] == 'm' || p[0] == 'M') { + dv /= 1000.; + } + else { + dv *= ucl_lex_num_multiplier (*p, false); + } + p += 2; + if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { + *pos = start; + return EINVAL; + } + goto set_obj; + } + else if (number_bytes || (p[1] == 'b' || p[1] == 'B')) { + /* Bytes */ + if (need_double) { + need_double = false; + lv = dv; + } + lv *= ucl_lex_num_multiplier (*p, true); + p += 2; + if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { + *pos = start; + return EINVAL; + } + goto set_obj; + } + else if (ucl_lex_is_atom_end (p[1])) { + if (need_double) { + dv *= ucl_lex_num_multiplier (*p, false); + } + else { + lv *= ucl_lex_num_multiplier (*p, number_bytes); + } + p ++; + goto set_obj; + } + else if (allow_time && end - p >= 3) { + if (tolower (p[0]) == 'm' && + tolower (p[1]) == 'i' && + tolower (p[2]) == 'n') { + /* Minutes */ + if (!need_double) { + need_double = true; + dv = lv; + } + is_time = true; + dv *= 60.; + p += 3; + if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { + *pos = start; + return EINVAL; + } + goto set_obj; + } + } + } + else { + if (need_double) { + dv *= ucl_lex_num_multiplier (*p, false); + } + else { + lv *= ucl_lex_num_multiplier (*p, number_bytes); + } + p ++; + if (end - p > 0 && !ucl_lex_is_atom_end (*p)) { + *pos = start; + return EINVAL; + } + goto set_obj; + } + break; + case 'S': + case 's': + if (allow_time && + (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { + if (!need_double) { + need_double = true; + dv = lv; + } + p ++; + is_time = true; + goto set_obj; + } + break; + case 'h': + case 'H': + case 'd': + case 'D': + case 'w': + case 'W': + case 'Y': + case 'y': + if (allow_time && + (p == end - 1 || ucl_lex_is_atom_end (p[1]))) { + if (!need_double) { + need_double = true; + dv = lv; + } + is_time = true; + dv *= ucl_lex_time_multiplier (*p); + p ++; + goto set_obj; + } + break; + case '\t': + case ' ': + while (p < end && ucl_test_character(*p, UCL_CHARACTER_WHITESPACE)) { + p++; + } + if (ucl_lex_is_atom_end(*p)) + goto set_obj; + break; + } + } + else if (endptr == end) { + /* Just a number at the end of chunk */ + p = end; + goto set_obj; + } + + *pos = c; + return EINVAL; + +set_obj: + if (obj != NULL) { + if (allow_double && (need_double || is_time)) { + if (!is_time) { + obj->type = UCL_FLOAT; + } + else { + obj->type = UCL_TIME; + } + obj->value.dv = dv; + } + else { + obj->type = UCL_INT; + obj->value.iv = lv; + } + } + *pos = p; + return 0; +} + +/** + * Parse possible number + * @param parser + * @param chunk + * @param obj + * @return true if a number has been parsed + */ +static bool +ucl_lex_number (struct ucl_parser *parser, + struct ucl_chunk *chunk, ucl_object_t *obj) +{ + const unsigned char *pos; + int ret; + + ret = ucl_maybe_parse_number (obj, chunk->pos, chunk->end, (const char **)&pos, + true, false, ((parser->flags & UCL_PARSER_NO_TIME) == 0)); + + if (ret == 0) { + chunk->remain -= pos - chunk->pos; + chunk->column += pos - chunk->pos; + chunk->pos = pos; + return true; + } + else if (ret == ERANGE) { + ucl_set_err (parser, UCL_ESYNTAX, "numeric value out of range", + &parser->err); + } + + return false; +} + +/** + * Parse quoted string with possible escapes + * @param parser + * @param chunk + * @param need_unescape + * @param ucl_escape + * @param var_expand + * @return true if a string has been parsed + */ +static bool +ucl_lex_json_string (struct ucl_parser *parser, + struct ucl_chunk *chunk, + bool *need_unescape, + bool *ucl_escape, + bool *var_expand) +{ + const unsigned char *p = chunk->pos; + unsigned char c; + int i; + + while (p < chunk->end) { + c = *p; + if (c < 0x1F) { + /* Unmasked control character */ + if (c == '\n') { + ucl_set_err (parser, UCL_ESYNTAX, "unexpected newline", + &parser->err); + } + else { + ucl_set_err (parser, UCL_ESYNTAX, "unexpected control character", + &parser->err); + } + return false; + } + else if (c == '\\') { + ucl_chunk_skipc (chunk, p); + if (p >= chunk->end) { + ucl_set_err (parser, UCL_ESYNTAX, "unfinished escape character", + &parser->err); + return false; + } + c = *p; + if (ucl_test_character (c, UCL_CHARACTER_ESCAPE)) { + if (c == 'u') { + ucl_chunk_skipc (chunk, p); + for (i = 0; i < 4 && p < chunk->end; i ++) { + if (!isxdigit (*p)) { + ucl_set_err (parser, UCL_ESYNTAX, "invalid utf escape", + &parser->err); + return false; + } + ucl_chunk_skipc (chunk, p); + } + if (p >= chunk->end) { + ucl_set_err (parser, UCL_ESYNTAX, + "unfinished escape character", + &parser->err); + return false; + } + } + else { + ucl_chunk_skipc (chunk, p); + } + } + *need_unescape = true; + *ucl_escape = true; + continue; + } + else if (c == '"') { + ucl_chunk_skipc (chunk, p); + return true; + } + else if (ucl_test_character (c, UCL_CHARACTER_UCL_UNSAFE)) { + *ucl_escape = true; + } + else if (c == '$') { + *var_expand = true; + } + ucl_chunk_skipc (chunk, p); + } + + ucl_set_err (parser, UCL_ESYNTAX, + "no quote at the end of json string", + &parser->err); + return false; +} + +/** + * Process single quoted string + * @param parser + * @param chunk + * @param need_unescape + * @return + */ +static bool +ucl_lex_squoted_string (struct ucl_parser *parser, + struct ucl_chunk *chunk, bool *need_unescape) +{ + const unsigned char *p = chunk->pos; + unsigned char c; + + while (p < chunk->end) { + c = *p; + if (c == '\\') { + ucl_chunk_skipc (chunk, p); + + if (p >= chunk->end) { + ucl_set_err (parser, UCL_ESYNTAX, + "unfinished escape character", + &parser->err); + return false; + } + else { + ucl_chunk_skipc (chunk, p); + } + + *need_unescape = true; + continue; + } + else if (c == '\'') { + ucl_chunk_skipc (chunk, p); + return true; + } + + ucl_chunk_skipc (chunk, p); + } + + ucl_set_err (parser, UCL_ESYNTAX, + "no quote at the end of single quoted string", + &parser->err); + return false; +} + +static void +ucl_parser_append_elt (struct ucl_parser *parser, ucl_hash_t *cont, + ucl_object_t *top, + ucl_object_t *elt) +{ + ucl_object_t *nobj; + + if ((parser->flags & UCL_PARSER_NO_IMPLICIT_ARRAYS) == 0) { + /* Implicit array */ + top->flags |= UCL_OBJECT_MULTIVALUE; + DL_APPEND (top, elt); + parser->stack->obj->len ++; + } + else { + if ((top->flags & UCL_OBJECT_MULTIVALUE) != 0) { + /* Just add to the explicit array */ + ucl_array_append (top, elt); + } + else { + /* Convert to an array */ + nobj = ucl_object_typed_new (UCL_ARRAY); + nobj->key = top->key; + nobj->keylen = top->keylen; + nobj->flags |= UCL_OBJECT_MULTIVALUE; + ucl_array_append (nobj, top); + ucl_array_append (nobj, elt); + ucl_hash_replace (cont, top, nobj); + } + } +} + +bool +ucl_parser_process_object_element (struct ucl_parser *parser, ucl_object_t *nobj) +{ + ucl_hash_t *container; + ucl_object_t *tobj = NULL, *cur; + char errmsg[256]; + + container = parser->stack->obj->value.ov; + + DL_FOREACH (parser->stack->obj, cur) { + tobj = __DECONST (ucl_object_t *, ucl_hash_search_obj (cur->value.ov, nobj)); + + if (tobj != NULL) { + break; + } + } + + + if (tobj == NULL) { + container = ucl_hash_insert_object (container, nobj, + parser->flags & UCL_PARSER_KEY_LOWERCASE); + if (container == NULL) { + return false; + } + nobj->prev = nobj; + nobj->next = NULL; + parser->stack->obj->len ++; + } + else { + unsigned priold = ucl_object_get_priority (tobj), + prinew = ucl_object_get_priority (nobj); + switch (parser->chunks->strategy) { + + case UCL_DUPLICATE_APPEND: + /* + * The logic here is the following: + * + * - if we have two objects with the same priority, then we form an + * implicit or explicit array + * - if a new object has bigger priority, then we overwrite an old one + * - if a new object has lower priority, then we ignore it + */ + /* Special case for inherited objects */ + if (tobj->flags & UCL_OBJECT_INHERITED) { + prinew = priold + 1; + } + + if (priold == prinew) { + ucl_parser_append_elt (parser, container, tobj, nobj); + } + else if (priold > prinew) { + /* + * We add this new object to a list of trash objects just to ensure + * that it won't come to any real object + * XXX: rather inefficient approach + */ + DL_APPEND (parser->trash_objs, nobj); + } + else { + ucl_hash_replace (container, tobj, nobj); + ucl_object_unref (tobj); + } + + break; + + case UCL_DUPLICATE_REWRITE: + /* We just rewrite old values regardless of priority */ + ucl_hash_replace (container, tobj, nobj); + ucl_object_unref (tobj); + + break; + + case UCL_DUPLICATE_ERROR: + snprintf(errmsg, sizeof(errmsg), + "duplicate element for key '%s' found", + nobj->key); + ucl_set_err (parser, UCL_EMERGE, errmsg, &parser->err); + return false; + + case UCL_DUPLICATE_MERGE: + /* + * Here we do have some old object so we just push it on top of objects stack + * Check priority and then perform the merge on the remaining objects + */ + if (tobj->type == UCL_OBJECT || tobj->type == UCL_ARRAY) { + ucl_object_unref (nobj); + nobj = tobj; + } + else if (priold == prinew) { + ucl_parser_append_elt (parser, container, tobj, nobj); + } + else if (priold > prinew) { + /* + * We add this new object to a list of trash objects just to ensure + * that it won't come to any real object + * XXX: rather inefficient approach + */ + DL_APPEND (parser->trash_objs, nobj); + } + else { + ucl_hash_replace (container, tobj, nobj); + ucl_object_unref (tobj); + } + break; + } + } + + parser->stack->obj->value.ov = container; + parser->cur_obj = nobj; + ucl_attach_comment (parser, nobj, false); + + return true; +} + +/** + * Parse a key in an object + * @param parser + * @param chunk + * @param next_key + * @param end_of_object + * @return true if a key has been parsed + */ +static bool +ucl_parse_key (struct ucl_parser *parser, struct ucl_chunk *chunk, + bool *next_key, bool *end_of_object) +{ + const unsigned char *p, *c = NULL, *end, *t; + const char *key = NULL; + bool got_quote = false, got_eq = false, got_semicolon = false, + need_unescape = false, ucl_escape = false, var_expand = false, + got_content = false, got_sep = false; + ucl_object_t *nobj; + ssize_t keylen; + + p = chunk->pos; + + if (*p == '.') { + /* It is macro actually */ + if (!(parser->flags & UCL_PARSER_DISABLE_MACRO)) { + ucl_chunk_skipc (chunk, p); + } + + parser->prev_state = parser->state; + parser->state = UCL_STATE_MACRO_NAME; + *end_of_object = false; + return true; + } + while (p < chunk->end) { + /* + * A key must start with alpha, number, '/' or '_' and end with space character + */ + if (c == NULL) { + if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { + if (!ucl_skip_comments (parser)) { + return false; + } + p = chunk->pos; + } + else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { + ucl_chunk_skipc (chunk, p); + } + else if (ucl_test_character (*p, UCL_CHARACTER_KEY_START)) { + /* The first symbol */ + c = p; + ucl_chunk_skipc (chunk, p); + got_content = true; + } + else if (*p == '"') { + /* JSON style key */ + c = p + 1; + got_quote = true; + got_content = true; + ucl_chunk_skipc (chunk, p); + } + else if (*p == '}') { + /* We have actually end of an object */ + *end_of_object = true; + return true; + } + else if (*p == '.') { + ucl_chunk_skipc (chunk, p); + parser->prev_state = parser->state; + parser->state = UCL_STATE_MACRO_NAME; + return true; + } + else { + /* Invalid identifier */ + ucl_set_err (parser, UCL_ESYNTAX, "key must begin with a letter", + &parser->err); + return false; + } + } + else { + /* Parse the body of a key */ + if (!got_quote) { + if (ucl_test_character (*p, UCL_CHARACTER_KEY)) { + got_content = true; + ucl_chunk_skipc (chunk, p); + } + else if (ucl_test_character (*p, UCL_CHARACTER_KEY_SEP)) { + end = p; + break; + } + else { + ucl_set_err (parser, UCL_ESYNTAX, "invalid character in a key", + &parser->err); + return false; + } + } + else { + /* We need to parse json like quoted string */ + if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { + return false; + } + /* Always escape keys obtained via json */ + end = chunk->pos - 1; + p = chunk->pos; + break; + } + } + } + + if (p >= chunk->end && got_content) { + ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); + return false; + } + else if (!got_content) { + return true; + } + *end_of_object = false; + /* We are now at the end of the key, need to parse the rest */ + while (p < chunk->end) { + if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { + ucl_chunk_skipc (chunk, p); + } + else if (*p == '=') { + if (!got_eq && !got_semicolon) { + ucl_chunk_skipc (chunk, p); + got_eq = true; + } + else { + ucl_set_err (parser, UCL_ESYNTAX, "unexpected '=' character", + &parser->err); + return false; + } + } + else if (*p == ':') { + if (!got_eq && !got_semicolon) { + ucl_chunk_skipc (chunk, p); + got_semicolon = true; + } + else { + ucl_set_err (parser, UCL_ESYNTAX, "unexpected ':' character", + &parser->err); + return false; + } + } + else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { + /* Check for comment */ + if (!ucl_skip_comments (parser)) { + return false; + } + p = chunk->pos; + } + else { + /* Start value */ + break; + } + } + + if (p >= chunk->end && got_content) { + ucl_set_err (parser, UCL_ESYNTAX, "unfinished key", &parser->err); + return false; + } + + got_sep = got_semicolon || got_eq; + + if (!got_sep) { + /* + * Maybe we have more keys nested, so search for termination character. + * Possible choices: + * 1) key1 key2 ... keyN [:=] value <- we treat that as error + * 2) key1 ... keyN {} or [] <- we treat that as nested objects + * 3) key1 value[;,\n] <- we treat that as linear object + */ + t = p; + *next_key = false; + while (ucl_test_character (*t, UCL_CHARACTER_WHITESPACE)) { + t ++; + } + /* Check first non-space character after a key */ + if (*t != '{' && *t != '[') { + while (t < chunk->end) { + if (*t == ',' || *t == ';' || *t == '\n' || *t == '\r') { + break; + } + else if (*t == '{' || *t == '[') { + *next_key = true; + break; + } + t ++; + } + } + } + + /* Create a new object */ + nobj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); + if (nobj == NULL) { + return false; + } + keylen = ucl_copy_or_store_ptr (parser, c, &nobj->trash_stack[UCL_TRASH_KEY], + &key, end - c, need_unescape, parser->flags & UCL_PARSER_KEY_LOWERCASE, + false, false); + if (keylen == -1) { + ucl_object_unref (nobj); + return false; + } + else if (keylen == 0) { + ucl_set_err (parser, UCL_ESYNTAX, "empty keys are not allowed", &parser->err); + ucl_object_unref (nobj); + return false; + } + + nobj->key = key; + nobj->keylen = keylen; + + if (!ucl_parser_process_object_element (parser, nobj)) { + return false; + } + + if (ucl_escape) { + nobj->flags |= UCL_OBJECT_NEED_KEY_ESCAPE; + } + + + return true; +} + +/** + * Parse a cl string + * @param parser + * @param chunk + * @param var_expand + * @param need_unescape + * @return true if a key has been parsed + */ +static bool +ucl_parse_string_value (struct ucl_parser *parser, + struct ucl_chunk *chunk, bool *var_expand, bool *need_unescape) +{ + const unsigned char *p; + enum { + UCL_BRACE_ROUND = 0, + UCL_BRACE_SQUARE, + UCL_BRACE_FIGURE + }; + int braces[3][2] = {{0, 0}, {0, 0}, {0, 0}}; + + p = chunk->pos; + + while (p < chunk->end) { + + /* Skip pairs of figure braces */ + if (*p == '{') { + braces[UCL_BRACE_FIGURE][0] ++; + } + else if (*p == '}') { + braces[UCL_BRACE_FIGURE][1] ++; + if (braces[UCL_BRACE_FIGURE][1] <= braces[UCL_BRACE_FIGURE][0]) { + /* This is not a termination symbol, continue */ + ucl_chunk_skipc (chunk, p); + continue; + } + } + /* Skip pairs of square braces */ + else if (*p == '[') { + braces[UCL_BRACE_SQUARE][0] ++; + } + else if (*p == ']') { + braces[UCL_BRACE_SQUARE][1] ++; + if (braces[UCL_BRACE_SQUARE][1] <= braces[UCL_BRACE_SQUARE][0]) { + /* This is not a termination symbol, continue */ + ucl_chunk_skipc (chunk, p); + continue; + } + } + else if (*p == '$') { + *var_expand = true; + } + else if (*p == '\\') { + *need_unescape = true; + ucl_chunk_skipc (chunk, p); + if (p < chunk->end) { + ucl_chunk_skipc (chunk, p); + } + continue; + } + + if (ucl_lex_is_atom_end (*p) || (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { + break; + } + ucl_chunk_skipc (chunk, p); + } + + return true; +} + +/** + * Parse multiline string ending with \n{term}\n + * @param parser + * @param chunk + * @param term + * @param term_len + * @param beg + * @param var_expand + * @return size of multiline string or 0 in case of error + */ +static int +ucl_parse_multiline_string (struct ucl_parser *parser, + struct ucl_chunk *chunk, const unsigned char *term, + int term_len, unsigned char const **beg, + bool *var_expand) +{ + const unsigned char *p, *c, *tend; + bool newline = false; + int len = 0; + + p = chunk->pos; + + c = p; + + while (p < chunk->end) { + if (newline) { + if (chunk->end - p < term_len) { + return 0; + } + else if (memcmp (p, term, term_len) == 0) { + tend = p + term_len; + if (*tend != '\n' && *tend != ';' && *tend != ',') { + /* Incomplete terminator */ + ucl_chunk_skipc (chunk, p); + continue; + } + len = p - c; + chunk->remain -= term_len; + chunk->pos = p + term_len; + chunk->column = term_len; + *beg = c; + break; + } + } + if (*p == '\n') { + newline = true; + } + else { + if (*p == '$') { + *var_expand = true; + } + newline = false; + } + ucl_chunk_skipc (chunk, p); + } + + return len; +} + +static inline ucl_object_t* +ucl_parser_get_container (struct ucl_parser *parser) +{ + ucl_object_t *t, *obj = NULL; + + if (parser == NULL || parser->stack == NULL || parser->stack->obj == NULL) { + return NULL; + } + + if (parser->stack->obj->type == UCL_ARRAY) { + /* Object must be allocated */ + obj = ucl_object_new_full (UCL_NULL, parser->chunks->priority); + t = parser->stack->obj; + + if (!ucl_array_append (t, obj)) { + ucl_object_unref (obj); + return NULL; + } + + parser->cur_obj = obj; + ucl_attach_comment (parser, obj, false); + } + else { + /* Object has been already allocated */ + obj = parser->cur_obj; + } + + return obj; +} + +/** + * Handle value data + * @param parser + * @param chunk + * @return + */ +static bool +ucl_parse_value (struct ucl_parser *parser, struct ucl_chunk *chunk) +{ + const unsigned char *p, *c; + ucl_object_t *obj = NULL; + unsigned int stripped_spaces; + ssize_t str_len; + bool need_unescape = false, ucl_escape = false, var_expand = false; + + p = chunk->pos; + + /* Skip any spaces and comments */ + if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) || + (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1]))) { + while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { + ucl_chunk_skipc (chunk, p); + } + if (!ucl_skip_comments (parser)) { + return false; + } + p = chunk->pos; + } + + while (p < chunk->end) { + c = p; + switch (*p) { + case '"': + ucl_chunk_skipc (chunk, p); + + if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, + &var_expand)) { + return false; + } + + obj = ucl_parser_get_container (parser); + if (!obj) { + return false; + } + + str_len = chunk->pos - c - 2; + obj->type = UCL_STRING; + if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, false, + var_expand, false)) == -1) { + return false; + } + + obj->len = str_len; + parser->state = UCL_STATE_AFTER_VALUE; + + return true; + break; + case '\'': + ucl_chunk_skipc (chunk, p); + + if (!ucl_lex_squoted_string (parser, chunk, &need_unescape)) { + return false; + } + + obj = ucl_parser_get_container (parser); + if (!obj) { + return false; + } + + str_len = chunk->pos - c - 2; + obj->type = UCL_STRING; + obj->flags |= UCL_OBJECT_SQUOTED; + + if ((str_len = ucl_copy_or_store_ptr (parser, c + 1, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, false, + var_expand, true)) == -1) { + return false; + } + + obj->len = str_len; + + parser->state = UCL_STATE_AFTER_VALUE; + + return true; + break; + case '{': + obj = ucl_parser_get_container (parser); + if (obj == NULL) { + return false; + } + /* We have a new object */ + if (parser->stack) { + obj = ucl_parser_add_container (obj, parser, false, + parser->stack->e.params.level, true); + } + else { + return false; + } + if (obj == NULL) { + return false; + } + + ucl_chunk_skipc (chunk, p); + + return true; + break; + case '[': + obj = ucl_parser_get_container (parser); + if (obj == NULL) { + return false; + } + /* We have a new array */ + if (parser->stack) { + obj = ucl_parser_add_container (obj, parser, true, + parser->stack->e.params.level, true); + } + else { + return false; + } + + if (obj == NULL) { + return false; + } + + ucl_chunk_skipc (chunk, p); + + return true; + break; + case ']': + /* We have the array ending */ + if (parser->stack && parser->stack->obj->type == UCL_ARRAY) { + parser->state = UCL_STATE_AFTER_VALUE; + return true; + } + else { + goto parse_string; + } + break; + case '<': + obj = ucl_parser_get_container (parser); + /* We have something like multiline value, which must be <<[A-Z]+\n */ + if (chunk->end - p > 3) { + if (memcmp (p, "<<", 2) == 0) { + p += 2; + /* We allow only uppercase characters in multiline definitions */ + while (p < chunk->end && *p >= 'A' && *p <= 'Z') { + p ++; + } + if(p == chunk->end) { + ucl_set_err (parser, UCL_ESYNTAX, + "unterminated multiline value", &parser->err); + return false; + } + if (*p =='\n') { + /* Set chunk positions and start multiline parsing */ + chunk->remain -= p - c + 1; + c += 2; + chunk->pos = p + 1; + chunk->column = 0; + chunk->line ++; + if ((str_len = ucl_parse_multiline_string (parser, chunk, c, + p - c, &c, &var_expand)) == 0) { + ucl_set_err (parser, UCL_ESYNTAX, + "unterminated multiline value", &parser->err); + return false; + } + + obj->type = UCL_STRING; + obj->flags |= UCL_OBJECT_MULTILINE; + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len - 1, false, + false, var_expand, false)) == -1) { + return false; + } + obj->len = str_len; + + parser->state = UCL_STATE_AFTER_VALUE; + + return true; + } + } + } + /* Fallback to ordinary strings */ + /* FALLTHRU */ + default: +parse_string: + if (obj == NULL) { + obj = ucl_parser_get_container (parser); + } + + /* Parse atom */ + if (ucl_test_character (*p, UCL_CHARACTER_VALUE_DIGIT_START)) { + if (!ucl_lex_number (parser, chunk, obj)) { + if (parser->state == UCL_STATE_ERROR) { + return false; + } + } + else { + parser->state = UCL_STATE_AFTER_VALUE; + return true; + } + /* Fallback to normal string */ + } + + if (!ucl_parse_string_value (parser, chunk, &var_expand, + &need_unescape)) { + return false; + } + /* Cut trailing spaces */ + stripped_spaces = 0; + while (ucl_test_character (*(chunk->pos - 1 - stripped_spaces), + UCL_CHARACTER_WHITESPACE)) { + stripped_spaces ++; + } + str_len = chunk->pos - c - stripped_spaces; + if (str_len <= 0) { + ucl_set_err (parser, UCL_ESYNTAX, "string value must not be empty", + &parser->err); + return false; + } + else if (str_len == 4 && memcmp (c, "null", 4) == 0) { + obj->len = 0; + obj->type = UCL_NULL; + } + else if (str_len == 3 && memcmp (c, "nan", 3) == 0) { + obj->len = 0; + obj->type = UCL_FLOAT; + obj->value.dv = NAN; + } + else if (str_len == 3 && memcmp (c, "inf", 3) == 0) { + obj->len = 0; + obj->type = UCL_FLOAT; + obj->value.dv = INFINITY; + } + else if (!ucl_maybe_parse_boolean (obj, c, str_len)) { + obj->type = UCL_STRING; + if ((str_len = ucl_copy_or_store_ptr (parser, c, + &obj->trash_stack[UCL_TRASH_VALUE], + &obj->value.sv, str_len, need_unescape, + false, var_expand, false)) == -1) { + return false; + } + obj->len = str_len; + } + + parser->state = UCL_STATE_AFTER_VALUE; + + return true; + break; + } + } + + return true; +} + +/** + * Handle after value data + * @param parser + * @param chunk + * @return + */ +static bool +ucl_parse_after_value (struct ucl_parser *parser, struct ucl_chunk *chunk) +{ + const unsigned char *p; + bool got_sep = false; + struct ucl_stack *st; + + p = chunk->pos; + + while (p < chunk->end) { + if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { + /* Skip whitespaces */ + ucl_chunk_skipc (chunk, p); + } + else if (chunk->remain >= 2 && ucl_lex_is_comment (p[0], p[1])) { + /* Skip comment */ + if (!ucl_skip_comments (parser)) { + return false; + } + /* Treat comment as a separator */ + got_sep = true; + p = chunk->pos; + } + else if (ucl_test_character (*p, UCL_CHARACTER_VALUE_END)) { + if (*p == '}' || *p == ']') { + if (parser->stack == NULL) { + ucl_set_err (parser, UCL_ESYNTAX, + "end of array or object detected without corresponding start", + &parser->err); + return false; + } + if ((*p == '}' && parser->stack->obj->type == UCL_OBJECT) || + (*p == ']' && parser->stack->obj->type == UCL_ARRAY)) { + + /* Pop all nested objects from a stack */ + st = parser->stack; + + if (!(st->e.params.flags & UCL_STACK_HAS_OBRACE)) { + parser->err_code = UCL_EUNPAIRED; + ucl_create_err (&parser->err, + "%s:%d object closed with } is not opened with { at line %d", + chunk->fname ? chunk->fname : "memory", + parser->chunks->line, st->e.params.line); + + return false; + } + + parser->stack = st->next; + UCL_FREE (sizeof (struct ucl_stack), st); + + if (parser->cur_obj) { + ucl_attach_comment (parser, parser->cur_obj, true); + } + + while (parser->stack != NULL) { + st = parser->stack; + + if (st->next == NULL) { + break; + } + else if (st->next->e.params.level == st->e.params.level) { + break; + } + + + parser->stack = st->next; + parser->cur_obj = st->obj; + UCL_FREE (sizeof (struct ucl_stack), st); + } + } + else { + ucl_set_err (parser, UCL_ESYNTAX, + "unexpected terminating symbol detected", + &parser->err); + return false; + } + + if (parser->stack == NULL) { + /* Ignore everything after a top object */ + return true; + } + else { + ucl_chunk_skipc (chunk, p); + } + got_sep = true; + } + else { + /* Got a separator */ + got_sep = true; + ucl_chunk_skipc (chunk, p); + } + } + else { + /* Anything else */ + if (!got_sep) { + ucl_set_err (parser, UCL_ESYNTAX, "delimiter is missing", + &parser->err); + return false; + } + return true; + } + } + + return true; +} + +static bool +ucl_skip_macro_as_comment (struct ucl_parser *parser, + struct ucl_chunk *chunk) +{ + const unsigned char *p, *c; + enum { + macro_skip_start = 0, + macro_has_symbols, + macro_has_obrace, + macro_has_quote, + macro_has_backslash, + macro_has_sqbrace, + macro_save + } state = macro_skip_start, prev_state = macro_skip_start; + + p = chunk->pos; + c = chunk->pos; + + while (p < chunk->end) { + switch (state) { + case macro_skip_start: + if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE)) { + state = macro_has_symbols; + } + else if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { + state = macro_save; + continue; + } + + ucl_chunk_skipc (chunk, p); + break; + + case macro_has_symbols: + if (*p == '{') { + state = macro_has_sqbrace; + } + else if (*p == '(') { + state = macro_has_obrace; + } + else if (*p == '"') { + state = macro_has_quote; + } + else if (*p == '\n') { + state = macro_save; + continue; + } + + ucl_chunk_skipc (chunk, p); + break; + + case macro_has_obrace: + if (*p == '\\') { + prev_state = state; + state = macro_has_backslash; + } + else if (*p == ')') { + state = macro_has_symbols; + } + + ucl_chunk_skipc (chunk, p); + break; + + case macro_has_sqbrace: + if (*p == '\\') { + prev_state = state; + state = macro_has_backslash; + } + else if (*p == '}') { + state = macro_save; + } + + ucl_chunk_skipc (chunk, p); + break; + + case macro_has_quote: + if (*p == '\\') { + prev_state = state; + state = macro_has_backslash; + } + else if (*p == '"') { + state = macro_save; + } + + ucl_chunk_skipc (chunk, p); + break; + + case macro_has_backslash: + state = prev_state; + ucl_chunk_skipc (chunk, p); + break; + + case macro_save: + if (parser->flags & UCL_PARSER_SAVE_COMMENTS) { + ucl_save_comment (parser, c, p - c); + } + + return true; + } + } + + return false; +} + +/** + * Handle macro data + * @param parser + * @param chunk + * @param marco + * @param macro_start + * @param macro_len + * @return + */ +static bool +ucl_parse_macro_value (struct ucl_parser *parser, + struct ucl_chunk *chunk, struct ucl_macro *macro, + unsigned char const **macro_start, size_t *macro_len) +{ + const unsigned char *p, *c; + bool need_unescape = false, ucl_escape = false, var_expand = false; + + p = chunk->pos; + + switch (*p) { + case '"': + /* We have macro value encoded in quotes */ + c = p; + ucl_chunk_skipc (chunk, p); + if (!ucl_lex_json_string (parser, chunk, &need_unescape, &ucl_escape, &var_expand)) { + return false; + } + + *macro_start = c + 1; + *macro_len = chunk->pos - c - 2; + p = chunk->pos; + break; + case '{': + /* We got a multiline macro body */ + ucl_chunk_skipc (chunk, p); + /* Skip spaces at the beginning */ + while (p < chunk->end) { + if (ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { + ucl_chunk_skipc (chunk, p); + } + else { + break; + } + } + c = p; + while (p < chunk->end) { + if (*p == '}') { + break; + } + ucl_chunk_skipc (chunk, p); + } + *macro_start = c; + *macro_len = p - c; + ucl_chunk_skipc (chunk, p); + break; + default: + /* Macro is not enclosed in quotes or braces */ + c = p; + while (p < chunk->end) { + if (ucl_lex_is_atom_end (*p)) { + break; + } + ucl_chunk_skipc (chunk, p); + } + *macro_start = c; + *macro_len = p - c; + break; + } + + /* We are at the end of a macro */ + /* Skip ';' and space characters and return to previous state */ + while (p < chunk->end) { + if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && *p != ';') { + break; + } + ucl_chunk_skipc (chunk, p); + } + return true; +} + +/** + * Parse macro arguments as UCL object + * @param parser parser structure + * @param chunk the current data chunk + * @return + */ +static ucl_object_t * +ucl_parse_macro_arguments (struct ucl_parser *parser, + struct ucl_chunk *chunk) +{ + ucl_object_t *res = NULL; + struct ucl_parser *params_parser; + int obraces = 1, ebraces = 0, state = 0; + const unsigned char *p, *c; + size_t args_len = 0; + struct ucl_parser_saved_state saved; + + saved.column = chunk->column; + saved.line = chunk->line; + saved.pos = chunk->pos; + saved.remain = chunk->remain; + p = chunk->pos; + + if (*p != '(' || chunk->remain < 2) { + return NULL; + } + + /* Set begin and start */ + ucl_chunk_skipc (chunk, p); + c = p; + + while ((p) < (chunk)->end) { + switch (state) { + case 0: + /* Parse symbols and check for '(', ')' and '"' */ + if (*p == '(') { + obraces ++; + } + else if (*p == ')') { + ebraces ++; + } + else if (*p == '"') { + state = 1; + } + /* Check pairing */ + if (obraces == ebraces) { + state = 99; + } + else { + args_len ++; + } + /* Check overflow */ + if (chunk->remain == 0) { + goto restore_chunk; + } + ucl_chunk_skipc (chunk, p); + break; + case 1: + /* We have quote character, so skip all but quotes */ + if (*p == '"' && *(p - 1) != '\\') { + state = 0; + } + if (chunk->remain == 0) { + goto restore_chunk; + } + args_len ++; + ucl_chunk_skipc (chunk, p); + break; + case 99: + /* + * We have read the full body of arguments, so we need to parse and set + * object from that + */ + params_parser = ucl_parser_new (parser->flags); + if (!ucl_parser_add_chunk (params_parser, c, args_len)) { + ucl_set_err (parser, UCL_ESYNTAX, "macro arguments parsing error", + &parser->err); + } + else { + res = ucl_parser_get_object (params_parser); + } + ucl_parser_free (params_parser); + + return res; + + break; + } + } + + return res; + +restore_chunk: + chunk->column = saved.column; + chunk->line = saved.line; + chunk->pos = saved.pos; + chunk->remain = saved.remain; + + return NULL; +} + +#define SKIP_SPACES_COMMENTS(parser, chunk, p) do { \ + while ((p) < (chunk)->end) { \ + if (!ucl_test_character (*(p), UCL_CHARACTER_WHITESPACE_UNSAFE)) { \ + if ((chunk)->remain >= 2 && ucl_lex_is_comment ((p)[0], (p)[1])) { \ + if (!ucl_skip_comments (parser)) { \ + return false; \ + } \ + p = (chunk)->pos; \ + } \ + break; \ + } \ + ucl_chunk_skipc (chunk, p); \ + } \ +} while(0) + +/** + * Handle the main states of rcl parser + * @param parser parser structure + * @return true if chunk has been parsed and false in case of error + */ +static bool +ucl_state_machine (struct ucl_parser *parser) +{ + ucl_object_t *obj, *macro_args; + struct ucl_chunk *chunk = parser->chunks; + const unsigned char *p, *c = NULL, *macro_start = NULL; + unsigned char *macro_escaped; + size_t macro_len = 0; + struct ucl_macro *macro = NULL; + bool next_key = false, end_of_object = false, ret; + + if (parser->top_obj == NULL) { + parser->state = UCL_STATE_INIT; + } + + p = chunk->pos; + while (chunk->pos < chunk->end) { + switch (parser->state) { + case UCL_STATE_INIT: + /* + * At the init state we can either go to the parse array or object + * if we got [ or { correspondingly or can just treat new data as + * a key of newly created object + */ + if (!ucl_skip_comments (parser)) { + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + else { + bool seen_obrace = false; + + /* Skip any spaces */ + while (p < chunk->end && ucl_test_character (*p, + UCL_CHARACTER_WHITESPACE_UNSAFE)) { + ucl_chunk_skipc (chunk, p); + } + + p = chunk->pos; + + if (p < chunk->end) { + if (*p == '[') { + parser->state = UCL_STATE_VALUE; + ucl_chunk_skipc (chunk, p); + seen_obrace = true; + } + else { + + if (*p == '{') { + ucl_chunk_skipc (chunk, p); + parser->state = UCL_STATE_KEY_OBRACE; + seen_obrace = true; + } + else { + parser->state = UCL_STATE_KEY; + } + } + } + + if (parser->top_obj == NULL) { + if (parser->state == UCL_STATE_VALUE) { + obj = ucl_parser_add_container (NULL, parser, true, 0, + seen_obrace); + } + else { + obj = ucl_parser_add_container (NULL, parser, false, 0, + seen_obrace); + } + + if (obj == NULL) { + return false; + } + + parser->top_obj = obj; + parser->cur_obj = obj; + } + + } + break; + case UCL_STATE_KEY: + case UCL_STATE_KEY_OBRACE: + /* Skip any spaces */ + while (p < chunk->end && ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE)) { + ucl_chunk_skipc (chunk, p); + } + if (p == chunk->end || *p == '}') { + /* We have the end of an object */ + parser->state = UCL_STATE_AFTER_VALUE; + continue; + } + if (parser->stack == NULL) { + /* No objects are on stack, but we want to parse a key */ + ucl_set_err (parser, UCL_ESYNTAX, "top object is finished but the parser " + "expects a key", &parser->err); + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + if (!ucl_parse_key (parser, chunk, &next_key, &end_of_object)) { + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + + if (end_of_object) { + p = chunk->pos; + parser->state = UCL_STATE_AFTER_VALUE; + continue; + } + else if (parser->state != UCL_STATE_MACRO_NAME) { + if (next_key && parser->stack->obj->type == UCL_OBJECT) { + /* Parse more keys and nest objects accordingly */ + obj = ucl_parser_add_container (parser->cur_obj, + parser, + false, + parser->stack->e.params.level + 1, + parser->state == UCL_STATE_KEY_OBRACE); + if (obj == NULL) { + return false; + } + } + else { + parser->state = UCL_STATE_VALUE; + } + } + else { + c = chunk->pos; + } + p = chunk->pos; + break; + case UCL_STATE_VALUE: + /* We need to check what we do have */ + if (!parser->cur_obj || !ucl_parse_value (parser, chunk)) { + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + /* State is set in ucl_parse_value call */ + p = chunk->pos; + break; + case UCL_STATE_AFTER_VALUE: + if (!ucl_parse_after_value (parser, chunk)) { + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + + if (parser->stack != NULL) { + if (parser->stack->obj->type == UCL_OBJECT) { + parser->state = UCL_STATE_KEY; + } + else { + /* Array */ + parser->state = UCL_STATE_VALUE; + } + } + else { + /* Skip everything at the end */ + return true; + } + + p = chunk->pos; + break; + case UCL_STATE_MACRO_NAME: + if (parser->flags & UCL_PARSER_DISABLE_MACRO) { + if (!ucl_skip_macro_as_comment (parser, chunk)) { + /* We have invalid macro */ + ucl_create_err (&parser->err, + "error at %s:%d at column %d: invalid macro", + chunk->fname ? chunk->fname : "memory", + chunk->line, + chunk->column); + parser->state = UCL_STATE_ERROR; + return false; + } + else { + p = chunk->pos; + parser->state = parser->prev_state; + } + } + else { + if (!ucl_test_character (*p, UCL_CHARACTER_WHITESPACE_UNSAFE) && + *p != '(') { + ucl_chunk_skipc (chunk, p); + } + else { + if (c != NULL && p - c > 0) { + /* We got macro name */ + macro_len = (size_t) (p - c); + HASH_FIND (hh, parser->macroes, c, macro_len, macro); + if (macro == NULL) { + ucl_create_err (&parser->err, + "error at %s:%d at column %d: " + "unknown macro: '%.*s', character: '%c'", + chunk->fname ? chunk->fname : "memory", + chunk->line, + chunk->column, + (int) (p - c), + c, + *chunk->pos); + parser->state = UCL_STATE_ERROR; + return false; + } + /* Now we need to skip all spaces */ + SKIP_SPACES_COMMENTS(parser, chunk, p); + parser->state = UCL_STATE_MACRO; + } + else { + /* We have invalid macro name */ + ucl_create_err (&parser->err, + "error at %s:%d at column %d: invalid macro name", + chunk->fname ? chunk->fname : "memory", + chunk->line, + chunk->column); + parser->state = UCL_STATE_ERROR; + return false; + } + } + } + break; + case UCL_STATE_MACRO: + if (*chunk->pos == '(') { + macro_args = ucl_parse_macro_arguments (parser, chunk); + p = chunk->pos; + if (macro_args) { + SKIP_SPACES_COMMENTS(parser, chunk, p); + } + } + else { + macro_args = NULL; + } + if (!ucl_parse_macro_value (parser, chunk, macro, + ¯o_start, ¯o_len)) { + parser->prev_state = parser->state; + parser->state = UCL_STATE_ERROR; + return false; + } + macro_len = ucl_expand_variable (parser, ¯o_escaped, + macro_start, macro_len); + parser->state = parser->prev_state; + + if (macro_escaped == NULL && macro != NULL) { + if (macro->is_context) { + ret = macro->h.context_handler (macro_start, macro_len, + macro_args, + parser->top_obj, + macro->ud); + } + else { + ret = macro->h.handler (macro_start, macro_len, macro_args, + macro->ud); + } + } + else if (macro != NULL) { + if (macro->is_context) { + ret = macro->h.context_handler (macro_escaped, macro_len, + macro_args, + parser->top_obj, + macro->ud); + } + else { + ret = macro->h.handler (macro_escaped, macro_len, macro_args, + macro->ud); + } + + UCL_FREE (macro_len + 1, macro_escaped); + } + else { + ret = false; + ucl_set_err (parser, UCL_EINTERNAL, + "internal error: parser has macro undefined", &parser->err); + } + + /* + * Chunk can be modified within macro handler + */ + chunk = parser->chunks; + p = chunk->pos; + + if (macro_args) { + ucl_object_unref (macro_args); + } + + if (!ret) { + return false; + } + break; + default: + ucl_set_err (parser, UCL_EINTERNAL, + "internal error: parser is in an unknown state", &parser->err); + parser->state = UCL_STATE_ERROR; + return false; + } + } + + if (parser->last_comment) { + if (parser->cur_obj) { + ucl_attach_comment (parser, parser->cur_obj, true); + } + else if (parser->stack && parser->stack->obj) { + ucl_attach_comment (parser, parser->stack->obj, true); + } + else if (parser->top_obj) { + ucl_attach_comment (parser, parser->top_obj, true); + } + else { + ucl_object_unref (parser->last_comment); + } + } + + if (parser->stack != NULL && parser->state != UCL_STATE_ERROR) { + struct ucl_stack *st; + bool has_error = false; + + LL_FOREACH (parser->stack, st) { + if (st->chunk != parser->chunks) { + break; /* Not our chunk, give up */ + } + if (st->e.params.flags & UCL_STACK_HAS_OBRACE) { + if (parser->err == NULL) { + utstring_new (parser->err); + } + + utstring_printf (parser->err, "%s:%d unmatched open brace at %d; ", + chunk->fname ? chunk->fname : "memory", + parser->chunks->line, + st->e.params.line); + + has_error = true; + } + } + + if (has_error) { + parser->err_code = UCL_EUNPAIRED; + + return false; + } + } + + return true; +} + +#define UPRM_SAFE(fn, a, b, c, el) do { \ + if (!fn(a, b, c, a)) \ + goto el; \ + } while (0) + +struct ucl_parser* +ucl_parser_new (int flags) +{ + struct ucl_parser *parser; + + parser = UCL_ALLOC (sizeof (struct ucl_parser)); + if (parser == NULL) { + return NULL; + } + + memset (parser, 0, sizeof (struct ucl_parser)); + + UPRM_SAFE(ucl_parser_register_macro, parser, "include", ucl_include_handler, e0); + UPRM_SAFE(ucl_parser_register_macro, parser, "try_include", ucl_try_include_handler, e0); + UPRM_SAFE(ucl_parser_register_macro, parser, "includes", ucl_includes_handler, e0); + UPRM_SAFE(ucl_parser_register_macro, parser, "priority", ucl_priority_handler, e0); + UPRM_SAFE(ucl_parser_register_macro, parser, "load", ucl_load_handler, e0); + UPRM_SAFE(ucl_parser_register_context_macro, parser, "inherit", ucl_inherit_handler, e0); + + parser->flags = flags; + parser->includepaths = NULL; + + if (flags & UCL_PARSER_SAVE_COMMENTS) { + parser->comments = ucl_object_typed_new (UCL_OBJECT); + } + + if (!(flags & UCL_PARSER_NO_FILEVARS)) { + /* Initial assumption about filevars */ + ucl_parser_set_filevars (parser, NULL, false); + } + + return parser; +e0: + ucl_parser_free(parser); + return NULL; +} + +bool +ucl_parser_set_default_priority (struct ucl_parser *parser, unsigned prio) +{ + if (parser == NULL) { + return false; + } + + parser->default_priority = prio; + + return true; +} + +int +ucl_parser_get_default_priority (struct ucl_parser *parser) +{ + if (parser == NULL) { + return -1; + } + + return parser->default_priority; +} + +bool +ucl_parser_register_macro (struct ucl_parser *parser, const char *macro, + ucl_macro_handler handler, void* ud) +{ + struct ucl_macro *new; + + if (macro == NULL || handler == NULL) { + return false; + } + + new = UCL_ALLOC (sizeof (struct ucl_macro)); + if (new == NULL) { + return false; + } + + memset (new, 0, sizeof (struct ucl_macro)); + new->h.handler = handler; + new->name = strdup (macro); + if (new->name == NULL) { + UCL_FREE (sizeof (struct ucl_macro), new); + return false; + } + new->ud = ud; + HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); + return true; +} + +bool +ucl_parser_register_context_macro (struct ucl_parser *parser, const char *macro, + ucl_context_macro_handler handler, void* ud) +{ + struct ucl_macro *new; + + if (macro == NULL || handler == NULL) { + return false; + } + + new = UCL_ALLOC (sizeof (struct ucl_macro)); + if (new == NULL) { + return false; + } + + memset (new, 0, sizeof (struct ucl_macro)); + new->h.context_handler = handler; + new->name = strdup (macro); + if (new->name == NULL) { + UCL_FREE (sizeof (struct ucl_macro), new); + return false; + } + new->ud = ud; + new->is_context = true; + HASH_ADD_KEYPTR (hh, parser->macroes, new->name, strlen (new->name), new); + return true; +} + +void +ucl_parser_register_variable (struct ucl_parser *parser, const char *var, + const char *value) +{ + struct ucl_variable *new = NULL, *cur; + + if (var == NULL) { + return; + } + + /* Find whether a variable already exists */ + LL_FOREACH (parser->variables, cur) { + if (strcmp (cur->var, var) == 0) { + new = cur; + break; + } + } + + if (value == NULL) { + + if (new != NULL) { + /* Remove variable */ + DL_DELETE (parser->variables, new); + free (new->var); + free (new->value); + UCL_FREE (sizeof (struct ucl_variable), new); + } + else { + /* Do nothing */ + return; + } + } + else { + if (new == NULL) { + new = UCL_ALLOC (sizeof (struct ucl_variable)); + if (new == NULL) { + return; + } + memset (new, 0, sizeof (struct ucl_variable)); + new->var = strdup (var); + new->var_len = strlen (var); + new->value = strdup (value); + new->value_len = strlen (value); + + DL_APPEND (parser->variables, new); + } + else { + free (new->value); + new->value = strdup (value); + new->value_len = strlen (value); + } + } +} + +void +ucl_parser_set_variables_handler (struct ucl_parser *parser, + ucl_variable_handler handler, void *ud) +{ + parser->var_handler = handler; + parser->var_data = ud; +} + +bool +ucl_parser_add_chunk_full (struct ucl_parser *parser, const unsigned char *data, + size_t len, unsigned priority, enum ucl_duplicate_strategy strat, + enum ucl_parse_type parse_type) +{ + struct ucl_chunk *chunk; + struct ucl_parser_special_handler *special_handler; + + if (parser == NULL) { + return false; + } + + if (data == NULL && len != 0) { + ucl_create_err (&parser->err, "invalid chunk added"); + return false; + } + + if (parser->state != UCL_STATE_ERROR) { + chunk = UCL_ALLOC (sizeof (struct ucl_chunk)); + if (chunk == NULL) { + ucl_create_err (&parser->err, "cannot allocate chunk structure"); + return false; + } + + memset (chunk, 0, sizeof (*chunk)); + + /* Apply all matching handlers from the first to the last */ + LL_FOREACH (parser->special_handlers, special_handler) { + if ((special_handler->flags & UCL_SPECIAL_HANDLER_PREPROCESS_ALL) || + (len >= special_handler->magic_len && + memcmp (data, special_handler->magic, special_handler->magic_len) == 0)) { + unsigned char *ndata = NULL; + size_t nlen = 0; + + if (!special_handler->handler (parser, data, len, &ndata, &nlen, + special_handler->user_data)) { + UCL_FREE(sizeof (struct ucl_chunk), chunk); + ucl_create_err (&parser->err, "call for external handler failed"); + + return false; + } + + struct ucl_parser_special_handler_chain *nchain; + nchain = UCL_ALLOC (sizeof (*nchain)); + nchain->begin = ndata; + nchain->len = nlen; + nchain->special_handler = special_handler; + + /* Free order is reversed */ + LL_PREPEND (chunk->special_handlers, nchain); + + data = ndata; + len = nlen; + } + } + + if (parse_type == UCL_PARSE_AUTO && len > 0) { + /* We need to detect parse type by the first symbol */ + if ((*data & 0x80) == 0x80 && (*data >= 0xdc && *data <= 0xdf)) { + parse_type = UCL_PARSE_MSGPACK; + } + else if (*data == '(') { + parse_type = UCL_PARSE_CSEXP; + } + else { + parse_type = UCL_PARSE_UCL; + } + } + + chunk->begin = data; + chunk->remain = len; + chunk->pos = chunk->begin; + chunk->end = chunk->begin + len; + chunk->line = 1; + chunk->column = 0; + chunk->priority = priority; + chunk->strategy = strat; + chunk->parse_type = parse_type; + + if (parser->cur_file) { + chunk->fname = strdup (parser->cur_file); + } + + LL_PREPEND (parser->chunks, chunk); + parser->recursion ++; + + if (parser->recursion > UCL_MAX_RECURSION) { + ucl_create_err (&parser->err, "maximum include nesting limit is reached: %d", + parser->recursion); + return false; + } + + if (len > 0) { + /* Need to parse something */ + switch (parse_type) { + default: + case UCL_PARSE_UCL: + return ucl_state_machine (parser); + case UCL_PARSE_MSGPACK: + return ucl_parse_msgpack (parser); + case UCL_PARSE_CSEXP: + return ucl_parse_csexp (parser); + } + } + else { + /* Just add empty chunk and go forward */ + if (parser->top_obj == NULL) { + /* + * In case of empty object, create one to indicate that we've + * read something + */ + parser->top_obj = ucl_object_new_full (UCL_OBJECT, priority); + } + + return true; + } + } + + ucl_create_err (&parser->err, "a parser is in an invalid state"); + + return false; +} + +bool +ucl_parser_add_chunk_priority (struct ucl_parser *parser, + const unsigned char *data, size_t len, unsigned priority) +{ + /* We dereference parser, so this check is essential */ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_chunk_full (parser, data, len, + priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); +} + +bool +ucl_parser_add_chunk (struct ucl_parser *parser, const unsigned char *data, + size_t len) +{ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_chunk_full (parser, data, len, + parser->default_priority, UCL_DUPLICATE_APPEND, UCL_PARSE_UCL); +} + +bool +ucl_parser_insert_chunk (struct ucl_parser *parser, const unsigned char *data, + size_t len) +{ + if (parser == NULL || parser->top_obj == NULL) { + return false; + } + + bool res; + struct ucl_chunk *chunk; + + int state = parser->state; + parser->state = UCL_STATE_INIT; + + /* Prevent inserted chunks from unintentionally closing the current object */ + if (parser->stack != NULL && parser->stack->next != NULL) { + parser->stack->e.params.level = parser->stack->next->e.params.level; + } + + res = ucl_parser_add_chunk_full (parser, data, len, parser->chunks->priority, + parser->chunks->strategy, parser->chunks->parse_type); + + /* Remove chunk from the stack */ + chunk = parser->chunks; + if (chunk != NULL) { + parser->chunks = chunk->next; + ucl_chunk_free (chunk); + parser->recursion --; + } + + parser->state = state; + + return res; +} + +bool +ucl_parser_add_string_priority (struct ucl_parser *parser, const char *data, + size_t len, unsigned priority) +{ + if (data == NULL) { + ucl_create_err (&parser->err, "invalid string added"); + return false; + } + if (len == 0) { + len = strlen (data); + } + + return ucl_parser_add_chunk_priority (parser, + (const unsigned char *)data, len, priority); +} + +bool +ucl_parser_add_string (struct ucl_parser *parser, const char *data, + size_t len) +{ + if (parser == NULL) { + return false; + } + + return ucl_parser_add_string_priority (parser, + (const unsigned char *)data, len, parser->default_priority); +} + +bool +ucl_set_include_path (struct ucl_parser *parser, ucl_object_t *paths) +{ + if (parser == NULL || paths == NULL) { + return false; + } + + if (parser->includepaths == NULL) { + parser->includepaths = ucl_object_copy (paths); + } + else { + ucl_object_unref (parser->includepaths); + parser->includepaths = ucl_object_copy (paths); + } + + if (parser->includepaths == NULL) { + return false; + } + + return true; +} + +unsigned char ucl_parser_chunk_peek (struct ucl_parser *parser) +{ + if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || + parser->chunks->pos == parser->chunks->end) { + return 0; + } + + return( *parser->chunks->pos ); +} + +bool ucl_parser_chunk_skip (struct ucl_parser *parser) +{ + if (parser == NULL || parser->chunks == NULL || parser->chunks->pos == NULL || parser->chunks->end == NULL || + parser->chunks->pos == parser->chunks->end) { + return false; + } + + const unsigned char *p = parser->chunks->pos; + ucl_chunk_skipc( parser->chunks, p ); + if( parser->chunks->pos != NULL ) return true; + return false; +} + +ucl_object_t* +ucl_parser_get_current_stack_object (struct ucl_parser *parser, unsigned int depth) +{ + ucl_object_t *obj; + + if (parser == NULL || parser->stack == NULL) { + return NULL; + } + + struct ucl_stack *stack = parser->stack; + if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) + { + return NULL; + } + + for( unsigned int i = 0; i < depth; ++i ) + { + stack = stack->next; + if(stack == NULL || stack->obj == NULL || ucl_object_type (stack->obj) != UCL_OBJECT) + { + return NULL; + } + } + + obj = ucl_object_ref (stack->obj); + return obj; +} + |