diff options
Diffstat (limited to '')
-rw-r--r-- | src/xml.c | 1421 |
1 files changed, 1421 insertions, 0 deletions
diff --git a/src/xml.c b/src/xml.c new file mode 100644 index 0000000..ac9e72b --- /dev/null +++ b/src/xml.c @@ -0,0 +1,1421 @@ +/** + * @file xml.c + * @author Radek Krejci <rkrejci@cesnet.cz> + * @author Michal Vasko <mvasko@cesnet.cz> + * @brief Generic XML parser implementation for libyang + * + * Copyright (c) 2015 - 2021 CESNET, z.s.p.o. + * + * This source code is licensed under BSD 3-Clause License (the "License"). + * You may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://opensource.org/licenses/BSD-3-Clause + */ + +#define _GNU_SOURCE + +#include "xml.h" + +#include <assert.h> +#include <ctype.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> + +#include "compat.h" +#include "in_internal.h" +#include "ly_common.h" +#include "out_internal.h" +#include "tree.h" +#include "tree_schema_internal.h" + +/* Move input p by s characters, if EOF log with lyxml_ctx c */ +#define move_input(c, s) \ + ly_in_skip(c->in, s); \ + LY_CHECK_ERR_RET(!c->in->current[0], LOGVAL(c->ctx, LY_VCODE_EOF), LY_EVALID) + +/* Ignore whitespaces in the input string p */ +#define ign_xmlws(c) \ + while (is_xmlws(*(c)->in->current)) { \ + if (*(c)->in->current == '\n') { \ + LY_IN_NEW_LINE((c)->in); \ + } \ + ly_in_skip(c->in, 1); \ + } + +static LY_ERR lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, + ly_bool *dynamic); + +/** + * @brief Ignore and skip any characters until the delim of the size delim_len is read, including the delim + * + * @param[in] xmlctx XML parser context to provide input handler and libyang context + * @param[in] in input handler to read the data, it is updated only in case the section is correctly terminated. + * @param[in] delim Delimiter to detect end of the section. + * @param[in] delim_len Length of the delimiter string to use. + * @param[in] sectname Section name to refer in error message. + */ +LY_ERR +skip_section(struct lyxml_ctx *xmlctx, const char *delim, size_t delim_len, const char *sectname) +{ + size_t i; + register const char *input, *a, *b; + uint64_t parsed = 0, newlines = 0; + + for (input = xmlctx->in->current; *input; ++input, ++parsed) { + if (*input != *delim) { + if (*input == '\n') { + ++newlines; + } + continue; + } + a = input; + b = delim; + for (i = 0; i < delim_len; ++i) { + if (*a++ != *b++) { + break; + } + } + if (i == delim_len) { + /* delim found */ + xmlctx->in->line += newlines; + ly_in_skip(xmlctx->in, parsed + delim_len); + return LY_SUCCESS; + } + } + + /* delim not found, + * do not update input handler to refer to the beginning of the section in error message */ + LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, sectname); + return LY_EVALID; +} + +/** + * @brief Check/Get an XML identifier from the input string. + * + * The identifier must have at least one valid character complying the name start character constraints. + * The identifier is terminated by the first character, which does not comply to the name character constraints. + * + * See https://www.w3.org/TR/xml-names/#NT-NCName + * + * @param[in] xmlctx XML context. + * @param[out] start Pointer to the start of the identifier. + * @param[out] end Pointer ot the end of the identifier. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_parse_identifier(struct lyxml_ctx *xmlctx, const char **start, const char **end) +{ + const char *s, *in; + uint32_t c; + size_t parsed; + LY_ERR rc; + + in = s = xmlctx->in->current; + + /* check NameStartChar (minus colon) */ + LY_CHECK_ERR_RET(ly_getutf8(&in, &c, &parsed), + LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), + LY_EVALID); + LY_CHECK_ERR_RET(!is_xmlqnamestartchar(c), + LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Identifier \"%s\" starts with an invalid character.", in - parsed), + LY_EVALID); + + /* check rest of the identifier */ + do { + /* move only successfully parsed bytes */ + ly_in_skip(xmlctx->in, parsed); + + rc = ly_getutf8(&in, &c, &parsed); + LY_CHECK_ERR_RET(rc, LOGVAL(xmlctx->ctx, LY_VCODE_INCHAR, in[0]), LY_EVALID); + } while (is_xmlqnamechar(c)); + + *start = s; + *end = xmlctx->in->current; + return LY_SUCCESS; +} + +/** + * @brief Add namespace definition into XML context. + * + * Namespaces from a single element are supposed to be added sequentially together (not interleaved by a namespace from other + * element). This mimic namespace visibility, since the namespace defined in element E is not visible from its parents or + * siblings. On the other hand, namespace from a parent element can be redefined in a child element. This is also reflected + * by lyxml_ns_get() which returns the most recent namespace definition for the given prefix. + * + * When leaving processing of a subtree of some element (after it is removed from xmlctx->elements), caller is supposed to call + * lyxml_ns_rm() to remove all the namespaces defined in such an element from the context. + * + * @param[in] xmlctx XML context to work with. + * @param[in] prefix Pointer to the namespace prefix. Can be NULL for default namespace. + * @param[in] prefix_len Length of the prefix. + * @param[in] uri Namespace URI (value) to store directly. Value is always spent. + * @return LY_ERR values. + */ +LY_ERR +lyxml_ns_add(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, char *uri) +{ + LY_ERR rc = LY_SUCCESS; + struct lyxml_ns *ns; + uint32_t i; + + /* check for duplicates */ + if (xmlctx->ns.count) { + i = xmlctx->ns.count; + do { + --i; + ns = xmlctx->ns.objs[i]; + if (ns->depth < xmlctx->elements.count) { + /* only namespaces of parents, no need to check further */ + break; + } else if (prefix && ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) { + if (!strcmp(ns->uri, uri)) { + /* exact same prefix and namespace, ignore */ + goto cleanup; + } + + LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate XML NS prefix \"%s\" used for namespaces \"%s\" and \"%s\".", + ns->prefix, ns->uri, uri); + rc = LY_EVALID; + goto cleanup; + } else if (!prefix && !ns->prefix) { + if (!strcmp(ns->uri, uri)) { + /* exact same default namespace, ignore */ + goto cleanup; + } + + LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Duplicate default XML namespaces \"%s\" and \"%s\".", ns->uri, uri); + rc = LY_EVALID; + goto cleanup; + } + } while (i); + } + + ns = malloc(sizeof *ns); + LY_CHECK_ERR_GOTO(!ns, LOGMEM(xmlctx->ctx); rc = LY_EMEM, cleanup); + + /* we need to connect the depth of the element where the namespace is defined with the + * namespace record to be able to maintain (remove) the record when the parser leaves + * (to its sibling or back to the parent) the element where the namespace was defined */ + ns->depth = xmlctx->elements.count; + + ns->uri = uri; + if (prefix) { + ns->prefix = strndup(prefix, prefix_len); + LY_CHECK_ERR_GOTO(!ns->prefix, LOGMEM(xmlctx->ctx); free(ns); rc = LY_EMEM, cleanup); + } else { + ns->prefix = NULL; + } + + rc = ly_set_add(&xmlctx->ns, ns, 1, NULL); + LY_CHECK_ERR_GOTO(rc, free(ns->prefix); free(ns), cleanup); + + /* successfully stored */ + uri = NULL; + +cleanup: + free(uri); + return rc; +} + +void +lyxml_ns_rm(struct lyxml_ctx *xmlctx) +{ + struct lyxml_ns *ns; + uint32_t u; + + if (!xmlctx->ns.count) { + return; + } + + u = xmlctx->ns.count; + do { + --u; + ns = (struct lyxml_ns *)xmlctx->ns.objs[u]; + + if (ns->depth != xmlctx->elements.count + 1) { + /* we are done, the namespaces from a single element are supposed to be together */ + break; + } + + /* remove the ns structure */ + free(ns->prefix); + free(ns->uri); + free(ns); + --xmlctx->ns.count; + } while (u); + + if (!xmlctx->ns.count) { + /* cleanup the xmlctx's namespaces storage */ + ly_set_erase(&xmlctx->ns, NULL); + } +} + +const struct lyxml_ns * +lyxml_ns_get(const struct ly_set *ns_set, const char *prefix, size_t prefix_len) +{ + struct lyxml_ns *ns; + uint32_t u; + + if (!ns_set->count) { + return NULL; + } + + u = ns_set->count; + do { + --u; + ns = (struct lyxml_ns *)ns_set->objs[u]; + + if (prefix && prefix_len) { + if (ns->prefix && !ly_strncmp(ns->prefix, prefix, prefix_len)) { + return ns; + } + } else if (!ns->prefix) { + /* default namespace */ + return ns; + } + } while (u); + + return NULL; +} + +/** + * @brief Skip in the input until EOF or just after the opening tag. + * Handles special XML constructs (comment, cdata, doctype). + * + * @param[in] xmlctx XML context to use. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_skip_until_end_or_after_otag(struct lyxml_ctx *xmlctx) +{ + const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */ + const char *endtag, *sectname; + size_t endtag_len; + + while (1) { + ign_xmlws(xmlctx); + + if (xmlctx->in->current[0] == '\0') { + /* EOF */ + if (xmlctx->elements.count) { + LOGVAL(ctx, LY_VCODE_EOF); + return LY_EVALID; + } + return LY_SUCCESS; + } else if (xmlctx->in->current[0] != '<') { + LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), xmlctx->in->current, + "element tag start ('<')"); + return LY_EVALID; + } + move_input(xmlctx, 1); + + if (xmlctx->in->current[0] == '!') { + move_input(xmlctx, 1); + /* sections to ignore */ + if (!strncmp(xmlctx->in->current, "--", 2)) { + /* comment */ + move_input(xmlctx, 2); + sectname = "Comment"; + endtag = "-->"; + endtag_len = ly_strlen_const("-->"); + } else if (!strncmp(xmlctx->in->current, "DOCTYPE", ly_strlen_const("DOCTYPE"))) { + /* Document type declaration - not supported */ + LOGVAL(ctx, LY_VCODE_NSUPP, "Document Type Declaration"); + return LY_EVALID; + } else { + LOGVAL(ctx, LYVE_SYNTAX, "Unknown XML section \"%.20s\".", &xmlctx->in->current[-2]); + return LY_EVALID; + } + LY_CHECK_RET(skip_section(xmlctx, endtag, endtag_len, sectname)); + } else if (xmlctx->in->current[0] == '?') { + LY_CHECK_RET(skip_section(xmlctx, "?>", 2, "Declaration")); + } else { + /* other non-WS character */ + break; + } + } + + return LY_SUCCESS; +} + +/** + * @brief Parse QName. + * + * @param[in] xmlctx XML context to use. + * @param[out] prefix Parsed prefix, may be NULL. + * @param[out] prefix_len Length of @p prefix. + * @param[out] name Parsed name. + * @param[out] name_len Length of @p name. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_parse_qname(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) +{ + const char *start, *end; + + *prefix = NULL; + *prefix_len = 0; + + LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end)); + if (end[0] == ':') { + /* we have prefixed identifier */ + *prefix = start; + *prefix_len = end - start; + + move_input(xmlctx, 1); + LY_CHECK_RET(lyxml_parse_identifier(xmlctx, &start, &end)); + } + + *name = start; + *name_len = end - start; + return LY_SUCCESS; +} + +/** + * @brief Prepare buffer for new data. + * + * @param[in] ctx Context for logging. + * @param[in,out] in XML input data. + * @param[in,out] offset Current offset in @p in. + * @param[in] need_space Needed additional free space that is allocated. + * @param[in,out] buf Dynamic buffer. + * @param[in,out] len Current @p buf length (used characters). + * @param[in,out] size Current @p buf size (allocated characters). + * @return LY_ERR value. + */ +static LY_ERR +lyxml_parse_value_use_buf(const struct ly_ctx *ctx, const char **in, size_t *offset, size_t need_space, char **buf, + size_t *len, size_t *size) +{ +#define BUFSIZE 24 +#define BUFSIZE_STEP 128 + + if (!*buf) { + /* prepare output buffer */ + *buf = malloc(BUFSIZE); + LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM); + *size = BUFSIZE; + } + + /* allocate needed space */ + while (*len + *offset + need_space >= *size) { + *buf = ly_realloc(*buf, *size + BUFSIZE_STEP); + LY_CHECK_ERR_RET(!*buf, LOGMEM(ctx), LY_EMEM); + *size += BUFSIZE_STEP; + } + + if (*offset) { + /* store what we have so far */ + memcpy(&(*buf)[*len], *in, *offset); + *len += *offset; + *in += *offset; + *offset = 0; + } + + return LY_SUCCESS; + +#undef BUFSIZE +#undef BUFSIZE_STEP +} + +/** + * @brief Parse XML text content (value). + * + * @param[in] xmlctx XML context to use. + * @param[in] endchar Expected character to mark value end. + * @param[out] value Parsed value. + * @param[out] length Length of @p value. + * @param[out] ws_only Whether the value is empty/white-spaces only. + * @param[out] dynamic Whether the value was dynamically allocated. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_parse_value(struct lyxml_ctx *xmlctx, char endchar, char **value, size_t *length, ly_bool *ws_only, ly_bool *dynamic) +{ + const struct ly_ctx *ctx = xmlctx->ctx; /* shortcut */ + const char *in = xmlctx->in->current, *start, *in_aux, *p; + char *buf = NULL; + size_t offset; /* read offset in input buffer */ + size_t len; /* length of the output string (write offset in output buffer) */ + size_t size = 0; /* size of the output buffer */ + uint32_t n; + size_t u; + ly_bool ws = 1; + + assert(xmlctx); + + /* init */ + start = in; + offset = len = 0; + + /* parse */ + while (in[offset]) { + if (in[offset] == '&') { + /* non WS */ + ws = 0; + + /* use buffer and allocate enough for the offset and next character, + * we will need 4 bytes at most since we support only the predefined + * (one-char) entities and character references */ + LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, 4, &buf, &len, &size)); + + ++offset; + if (in[offset] != '#') { + /* entity reference - only predefined references are supported */ + if (!strncmp(&in[offset], "lt;", ly_strlen_const("lt;"))) { + buf[len++] = '<'; + in += ly_strlen_const("<"); + } else if (!strncmp(&in[offset], "gt;", ly_strlen_const("gt;"))) { + buf[len++] = '>'; + in += ly_strlen_const(">"); + } else if (!strncmp(&in[offset], "amp;", ly_strlen_const("amp;"))) { + buf[len++] = '&'; + in += ly_strlen_const("&"); + } else if (!strncmp(&in[offset], "apos;", ly_strlen_const("apos;"))) { + buf[len++] = '\''; + in += ly_strlen_const("'"); + } else if (!strncmp(&in[offset], "quot;", ly_strlen_const("quot;"))) { + buf[len++] = '\"'; + in += ly_strlen_const("""); + } else { + LOGVAL(ctx, LYVE_SYNTAX, "Entity reference \"%.*s\" not supported, only predefined references allowed.", + 10, &in[offset - 1]); + goto error; + } + offset = 0; + } else { + p = &in[offset - 1]; + /* character reference */ + ++offset; + if (isdigit(in[offset])) { + for (n = 0; isdigit(in[offset]); offset++) { + n = (LY_BASE_DEC * n) + (in[offset] - '0'); + } + } else if ((in[offset] == 'x') && isxdigit(in[offset + 1])) { + for (n = 0, ++offset; isxdigit(in[offset]); offset++) { + if (isdigit(in[offset])) { + u = (in[offset] - '0'); + } else if (in[offset] > 'F') { + u = LY_BASE_DEC + (in[offset] - 'a'); + } else { + u = LY_BASE_DEC + (in[offset] - 'A'); + } + n = (LY_BASE_HEX * n) + u; + } + } else { + LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\".", p); + goto error; + + } + + if (in[offset] != ';') { + LOGVAL(ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(&in[offset]), &in[offset], ";"); + goto error; + } + ++offset; + if (ly_pututf8(&buf[len], n, &u)) { + LOGVAL(ctx, LYVE_SYNTAX, "Invalid character reference \"%.12s\" (0x%08" PRIx32 ").", p, n); + goto error; + } + len += u; + in += offset; + offset = 0; + } + } else if (!strncmp(in + offset, "<![CDATA[", ly_strlen_const("<![CDATA["))) { + /* CDATA, find the end */ + in_aux = strstr(in + offset + ly_strlen_const("<![CDATA["), "]]>"); + if (!in_aux) { + LOGVAL(xmlctx->ctx, LY_VCODE_NTERM, "CDATA"); + goto error; + } + u = in_aux - (in + offset + ly_strlen_const("<![CDATA[")); + + /* use buffer, allocate enough for the whole CDATA */ + LY_CHECK_RET(lyxml_parse_value_use_buf(ctx, &in, &offset, u, &buf, &len, &size)); + + /* skip CDATA tag */ + in += ly_strlen_const("<![CDATA["); + assert(!offset); + + /* analyze CDATA for non WS and newline chars */ + for (n = 0; n < u; ++n) { + if (in[n] == '\n') { + LY_IN_NEW_LINE(xmlctx->in); + } else if (!is_xmlws(in[n])) { + ws = 0; + } + } + + /* copy CDATA */ + memcpy(buf + len, in, u); + len += u; + + /* move input skipping the end tag */ + in += u + ly_strlen_const("]]>"); + } else if (in[offset] == endchar) { + /* end of string */ + if (buf) { + /* realloc exact size string */ + buf = ly_realloc(buf, len + offset + 1); + LY_CHECK_ERR_RET(!buf, LOGMEM(ctx), LY_EMEM); + size = len + offset + 1; + if (offset) { + memcpy(&buf[len], in, offset); + } + + /* set terminating NULL byte */ + buf[len + offset] = '\0'; + } + len += offset; + in += offset; + goto success; + } else { + if (!is_xmlws(in[offset])) { + /* non WS */ + ws = 0; + } + + /* log lines */ + if (in[offset] == '\n') { + LY_IN_NEW_LINE(xmlctx->in); + } + + /* continue */ + in_aux = &in[offset]; + LY_CHECK_ERR_GOTO(ly_getutf8(&in_aux, &n, &u), + LOGVAL(ctx, LY_VCODE_INCHAR, in[offset]), error); + offset += u; + } + } + + /* EOF reached before endchar */ + LOGVAL(ctx, LY_VCODE_EOF); + +error: + free(buf); + return LY_EVALID; + +success: + if (buf) { + *value = buf; + *dynamic = 1; + } else { + *value = (char *)start; + *dynamic = 0; + } + *length = len; + *ws_only = ws; + + xmlctx->in->current = in; + return LY_SUCCESS; +} + +/** + * @brief Parse XML closing element and match it to a stored starting element. + * + * @param[in] xmlctx XML context to use. + * @param[in] prefix Expected closing element prefix. + * @param[in] prefix_len Length of @p prefix. + * @param[in] name Expected closing element name. + * @param[in] name_len Length of @p name. + * @param[in] empty Whether we are parsing a special "empty" element (with joined starting and closing tag) with no value. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_close_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len, + ly_bool empty) +{ + struct lyxml_elem *e; + + /* match opening and closing element tags */ + if (!xmlctx->elements.count) { + LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", + (int)name_len, name); + return LY_EVALID; + } + + e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1]; + if ((e->prefix_len != prefix_len) || (e->name_len != name_len) || + (prefix_len && strncmp(prefix, e->prefix, e->prefix_len)) || strncmp(name, e->name, e->name_len)) { + LOGVAL(xmlctx->ctx, LYVE_SYNTAX, "Opening (\"%.*s%s%.*s\") and closing (\"%.*s%s%.*s\") elements tag mismatch.", + (int)e->prefix_len, e->prefix ? e->prefix : "", e->prefix ? ":" : "", (int)e->name_len, e->name, + (int)prefix_len, prefix ? prefix : "", prefix ? ":" : "", (int)name_len, name); + return LY_EVALID; + } + + /* opening and closing element tags matches, remove record from the opening tags list */ + ly_set_rm_index(&xmlctx->elements, xmlctx->elements.count - 1, free); + + /* remove also the namespaces connected with the element */ + lyxml_ns_rm(xmlctx); + + /* skip WS */ + ign_xmlws(xmlctx); + + /* special "<elem/>" element */ + if (empty && (xmlctx->in->current[0] == '/')) { + move_input(xmlctx, 1); + } + + /* parse closing tag */ + if (xmlctx->in->current[0] != '>') { + LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), + xmlctx->in->current, "element tag termination ('>')"); + return LY_EVALID; + } + + /* move after closing tag without checking for EOF */ + ly_in_skip(xmlctx->in, 1); + + return LY_SUCCESS; +} + +/** + * @brief Store parsed opening element and parse any included namespaces. + * + * @param[in] xmlctx XML context to use. + * @param[in] prefix Parsed starting element prefix. + * @param[in] prefix_len Length of @p prefix. + * @param[in] name Parsed starting element name. + * @param[in] name_len Length of @p name. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_open_element(struct lyxml_ctx *xmlctx, const char *prefix, size_t prefix_len, const char *name, size_t name_len) +{ + LY_ERR ret = LY_SUCCESS; + struct lyxml_elem *e; + const char *prev_input; + uint64_t prev_line; + char *value; + size_t parsed, value_len; + ly_bool ws_only, dynamic, is_ns; + uint32_t c; + + /* store element opening tag information */ + e = malloc(sizeof *e); + LY_CHECK_ERR_RET(!e, LOGMEM(xmlctx->ctx), LY_EMEM); + e->name = name; + e->prefix = prefix; + e->name_len = name_len; + e->prefix_len = prefix_len; + + LY_CHECK_RET(ly_set_add(&xmlctx->elements, e, 1, NULL)); + if (xmlctx->elements.count > LY_MAX_BLOCK_DEPTH) { + LOGERR(xmlctx->ctx, LY_EINVAL, "The maximum number of open elements has been exceeded."); + return LY_EINVAL; + } + + /* skip WS */ + ign_xmlws(xmlctx); + + /* parse and store all namespaces */ + prev_input = xmlctx->in->current; + prev_line = xmlctx->in->line; + is_ns = 1; + while ((xmlctx->in->current[0] != '\0') && !(ret = ly_getutf8(&xmlctx->in->current, &c, &parsed))) { + if (!is_xmlqnamestartchar(c)) { + break; + } + xmlctx->in->current -= parsed; + + /* parse attribute name */ + LY_CHECK_GOTO(ret = lyxml_parse_qname(xmlctx, &prefix, &prefix_len, &name, &name_len), cleanup); + + /* parse the value */ + LY_CHECK_GOTO(ret = lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic), cleanup); + + /* store every namespace */ + if ((prefix && !ly_strncmp("xmlns", prefix, prefix_len)) || (!prefix && !ly_strncmp("xmlns", name, name_len))) { + ret = lyxml_ns_add(xmlctx, prefix ? name : NULL, prefix ? name_len : 0, + dynamic ? value : strndup(value, value_len)); + dynamic = 0; + LY_CHECK_GOTO(ret, cleanup); + } else { + /* not a namespace */ + is_ns = 0; + } + if (dynamic) { + free(value); + } + + /* skip WS */ + ign_xmlws(xmlctx); + + if (is_ns) { + /* we can actually skip all the namespaces as there is no reason to parse them again */ + prev_input = xmlctx->in->current; + prev_line = xmlctx->in->line; + } + } + +cleanup: + if (!ret) { + xmlctx->in->current = prev_input; + xmlctx->in->line = prev_line; + } + return ret; +} + +/** + * @brief Move parser to the attribute content and parse it. + * + * @param[in] xmlctx XML context to use. + * @param[out] value Parsed attribute value. + * @param[out] value_len Length of @p value. + * @param[out] ws_only Whether the value is empty/white-spaces only. + * @param[out] dynamic Whether the value was dynamically allocated. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_next_attr_content(struct lyxml_ctx *xmlctx, const char **value, size_t *value_len, ly_bool *ws_only, ly_bool *dynamic) +{ + char quot; + + /* skip WS */ + ign_xmlws(xmlctx); + + /* skip '=' */ + if (xmlctx->in->current[0] == '\0') { + LOGVAL(xmlctx->ctx, LY_VCODE_EOF); + return LY_EVALID; + } else if (xmlctx->in->current[0] != '=') { + LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), + xmlctx->in->current, "'='"); + return LY_EVALID; + } + move_input(xmlctx, 1); + + /* skip WS */ + ign_xmlws(xmlctx); + + /* find quotes */ + if (xmlctx->in->current[0] == '\0') { + LOGVAL(xmlctx->ctx, LY_VCODE_EOF); + return LY_EVALID; + } else if ((xmlctx->in->current[0] != '\'') && (xmlctx->in->current[0] != '\"')) { + LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(xmlctx->in->current), + xmlctx->in->current, "either single or double quotation mark"); + return LY_EVALID; + } + + /* remember quote */ + quot = xmlctx->in->current[0]; + move_input(xmlctx, 1); + + /* parse attribute value */ + LY_CHECK_RET(lyxml_parse_value(xmlctx, quot, (char **)value, value_len, ws_only, dynamic)); + + /* move after ending quote (without checking for EOF) */ + ly_in_skip(xmlctx->in, 1); + + return LY_SUCCESS; +} + +/** + * @brief Move parser to the next attribute and parse it. + * + * @param[in] xmlctx XML context to use. + * @param[out] prefix Parsed attribute prefix. + * @param[out] prefix_len Length of @p prefix. + * @param[out] name Parsed attribute name. + * @param[out] name_len Length of @p name. + * @return LY_ERR value. + */ +static LY_ERR +lyxml_next_attribute(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len) +{ + const char *in; + char *value; + uint32_t c; + size_t parsed, value_len; + ly_bool ws_only, dynamic; + + /* skip WS */ + ign_xmlws(xmlctx); + + /* parse only possible attributes */ + while ((xmlctx->in->current[0] != '>') && (xmlctx->in->current[0] != '/')) { + in = xmlctx->in->current; + if (in[0] == '\0') { + LOGVAL(xmlctx->ctx, LY_VCODE_EOF); + return LY_EVALID; + } else if ((ly_getutf8(&in, &c, &parsed) || !is_xmlqnamestartchar(c))) { + LOGVAL(xmlctx->ctx, LY_VCODE_INSTREXP, LY_VCODE_INSTREXP_len(in - parsed), in - parsed, + "element tag end ('>' or '/>') or an attribute"); + return LY_EVALID; + } + + /* parse attribute name */ + LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len)); + + if ((!*prefix || ly_strncmp("xmlns", *prefix, *prefix_len)) && (*prefix || ly_strncmp("xmlns", *name, *name_len))) { + /* standard attribute */ + break; + } + + /* namespace, skip it */ + LY_CHECK_RET(lyxml_next_attr_content(xmlctx, (const char **)&value, &value_len, &ws_only, &dynamic)); + if (dynamic) { + free(value); + } + + /* skip WS */ + ign_xmlws(xmlctx); + } + + return LY_SUCCESS; +} + +/** + * @brief Move parser to the next element and parse it. + * + * @param[in] xmlctx XML context to use. + * @param[out] prefix Parsed element prefix. + * @param[out] prefix_len Length of @p prefix. + * @param[out] name Parse element name. + * @param[out] name_len Length of @p name. + * @param[out] closing Flag if the element is closing (includes '/'). + * @return LY_ERR value. + */ +static LY_ERR +lyxml_next_element(struct lyxml_ctx *xmlctx, const char **prefix, size_t *prefix_len, const char **name, size_t *name_len, + ly_bool *closing) +{ + /* skip WS until EOF or after opening tag '<' */ + LY_CHECK_RET(lyxml_skip_until_end_or_after_otag(xmlctx)); + if (xmlctx->in->current[0] == '\0') { + /* set return values */ + *prefix = *name = NULL; + *prefix_len = *name_len = 0; + return LY_SUCCESS; + } + + if (xmlctx->in->current[0] == '/') { + move_input(xmlctx, 1); + *closing = 1; + } else { + *closing = 0; + } + + /* skip WS */ + ign_xmlws(xmlctx); + + /* parse element name */ + LY_CHECK_RET(lyxml_parse_qname(xmlctx, prefix, prefix_len, name, name_len)); + + return LY_SUCCESS; +} + +LY_ERR +lyxml_ctx_new(const struct ly_ctx *ctx, struct ly_in *in, struct lyxml_ctx **xmlctx_p) +{ + LY_ERR ret = LY_SUCCESS; + struct lyxml_ctx *xmlctx; + ly_bool closing; + + /* new context */ + xmlctx = calloc(1, sizeof *xmlctx); + LY_CHECK_ERR_RET(!xmlctx, LOGMEM(ctx), LY_EMEM); + xmlctx->ctx = ctx; + xmlctx->in = in; + + ly_log_location(NULL, NULL, NULL, in); + + /* parse next element, if any */ + LY_CHECK_GOTO(ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, + &xmlctx->name_len, &closing), cleanup); + + if (xmlctx->in->current[0] == '\0') { + /* update status */ + xmlctx->status = LYXML_END; + } else if (closing) { + LOGVAL(ctx, LYVE_SYNTAX, "Stray closing element tag (\"%.*s\").", (int)xmlctx->name_len, xmlctx->name); + ret = LY_EVALID; + goto cleanup; + } else { + /* open an element, also parses all enclosed namespaces */ + LY_CHECK_GOTO(ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len), cleanup); + + /* update status */ + xmlctx->status = LYXML_ELEMENT; + } + +cleanup: + if (ret) { + lyxml_ctx_free(xmlctx); + } else { + *xmlctx_p = xmlctx; + } + return ret; +} + +LY_ERR +lyxml_ctx_next(struct lyxml_ctx *xmlctx) +{ + LY_ERR ret = LY_SUCCESS; + ly_bool closing; + struct lyxml_elem *e; + + /* if the value was not used, free it */ + if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { + free((char *)xmlctx->value); + xmlctx->value = NULL; + xmlctx->dynamic = 0; + } + + switch (xmlctx->status) { + case LYXML_ELEM_CONTENT: + /* content |</elem> */ + + /* handle special case when empty content for "<elem/>" was returned */ + if (xmlctx->in->current[0] == '/') { + assert(xmlctx->elements.count); + e = (struct lyxml_elem *)xmlctx->elements.objs[xmlctx->elements.count - 1]; + + /* close the element (parses closing tag) */ + ret = lyxml_close_element(xmlctx, e->prefix, e->prefix_len, e->name, e->name_len, 1); + LY_CHECK_GOTO(ret, cleanup); + + /* update status */ + xmlctx->status = LYXML_ELEM_CLOSE; + break; + } + /* fall through */ + case LYXML_ELEM_CLOSE: + /* </elem>| <elem2>* */ + + /* parse next element, if any */ + ret = lyxml_next_element(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len, &closing); + LY_CHECK_GOTO(ret, cleanup); + + if (xmlctx->in->current[0] == '\0') { + /* update status */ + xmlctx->status = LYXML_END; + } else if (closing) { + /* close an element (parses also closing tag) */ + ret = lyxml_close_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len, 0); + LY_CHECK_GOTO(ret, cleanup); + + /* update status */ + xmlctx->status = LYXML_ELEM_CLOSE; + } else { + /* open an element, also parses all enclosed namespaces */ + ret = lyxml_open_element(xmlctx, xmlctx->prefix, xmlctx->prefix_len, xmlctx->name, xmlctx->name_len); + LY_CHECK_GOTO(ret, cleanup); + + /* update status */ + xmlctx->status = LYXML_ELEMENT; + } + break; + + case LYXML_ELEMENT: + /* <elem| attr='val'* > content */ + case LYXML_ATTR_CONTENT: + /* attr='val'| attr='val'* > content */ + + /* parse attribute name, if any */ + ret = lyxml_next_attribute(xmlctx, &xmlctx->prefix, &xmlctx->prefix_len, &xmlctx->name, &xmlctx->name_len); + LY_CHECK_GOTO(ret, cleanup); + + if (xmlctx->in->current[0] == '>') { + /* no attributes but a closing tag */ + ly_in_skip(xmlctx->in, 1); + if (!xmlctx->in->current[0]) { + LOGVAL(xmlctx->ctx, LY_VCODE_EOF); + ret = LY_EVALID; + goto cleanup; + } + + /* parse element content */ + ret = lyxml_parse_value(xmlctx, '<', (char **)&xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, + &xmlctx->dynamic); + LY_CHECK_GOTO(ret, cleanup); + + if (!xmlctx->value_len) { + /* empty value should by alocated staticaly, but check for in any case */ + if (xmlctx->dynamic) { + free((char *) xmlctx->value); + } + /* use empty value, easier to work with */ + xmlctx->value = ""; + xmlctx->dynamic = 0; + } + + /* update status */ + xmlctx->status = LYXML_ELEM_CONTENT; + } else if (xmlctx->in->current[0] == '/') { + /* no content but we still return it */ + xmlctx->value = ""; + xmlctx->value_len = 0; + xmlctx->ws_only = 1; + xmlctx->dynamic = 0; + + /* update status */ + xmlctx->status = LYXML_ELEM_CONTENT; + } else { + /* update status */ + xmlctx->status = LYXML_ATTRIBUTE; + } + break; + + case LYXML_ATTRIBUTE: + /* attr|='val' */ + + /* skip formatting and parse value */ + ret = lyxml_next_attr_content(xmlctx, &xmlctx->value, &xmlctx->value_len, &xmlctx->ws_only, &xmlctx->dynamic); + LY_CHECK_GOTO(ret, cleanup); + + /* update status */ + xmlctx->status = LYXML_ATTR_CONTENT; + break; + + case LYXML_END: + /* </elem> |EOF */ + /* nothing to do */ + break; + } + +cleanup: + if (ret) { + /* invalidate context */ + xmlctx->status = LYXML_END; + } + return ret; +} + +LY_ERR +lyxml_ctx_peek(struct lyxml_ctx *xmlctx, enum LYXML_PARSER_STATUS *next) +{ + LY_ERR ret = LY_SUCCESS; + const char *prefix, *name, *prev_input; + size_t prefix_len, name_len; + ly_bool closing; + + prev_input = xmlctx->in->current; + + switch (xmlctx->status) { + case LYXML_ELEM_CONTENT: + if (xmlctx->in->current[0] == '/') { + *next = LYXML_ELEM_CLOSE; + break; + } + /* fall through */ + case LYXML_ELEM_CLOSE: + /* parse next element, if any */ + ret = lyxml_next_element(xmlctx, &prefix, &prefix_len, &name, &name_len, &closing); + LY_CHECK_GOTO(ret, cleanup); + + if (xmlctx->in->current[0] == '\0') { + *next = LYXML_END; + } else if (closing) { + *next = LYXML_ELEM_CLOSE; + } else { + *next = LYXML_ELEMENT; + } + break; + case LYXML_ELEMENT: + case LYXML_ATTR_CONTENT: + /* parse attribute name, if any */ + ret = lyxml_next_attribute(xmlctx, &prefix, &prefix_len, &name, &name_len); + LY_CHECK_GOTO(ret, cleanup); + + if ((xmlctx->in->current[0] == '>') || (xmlctx->in->current[0] == '/')) { + *next = LYXML_ELEM_CONTENT; + } else { + *next = LYXML_ATTRIBUTE; + } + break; + case LYXML_ATTRIBUTE: + *next = LYXML_ATTR_CONTENT; + break; + case LYXML_END: + *next = LYXML_END; + break; + } + +cleanup: + xmlctx->in->current = prev_input; + return ret; +} + +/** + * @brief Free all namespaces in XML context. + * + * @param[in] xmlctx XML context to use. + */ +static void +lyxml_ns_rm_all(struct lyxml_ctx *xmlctx) +{ + struct lyxml_ns *ns; + uint32_t i; + + for (i = 0; i < xmlctx->ns.count; ++i) { + ns = xmlctx->ns.objs[i]; + + free(ns->prefix); + free(ns->uri); + free(ns); + } + ly_set_erase(&xmlctx->ns, NULL); +} + +void +lyxml_ctx_free(struct lyxml_ctx *xmlctx) +{ + if (!xmlctx) { + return; + } + + ly_log_location_revert(0, 0, 0, 1); + + if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { + free((char *)xmlctx->value); + } + ly_set_erase(&xmlctx->elements, free); + lyxml_ns_rm_all(xmlctx); + free(xmlctx); +} + +/** + * @brief Duplicate an XML element. + * + * @param[in] elem Element to duplicate. + * @return Element duplicate. + * @return NULL on error. + */ +static struct lyxml_elem * +lyxml_elem_dup(const struct lyxml_elem *elem) +{ + struct lyxml_elem *dup; + + dup = malloc(sizeof *dup); + LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL); + + memcpy(dup, elem, sizeof *dup); + + return dup; +} + +/** + * @brief Duplicate an XML namespace. + * + * @param[in] ns Namespace to duplicate. + * @return Namespace duplicate. + * @return NULL on error. + */ +static struct lyxml_ns * +lyxml_ns_dup(const struct lyxml_ns *ns) +{ + struct lyxml_ns *dup; + + dup = malloc(sizeof *dup); + LY_CHECK_ERR_RET(!dup, LOGMEM(NULL), NULL); + + if (ns->prefix) { + dup->prefix = strdup(ns->prefix); + LY_CHECK_ERR_RET(!dup->prefix, LOGMEM(NULL); free(dup), NULL); + } else { + dup->prefix = NULL; + } + dup->uri = strdup(ns->uri); + LY_CHECK_ERR_RET(!dup->uri, LOGMEM(NULL); free(dup->prefix); free(dup), NULL); + dup->depth = ns->depth; + + return dup; +} + +LY_ERR +lyxml_ctx_backup(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup) +{ + uint32_t i; + + /* first make shallow copy */ + memcpy(backup, xmlctx, sizeof *backup); + + if ((xmlctx->status == LYXML_ELEM_CONTENT) && xmlctx->dynamic) { + /* it was backed up, do not free */ + xmlctx->dynamic = 0; + } + + /* backup in */ + backup->b_current = xmlctx->in->current; + backup->b_line = xmlctx->in->line; + + /* duplicate elements */ + backup->elements.objs = malloc(xmlctx->elements.size * sizeof(struct lyxml_elem)); + LY_CHECK_ERR_RET(!backup->elements.objs, LOGMEM(xmlctx->ctx), LY_EMEM); + for (i = 0; i < xmlctx->elements.count; ++i) { + backup->elements.objs[i] = lyxml_elem_dup(xmlctx->elements.objs[i]); + LY_CHECK_RET(!backup->elements.objs[i], LY_EMEM); + } + + /* duplicate ns */ + backup->ns.objs = malloc(xmlctx->ns.size * sizeof(struct lyxml_ns)); + LY_CHECK_ERR_RET(!backup->ns.objs, LOGMEM(xmlctx->ctx), LY_EMEM); + for (i = 0; i < xmlctx->ns.count; ++i) { + backup->ns.objs[i] = lyxml_ns_dup(xmlctx->ns.objs[i]); + LY_CHECK_RET(!backup->ns.objs[i], LY_EMEM); + } + + return LY_SUCCESS; +} + +void +lyxml_ctx_restore(struct lyxml_ctx *xmlctx, struct lyxml_ctx *backup) +{ + if (((xmlctx->status == LYXML_ELEM_CONTENT) || (xmlctx->status == LYXML_ATTR_CONTENT)) && xmlctx->dynamic) { + /* free dynamic value */ + free((char *)xmlctx->value); + } + + /* free elements */ + ly_set_erase(&xmlctx->elements, free); + + /* free ns */ + lyxml_ns_rm_all(xmlctx); + + /* restore in */ + xmlctx->in->current = backup->b_current; + xmlctx->in->line = backup->b_line; + backup->in = xmlctx->in; + + /* restore backup */ + memcpy(xmlctx, backup, sizeof *xmlctx); +} + +LY_ERR +lyxml_dump_text(struct ly_out *out, const char *text, ly_bool attribute) +{ + LY_ERR ret; + + if (!text) { + return 0; + } + + for (uint64_t u = 0; text[u]; u++) { + switch (text[u]) { + case '&': + ret = ly_print_(out, "&"); + break; + case '<': + ret = ly_print_(out, "<"); + break; + case '>': + /* not needed, just for readability */ + ret = ly_print_(out, ">"); + break; + case '"': + if (attribute) { + ret = ly_print_(out, """); + break; + } + /* fall through */ + default: + ret = ly_write_(out, &text[u], 1); + break; + } + LY_CHECK_RET(ret); + } + + return LY_SUCCESS; +} + +LY_ERR +lyxml_value_compare(const struct ly_ctx *ctx1, const char *value1, void *val_prefix_data1, + const struct ly_ctx *ctx2, const char *value2, void *val_prefix_data2) +{ + const char *value1_iter, *value2_iter; + const char *value1_next, *value2_next; + uint32_t value1_len, value2_len; + ly_bool is_prefix1, is_prefix2; + const struct lys_module *mod1, *mod2; + LY_ERR ret; + + if (!value1 && !value2) { + return LY_SUCCESS; + } + if ((value1 && !value2) || (!value1 && value2)) { + return LY_ENOT; + } + + if (!ctx2) { + ctx2 = ctx1; + } + + ret = LY_SUCCESS; + for (value1_iter = value1, value2_iter = value2; + value1_iter && value2_iter; + value1_iter = value1_next, value2_iter = value2_next) { + if ((ret = ly_value_prefix_next(value1_iter, NULL, &value1_len, &is_prefix1, &value1_next))) { + break; + } + if ((ret = ly_value_prefix_next(value2_iter, NULL, &value2_len, &is_prefix2, &value2_next))) { + break; + } + + if (is_prefix1 != is_prefix2) { + ret = LY_ENOT; + break; + } + + if (!is_prefix1) { + if (value1_len != value2_len) { + ret = LY_ENOT; + break; + } + if (strncmp(value1_iter, value2_iter, value1_len)) { + ret = LY_ENOT; + break; + } + continue; + } + + mod1 = mod2 = NULL; + if (val_prefix_data1) { + /* find module of the first prefix, if any */ + mod1 = ly_resolve_prefix(ctx1, value1_iter, value1_len, LY_VALUE_XML, val_prefix_data1); + } + if (val_prefix_data2) { + mod2 = ly_resolve_prefix(ctx2, value2_iter, value2_len, LY_VALUE_XML, val_prefix_data2); + } + if (!mod1 || !mod2) { + /* not a prefix or maps to different namespaces */ + ret = LY_ENOT; + break; + } + + if (mod1->ctx == mod2->ctx) { + /* same contexts */ + if ((mod1->name != mod2->name) || (mod1->revision != mod2->revision)) { + ret = LY_ENOT; + break; + } + } else { + /* different contexts */ + if (strcmp(mod1->name, mod2->name)) { + ret = LY_ENOT; + break; + } + + if (mod1->revision || mod2->revision) { + if (!mod1->revision || !mod2->revision) { + ret = LY_ENOT; + break; + } + if (strcmp(mod1->revision, mod2->revision)) { + ret = LY_ENOT; + break; + } + } + } + } + + if (value1_iter || value2_iter) { + ret = LY_ENOT; + } + + return ret; +} |