diff options
Diffstat (limited to 'xml.cc')
-rw-r--r-- | xml.cc | 463 |
1 files changed, 463 insertions, 0 deletions
@@ -0,0 +1,463 @@ +/*************************************************************************** + * xml.cc -- Simple library to emit XML. * + ***********************IMPORTANT NMAP LICENSE TERMS************************ + * + * The Nmap Security Scanner is (C) 1996-2023 Nmap Software LLC ("The Nmap + * Project"). Nmap is also a registered trademark of the Nmap Project. + * + * This program is distributed under the terms of the Nmap Public Source + * License (NPSL). The exact license text applying to a particular Nmap + * release or source code control revision is contained in the LICENSE + * file distributed with that version of Nmap or source code control + * revision. More Nmap copyright/legal information is available from + * https://nmap.org/book/man-legal.html, and further information on the + * NPSL license itself can be found at https://nmap.org/npsl/ . This + * header summarizes some key points from the Nmap license, but is no + * substitute for the actual license text. + * + * Nmap is generally free for end users to download and use themselves, + * including commercial use. It is available from https://nmap.org. + * + * The Nmap license generally prohibits companies from using and + * redistributing Nmap in commercial products, but we sell a special Nmap + * OEM Edition with a more permissive license and special features for + * this purpose. See https://nmap.org/oem/ + * + * If you have received a written Nmap license agreement or contract + * stating terms other than these (such as an Nmap OEM license), you may + * choose to use and redistribute Nmap under those terms instead. + * + * The official Nmap Windows builds include the Npcap software + * (https://npcap.com) for packet capture and transmission. It is under + * separate license terms which forbid redistribution without special + * permission. So the official Nmap Windows builds may not be redistributed + * without special permission (such as an Nmap OEM license). + * + * Source is provided to this software because we believe users have a + * right to know exactly what a program is going to do before they run it. + * This also allows you to audit the software for security holes. + * + * Source code also allows you to port Nmap to new platforms, fix bugs, and add + * new features. You are highly encouraged to submit your changes as a Github PR + * or by email to the dev@nmap.org mailing list for possible incorporation into + * the main distribution. Unless you specify otherwise, it is understood that + * you are offering us very broad rights to use your submissions as described in + * the Nmap Public Source License Contributor Agreement. This is important + * because we fund the project by selling licenses with various terms, and also + * because the inability to relicense code has caused devastating problems for + * other Free Software projects (such as KDE and NASM). + * + * The free version of Nmap is distributed in the hope that it will be + * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Warranties, + * indemnification and commercial support are all available through the + * Npcap OEM program--see https://nmap.org/oem/ + * + ***************************************************************************/ + +/* $Id: xml.cc 15135 2009-08-19 21:05:21Z david $ */ + +/* +This is a simple library for writing XML. It handles two main things: +keeping track of the element stack, and escaping text where necessary. +If you wanted to write this XML: + <?xml version="1.0" encoding="UTF-8"?> + <!DOCTYPE elem> + <elem name="&10.5"></elem> +these are the functions you would call. Each one is followed by the text +it prints enclosed in ||. + +xml_start_document("elem") |<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE elem>| +xml_newline(); |\n| +xml_open_start_tag("elem"); |<elem| +xml_attribute("name", "&%.1f", 10.5); | name="&10.5"| +xml_close_start_tag(); |>| +xml_end_tag(); |</elem>| + +The typical use is to call xml_open_start_tag, then call xml_attribute a +number of times. That is followed by xml_close_empty_tag, or else +xml_close_start_tag followed by xml_end_tag later on. You can call +xml_start_tag if there are no attributes. Whenever a start tag is opened +with xml_open_start_tag or xml_start_tag, the element name is pushed on +the tag stack. xml_end_tag pops the element stack and closes the element +it finds. + +Here is a summary of all the elementary writing functions. The functions +return 0 on success and -1 on error. The terms "start" and "end" refer +to start and end tags and the start and end of comments. The terms +"open" and "close" refer only to start tags and processing instructions. + +xml_start_comment() |<!--| +xml_end_comment() |-->| +xml_open_pi("elem") |<?elem| +xml_close_pi() |?>| +xml_open_start_tag("elem") |<elem| +xml_close_start_tag() |>| +xml_close_empty_tag() |/>| +xml_start_tag("elem") |<elem>| +xml_end_tag() |</elem>| +xml_attribute("name", "val") | name="val"| +xml_newline() |\n| + +Additional functions are + +xml_write_raw Raw unescaped output. +xml_write_escaped XML-escaped output. +xml_write_escaped_v XML-escaped output, with a va_list. +xml_start_document Writes <?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE elem>. +xml_depth Returns the size of the element stack. + +The library makes it harder but not impossible to make non-well-formed +XML. For example, you can call xml_start_tag, xml_end_tag, +xml_start_tag, xml_end_tag to create a document with two root elements. +Things like element names aren't checked to be sure they're legal. Text +given to these functions should be ASCII or UTF-8. + +All writing is done with log_write(LOG_XML), so if LOG_XML hasn't been +opened, calling these functions has no effect. +*/ + +#include "output.h" +#include "xml.h" +#include <nbase.h> + +#include <assert.h> +#include <stdarg.h> +#include <stdio.h> +#include <list> + +struct xml_writer { + /* Sanity checking: Don't open a new tag while still defining + attributes for another, like "<elem1<elem2". */ + bool tag_open; + /* Has the root element been started yet? If so, and if + element_stack.size() == 0, then the document is finished. */ + bool root_written; + std::list<const char *> element_stack; +}; + +static struct xml_writer xml; + +char *xml_unescape(const char *str) { + char *result = NULL; + size_t n = 0, len; + const char *p; + int i; + + i = 0; + for (p = str; *p != '\0'; p++) { + const char *repl; + char buf[32]; + + if (*p != '&') { + /* Based on the asumption that ampersand is only used for escaping. */ + buf[0] = *p; + buf[1] = '\0'; + repl = buf; + } else if (strncmp(p, "<", 4) == 0) { + repl = "<"; + p += 3; + } else if (strncmp(p, ">", 4) == 0) { + repl = ">"; + p += 3; + } else if (strncmp(p, "&", 5) == 0) { + repl = "&"; + p += 4; + } else if (strncmp(p, """, 6) == 0) { + repl = "\""; + p += 5; + } else if (strncmp(p, "'", 6) == 0) { + repl = "\'"; + p += 5; + } else if (strncmp(p, "-", 5) == 0) { + repl = "-"; + p += 4; + } else { + /* Escaped control characters and anything outside of ASCII. */ + Strncpy(buf, p + 3, sizeof(buf)); + char *q; + q = strchr(buf, ';'); + if(!q) + buf[0] = '\0'; + else + *q = '\0'; + repl = buf; + } + + len = strlen(repl); + /* Double the size of the result buffer if necessary. */ + if (i == 0 || i + len > n) { + n = (i + len) * 2; + result = (char *) safe_realloc(result, n + 1); + } + memcpy(result + i, repl, len); + i += len; + } + /* Trim to length. (Also does initial allocation when str is empty.) */ + result = (char *) safe_realloc(result, i + 1); + result[i] = '\0'; + + return result; +} + +/* Escape a string for inclusion in XML. This gets <>&, "' for attribute + values, -- for inside comments, and characters with value > 0x7F. It + also gets control characters with value < 0x20 to avoid parser + normalization of \r\n\t in attribute values. If this is not desired + in some cases, we'll have to add a parameter to control this. */ +static char *escape(const char *str) { + /* result is the result buffer; n + 1 is the allocated size. Double the + allocation when space runs out. */ + char *result = NULL; + size_t n = 0, len; + const char *p; + int i; + + i = 0; + for (p = str; *p != '\0'; p++) { + const char *repl; + char buf[32]; + + if (*p == '<') + repl = "<"; + else if (*p == '>') + repl = ">"; + else if (*p == '&') + repl = "&"; + else if (*p == '"') + repl = """; + else if (*p == '\'') + repl = "'"; + else if (*p == '-' && p > str && *(p - 1) == '-') { + /* Escape -- for comments. */ + repl = "-"; + } else if (*p < 0x20 || (unsigned char) *p > 0x7F) { + /* Escape control characters and anything outside of ASCII. We have to + emit UTF-8 and an easy way to do that is to emit ASCII. */ + Snprintf(buf, sizeof(buf), "&#x%x;", (unsigned char) *p); + repl = buf; + } else { + /* Unescaped character. */ + buf[0] = *p; + buf[1] = '\0'; + repl = buf; + } + + len = strlen(repl); + /* Double the size of the result buffer if necessary. */ + if (i == 0 || i + len > n) { + n = (i + len) * 2; + result = (char *) safe_realloc(result, n + 1); + } + memcpy(result + i, repl, len); + i += len; + } + /* Trim to length. (Also does initial allocation when str is empty.) */ + result = (char *) safe_realloc(result, i + 1); + result[i] = '\0'; + + return result; +} + +/* Write data directly to the XML file with no escaping. Make sure you + know what you're doing. */ +int xml_write_raw(const char *fmt, ...) { + va_list va; + char *s; + + va_start(va, fmt); + alloc_vsprintf(&s, fmt, va); + va_end(va); + if (s == NULL) + return -1; + + log_write(LOG_XML, "%s", s); + free(s); + + return 0; +} + +/* Write data directly to the XML file after escaping it. */ +int xml_write_escaped(const char *fmt, ...) { + va_list va; + int n; + + va_start(va, fmt); + n = xml_write_escaped_v(fmt, va); + va_end(va); + + return n; +} + +/* Write data directly to the XML file after escaping it. This version takes a + va_list like vprintf. */ +int xml_write_escaped_v(const char *fmt, va_list va) { + char *s, *esc_s; + + alloc_vsprintf(&s, fmt, va); + if (s == NULL) + return -1; + esc_s = escape(s); + free(s); + if (esc_s == NULL) + return -1; + + log_write(LOG_XML, "%s", esc_s); + free(esc_s); + + return 0; +} + +/* Write the XML declaration: <?xml version="1.0" encoding="UTF-8"?> + * and the DOCTYPE declaration: <!DOCTYPE rootnode> + */ +int xml_start_document(const char *rootnode) { + if (xml_open_pi("xml") < 0) + return -1; + if (xml_attribute("version", "1.0") < 0) + return -1; + /* Practically, Nmap only uses ASCII, but UTF-8 encompasses ASCII and allows + * for future expansion */ + if (xml_attribute("encoding", "UTF-8") < 0) + return -1; + if (xml_close_pi() < 0) + return -1; + if (xml_newline() < 0) + return -1; + + log_write(LOG_XML, "<!DOCTYPE %s>\n", rootnode); + + return 0; +} + +int xml_start_comment() { + log_write(LOG_XML, "<!--"); + + return 0; +} + +int xml_end_comment() { + log_write(LOG_XML, "-->"); + + return 0; +} + +int xml_open_pi(const char *name) { + assert(!xml.tag_open); + log_write(LOG_XML, "<?%s", name); + xml.tag_open = true; + + return 0; +} + +int xml_close_pi() { + assert(xml.tag_open); + log_write(LOG_XML, "?>"); + xml.tag_open = false; + + return 0; +} + +/* Open a start tag, like "<name". The tag must be later closed with + xml_close_start_tag or xml_close_empty_tag. Usually the tag is closed + after writing some attributes. */ +int xml_open_start_tag(const char *name, const bool write) { + assert(!xml.tag_open); + if (write) + log_write(LOG_XML, "<%s", name); + xml.element_stack.push_back(name); + xml.tag_open = true; + xml.root_written = true; + + return 0; +} + +int xml_close_start_tag(const bool write) { + assert(xml.tag_open); + if(write) + log_write(LOG_XML, ">"); + xml.tag_open = false; + + return 0; +} + +/* Close an empty-element tag. It should have been opened with + xml_open_start_tag. */ +int xml_close_empty_tag() { + assert(xml.tag_open); + assert(!xml.element_stack.empty()); + xml.element_stack.pop_back(); + log_write(LOG_XML, "/>"); + xml.tag_open = false; + + return 0; +} + +int xml_start_tag(const char *name, const bool write) { + if (xml_open_start_tag(name, write) < 0) + return -1; + if (xml_close_start_tag(write) < 0) + return -1; + + return 0; +} + +/* Write an end tag for the element at the top of the element stack. */ +int xml_end_tag() { + const char *name; + + assert(!xml.tag_open); + assert(!xml.element_stack.empty()); + name = xml.element_stack.back(); + xml.element_stack.pop_back(); + + log_write(LOG_XML, "</%s>", name); + + return 0; +} + +/* Write an attribute. The only place this makes sense is between + xml_open_start_tag and either xml_close_start_tag or + xml_close_empty_tag. */ +int xml_attribute(const char *name, const char *fmt, ...) { + va_list va; + char *val, *esc_val; + + assert(xml.tag_open); + + va_start(va, fmt); + alloc_vsprintf(&val, fmt, va); + va_end(va); + if (val == NULL) + return -1; + esc_val = escape(val); + free(val); + if (esc_val == NULL) + return -1; + + log_write(LOG_XML, " %s=\"%s\"", name, esc_val); + free(esc_val); + + return 0; +} + +int xml_newline() { + log_write(LOG_XML, "\n"); + + return 0; +} + +/* Return the size of the element stack. */ +int xml_depth() { + return xml.element_stack.size(); +} + +/* Return true iff a root element has been started. */ +bool xml_tag_open() { + return xml.tag_open; +} + +/* Return true iff a root element has been started. */ +bool xml_root_written() { + return xml.root_written; +} |