1 files changed, 463 insertions, 0 deletions
diff --git a/xml.cc b/xml.cc
new file mode 100644
index 0000000..d392bfa
--- /dev/null
+++ b/xml.cc
@@ -0,0 +1,463 @@
+/***************************************************************************
+ * xml.cc -- Simple library to emit XML.                                   *
+ ***********************IMPORTANT NMAP LICENSE TERMS************************
+ *
+ * The Nmap Security Scanner is (C) 1996-2023 Nmap Software LLC ("The Nmap
+ * Project"). Nmap is also a registered trademark of the Nmap Project.
+ *
+ * This program is distributed under the terms of the Nmap Public Source
+ * License (NPSL). The exact license text applying to a particular Nmap
+ * release or source code control revision is contained in the LICENSE
+ * file distributed with that version of Nmap or source code control
+ * revision. More Nmap copyright/legal information is available from
+ * https://nmap.org/book/man-legal.html, and further information on the
+ * NPSL license itself can be found at https://nmap.org/npsl/ . This
+ * header summarizes some key points from the Nmap license, but is no
+ * substitute for the actual license text.
+ *
+ * Nmap is generally free for end users to download and use themselves,
+ * including commercial use. It is available from https://nmap.org.
+ *
+ * The Nmap license generally prohibits companies from using and
+ * redistributing Nmap in commercial products, but we sell a special Nmap
+ * OEM Edition with a more permissive license and special features for
+ * this purpose. See https://nmap.org/oem/
+ *
+ * If you have received a written Nmap license agreement or contract
+ * stating terms other than these (such as an Nmap OEM license), you may
+ * choose to use and redistribute Nmap under those terms instead.
+ *
+ * The official Nmap Windows builds include the Npcap software
+ * (https://npcap.com) for packet capture and transmission. It is under
+ * separate license terms which forbid redistribution without special
+ * permission. So the official Nmap Windows builds may not be redistributed
+ * without special permission (such as an Nmap OEM license).
+ *
+ * Source is provided to this software because we believe users have a
+ * right to know exactly what a program is going to do before they run it.
+ * This also allows you to audit the software for security holes.
+ *
+ * Source code also allows you to port Nmap to new platforms, fix bugs, and add
+ * new features. You are highly encouraged to submit your changes as a Github PR
+ * or by email to the dev@nmap.org mailing list for possible incorporation into
+ * the main distribution. Unless you specify otherwise, it is understood that
+ * you are offering us very broad rights to use your submissions as described in
+ * the Nmap Public Source License Contributor Agreement. This is important
+ * because we fund the project by selling licenses with various terms, and also
+ * because the inability to relicense code has caused devastating problems for
+ * other Free Software projects (such as KDE and NASM).
+ *
+ * The free version of Nmap is distributed in the hope that it will be
+ * useful, but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. Warranties,
+ * indemnification and commercial support are all available through the
+ * Npcap OEM program--see https://nmap.org/oem/
+ *
+ ***************************************************************************/
+
+/* $Id: xml.cc 15135 2009-08-19 21:05:21Z david $ */
+
+/*
+This is a simple library for writing XML. It handles two main things:
+keeping track of the element stack, and escaping text where necessary.
+If you wanted to write this XML:
+  <?xml version="1.0" encoding="UTF-8"?>
+  <!DOCTYPE elem>
+  <elem name="&amp;10.5"></elem>
+these are the functions you would call. Each one is followed by the text
+it prints enclosed in ||.
+
+xml_start_document("elem")             |<?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE elem>|
+xml_newline();                         |\n|
+xml_open_start_tag("elem");            |<elem|
+xml_attribute("name", "&%.1f", 10.5);  | name="&amp;10.5"|
+xml_close_start_tag();                 |>|
+xml_end_tag();                         |</elem>|
+
+The typical use is to call xml_open_start_tag, then call xml_attribute a
+number of times. That is followed by xml_close_empty_tag, or else
+xml_close_start_tag followed by xml_end_tag later on. You can call
+xml_start_tag if there are no attributes. Whenever a start tag is opened
+with xml_open_start_tag or xml_start_tag, the element name is pushed on
+the tag stack. xml_end_tag pops the element stack and closes the element
+it finds.
+
+Here is a summary of all the elementary writing functions. The functions
+return 0 on success and -1 on error. The terms "start" and "end" refer
+to start and end tags and the start and end of comments. The terms
+"open" and "close" refer only to start tags and processing instructions.
+
+xml_start_comment()           |<!--|
+xml_end_comment()             |-->|
+xml_open_pi("elem")           |<?elem|
+xml_close_pi()                |?>|
+xml_open_start_tag("elem")    |<elem|
+xml_close_start_tag()         |>|
+xml_close_empty_tag()         |/>|
+xml_start_tag("elem")         |<elem>|
+xml_end_tag()                 |</elem>|
+xml_attribute("name", "val")  | name="val"|
+xml_newline()                 |\n|
+
+Additional functions are
+
+xml_write_raw                 Raw unescaped output.
+xml_write_escaped             XML-escaped output.
+xml_write_escaped_v           XML-escaped output, with a va_list.
+xml_start_document            Writes <?xml version="1.0" encoding="UTF-8"?>\n<!DOCTYPE elem>.
+xml_depth                     Returns the size of the element stack.
+
+The library makes it harder but not impossible to make non-well-formed
+XML. For example, you can call xml_start_tag, xml_end_tag,
+xml_start_tag, xml_end_tag to create a document with two root elements.
+Things like element names aren't checked to be sure they're legal. Text
+given to these functions should be ASCII or UTF-8.
+
+All writing is done with log_write(LOG_XML), so if LOG_XML hasn't been
+opened, calling these functions has no effect.
+*/
+
+#include "output.h"
+#include "xml.h"
+#include <nbase.h>
+
+#include <assert.h>
+#include <stdarg.h>
+#include <stdio.h>
+#include <list>
+
+struct xml_writer {
+  /* Sanity checking: Don't open a new tag while still defining
+     attributes for another, like "<elem1<elem2". */
+  bool tag_open;
+  /* Has the root element been started yet? If so, and if
+     element_stack.size() == 0, then the document is finished. */
+  bool root_written;
+  std::list<const char *> element_stack;
+};
+
+static struct xml_writer xml;
+
+char *xml_unescape(const char *str) {
+  char *result = NULL;
+  size_t n = 0, len;
+  const char *p;
+  int i;
+
+  i = 0;
+  for (p = str; *p != '\0'; p++) {
+    const char *repl;
+    char buf[32];
+
+    if (*p != '&') {
+      /* Based on the asumption that ampersand is only used for escaping. */
+      buf[0] = *p;
+      buf[1] = '\0';
+      repl = buf;
+    } else if (strncmp(p, "&lt;", 4) == 0) {
+      repl = "<";
+      p += 3;
+    } else if (strncmp(p, "&gt;", 4) == 0) {
+      repl = ">";
+      p += 3;
+    } else if (strncmp(p, "&amp;", 5) == 0) {
+      repl = "&";
+      p += 4;
+    } else if (strncmp(p, "&quot;", 6) == 0) {
+      repl = "\"";
+      p += 5;
+    } else if (strncmp(p, "&apos;", 6) == 0) {
+      repl = "\'";
+      p += 5;
+    } else if (strncmp(p, "&#45;", 5) == 0) {
+      repl = "-";
+      p += 4;
+    } else {
+      /* Escaped control characters and anything outside of ASCII. */
+      Strncpy(buf, p + 3, sizeof(buf));
+      char *q;
+      q = strchr(buf, ';');
+      if(!q)
+        buf[0] = '\0';
+      else
+        *q = '\0';
+      repl = buf;
+    }
+
+    len = strlen(repl);
+    /* Double the size of the result buffer if necessary. */
+    if (i == 0 || i + len > n) {
+      n = (i + len) * 2;
+      result = (char *) safe_realloc(result, n + 1);
+    }
+    memcpy(result + i, repl, len);
+    i += len;
+  }
+  /* Trim to length. (Also does initial allocation when str is empty.) */
+  result = (char *) safe_realloc(result, i + 1);
+  result[i] = '\0';
+
+  return result;
+}
+
+/* Escape a string for inclusion in XML. This gets <>&, "' for attribute
+   values, -- for inside comments, and characters with value > 0x7F. It
+   also gets control characters with value < 0x20 to avoid parser
+   normalization of \r\n\t in attribute values. If this is not desired
+   in some cases, we'll have to add a parameter to control this. */
+static char *escape(const char *str) {
+  /* result is the result buffer; n + 1 is the allocated size. Double the
+     allocation when space runs out. */
+  char *result = NULL;
+  size_t n = 0, len;
+  const char *p;
+  int i;
+
+  i = 0;
+  for (p = str; *p != '\0'; p++) {
+    const char *repl;
+    char buf[32];
+
+    if (*p == '<')
+      repl = "&lt;";
+    else if (*p == '>')
+      repl = "&gt;";
+    else if (*p == '&')
+      repl = "&amp;";
+    else if (*p == '"')
+      repl = "&quot;";
+    else if (*p == '\'')
+      repl = "&apos;";
+    else if (*p == '-' && p > str && *(p - 1) == '-') {
+      /* Escape -- for comments. */
+      repl = "&#45;";
+    } else if (*p < 0x20 || (unsigned char) *p > 0x7F) {
+      /* Escape control characters and anything outside of ASCII. We have to
+         emit UTF-8 and an easy way to do that is to emit ASCII. */
+      Snprintf(buf, sizeof(buf), "&#x%x;", (unsigned char) *p);
+      repl = buf;
+    } else {
+      /* Unescaped character. */
+      buf[0] = *p;
+      buf[1] = '\0';
+      repl = buf;
+    }
+
+    len = strlen(repl);
+    /* Double the size of the result buffer if necessary. */
+    if (i == 0 || i + len > n) {
+      n = (i + len) * 2;
+      result = (char *) safe_realloc(result, n + 1);
+    }
+    memcpy(result + i, repl, len);
+    i += len;
+  }
+  /* Trim to length. (Also does initial allocation when str is empty.) */
+  result = (char *) safe_realloc(result, i + 1);
+  result[i] = '\0';
+
+  return result;
+}
+
+/* Write data directly to the XML file with no escaping. Make sure you
+   know what you're doing. */
+int xml_write_raw(const char *fmt, ...) {
+  va_list va;
+  char *s;
+
+  va_start(va, fmt);
+  alloc_vsprintf(&s, fmt, va);
+  va_end(va);
+  if (s == NULL)
+    return -1;
+
+  log_write(LOG_XML, "%s", s);
+  free(s);
+
+  return 0;
+}
+
+/* Write data directly to the XML file after escaping it. */
+int xml_write_escaped(const char *fmt, ...) {
+  va_list va;
+  int n;
+
+  va_start(va, fmt);
+  n = xml_write_escaped_v(fmt, va);
+  va_end(va);
+
+  return n;
+}
+
+/* Write data directly to the XML file after escaping it. This version takes a
+   va_list like vprintf. */
+int xml_write_escaped_v(const char *fmt, va_list va) {
+  char *s, *esc_s;
+
+  alloc_vsprintf(&s, fmt, va);
+  if (s == NULL)
+    return -1;
+  esc_s = escape(s);
+  free(s);
+  if (esc_s == NULL)
+    return -1;
+
+  log_write(LOG_XML, "%s", esc_s);
+  free(esc_s);
+
+  return 0;
+}
+
+/* Write the XML declaration: <?xml version="1.0" encoding="UTF-8"?>
+ * and the DOCTYPE declaration: <!DOCTYPE rootnode>
+ */
+int xml_start_document(const char *rootnode) {
+  if (xml_open_pi("xml") < 0)
+    return -1;
+  if (xml_attribute("version", "1.0") < 0)
+    return -1;
+  /* Practically, Nmap only uses ASCII, but UTF-8 encompasses ASCII and allows
+   * for future expansion */
+  if (xml_attribute("encoding", "UTF-8") < 0)
+    return -1;
+  if (xml_close_pi() < 0)
+    return -1;
+  if (xml_newline() < 0)
+    return -1;
+
+  log_write(LOG_XML, "<!DOCTYPE %s>\n", rootnode);
+
+  return 0;
+}
+
+int xml_start_comment() {
+  log_write(LOG_XML, "<!--");
+
+  return 0;
+}
+
+int xml_end_comment() {
+  log_write(LOG_XML, "-->");
+
+  return 0;
+}
+
+int xml_open_pi(const char *name) {
+  assert(!xml.tag_open);
+  log_write(LOG_XML, "<?%s", name);
+  xml.tag_open = true;
+
+  return 0;
+}
+
+int xml_close_pi() {
+  assert(xml.tag_open);
+  log_write(LOG_XML, "?>");
+  xml.tag_open = false;
+
+  return 0;
+}
+
+/* Open a start tag, like "<name". The tag must be later closed with
+   xml_close_start_tag or xml_close_empty_tag. Usually the tag is closed
+   after writing some attributes. */
+int xml_open_start_tag(const char *name, const bool write) {
+  assert(!xml.tag_open);
+  if (write)
+    log_write(LOG_XML, "<%s", name);
+  xml.element_stack.push_back(name);
+  xml.tag_open = true;
+  xml.root_written = true;
+
+  return 0;
+}
+
+int xml_close_start_tag(const bool write) {
+  assert(xml.tag_open);
+  if(write)
+    log_write(LOG_XML, ">");
+  xml.tag_open = false;
+
+  return 0;
+}
+
+/* Close an empty-element tag. It should have been opened with
+   xml_open_start_tag. */
+int xml_close_empty_tag() {
+  assert(xml.tag_open);
+  assert(!xml.element_stack.empty());
+  xml.element_stack.pop_back();
+  log_write(LOG_XML, "/>");
+  xml.tag_open = false;
+
+  return 0;
+}
+
+int xml_start_tag(const char *name, const bool write) {
+  if (xml_open_start_tag(name, write) < 0)
+    return -1;
+  if (xml_close_start_tag(write) < 0)
+    return -1;
+
+  return 0;
+}
+
+/* Write an end tag for the element at the top of the element stack. */
+int xml_end_tag() {
+  const char *name;
+
+  assert(!xml.tag_open);
+  assert(!xml.element_stack.empty());
+  name = xml.element_stack.back();
+  xml.element_stack.pop_back();
+
+  log_write(LOG_XML, "</%s>", name);
+
+  return 0;
+}
+
+/* Write an attribute. The only place this makes sense is between
+   xml_open_start_tag and either xml_close_start_tag or
+   xml_close_empty_tag. */
+int xml_attribute(const char *name, const char *fmt, ...) {
+  va_list va;
+  char *val, *esc_val;
+
+  assert(xml.tag_open);
+
+  va_start(va, fmt);
+  alloc_vsprintf(&val, fmt, va);
+  va_end(va);
+  if (val == NULL)
+    return -1;
+  esc_val = escape(val);
+  free(val);
+  if (esc_val == NULL)
+    return -1;
+
+  log_write(LOG_XML, " %s=\"%s\"", name, esc_val);
+  free(esc_val);
+
+  return 0;
+}
+
+int xml_newline() {
+  log_write(LOG_XML, "\n");
+
+  return 0;
+}
+
+/* Return the size of the element stack. */
+int xml_depth() {
+  return xml.element_stack.size();
+}
+
+/* Return true iff a root element has been started. */
+bool xml_tag_open() {
+  return xml.tag_open;
+}
+
+/* Return true iff a root element has been started. */
+bool xml_root_written() {
+  return xml.root_written;
+}