summaryrefslogtreecommitdiffstats
path: root/src/raptor_xml.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/raptor_xml.c')
-rw-r--r--src/raptor_xml.c1100
1 files changed, 1100 insertions, 0 deletions
diff --git a/src/raptor_xml.c b/src/raptor_xml.c
new file mode 100644
index 0000000..d89030c
--- /dev/null
+++ b/src/raptor_xml.c
@@ -0,0 +1,1100 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_xml.c - Raptor XML routines
+ *
+ * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+
+#ifndef STANDALONE
+
+/**
+ * raptor_new_xml_element:
+ * @name: The XML element name
+ * @xml_language: the in-scope XML language (or NULL)
+ * @xml_base: the in-scope XML base URI (or NULL)
+ *
+ * Constructor - create a new XML element from a QName
+ *
+ * The @xml_language and @xml_base become owned by the new object.
+ *
+ * Return value: a new #raptor_xml_element or NULL on failure
+ **/
+raptor_xml_element*
+raptor_new_xml_element(raptor_qname *name,
+ const unsigned char *xml_language,
+ raptor_uri *xml_base)
+{
+ raptor_xml_element* xml_element;
+
+ xml_element = RAPTOR_CALLOC(raptor_xml_element*, 1, sizeof(*xml_element));
+ if(!xml_element)
+ return NULL;
+
+ /* Element name */
+ xml_element->name = name;
+ xml_element->xml_language = xml_language;
+ xml_element->base_uri = xml_base;
+
+ xml_element->declared_nspaces = NULL;
+
+ xml_element->content_cdata_sb = raptor_new_stringbuffer();
+ if(!xml_element->content_cdata_sb) {
+ RAPTOR_FREE(raptor_xml_element, xml_element);
+ xml_element = NULL;
+ }
+
+ return xml_element;
+}
+
+
+/**
+ * raptor_new_xml_element_from_namespace_local_name:
+ * @ns: namespace
+ * @name: the XML element local name
+ * @xml_language: the in-scope XML language (or NULL)
+ * @xml_base: base uri (or NULL)
+ *
+ * Constructor - create a new XML element from an XML namespace and a local name
+ *
+ * Added in 1.4.16.
+ *
+ * Return value: a new #raptor_xml_element or NULL on failure
+ */
+raptor_xml_element*
+raptor_new_xml_element_from_namespace_local_name(raptor_namespace *ns,
+ const unsigned char *name,
+ const unsigned char *xml_language,
+ raptor_uri *xml_base)
+{
+ raptor_uri *base_uri_copy;
+ raptor_qname *qname;
+ raptor_xml_element *element = NULL;
+
+ qname = raptor_new_qname_from_namespace_local_name(ns->nstack->world, ns,
+ name, NULL);
+ if(qname) {
+ base_uri_copy = xml_base ? raptor_uri_copy(xml_base) : NULL;
+ element = raptor_new_xml_element(qname, xml_language, base_uri_copy);
+ if(!element) {
+ raptor_free_qname(qname);
+ if(base_uri_copy)
+ raptor_free_uri(base_uri_copy);
+ }
+ }
+ return element;
+}
+
+
+/**
+ * raptor_free_xml_element:
+ * @element: XML Element
+ *
+ * Destructor - destroy a raptor_xml_element object.
+ **/
+void
+raptor_free_xml_element(raptor_xml_element *element)
+{
+ unsigned int i;
+
+ if(!element)
+ return;
+
+ for(i = 0; i < element->attribute_count; i++)
+ if(element->attributes[i])
+ raptor_free_qname(element->attributes[i]);
+
+ if(element->attributes)
+ RAPTOR_FREE(raptor_qname_array, element->attributes);
+
+ if(element->content_cdata_sb)
+ raptor_free_stringbuffer(element->content_cdata_sb);
+
+ if(element->base_uri)
+ raptor_free_uri(element->base_uri);
+
+ if(element->xml_language)
+ RAPTOR_FREE(char*, element->xml_language);
+
+ raptor_free_qname(element->name);
+
+ if(element->declared_nspaces)
+ raptor_free_sequence(element->declared_nspaces);
+
+ RAPTOR_FREE(raptor_element, element);
+}
+
+
+/**
+ * raptor_xml_element_get_name:
+ * @xml_element: XML Element
+ *
+ * Get the XML Name of an XML element
+ *
+ * Return value: The Name.
+ **/
+raptor_qname*
+raptor_xml_element_get_name(raptor_xml_element *xml_element)
+{
+ return xml_element->name;
+}
+
+
+/**
+ * raptor_xml_element_set_attributes:
+ * @xml_element: XML Element
+ * @attributes: Array of XML Qname attributes with values
+ * @count: Length of array
+ *
+ * Set the attributes on an XML element.
+ *
+ * The @attributes array becomes owned by the element after this function.
+ **/
+void
+raptor_xml_element_set_attributes(raptor_xml_element* xml_element,
+ raptor_qname **attributes, int count)
+{
+ xml_element->attributes = attributes;
+ xml_element->attribute_count = count;
+}
+
+
+/**
+ * raptor_xml_element_get_attributes:
+ * @xml_element: XML Element
+ *
+ * Get the array of attributes on the XML element.
+ *
+ * Use raptor_xml_element_get_attributes_count() to get the count
+ * of the array size.
+ *
+ * Return value: the array of qnames or NULL if none are present.
+ **/
+raptor_qname**
+raptor_xml_element_get_attributes(raptor_xml_element* xml_element)
+{
+ return xml_element->attributes;
+}
+
+
+/**
+ * raptor_xml_element_get_attributes_count:
+ * @xml_element: XML Element
+ *
+ * Get the number of attributes on the XML element.
+ *
+ * Return value: Integer number of attributes - 0 or more.
+ **/
+int
+raptor_xml_element_get_attributes_count(raptor_xml_element* xml_element)
+{
+ return xml_element->attribute_count;
+}
+
+
+/**
+ * raptor_xml_element_declare_namespace:
+ * @xml_element: XML Element
+ * @nspace: raptor_namespace to declare
+ *
+ * Declare a namespace on the XML Element.
+ *
+ * Return value: non-0 if namespace cannot be declared
+ **/
+int
+raptor_xml_element_declare_namespace(raptor_xml_element* xml_element,
+ raptor_namespace *nspace)
+{
+ int i;
+ const raptor_namespace *ns;
+
+ if(!xml_element->declared_nspaces)
+ xml_element->declared_nspaces = raptor_new_sequence(NULL, NULL);
+
+ if((ns = xml_element->name->nspace)) {
+ /* Cannot have same namespace already seen */
+ if(ns == nspace ||
+ /* ... or two default nspaces */
+ (!ns->prefix && !nspace->prefix) ||
+ /* ... or two same prefixes */
+ (ns->prefix && nspace->prefix &&
+ !strcmp((const char*)ns->prefix, (const char*)nspace->prefix))
+ )
+ return 1;
+ }
+
+
+ for(i = 0;
+ (ns = (const raptor_namespace*)raptor_sequence_get_at(xml_element->declared_nspaces, i));
+ i++) {
+ /* Cannot have same namespace already seen */
+ if(ns == nspace ||
+ /* ... or two default nspaces */
+ (!ns->prefix && !nspace->prefix) ||
+ /* ... or two same prefixes */
+ (ns->prefix && nspace->prefix &&
+ !strcmp((const char*)ns->prefix, (const char*)nspace->prefix))
+ )
+ return 1;
+ }
+
+ raptor_sequence_push(xml_element->declared_nspaces, nspace);
+
+ return 0;
+}
+
+
+#ifdef RAPTOR_DEBUG
+void
+raptor_print_xml_element(raptor_xml_element *element, FILE* stream)
+{
+ raptor_qname_print(stream, element->name);
+ fputc('\n', stream);
+
+ if(element->attribute_count) {
+ unsigned int i;
+ int printed = 0;
+
+ fputs(" attributes: ", stream);
+ for(i = 0; i < element->attribute_count; i++) {
+ if(element->attributes[i]) {
+ if(printed)
+ fputc(' ', stream);
+ raptor_qname_print(stream, element->attributes[i]);
+ fprintf(stream, "='%s'", element->attributes[i]->value);
+ printed = 1;
+ }
+ }
+ fputc('\n', stream);
+ }
+}
+#endif
+
+
+struct nsd
+{
+ const raptor_namespace *nspace;
+ unsigned char *declaration;
+ size_t length;
+};
+
+
+static int
+raptor_nsd_compare(const void *a, const void *b)
+{
+ struct nsd* nsd_a = (struct nsd*)a;
+ struct nsd* nsd_b = (struct nsd*)b;
+
+ /* Sort NULLs earlier */
+ if(!nsd_a->declaration)
+ return -1;
+ else if(!nsd_b->declaration)
+ return 1;
+ return strcmp((const char*)nsd_a->declaration, (const char*)nsd_b->declaration);
+}
+
+
+/**
+ * raptor_xml_element_write:
+ * @element: XML element to format
+ * @nstack: Namespace stack context to use in formatting
+ * @is_empty: non-0 if element is empty
+ * @is_end: non-0 if this is an end element (else is a start element)
+ * @depth: XML element depth
+ * @iostr: iostream object
+ *
+ * Write a formatted XML element to a #raptor_iostream
+ *
+ * Return value: non-0 on failure
+*/
+int
+raptor_xml_element_write(raptor_xml_element *element,
+ raptor_namespace_stack *nstack,
+ int is_empty,
+ int is_end,
+ int depth,
+ raptor_iostream* iostr)
+{
+ struct nsd *nspace_declarations = NULL;
+ size_t nspace_declarations_count = 0;
+ unsigned int i;
+
+ /* max is 1 per element and 1 for each attribute + size of declared */
+ if(nstack) {
+ int nspace_max_count = element->attribute_count+1;
+ if(element->declared_nspaces)
+ nspace_max_count += raptor_sequence_size(element->declared_nspaces);
+
+ nspace_declarations = RAPTOR_CALLOC(struct nsd*, nspace_max_count,
+ sizeof(struct nsd));
+ }
+
+ if(element->name->nspace) {
+ if(!is_end && nstack &&
+ !raptor_namespaces_namespace_in_scope(nstack, element->name->nspace)) {
+ nspace_declarations[0].declaration=
+ raptor_namespace_format_as_xml(element->name->nspace,
+ &nspace_declarations[0].length);
+ nspace_declarations[0].nspace = element->name->nspace;
+ nspace_declarations_count++;
+ }
+ }
+
+ if(!is_end && element->attributes) {
+ for(i = 0; i < element->attribute_count; i++) {
+ /* qname */
+ if(element->attributes[i]->nspace) {
+ if(nstack &&
+ !raptor_namespaces_namespace_in_scope(nstack, element->attributes[i]->nspace) && element->attributes[i]->nspace != element->name->nspace) {
+ /* not in scope and not same as element (so already going to be declared)*/
+ unsigned int j;
+ int declare_me = 1;
+
+ /* check it wasn't an earlier declaration too */
+ for(j = 0; j < nspace_declarations_count; j++)
+ if(nspace_declarations[j].nspace == element->attributes[j]->nspace) {
+ declare_me = 0;
+ break;
+ }
+
+ if(declare_me) {
+ nspace_declarations[nspace_declarations_count].declaration=
+ raptor_namespace_format_as_xml(element->attributes[i]->nspace,
+ &nspace_declarations[nspace_declarations_count].length);
+ nspace_declarations[nspace_declarations_count].nspace = element->attributes[i]->nspace;
+ nspace_declarations_count++;
+ }
+ }
+
+ }
+ }
+ }
+
+
+ if(!is_end && nstack && element->declared_nspaces &&
+ raptor_sequence_size(element->declared_nspaces) > 0) {
+ for(i = 0; i< (unsigned int)raptor_sequence_size(element->declared_nspaces); i++) {
+ raptor_namespace* nspace = (raptor_namespace*)raptor_sequence_get_at(element->declared_nspaces, i);
+ unsigned int j;
+ int declare_me = 1;
+
+ /* check it wasn't an earlier declaration too */
+ for(j = 0; j < nspace_declarations_count; j++)
+ if(nspace_declarations[j].nspace == nspace) {
+ declare_me = 0;
+ break;
+ }
+
+ if(declare_me) {
+ nspace_declarations[nspace_declarations_count].declaration=
+ raptor_namespace_format_as_xml(nspace,
+ &nspace_declarations[nspace_declarations_count].length);
+ nspace_declarations[nspace_declarations_count].nspace = nspace;
+ nspace_declarations_count++;
+ }
+
+ }
+ }
+
+
+
+ raptor_iostream_write_byte('<', iostr);
+ if(is_end)
+ raptor_iostream_write_byte('/', iostr);
+
+ if(element->name->nspace && element->name->nspace->prefix_length > 0) {
+ raptor_iostream_counted_string_write((const char*)element->name->nspace->prefix,
+ element->name->nspace->prefix_length,
+ iostr);
+ raptor_iostream_write_byte(':', iostr);
+ }
+ raptor_iostream_counted_string_write((const char*)element->name->local_name,
+ element->name->local_name_length,
+ iostr);
+
+ /* declare namespaces */
+ if(nspace_declarations_count) {
+ /* sort them into the canonical order */
+ qsort((void*)nspace_declarations,
+ nspace_declarations_count, sizeof(struct nsd),
+ raptor_nsd_compare);
+ /* add them */
+ for(i = 0; i < nspace_declarations_count; i++) {
+ raptor_iostream_write_byte(' ', iostr);
+ raptor_iostream_counted_string_write((const char*)nspace_declarations[i].declaration,
+ nspace_declarations[i].length,
+ iostr);
+ RAPTOR_FREE(char*, nspace_declarations[i].declaration);
+ nspace_declarations[i].declaration = NULL;
+
+ raptor_namespace_stack_start_namespace(nstack,
+ (raptor_namespace*)nspace_declarations[i].nspace,
+ depth);
+ }
+ }
+
+
+ if(!is_end && element->attributes) {
+ for(i = 0; i < element->attribute_count; i++) {
+ raptor_iostream_write_byte(' ', iostr);
+
+ if(element->attributes[i]->nspace &&
+ element->attributes[i]->nspace->prefix_length > 0) {
+ raptor_iostream_counted_string_write((char*)element->attributes[i]->nspace->prefix,
+ element->attributes[i]->nspace->prefix_length,
+ iostr);
+ raptor_iostream_write_byte(':', iostr);
+ }
+
+ raptor_iostream_counted_string_write((const char*)element->attributes[i]->local_name,
+ element->attributes[i]->local_name_length,
+ iostr);
+
+ raptor_iostream_counted_string_write("=\"", 2, iostr);
+
+ raptor_xml_escape_string_write(element->attributes[i]->value,
+ element->attributes[i]->value_length,
+ '"',
+ iostr);
+ raptor_iostream_write_byte('"', iostr);
+ }
+ }
+
+ if(is_empty)
+ raptor_iostream_write_byte('/', iostr);
+
+ raptor_iostream_write_byte('>', iostr);
+
+ if(nstack)
+ RAPTOR_FREE(stringarray, nspace_declarations);
+
+ return 0;
+}
+
+
+/**
+ * raptor_xml_element_get_language:
+ * @xml_element: XML Element
+ *
+ * Get the XML language of the element.
+ *
+ * Return value: XML language or NULL if none in scope
+ **/
+const unsigned char*
+raptor_xml_element_get_language(raptor_xml_element* xml_element)
+{
+ return xml_element->xml_language;
+}
+
+
+/**
+ * raptor_valid_xml_ID:
+ * @rdf_parser: RDF parser
+ * @string: The string to check.
+ *
+ * Check the string matches the xml:ID value constraints.
+ *
+ * This checks the syntax part of the xml:ID validity constraint,
+ * that it matches [ VC: Name Token ] as amended by XML Namespaces:
+ *
+ * See <ulink url="http://www.w3.org/TR/REC-xml-names/#NT-NCName">Namespaces in XML NCName<ulink>
+ *
+ * Return value: non-zero if the ID string is valid
+ **/
+int
+raptor_valid_xml_ID(raptor_parser *rdf_parser, const unsigned char *string)
+{
+ size_t len = strlen((const char*)string);
+#ifdef RAPTOR_XML_1_1
+ #define XML_ID_XML_VERSION 11
+#else
+ #define XML_ID_XML_VERSION 10
+#endif
+
+ return raptor_xml_name_check(string, len, XML_ID_XML_VERSION);
+}
+
+
+/**
+ * raptor_xml_escape_string_any:
+ * @world: raptor world
+ * @string: string to XML escape (UTF-8)
+ * @len: length of string
+ * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length
+ * @length: buffer size
+ * @quote: optional quote character to escape for attribute content, or 0
+ * @xml_version: XML 1.0 (10) or XML 1.1 (11)
+ *
+ * Return an XML-escaped version a string.
+ *
+ * Follows
+ * <ulink url="http://www.w3.org/TR/xml-c14n#ProcessingModel">Canonical XML rules on Text Nodes and Attribute Nodes</ulink>
+ *
+ * Both:
+ * Replaces <literal>&amp;</literal> and <literal>&lt;</literal>
+ * with <literal>&amp;amp;</literal> and <literal>&amp;lt;</literal>
+ * respectively, preserving other characters.
+ *
+ * Text Nodes:
+ * <literal>&gt;</literal> is turned into <literal>&amp;gt;</literal>
+ * ##xD is turned into <literal>&amp;##xD;</literal>
+ *
+ * Attribute Nodes:
+ * <literal>&gt;</literal> is generated not <literal>&amp;gt</literal>.
+ * ##x9, ##xA and ##xD are turned into
+ * <literal>&amp;##x9;</literal>,
+ * <literal>&amp;##xA;</literal> and
+ * <literal>&amp;##xD;</literal>
+ * entities.
+ *
+ * If @quote is given it can be either of '\'' or '\"'
+ * which will be turned into <literal>&amp;apos;</literal> or
+ * <literal>&amp;quot;</literal> respectively.
+ * ASCII NUL ('\0') or any other character will not be escaped.
+ *
+ * If @buffer is NULL, no work is done but the size of buffer
+ * required is returned. The output in buffer remains in UTF-8.
+ *
+ * If the input @string is empty, a single NUL will be written to the
+ * buffer.
+ *
+ * Return value: the number of bytes required / used or <0 on failure.
+ **/
+int
+raptor_xml_escape_string_any(raptor_world *world,
+ const unsigned char *string, size_t len,
+ unsigned char *buffer, size_t length,
+ char quote,
+ int xml_version)
+{
+ size_t l;
+ size_t new_len = 0;
+ const unsigned char *p;
+ unsigned char *q;
+ int unichar_len;
+ raptor_unichar unichar;
+
+ if(!string)
+ return -1;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1);
+
+ raptor_world_open(world);
+
+ if(quote != '\"' && quote != '\'')
+ quote='\0';
+
+ for(l = len, p = string; l; p++, l--) {
+ if(*p > 0x7f) {
+ unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar);
+ if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) {
+ raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Bad UTF-8 encoding.");
+ return -1;
+ }
+ } else {
+ unichar=*p;
+ unichar_len = 1;
+ }
+
+ if(unichar == '&')
+ /* &amp; */
+ new_len+= 5;
+ else if(unichar == '<' || (!quote && unichar == '>'))
+ /* &lt; or &gt; */
+ new_len+= 4;
+ else if(quote && unichar == (unsigned long)quote)
+ /* &apos; or &quot; */
+ new_len+= 6;
+ else if(unichar == 0x0d ||
+ (quote && (unichar == 0x09 || unichar == 0x0a)))
+ /* &#xD; or &#x9; or &xA; */
+ new_len+= 5;
+ else if(unichar == 0x7f ||
+ (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) {
+ if(!unichar || xml_version < 11) {
+ raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Cannot write illegal XML 1.0 character U+%6lX.",
+ unichar);
+ } else {
+ /* &#xX; */
+ new_len+= 5;
+ if(unichar > 0x0f)
+ new_len++;
+ }
+ } else
+ new_len+= unichar_len;
+
+ unichar_len--; /* since loop does len-- */
+ p += unichar_len; l -= unichar_len;
+ }
+
+ if(length && new_len > length)
+ return 0;
+
+ if(!buffer)
+ return RAPTOR_BAD_CAST(int, new_len);
+
+ for(l = len, p = string, q = buffer; l; p++, l--) {
+ if(*p > 0x7f) {
+ unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar);
+ /* if the UTF-8 encoding is bad, we already did return -1 above */
+ } else {
+ unichar=*p;
+ unichar_len = 1;
+ }
+
+ if(unichar == '&') {
+ memcpy(q, "&amp;", 5);
+ q+= 5;
+ } else if(unichar == '<') {
+ memcpy(q, "&lt;", 4);
+ q+= 4;
+ } else if(!quote && unichar == '>') {
+ memcpy(q, "&gt;", 4);
+ q+= 4;
+ } else if(quote && unichar == RAPTOR_GOOD_CAST(unsigned long, quote)) {
+ if(quote == '\'')
+ memcpy(q, "&apos;", 6);
+ else
+ memcpy(q, "&quot;", 6);
+ q+= 6;
+ } else if(unichar == 0x0d ||
+ (quote && (unichar == 0x09 || unichar == 0x0a))) {
+ /* &#xX; */
+ *q++='&';
+ *q++='#';
+ *q++='x';
+ if(unichar == 0x09)
+ *q++ = '9';
+ else
+ *q++ = RAPTOR_GOOD_CAST(unsigned char, 'A' + (RAPTOR_GOOD_CAST(char, unichar) - 0x0a));
+ *q++= ';';
+ } else if(unichar == 0x7f ||
+ (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) {
+ if(!unichar || xml_version < 11) {
+ raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Cannot write illegal XML 1.0 character U+%6lX.",
+ unichar);
+ } else {
+ /* &#xX; */
+ *q++ = '&';
+ *q++ = '#';
+ *q++ = 'x';
+ q += raptor_format_integer((char*)q, 3,
+ RAPTOR_GOOD_CAST(unsigned int, unichar),
+ /* base */ 16, -1, '\0');
+ *q++ = ';';
+ }
+ } else {
+ /* coverity[negative_returns]
+ * negative unichar_len values are checked and cause return -1 above */
+ memcpy(q, p, unichar_len);
+ q+= unichar_len;
+ }
+
+ unichar_len--; /* since loop does len-- */
+ p += unichar_len; l -= unichar_len;
+ }
+
+ /* Terminate new string */
+ *q = '\0';
+
+ return RAPTOR_BAD_CAST(int, new_len);
+}
+
+
+/**
+ * raptor_xml_escape_string:
+ * @world: raptor world
+ * @string: string to XML 1.0 escape (UTF-8)
+ * @len: length of string
+ * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length.
+ * @length: buffer size
+ * @quote: optional quote character to escape for attribute content, or 0
+ *
+ * Return an XML 1.0-escaped version a string.
+ *
+ * See raptor_xml_escape_string_any() for the conditions on parameters.
+ *
+ * Return value: the number of bytes required / used or <0 on failure.
+ **/
+int
+raptor_xml_escape_string(raptor_world *world,
+ const unsigned char *string, size_t len,
+ unsigned char *buffer, size_t length,
+ char quote)
+{
+ if(!string)
+ return -1;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1);
+
+ raptor_world_open(world);
+
+ return raptor_xml_escape_string_any(world, string, len,
+ buffer, length,
+ quote,
+ 10);
+}
+
+
+/**
+ * raptor_xml_escape_string_any_write:
+ * @string: string to XML escape (UTF-8)
+ * @len: length of string
+ * @quote: optional quote character to escape for attribute content, or 0
+ * @xml_version: XML version - 10 (XML 1.0) or 11 (XML 1.1)
+ * @iostr: the #raptor_iostream to write to
+ *
+ * Write an XML-escaped version of a string to an iostream.
+ *
+ * See raptor_xml_escape_string() for the escapes performed and
+ * the conditions on @quote and @string. XML 1.1 allows additional
+ * characters in XML such as U+0001 to U+001F inclusive.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_xml_escape_string_any_write(const unsigned char *string,
+ size_t len,
+ char quote,
+ int xml_version,
+ raptor_iostream* iostr)
+{
+ size_t l;
+ const unsigned char *p;
+
+ if(xml_version != 10)
+ xml_version = 11;
+
+ if(quote != '\"' && quote != '\'')
+ quote='\0';
+
+ for(l = len, p = string; l; p++, l--) {
+ int unichar_len = 1;
+ raptor_unichar unichar=*p;
+
+ if(*p > 0x7f) {
+ unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar);
+ if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) {
+ raptor_log_error(raptor_iostream_get_world(iostr),
+ RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Bad UTF-8 encoding.");
+ return 1;
+ }
+ }
+
+ if(unichar == '&')
+ raptor_iostream_counted_string_write("&amp;", 5, iostr);
+ else if(unichar == '<')
+ raptor_iostream_counted_string_write("&lt;", 4, iostr);
+ else if(!quote && unichar == '>')
+ raptor_iostream_counted_string_write("&gt;", 4, iostr);
+ else if(quote && unichar == (unsigned long)quote) {
+ if(quote == '\'')
+ raptor_iostream_counted_string_write("&apos;", 6, iostr);
+ else
+ raptor_iostream_counted_string_write("&quot;", 6, iostr);
+ } else if(unichar == 0x0d ||
+ (quote && (unichar == 0x09 || unichar == 0x0a))) {
+ /* &#xX; */
+ raptor_iostream_counted_string_write("&#x", 3, iostr);
+ if(unichar == 0x09)
+ raptor_iostream_write_byte('9', iostr);
+ else
+ raptor_iostream_write_byte('A'+ ((char)unichar-0x0a), iostr);
+ raptor_iostream_write_byte(';', iostr);
+ } else if(unichar == 0x7f ||
+ (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) {
+ if(!unichar || xml_version < 11) {
+ raptor_log_error_formatted(raptor_iostream_get_world(iostr),
+ RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Cannot write illegal XML 1.0 character U+%6lX.",
+ unichar);
+ } else {
+ int width = (unichar < 0x10) ? 1 : 2;
+
+ /* &#xX; */
+ raptor_iostream_counted_string_write("&#x", 3, iostr);
+ raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), width, iostr);
+ raptor_iostream_write_byte(';', iostr);
+ }
+ } else
+ raptor_iostream_counted_string_write((const char*)p, unichar_len, iostr);
+
+ unichar_len--; /* since loop does len-- */
+ p += unichar_len; l -= unichar_len;
+ }
+
+ return 0;
+}
+
+
+/**
+ * raptor_xml_escape_string_write:
+ * @string: string to XML 1.0 escape (UTF-8)
+ * @len: length of string
+ * @quote: optional quote character to escape for attribute content, or 0
+ * @iostr: the #raptor_iostream to write to
+ *
+ * Write an XML 1.0-escaped version of a string to an iostream.
+ *
+ * See raptor_xml_escape_string_any_write() for the escapes
+ * performed and the conditions on @quote and @string.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_xml_escape_string_write(const unsigned char *string,
+ size_t len,
+ char quote,
+ raptor_iostream* iostr)
+{
+ return raptor_xml_escape_string_any_write(string, len, quote, 10,
+ iostr);
+}
+
+
+/**
+ * raptor_xml_name_check:
+ * @string: UTF-8 name string
+ * @length: length of string
+ * @xml_version: XML version
+ *
+ * Check a string is a legal XML name (and legal UTF8).
+ *
+ * xml_version is either 10 (for XML 1.0) or 11 for (XML 1.1). Any
+ * other version fails.
+ *
+ * Return value: Non 0 if the string is a legal XML name
+ **/
+int
+raptor_xml_name_check(const unsigned char *string, size_t length,
+ int xml_version)
+{
+ int pos;
+
+ if(xml_version != 10 && xml_version != 11)
+ return 0;
+
+ for(pos = 0; length > 0; pos++) {
+ raptor_unichar unichar = 0;
+
+ int unichar_len;
+ unichar_len = raptor_unicode_utf8_string_get_char(string, length, &unichar);
+ if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > length)
+ return 0;
+
+ if(unichar > raptor_unicode_max_codepoint)
+ return 0;
+
+ if(!pos) {
+ /* start of name */
+ if(xml_version == 10) {
+ if(!raptor_unicode_is_xml10_namestartchar(unichar))
+ return 0;
+ } else {
+ if(!raptor_unicode_is_xml11_namestartchar(unichar))
+ return 0;
+ }
+ } else {
+ /* rest of name */
+ if(xml_version == 10) {
+ if(!raptor_unicode_is_xml10_namechar(unichar))
+ return 0;
+ } else {
+ if(!raptor_unicode_is_xml11_namechar(unichar))
+ return 0;
+ }
+ }
+
+ string += unichar_len;
+ length -= unichar_len;
+ }
+ return 1;
+}
+
+
+#endif
+
+
+
+
+#ifdef STANDALONE
+
+/* static prototypes */
+void raptor_bad_string_print(const unsigned char *input, FILE *stream);
+int main(int argc, char *argv[]);
+
+void
+raptor_bad_string_print(const unsigned char *input, FILE *stream)
+{
+ while(*input) {
+ char c=(char)*input;
+ if(isprint(c))
+ fputc(c, stream);
+ else
+ fprintf(stream, "\\x%02X", (c & 0xff));
+ input++;
+ }
+}
+
+
+int
+main(int argc, char *argv[])
+{
+ raptor_world *world;
+ const char *program = raptor_basename(argv[0]);
+ struct tv {
+ const char *string;
+ const char quote;
+ const char *result;
+ };
+ struct tv *t;
+ struct tv test_values[]={
+ {"", 0, ""},
+
+ {"&", 0, "&amp;"},
+ {"<", 0, "&lt;"},
+ {">", 0, "&gt;"},
+ {"\x09", 0, "\x09"},
+ {"\x0a", 0, "\x0a"},
+ {"\x0d", 0, "&#xD;"},
+
+ {"'&'", '\'', "&apos;&amp;&apos;"},
+ {"'<'", '\'', "&apos;&lt;&apos;"},
+ {"'>'", '\'', "&apos;>&apos;"},
+ {"\x09", '\'', "&#x9;"},
+ {"\x0a", '\'', "&#xA;"},
+ {"\x0d", '\'', "&#xD;"},
+
+ {"\"&\"", '\"', "&quot;&amp;&quot;"},
+ {"\"<\"", '\"', "&quot;&lt;&quot;"},
+ {"\">\"", '\"', "&quot;>&quot;"},
+ {"\x09", '\"', "&#x9;"},
+ {"\x0a", '\"', "&#xA;"},
+ {"\x0d", '\"', "&#xD;"},
+
+ {"&amp;", 0, "&amp;amp;"},
+ {"<foo>", 0, "&lt;foo&gt;"},
+#if 0
+ {"\x1f", 0, "&#x1F;"},
+ {"\xc2\x80", 0, "&#x80;"},
+ {"\xe0\xa0\x80", 0, "&#x0800;"},
+ {"\xf0\x90\x80\x80", 0, "&#x10000;"},
+
+ {"\x7f", 0, "&#x7F;"},
+ {"\xdf\xbf", 0, "&#x07FF;"},
+ {"\xef\xbf\xbd", 0, "&#xFFFD;"},
+ {"\xf4\x8f\xbf\xbf", 0, "&#x10FFFF;"},
+
+ {"\xc3\xbf", 0, "&#xFF;"},
+ {"\xf0\x8f\xbf\xbf", 0, "&#xFFFF;"},
+#endif
+ {NULL, 0, 0}
+ };
+ int i;
+ int failures = 0;
+
+ world = raptor_new_world();
+ if(!world || raptor_world_open(world))
+ exit(1);
+
+ for(i = 0; (t=&test_values[i]) && t->string; i++) {
+ const unsigned char *utf8_string = (const unsigned char*)t->string;
+ char quote = t->quote;
+ size_t utf8_string_len = strlen((const char*)utf8_string);
+ unsigned char *xml_string;
+ int xml_string_len = 0;
+
+ xml_string_len = raptor_xml_escape_string(world,
+ utf8_string, utf8_string_len,
+ NULL, 0, quote);
+ if(xml_string_len < 0) {
+ fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '",
+ program);
+ raptor_bad_string_print(utf8_string, stderr);
+ fputs("'\n", stderr);
+ failures++;
+ continue;
+ }
+
+ xml_string = RAPTOR_MALLOC(unsigned char*, xml_string_len + 1);
+
+ xml_string_len = raptor_xml_escape_string(world,
+ utf8_string, utf8_string_len,
+ xml_string, xml_string_len, quote);
+ if(xml_string_len < 0) {
+ fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '",
+ program);
+ raptor_bad_string_print(utf8_string, stderr);
+ fputs("'\n", stderr);
+ failures++;
+ continue;
+ }
+ if(strcmp((const char*)xml_string, t->result)) {
+ fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '",
+ program);
+ raptor_bad_string_print(utf8_string, stderr);
+ fprintf(stderr, "', expected '%s', result was '%s'\n",
+ t->result, xml_string);
+ failures++;
+ continue;
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, "%s: raptor_xml_escape_string escaped string to '%s' ok\n",
+ program, xml_string);
+#endif
+ RAPTOR_FREE(char*, xml_string);
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ if(!failures)
+ fprintf(stderr, "%s: raptor_xml_escape_string all tests OK\n", program);
+#endif
+
+ raptor_free_world(world);
+
+ return failures;
+}
+
+#endif