summaryrefslogtreecommitdiffstats
path: root/src/raptor_sax2.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/raptor_sax2.c')
-rw-r--r--src/raptor_sax2.c1026
1 files changed, 1026 insertions, 0 deletions
diff --git a/src/raptor_sax2.c b/src/raptor_sax2.c
new file mode 100644
index 0000000..450f7ac
--- /dev/null
+++ b/src/raptor_sax2.c
@@ -0,0 +1,1026 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_sax2.c - Raptor SAX2 API
+ *
+ * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+
+/* Define this for far too much output */
+#undef RAPTOR_DEBUG_CDATA
+
+
+int
+raptor_sax2_init(raptor_world* world)
+{
+ return 0;
+}
+
+
+void
+raptor_sax2_finish(raptor_world* world)
+{
+}
+
+
+/**
+ * raptor_new_sax2:
+ * @world: raptor world
+ * @locator: raptor locator to use for errors
+ * @user_data: pointer context information to pass to SAX handlers
+ *
+ * Constructor - Create a new SAX2 with error handlers
+ *
+ * Return value: new #raptor_sax2 object or NULL on failure
+ */
+raptor_sax2*
+raptor_new_sax2(raptor_world *world, raptor_locator *locator,
+ void* user_data)
+{
+ raptor_sax2* sax2;
+
+ RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
+
+ if(!locator)
+ return NULL;
+
+ raptor_world_open(world);
+
+ sax2 = RAPTOR_CALLOC(raptor_sax2*, 1, sizeof(*sax2));
+ if(!sax2)
+ return NULL;
+
+#ifdef RAPTOR_XML_LIBXML
+ sax2->magic = RAPTOR_LIBXML_MAGIC;
+#endif
+
+ sax2->world = world;
+ sax2->locator = locator;
+ sax2->user_data = user_data;
+
+ sax2->enabled = 1;
+
+ raptor_object_options_init(&sax2->options, RAPTOR_OPTION_AREA_SAX2);
+
+ return sax2;
+}
+
+
+/**
+ * raptor_free_sax2:
+ * @sax2: SAX2 object
+ *
+ * Destructor - destroy a SAX2 object
+ */
+void
+raptor_free_sax2(raptor_sax2 *sax2)
+{
+ raptor_xml_element *xml_element;
+
+ if(!sax2)
+ return;
+
+#ifdef RAPTOR_XML_LIBXML
+ if(sax2->xc) {
+ raptor_libxml_free(sax2->xc);
+ sax2->xc = NULL;
+ }
+#endif
+
+ while( (xml_element = raptor_xml_element_pop(sax2)) )
+ raptor_free_xml_element(xml_element);
+
+ raptor_namespaces_clear(&sax2->namespaces);
+
+ if(sax2->base_uri)
+ raptor_free_uri(sax2->base_uri);
+
+ raptor_object_options_clear(&sax2->options);
+
+ RAPTOR_FREE(raptor_sax2, sax2);
+}
+
+
+/**
+ * raptor_sax2_set_start_element_handler:
+ * @sax2: SAX2 object
+ * @handler: start element handler
+ *
+ * Set SAX2 start element handler.
+ */
+void
+raptor_sax2_set_start_element_handler(raptor_sax2* sax2,
+ raptor_sax2_start_element_handler handler)
+{
+ sax2->start_element_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_end_element_handler:
+ * @sax2: SAX2 object
+ * @handler: end element handler
+ *
+ * Set SAX2 end element handler.
+ */
+void
+raptor_sax2_set_end_element_handler(raptor_sax2* sax2,
+ raptor_sax2_end_element_handler handler)
+{
+ sax2->end_element_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_characters_handler:
+ * @sax2: SAX2 object
+ * @handler: characters handler
+ *
+ * Set SAX2 characters handler.
+ */
+void
+raptor_sax2_set_characters_handler(raptor_sax2* sax2,
+ raptor_sax2_characters_handler handler)
+{
+ sax2->characters_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_cdata_handler:
+ * @sax2: SAX2 object
+ * @handler: CDATA handler
+ *
+ * Set SAX2 CDATA handler.
+ */
+void
+raptor_sax2_set_cdata_handler(raptor_sax2* sax2,
+ raptor_sax2_cdata_handler handler)
+{
+ sax2->cdata_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_comment_handler:
+ * @sax2: SAX2 object
+ * @handler: comment handler
+ *
+ * Set SAX2 XML comment handler.
+ */
+void
+raptor_sax2_set_comment_handler(raptor_sax2* sax2,
+ raptor_sax2_comment_handler handler)
+{
+ sax2->comment_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_unparsed_entity_decl_handler:
+ * @sax2: SAX2 object
+ * @handler: unparsed entity declaration handler
+ *
+ * Set SAX2 XML unparsed entity declaration handler.
+ */
+void
+raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2* sax2,
+ raptor_sax2_unparsed_entity_decl_handler handler)
+{
+ sax2->unparsed_entity_decl_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_external_entity_ref_handler:
+ * @sax2: SAX2 object
+ * @handler: entity reference handler
+ *
+ * Set SAX2 XML entity reference handler.
+ */
+void
+raptor_sax2_set_external_entity_ref_handler(raptor_sax2* sax2,
+ raptor_sax2_external_entity_ref_handler handler)
+{
+ sax2->external_entity_ref_handler = handler;
+}
+
+
+/**
+ * raptor_sax2_set_namespace_handler:
+ * @sax2: #raptor_sax2 object
+ * @handler: new namespace callback function
+ *
+ * Set the XML namespace handler function.
+ *
+ * When a prefix/namespace is seen in an XML parser, call the given
+ * @handler with the prefix string and the #raptor_uri namespace URI.
+ * Either can be NULL for the default prefix or default namespace.
+ *
+ * The handler function does not deal with duplicates so any
+ * namespace may be declared multiple times when a namespace is seen
+ * in different parts of a document.
+ *
+ */
+void
+raptor_sax2_set_namespace_handler(raptor_sax2* sax2,
+ raptor_namespace_handler handler)
+{
+ sax2->namespace_handler = handler;
+}
+
+
+raptor_xml_element*
+raptor_xml_element_pop(raptor_sax2 *sax2)
+{
+ raptor_xml_element *element = sax2->current_element;
+
+ if(!element)
+ return NULL;
+
+ sax2->current_element = element->parent;
+ if(sax2->root_element == element) /* just deleted root */
+ sax2->root_element = NULL;
+
+ return element;
+}
+
+
+void
+raptor_xml_element_push(raptor_sax2 *sax2, raptor_xml_element* element)
+{
+ element->parent = sax2->current_element;
+ sax2->current_element = element;
+ if(!sax2->root_element)
+ sax2->root_element = element;
+}
+
+
+/**
+ * raptor_xml_element_is_empty:
+ * @xml_element: XML Element
+ *
+ * Check if an XML Element is empty.
+ *
+ * Return value: non-0 if the element is empty.
+ */
+int
+raptor_xml_element_is_empty(raptor_xml_element* xml_element)
+{
+ return !xml_element->content_cdata_seen &&
+ !xml_element->content_element_seen;
+}
+
+
+/**
+ * raptor_sax2_inscope_xml_language:
+ * @sax2: SAX2 object
+ *
+ * Get the in-scope XML language
+ *
+ * The result is a language string which may be "" if xml:lang="" is
+ * given. NULL is returned only if there is no xml:lang in any outer
+ * scope.
+ *
+ * Return value: shared pointer to the XML language or NULL if none is in scope.
+ */
+const unsigned char*
+raptor_sax2_inscope_xml_language(raptor_sax2 *sax2)
+{
+ raptor_xml_element* xml_element;
+
+ for(xml_element = sax2->current_element;
+ xml_element;
+ xml_element = xml_element->parent) {
+ if(xml_element->xml_language)
+ return xml_element->xml_language;
+ }
+
+ return NULL;
+}
+
+
+/**
+ * raptor_sax2_inscope_base_uri:
+ * @sax2: SAX2 object
+ *
+ * Get the in-scope base URI
+ *
+ * Return value: the in-scope base URI shared object or NULL if none is in scope.
+ */
+raptor_uri*
+raptor_sax2_inscope_base_uri(raptor_sax2 *sax2)
+{
+ raptor_xml_element *xml_element;
+
+ for(xml_element = sax2->current_element;
+ xml_element;
+ xml_element = xml_element->parent)
+ if(xml_element->base_uri)
+ return xml_element->base_uri;
+
+ return sax2->base_uri;
+}
+
+
+/**
+ * raptor_sax2_set_uri_filter:
+ * @sax2: SAX2 object
+ * @filter: URI filter function
+ * @user_data: User data to pass to filter function
+ *
+ * Set URI filter function for SAX2 internal retrievals.
+ **/
+void
+raptor_sax2_set_uri_filter(raptor_sax2* sax2,
+ raptor_uri_filter_func filter,
+ void *user_data)
+{
+ sax2->uri_filter = filter;
+ sax2->uri_filter_user_data = user_data;
+}
+
+
+int
+raptor_sax2_get_depth(raptor_sax2 *sax2)
+{
+ return sax2->depth;
+}
+
+void
+raptor_sax2_inc_depth(raptor_sax2 *sax2)
+{
+ sax2->depth++;
+}
+
+void
+raptor_sax2_dec_depth(raptor_sax2 *sax2)
+{
+ sax2->depth--;
+}
+
+
+static void raptor_sax2_simple_error(void* user_data, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3);
+
+/*
+ * raptor_sax2_simple_error - Error from a sax2 - Internal
+ *
+ * Matches the raptor_simple_message_handler API but calls
+ * the sax2 error_handler
+ */
+static void
+raptor_sax2_simple_error(void* user_data, const char *message, ...)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+ va_list arguments;
+
+ va_start(arguments, message);
+
+ if(sax2) {
+ raptor_log_error_varargs(sax2->world,
+ RAPTOR_LOG_LEVEL_ERROR,
+ sax2->locator,
+ message, arguments);
+ }
+
+ va_end(arguments);
+}
+
+
+
+/**
+ * raptor_sax2_parse_start:
+ * @sax2: sax2 object
+ * @base_uri: base URI
+ *
+ * Start an XML SAX2 parse.
+ */
+void
+raptor_sax2_parse_start(raptor_sax2* sax2, raptor_uri *base_uri)
+{
+ sax2->depth = 0;
+ sax2->root_element = NULL;
+ sax2->current_element = NULL;
+
+ if(sax2->base_uri)
+ raptor_free_uri(sax2->base_uri);
+ if(base_uri)
+ sax2->base_uri = raptor_uri_copy(base_uri);
+ else
+ sax2->base_uri = NULL;
+
+#ifdef RAPTOR_XML_LIBXML
+ raptor_libxml_sax_init(sax2);
+
+#if LIBXML_VERSION < 20425
+ sax2->first_read = 1;
+#endif
+
+ if(sax2->xc) {
+ raptor_libxml_free(sax2->xc);
+ sax2->xc = NULL;
+ }
+#endif
+
+ raptor_namespaces_clear(&sax2->namespaces);
+
+ if(raptor_namespaces_init(sax2->world, &sax2->namespaces, 1)) {
+ /* log a fatal error and set sax2 to failed state
+ since the function signature does not currently support returning an error */
+ raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL, sax2->locator,
+ "raptor_namespaces_init() failed");
+ sax2->failed = 1;
+ }
+}
+
+
+/**
+ * raptor_sax2_parse_chunk:
+ * @sax2: sax2 object
+ * @buffer: input buffer
+ * @len: input buffer lenght
+ * @is_end: non-0 if end of data
+ *
+ * Parse a chunk of XML data generating SAX2 events
+ *
+ * Return value: non-0 on failure
+ */
+int
+raptor_sax2_parse_chunk(raptor_sax2* sax2, const unsigned char *buffer,
+ size_t len, int is_end)
+{
+#ifdef RAPTOR_XML_LIBXML
+ /* parser context */
+ xmlParserCtxtPtr xc = sax2->xc;
+ int rc;
+
+ if(!xc) {
+ int libxml_options = 0;
+
+ if(!len) {
+ /* no data given at all */
+ raptor_sax2_update_document_locator(sax2, sax2->locator);
+ raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_ERROR, sax2->locator,
+ "XML Parsing failed - no element found");
+ return 1;
+ }
+
+ xc = xmlCreatePushParserCtxt(&sax2->sax, sax2, /* user data */
+ (char*)buffer, RAPTOR_BAD_CAST(int, len),
+ NULL);
+ if(!xc)
+ goto handle_error;
+
+#ifdef RAPTOR_LIBXML_XML_PARSE_NONET
+ if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET))
+ libxml_options |= XML_PARSE_NONET;
+#endif
+#ifdef HAVE_XMLCTXTUSEOPTIONS
+ xmlCtxtUseOptions(xc, libxml_options);
+#endif
+
+ xc->userData = sax2; /* user data */
+ xc->vctxt.userData = sax2; /* user data */
+ xc->vctxt.error = (xmlValidityErrorFunc)raptor_libxml_validation_error;
+ xc->vctxt.warning = (xmlValidityWarningFunc)raptor_libxml_validation_warning;
+ xc->replaceEntities = 1;
+
+ sax2->xc = xc;
+
+ if(is_end)
+ len = 0;
+ else
+ return 0;
+ }
+
+ if(!len) {
+ rc = xmlParseChunk(xc, (char*)buffer, 0, 1);
+ return rc;
+ }
+
+
+ /* This works around some libxml versions that fail to work
+ * if the buffer size is larger than the entire file
+ * and thus the entire parsing is done in one operation.
+ *
+ * The code below:
+ * 2.4.19 (oldest tested) to 2.4.24 - required
+ * 2.4.25 - works with or without it
+ * 2.4.26 or later - fails with this code
+ */
+
+#if LIBXML_VERSION < 20425
+ if(sax2->first_read && is_end) {
+ /* parse all but the last character */
+ rc = xmlParseChunk(xc, (char*)buffer, len-1, 0);
+ if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
+ goto handle_error;
+ /* last character */
+ rc = xmlParseChunk(xc, (char*)buffer + (len-1), 1, 0);
+ if(rc && rc != XML_WAR_UNDECLARED_ENTITY)
+ goto handle_error;
+ /* end */
+ xmlParseChunk(xc, (char*)buffer, 0, 1);
+ return 0;
+ }
+#endif
+
+#if LIBXML_VERSION < 20425
+ sax2->first_read = 0;
+#endif
+
+ rc = xmlParseChunk(xc, (char*)buffer, RAPTOR_BAD_CAST(int, len), is_end);
+ if(rc && rc != XML_WAR_UNDECLARED_ENTITY) /* libxml: non 0 is failure */
+ goto handle_error;
+ if(is_end)
+ return 0;
+
+ return rc;
+
+ handle_error:
+#endif
+
+ return 1;
+}
+
+
+/**
+ * raptor_sax2_set_option:
+ * @sax2: #raptor_sax2 SAX2 object
+ * @option: option to set from enumerated #raptor_option values
+ * @string: string option value (or NULL)
+ * @integer: integer option value
+ *
+ * Set SAX2 option.
+ *
+ * If @string is not NULL and the option type is numeric, the string
+ * value is converted to an integer and used in preference to @integer.
+ *
+ * If @string is NULL and the option type is not numeric, an error is
+ * returned.
+ *
+ * The @string values used are copied.
+ *
+ * The allowed options are available via
+ * raptor_world_get_option_description().
+ *
+ * Return value: non 0 on failure or if the option is unknown
+ */
+int
+raptor_sax2_set_option(raptor_sax2 *sax2, raptor_option option,
+ char* string, int integer)
+{
+ return raptor_object_options_set_option(&sax2->options, option,
+ string, integer);
+}
+
+
+void
+raptor_sax2_update_document_locator(raptor_sax2* sax2,
+ raptor_locator* locator)
+{
+#ifdef RAPTOR_XML_LIBXML
+ raptor_libxml_update_document_locator(sax2, locator);
+#endif
+}
+
+
+/* start of an element */
+void
+raptor_sax2_start_element(void* user_data, const unsigned char *name,
+ const unsigned char **atts)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+ raptor_qname* el_name;
+ unsigned char **xml_atts_copy = NULL;
+ size_t xml_atts_size = 0;
+ int all_atts_count = 0;
+ int ns_attributes_count = 0;
+ raptor_qname** named_attrs = NULL;
+ raptor_xml_element* xml_element = NULL;
+ unsigned char *xml_language = NULL;
+ raptor_uri *xml_base = NULL;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+#ifdef RAPTOR_XML_LIBXML
+ if(atts) {
+ int i;
+
+ /* Do XML attribute value normalization */
+ for(i = 0; atts[i]; i += 2) {
+ unsigned char *value = (unsigned char*)atts[i+1];
+ unsigned char *src = value;
+ unsigned char *dst = xmlStrdup(value);
+
+ if(!dst) {
+ raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL,
+ sax2->locator, "Out of memory");
+ return;
+ }
+
+ atts[i+1] = dst;
+
+ while(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
+ src++;
+ while(*src) {
+ if(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) {
+ while(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09)
+ src++;
+ if(*src)
+ *dst++ = 0x20;
+ } else {
+ *dst++ = *src++;
+ }
+ }
+ *dst = '\0';
+ xmlFree(value);
+ }
+ }
+#endif
+
+ raptor_sax2_inc_depth(sax2);
+
+ if(atts) {
+ int i;
+
+ /* Save passed in XML attributes pointers so we can
+ * NULL the pointers when they get handled below (various atts[i]=NULL)
+ */
+ for(i = 0; atts[i]; i++) ;
+ xml_atts_size = sizeof(unsigned char*) * i;
+ if(xml_atts_size) {
+ xml_atts_copy = RAPTOR_MALLOC(unsigned char**, xml_atts_size);
+ if(!xml_atts_copy)
+ goto fail;
+ memcpy(xml_atts_copy, atts, xml_atts_size);
+ }
+
+ /* XML attributes processing:
+ * xmlns* - XML namespaces (Namespaces in XML REC)
+ * Deleted and used to synthesise namespaces declarations
+ * xml:lang - XML language (XML REC)
+ * Deleted and optionally normalised to lowercase
+ * xml:base - XML Base (XML Base REC)
+ * Deleted and used to set the in-scope base URI for this XML element
+ */
+ for(i = 0; atts[i]; i+= 2) {
+ all_atts_count++;
+
+ if(strncmp((char*)atts[i], "xml", 3)) {
+ /* count and skip non xml* attributes */
+ ns_attributes_count++;
+ continue;
+ }
+
+ /* synthesise the XML namespace events */
+ if(!memcmp((const char*)atts[i], "xmlns", 5)) {
+ const unsigned char *prefix = atts[i][5] ? &atts[i][6] : NULL;
+ const unsigned char *namespace_name = atts[i+1];
+
+ raptor_namespace* nspace;
+ nspace = raptor_new_namespace(&sax2->namespaces,
+ prefix, namespace_name,
+ raptor_sax2_get_depth(sax2));
+
+ if(nspace) {
+ raptor_namespaces_start_namespace(&sax2->namespaces, nspace);
+
+ if(sax2->namespace_handler)
+ (*sax2->namespace_handler)(sax2->user_data, nspace);
+ }
+ } else if(!strcmp((char*)atts[i], "xml:lang")) {
+ size_t lang_len = strlen((char*)atts[i+1]);
+ xml_language = RAPTOR_MALLOC(unsigned char*, lang_len + 1);
+ if(!xml_language) {
+ raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL,
+ sax2->locator, "Out of memory");
+ goto fail;
+ }
+
+ /* optionally normalize language to lowercase */
+ if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NORMALIZE_LANGUAGE)) {
+ unsigned char *from = (unsigned char*)atts[i+1];
+ unsigned char *to = xml_language;
+
+ while(*from) {
+ if(isupper(*from))
+ *to++ = RAPTOR_GOOD_CAST(unsigned char, tolower(*from++));
+ else
+ *to++ = *from++;
+ }
+ *to = '\0';
+ } else
+ memcpy(xml_language, atts[i+1], lang_len + 1); /* Copy NUL */
+ } else if(!strcmp((char*)atts[i], "xml:base")) {
+ raptor_uri* base_uri;
+ raptor_uri* xuri;
+ base_uri = raptor_sax2_inscope_base_uri(sax2);
+ xuri = raptor_new_uri_relative_to_base(sax2->world, base_uri, atts[i+1]);
+ xml_base = raptor_new_uri_for_xmlbase(xuri);
+ raptor_free_uri(xuri);
+ }
+
+ /* delete all xml attributes whether processed above or not */
+ atts[i] = NULL;
+ }
+ }
+
+
+ /* Create new element structure */
+ el_name = raptor_new_qname(&sax2->namespaces, name, NULL);
+ if(!el_name)
+ goto fail;
+
+#ifdef __clang_analyzer__
+ /* clang --analyze does not know about ownership of next call */
+ if(xml_language) {
+ free(xml_language); xml_language = NULL;
+ }
+ if(xml_base) {
+ raptor_free_uri(xml_base); xml_base = NULL;
+ }
+#endif
+ xml_element = raptor_new_xml_element(el_name, xml_language, xml_base);
+ if(!xml_element) {
+ raptor_free_qname(el_name);
+ goto fail;
+ }
+ /* xml_language,xml_base now owned by xml_element */
+ xml_language = NULL;
+ xml_base = NULL;
+
+ /* Turn string attributes into namespaced-attributes */
+ if(ns_attributes_count) {
+ int i;
+ int offset = 0;
+
+ /* Allocate new array to hold namespaced-attributes */
+ named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count,
+ sizeof(raptor_qname*));
+ if(!named_attrs) {
+ raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL,
+ sax2->locator, "Out of memory");
+ goto fail;
+ }
+
+ for(i = 0; i < all_atts_count; i++) {
+ raptor_qname* attr;
+
+ /* Skip previously processed attributes */
+ if(!atts[i<<1])
+ continue;
+
+ /* namespace-name[i] stored in named_attrs[i] */
+ attr = raptor_new_qname(&sax2->namespaces, atts[i<<1], atts[(i<<1)+1]);
+ if(!attr) { /* failed - tidy up and return */
+ int j;
+
+ for(j = 0; j < i; j++)
+ RAPTOR_FREE(raptor_qname, named_attrs[j]);
+ RAPTOR_FREE(raptor_qname_array, named_attrs);
+ goto fail;
+ }
+
+ named_attrs[offset++] = attr;
+ }
+ } /* end if ns_attributes_count */
+
+
+ if(named_attrs)
+ raptor_xml_element_set_attributes(xml_element,
+ named_attrs, ns_attributes_count);
+
+ raptor_xml_element_push(sax2, xml_element);
+
+ if(sax2->start_element_handler)
+ sax2->start_element_handler(sax2->user_data, xml_element);
+
+ if(xml_atts_copy) {
+ /* Restore passed in XML attributes, free the copy */
+ memcpy((void*)atts, xml_atts_copy, xml_atts_size);
+ RAPTOR_FREE(cstringpointer, xml_atts_copy);
+ }
+
+ return;
+
+ fail:
+ if(xml_atts_copy)
+ RAPTOR_FREE(cstringpointer, xml_atts_copy);
+ if(xml_base)
+ raptor_free_uri(xml_base);
+ if(xml_language)
+ RAPTOR_FREE(char*, xml_language);
+ if(xml_element)
+ raptor_free_xml_element(xml_element);
+}
+
+
+/* end of an element */
+void
+raptor_sax2_end_element(void* user_data, const unsigned char *name)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+ raptor_xml_element* xml_element;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+ xml_element = sax2->current_element;
+ if(xml_element) {
+#ifdef RAPTOR_DEBUG_VERBOSE
+ fprintf(stderr, "\nraptor_rdfxml_end_element_handler: End ns-element: ");
+ raptor_qname_print(stderr, xml_element->name);
+ fputc('\n', stderr);
+#endif
+
+ if(sax2->end_element_handler)
+ sax2->end_element_handler(sax2->user_data, xml_element);
+ }
+
+ raptor_namespaces_end_for_depth(&sax2->namespaces,
+ raptor_sax2_get_depth(sax2));
+ xml_element = raptor_xml_element_pop(sax2);
+ if(xml_element)
+ raptor_free_xml_element(xml_element);
+
+ raptor_sax2_dec_depth(sax2);
+}
+
+
+
+
+/* characters */
+void
+raptor_sax2_characters(void* user_data, const unsigned char *s, int len)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+ if(sax2->characters_handler)
+ sax2->characters_handler(sax2->user_data, sax2->current_element, s, len);
+}
+
+
+/* like <![CDATA[...]> */
+void
+raptor_sax2_cdata(void* user_data, const unsigned char *s, int len)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+ if(sax2->cdata_handler)
+ sax2->cdata_handler(sax2->user_data, sax2->current_element, s, len);
+}
+
+
+/* comment */
+void
+raptor_sax2_comment(void* user_data, const unsigned char *s)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+ if(sax2->comment_handler)
+ sax2->comment_handler(sax2->user_data, sax2->current_element, s);
+}
+
+
+/* unparsed (NDATA) entity */
+void
+raptor_sax2_unparsed_entity_decl(void* user_data,
+ const unsigned char* entityName,
+ const unsigned char* base,
+ const unsigned char* systemId,
+ const unsigned char* publicId,
+ const unsigned char* notationName)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+
+ if(sax2->failed || !sax2->enabled)
+ return;
+
+ if(sax2->unparsed_entity_decl_handler)
+ sax2->unparsed_entity_decl_handler(sax2->user_data,
+ entityName, base, systemId,
+ publicId, notationName);
+}
+
+
+/* external entity reference */
+int
+raptor_sax2_external_entity_ref(void* user_data,
+ const unsigned char* context,
+ const unsigned char* base,
+ const unsigned char* systemId,
+ const unsigned char* publicId)
+{
+ raptor_sax2* sax2 = (raptor_sax2*)user_data;
+
+ if(sax2->failed || !sax2->enabled)
+ return 0;
+
+ if(sax2->external_entity_ref_handler)
+ return sax2->external_entity_ref_handler(sax2->user_data,
+ context, base, systemId, publicId);
+
+ raptor_sax2_simple_error((void*)sax2,
+ "Failed to handle external entity reference with base %s systemId %s publicId %s",
+ (base ? (const char*)base : "(None)"),
+ systemId,
+ (publicId ? (const char*)publicId: "(None)"));
+
+ /* Failed to handle external entity reference */
+ return 0;
+}
+
+
+/**
+ * raptor_sax2_check_load_uri_string:
+ * @sax2: SAX2 object
+ * @uri_string: URI or file URI or file name string
+ *
+ * INTERNAL - Check URI loading policy
+ *
+ * Return value: > 0 if it is OK to load the URI, 0 if not, < 0 on failure
+*/
+int
+raptor_sax2_check_load_uri_string(raptor_sax2* sax2,
+ const unsigned char* uri_string)
+{
+ raptor_uri* abs_uri;
+ const unsigned char* abs_uri_string;
+ int abs_uri_is_file;
+ int load_uri = 0;
+
+ abs_uri = raptor_new_uri_from_uri_or_file_string(sax2->world, sax2->base_uri,
+ uri_string);
+ if(!abs_uri)
+ return -1;
+
+ abs_uri_string = raptor_uri_as_string(abs_uri);
+
+ abs_uri_is_file = raptor_uri_uri_string_is_file_uri(abs_uri_string);
+ if(abs_uri_is_file)
+ load_uri = !RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_FILE);
+ else
+ load_uri = !RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET);
+
+ if(sax2->uri_filter) {
+ int rc = sax2->uri_filter(sax2->uri_filter_user_data, abs_uri);
+ if(rc)
+ load_uri = 0;
+ }
+
+ RAPTOR_DEBUG4("URI '%s' Is a file? %s Load URI? %s\n", abs_uri_string,
+ (abs_uri_is_file > 0) ? "YES" : "NO",
+ (load_uri > 0) ? "YES" : "NO");
+
+ raptor_free_uri(abs_uri);
+
+ return load_uri;
+}