summaryrefslogtreecommitdiffstats
path: root/librdfa/rdfa.c
diff options
context:
space:
mode:
Diffstat (limited to 'librdfa/rdfa.c')
-rw-r--r--librdfa/rdfa.c1552
1 files changed, 1552 insertions, 0 deletions
diff --git a/librdfa/rdfa.c b/librdfa/rdfa.c
new file mode 100644
index 0000000..6dadbe0
--- /dev/null
+++ b/librdfa/rdfa.c
@@ -0,0 +1,1552 @@
+/**
+ * Copyright 2008-2011 Digital Bazaar, Inc.
+ *
+ * This file is part of librdfa.
+ *
+ * librdfa is Free Software, and can be licensed under any of the
+ * following three licenses:
+ *
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any
+ * newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE-* at the top of this software distribution for more
+ * information regarding the details of each license.
+ *
+ * The librdfa library is the Fastest RDFa Parser in the Universe. It is
+ * a stream parser, meaning that it takes an XML data as input and spits
+ * out RDF triples as it comes across them in the stream. Due to this
+ * processing approach, librdfa has a very, very small memory footprint.
+ * It is also very fast and can operate on hundreds of gigabytes of XML
+ * data without breaking a sweat.
+ *
+ * Usage:
+ *
+ * rdfacontext* context = rdfa_create_context(BASE_URI);
+ * context->callback_data = your_user_data;
+ * rdfa_set_default_graph_triple_handler(context, &default_graph_triple);
+ * rdfa_set_processor_graph_triple_handler(context, &processor_graph_triple);
+ * rdfa_set_buffer_filler(context, &fill_buffer);
+ * rdfa_parse(context);
+ * rdfa_free_context(context);
+ *
+ * @author Manu Sporny
+ */
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#ifdef HAVE_STRINGS_H
+# include <strings.h>
+#endif
+#include <ctype.h>
+#include <libxml/SAX2.h>
+#include "rdfa_utils.h"
+#include "rdfa.h"
+#include "strtok_r.h"
+
+#define READ_BUFFER_SIZE 4096
+#define RDFA_DOCTYPE_STRING_LENGTH 103
+
+/**
+ * Read the head of the XHTML document and determines the base IRI for
+ * the document.
+ *
+ * @param context the current working context.
+ * @param working_buffer the current working buffer.
+ * @param wb_allocated the number of bytes that have been allocated to
+ * the working buffer.
+ *
+ * @return the size of the data available in the working buffer.
+ */
+static size_t rdfa_init_base(
+ rdfacontext* context, char** working_buffer, size_t* working_buffer_size,
+ char* temp_buffer, size_t bytes_read)
+{
+ char* head_end = NULL;
+ size_t offset = context->wb_position;
+ size_t needed_size = 0;
+
+ if((offset + bytes_read) > *working_buffer_size)
+ {
+ needed_size = (offset + bytes_read) - *working_buffer_size;
+ }
+
+ /* search for the end of <head>, stop if <head> was found */
+
+ /* extend the working buffer size */
+ if(needed_size > 0)
+ {
+ size_t temp_buffer_size = sizeof(char) * READ_BUFFER_SIZE;
+ if((size_t)needed_size > temp_buffer_size)
+ temp_buffer_size += needed_size;
+
+ *working_buffer_size += temp_buffer_size;
+ /* +1 for NUL at end, to allow strstr() etc. to work */
+ *working_buffer = (char*)realloc(*working_buffer, *working_buffer_size + 1);
+ }
+
+ /* append to the working buffer */
+ memmove(*working_buffer + offset, temp_buffer, bytes_read);
+ /* ensure the buffer is a NUL-terminated string */
+ *(*working_buffer + offset + bytes_read) = '\0';
+
+ /* Sniff the beginning of the document for any document information */
+ if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL)
+ {
+ context->host_language = HOST_LANGUAGE_XHTML1;
+ context->rdfa_version = RDFA_VERSION_1_0;
+ }
+ else if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.1//EN") != NULL)
+ {
+ context->host_language = HOST_LANGUAGE_XHTML1;
+ context->rdfa_version = RDFA_VERSION_1_1;
+ }
+ else if(strstr(*working_buffer, "<html") != NULL)
+ {
+ context->host_language = HOST_LANGUAGE_HTML;
+ context->rdfa_version = RDFA_VERSION_1_1;
+ }
+ else
+ {
+ context->host_language = HOST_LANGUAGE_XML1;
+ context->rdfa_version = RDFA_VERSION_1_1;
+ }
+
+#ifdef LIBRDFA_IN_RAPTOR
+ if(context->raptor_rdfa_version == 10) {
+ context->host_language = HOST_LANGUAGE_XHTML1;
+ context->rdfa_version = RDFA_VERSION_1_0;
+ } else if(context->raptor_rdfa_version == 11)
+ context->rdfa_version = RDFA_VERSION_1_1;
+#endif
+
+ /* search for the end of </head> in */
+ head_end = strstr(*working_buffer, "</head>");
+ if(head_end == NULL)
+ head_end = strstr(*working_buffer, "</HEAD>");
+
+ context->wb_position += bytes_read;
+
+ if(head_end == NULL)
+ return bytes_read;
+
+ /* if </head> was found, search for <base and extract the base URI */
+ if(head_end != NULL)
+ {
+ char* base_start = strstr(*working_buffer, "<base ");
+ char* href_start = NULL;
+ if(base_start == NULL)
+ base_start = strstr(*working_buffer, "<BASE ");
+ if(base_start != NULL)
+ href_start = strstr(base_start, "href=");
+
+ if(href_start != NULL)
+ {
+ char sep = href_start[5];
+ char* uri_start = href_start + 6;
+ char* uri_end = strchr(uri_start, sep);
+
+ if(uri_end != NULL)
+ {
+ if(*uri_start != sep)
+ {
+ size_t uri_size = uri_end - uri_start;
+ char* temp_uri = (char*)malloc(sizeof(char) * uri_size + 1);
+ char* cleaned_base;
+ strncpy(temp_uri, uri_start, uri_size);
+ temp_uri[uri_size] = '\0';
+
+ /* TODO: This isn't in the processing rules, should it
+ * be? Setting current_object_resource will make
+ * sure that the BASE element is inherited by all
+ * subcontexts. */
+ cleaned_base = rdfa_iri_get_base(temp_uri);
+ context->current_object_resource =
+ rdfa_replace_string(
+ context->current_object_resource, cleaned_base);
+
+ /* clean up the base context */
+ context->base =
+ rdfa_replace_string(context->base, cleaned_base);
+ free(cleaned_base);
+ free(temp_uri);
+ }
+ }
+ }
+ }
+
+ return bytes_read;
+}
+
+#ifdef LIBRDFA_IN_RAPTOR
+static int
+raptor_nspace_compare(const void *a, const void *b)
+{
+ raptor_namespace* ns_a=*(raptor_namespace**)a;
+ raptor_namespace* ns_b=*(raptor_namespace**)b;
+ if(!ns_a->prefix)
+ return 1;
+ else if(!ns_b->prefix)
+ return -1;
+ else
+ return strcmp((const char*)ns_b->prefix, (const char*)ns_a->prefix);
+}
+#endif
+
+/**
+ * Handles the start_element call
+ */
+static void start_element(void *parser_context, const char* name,
+ const char* prefix, const char* URI, int nb_namespaces,
+ const char** namespaces, int nb_attributes, int nb_defaulted,
+ const char** attributes)
+{
+ rdfacontext* root_context = (rdfacontext*)parser_context;
+ rdfalist* context_stack = (rdfalist*)root_context->context_stack;
+ rdfacontext* context = rdfa_create_new_element_context(context_stack);
+ char* xml_lang = NULL;
+ const char* about_curie = NULL;
+ char* about = NULL;
+ const char* src_curie = NULL;
+ char* src = NULL;
+ const char* type_of_curie = NULL;
+ rdfalist* type_of = NULL;
+ const char* rel_curie = NULL;
+ rdfalist* rel = NULL;
+ const char* rev_curie = NULL;
+ rdfalist* rev = NULL;
+ const char* property_curie = NULL;
+ rdfalist* property = NULL;
+ const char* resource_curie = NULL;
+ char* resource = NULL;
+ const char* href_curie = NULL;
+ char* href = NULL;
+ char* content = NULL;
+ const char* datatype_curie = NULL;
+ char* datatype = NULL;
+
+#ifdef LIBRDFA_IN_RAPTOR
+ if(1) {
+ raptor_parser* rdf_parser = (raptor_parser*)context->callback_data;
+ raptor_sax2_update_document_locator(context->sax2,
+ &rdf_parser->locator);
+ }
+#endif
+
+ rdfa_push_item(context_stack, context, RDFALIST_FLAG_CONTEXT);
+
+#if defined(DEBUG) && DEBUG > 0
+ if(1) {
+ int i;
+
+ /* dump all arguments sent to this callback */
+ fprintf(stdout, "DEBUG: SAX.startElementNs(%s", (char *) name);
+ if (prefix == NULL)
+ fprintf(stdout, ", NULL");
+ else
+ fprintf(stdout, ", %s", (char *) prefix);
+ if (URI == NULL)
+ fprintf(stdout, ", NULL");
+ else
+ fprintf(stdout, ", '%s'", (char *) URI);
+ fprintf(stdout, ", %d", nb_namespaces);
+
+ /* dump all namespaces */
+ if (namespaces != NULL) {
+ for (i = 0;i < nb_namespaces * 2;i++) {
+ fprintf(stdout, ", xmlns");
+ if (namespaces[i] != NULL)
+ fprintf(stdout, ":%s", namespaces[i]);
+ i++;
+ fprintf(stdout, "='%s'", namespaces[i]);
+ }
+ }
+
+ /* dump all attributes */
+ fprintf(stdout, ", %d, %d", nb_attributes, nb_defaulted);
+ if (attributes != NULL) {
+ for (i = 0;i < nb_attributes * 5;i += 5) {
+ if (attributes[i + 1] != NULL)
+ fprintf(
+ stdout, ", %s:%s='", attributes[i + 1], attributes[i]);
+ else
+ fprintf(stdout, ", %s='", attributes[i]);
+ fprintf(stdout, "%.4s...', %d", attributes[i + 3],
+ (int)(attributes[i + 4] - attributes[i + 3]));
+ }
+ }
+ fprintf(stdout, ")\n");
+ }
+#endif
+
+ /* start the XML Literal text */
+ if(context->xml_literal == NULL)
+ {
+ context->xml_literal = rdfa_replace_string(context->xml_literal, "<");
+ context->xml_literal_size = 1;
+ }
+ else
+ {
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, "<", 1);
+ }
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ name, strlen(name));
+
+ if(!context->xml_literal_namespaces_defined)
+ {
+ /* append namespaces to XML Literal */
+#ifdef LIBRDFA_IN_RAPTOR
+ raptor_namespace_stack* nstack = &context->sax2->namespaces;
+ raptor_namespace* ns;
+ raptor_namespace** ns_list = NULL;
+ size_t ns_size;
+#else
+ void** umap = context->uri_mappings;
+#endif
+ const char* umap_key = NULL;
+ void* umap_value = NULL;
+
+ /* if the namespaces are not defined, then neither is the xml:lang */
+ context->xml_literal_xml_lang_defined = 0;
+
+#ifdef LIBRDFA_IN_RAPTOR
+ ns_size = 0;
+ ns_list = raptor_namespace_stack_to_array(nstack, &ns_size);
+ qsort((void*)ns_list, ns_size, sizeof(raptor_namespace*),
+ raptor_nspace_compare);
+
+ while(ns_size > 0)
+#else
+ while(*umap != NULL)
+#endif
+ {
+ unsigned char insert_xmlns_definition = 1;
+ const char* attr = NULL;
+
+ /* get the next mapping to process */
+#ifdef LIBRDFA_IN_RAPTOR
+ ns=ns_list[--ns_size];
+
+ umap_key = (const char*)raptor_namespace_get_prefix(ns);
+ if(!umap_key)
+ umap_key=(const char*)XMLNS_DEFAULT_MAPPING;
+ umap_value = (char*)raptor_uri_as_string(raptor_namespace_get_uri(ns));
+#else
+ rdfa_next_mapping(umap++, &umap_key, &umap_value);
+ umap++;
+#endif
+
+ /* check to make sure that the namespace isn't already
+ * defined in the current element. */
+ if(attributes != NULL)
+ {
+ const char** attrs = attributes;
+ while((*attrs != NULL) && insert_xmlns_definition)
+ {
+ attr = *attrs++;
+
+ /* if the attribute is a umap_key, skip the definition
+ * of the attribute. */
+ if(strcmp(attr, umap_key) == 0)
+ {
+ insert_xmlns_definition = 0;
+ }
+ }
+ }
+
+ /* if the namespace isn't already defined on the element,
+ * copy it to the XML Literal string. */
+ if(insert_xmlns_definition)
+ {
+ /* append the namespace attribute to the XML Literal */
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ " xmlns", strlen(" xmlns"));
+
+ /* check to see if we're dumping the standard XHTML namespace or
+ * a user-defined XML namespace */
+ if(strcmp(umap_key, XMLNS_DEFAULT_MAPPING) != 0)
+ {
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, ":", 1);
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ umap_key, strlen(umap_key));
+ }
+
+ /* append the namespace value */
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, "=\"", 2);
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ (const char*)umap_value, strlen((char*)umap_value));
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, "\"", 1);
+ }
+
+ } /* end while umap not NULL */
+ context->xml_literal_namespaces_defined = 1;
+
+#ifdef LIBRDFA_IN_RAPTOR
+ if(ns_list)
+ raptor_free_memory(ns_list);
+#endif
+ } /* end if namespaces inserted */
+
+#ifdef LIBRDFA_IN_RAPTOR
+ /* Raptor namespace code does this already */
+#else
+ /* 3. For backward compatibility, RDFa Processors should also permit the
+ * definition of mappings via @xmlns. In this case, the value to be mapped
+ * is set by the XML namespace prefix, and the value to map is the value of
+ * the attribute - an IRI. (Note that prefix mapping via @xmlns is
+ * deprecated, and may be removed in a future version of this
+ * specification.) When xmlns is supported, such mappings must be processed
+ * before processing any mappings from @prefix on the same element. */
+ if(namespaces != NULL)
+ {
+ int ni;
+
+ for(ni = 0; ni < nb_namespaces * 2; ni += 2)
+ {
+ const char* ns = namespaces[ni];
+ const char* value = namespaces[ni + 1];
+ /* Regardless of how the mapping is declared, the value to be mapped
+ * must be converted to lower case, and the IRI is not processed in
+ * any way; in particular if it is a relative path it must not be
+ * resolved against the current base. */
+ char* lcns = NULL;
+ if(ns != NULL)
+ {
+ /* convert the namespace string to lowercase */
+ unsigned int i;
+ size_t ns_length = strlen(ns);
+ lcns = (char*)malloc(ns_length + 1);
+ for(i = 0; i <= ns_length; i++)
+ {
+ lcns[i] = tolower(ns[i]);
+ }
+ }
+
+ /* update the URI mappings */
+ rdfa_update_uri_mappings(context, lcns, value);
+
+ if(lcns != NULL)
+ {
+ free(lcns);
+ }
+ }
+ }
+#endif
+
+ /* detect the RDFa version of the document, if specified */
+ if(attributes != NULL)
+ {
+ int ci;
+
+ /* search for a version attribute */
+ for(ci = 0; ci < nb_attributes * 5; ci += 5)
+ {
+ const char* attr;
+ char* value;
+ size_t value_length = 0;
+
+ attr = attributes[ci];
+ value_length = attributes[ci + 4] - attributes[ci + 3] + 1;
+
+ if(strcmp(attr, "version") == 0)
+ {
+ /* append the attribute-value pair to the XML literal */
+ value = (char*)malloc(value_length + 1);
+ snprintf(value, value_length, "%s", attributes[ci + 3]);
+ if(strstr(value, "RDFa 1.0") != NULL)
+ {
+ context->rdfa_version = RDFA_VERSION_1_0;
+ }
+ else if(strstr(value, "RDFa 1.1") != NULL)
+ {
+ context->rdfa_version = RDFA_VERSION_1_1;
+ }
+
+ free(value);
+ }
+ }
+ }
+
+#ifdef LIBRDFA_IN_RAPTOR
+ if(context->sax2)
+ {
+ /* Raptor handles xml:lang itself but not 'lang' */
+ xml_lang = (char*)raptor_sax2_inscope_xml_language(context->sax2);
+ xml_lang = rdfa_replace_string(NULL, xml_lang);
+ }
+#endif
+
+ /* prepare all of the RDFa-specific attributes we are looking for.
+ * scan all of the attributes for the RDFa-specific attributes */
+ if(attributes != NULL)
+ {
+ int ci;
+
+ if(context->rdfa_version == RDFA_VERSION_1_1)
+ {
+ /* process all vocab and prefix attributes */
+ for(ci = 0; ci < nb_attributes * 5; ci += 5)
+ {
+ const char* attr;
+ char* value;
+ size_t value_length = 0;
+
+ attr = attributes[ci];
+ value_length = attributes[ci + 4] - attributes[ci + 3] + 1;
+
+ /* append the attribute-value pair to the XML literal */
+ value = (char*)malloc(value_length + 1);
+ snprintf(value, value_length, "%s", attributes[ci + 3]);
+
+ /* 2. Next the current element is examined for any change to the
+ * default vocabulary via @vocab. */
+ if(strcmp(attr, "vocab") == 0)
+ {
+ if(strlen(value) < 1)
+ {
+ /* If the value is empty, then the local default vocabulary
+ * must be reset to the Host Language defined default
+ * (if any). */
+ free(context->default_vocabulary);
+ context->default_vocabulary = NULL;
+ }
+ else
+ {
+ char* resolved_uri;
+ rdftriple* triple;
+
+ /* If @vocab is present and contains a value, the local
+ * default vocabulary is updated according to the
+ * section on CURIE and IRI Processing. */
+ resolved_uri = rdfa_resolve_uri(context, value);
+ context->default_vocabulary = rdfa_replace_string(
+ context->default_vocabulary, resolved_uri);
+
+ /* The value of @vocab is used to generate a triple */
+ triple = rdfa_create_triple(
+ context->base, "http://www.w3.org/ns/rdfa#usesVocabulary",
+ resolved_uri, RDF_TYPE_IRI, NULL, NULL);
+ context->default_graph_triple_callback(
+ triple, context->callback_data);
+
+ free(resolved_uri);
+ }
+ }
+ else if(strcmp(attr, "prefix") == 0)
+ {
+ /* Mappings are defined via @prefix. */
+ char* working_string = NULL;
+ char* atprefix = NULL;
+ char* iri = NULL;
+ char* saveptr = NULL;
+
+ working_string = rdfa_replace_string(working_string, value);
+
+ /* Values in this attribute are evaluated from beginning to
+ * end (e.g., left to right in typical documents). */
+ atprefix = strtok_r(working_string, ":", &saveptr);
+ while(atprefix != NULL)
+ {
+ /* find the prefix and IRI mappings while skipping whitespace */
+ while((*saveptr == ' ' || *saveptr == '\n' ||
+ *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' ||
+ *saveptr == '\v') && *saveptr != '\0')
+ {
+ saveptr++;
+ }
+ iri = strtok_r(NULL, RDFA_WHITESPACE, &saveptr);
+
+ /* update the prefix mappings */
+ rdfa_update_uri_mappings(context, atprefix, iri);
+
+ if(!saveptr)
+ break;
+
+ while((*saveptr == ' ' || *saveptr == '\n' ||
+ *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' ||
+ *saveptr == '\v') && *saveptr != '\0')
+ {
+ saveptr++;
+ }
+
+ /* get the next prefix to process */
+ atprefix = strtok_r(NULL, ":", &saveptr);
+ }
+
+ free(working_string);
+ }
+ else if(strcmp(attr, "inlist") == 0)
+ {
+ context->inlist_present = 1;
+ }
+ free(value);
+ }
+ }
+
+ /* resolve all of the other RDFa values */
+ for(ci = 0; ci < nb_attributes * 5; ci += 5)
+ {
+ const char* attr;
+ char* value;
+ char* attrns;
+ char* literal_text;
+ size_t value_length = 0;
+ size_t literal_text_length;
+
+ attr = attributes[ci];
+ attrns = (char*)attributes[ci + 1];
+ value_length = attributes[ci + 4] - attributes[ci + 3] + 1;
+
+ /* append the attribute-value pair to the XML literal */
+ value = (char*)malloc(value_length + 1);
+ snprintf(value, value_length, "%s", attributes[ci + 3]);
+
+ literal_text_length = strlen(attr) + value_length + 4;
+ literal_text = (char*)malloc(literal_text_length + 1);
+ snprintf(literal_text, literal_text_length, " %s=\"%s\"", attr, value);
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ literal_text, strlen(literal_text));
+ free(literal_text);
+
+ /* if xml:lang is defined, ensure that it is not overwritten */
+ if(attrns != NULL && strcmp(attrns, "xml") == 0 &&
+ strcmp(attr, "lang") == 0)
+ {
+ context->xml_literal_xml_lang_defined = 1;
+ }
+
+ /* process all of the RDFa attributes */
+ if(strcmp(attr, "about") == 0)
+ {
+ about_curie = value;
+ about = rdfa_resolve_curie(
+ context, about_curie, CURIE_PARSE_ABOUT_RESOURCE);
+ }
+ else if(strcmp(attr, "src") == 0)
+ {
+ src_curie = value;
+ src = rdfa_resolve_curie(context, src_curie, CURIE_PARSE_HREF_SRC);
+ }
+ else if(strcmp(attr, "typeof") == 0)
+ {
+ type_of_curie = value;
+ type_of = rdfa_resolve_curie_list(
+ context, type_of_curie,
+ CURIE_PARSE_INSTANCEOF_DATATYPE);
+ }
+ else if(strcmp(attr, "rel") == 0)
+ {
+ context->rel_present = 1;
+ rel_curie = value;
+ rel = rdfa_resolve_curie_list(
+ context, rel_curie, CURIE_PARSE_RELREV);
+ }
+ else if(strcmp(attr, "rev") == 0)
+ {
+ context->rev_present = 1;
+ rev_curie = value;
+ rev = rdfa_resolve_curie_list(
+ context, rev_curie, CURIE_PARSE_RELREV);
+ }
+ else if(strcmp(attr, "property") == 0)
+ {
+ property_curie = value;
+ property =
+ rdfa_resolve_curie_list(
+ context, property_curie, CURIE_PARSE_PROPERTY);
+ }
+ else if(strcmp(attr, "resource") == 0)
+ {
+ resource_curie = value;
+ resource = rdfa_resolve_curie(
+ context, resource_curie, CURIE_PARSE_ABOUT_RESOURCE);
+ }
+ else if(strcmp(attr, "href") == 0)
+ {
+ href_curie = value;
+ href =
+ rdfa_resolve_curie(context, href_curie, CURIE_PARSE_HREF_SRC);
+ }
+ else if(strcmp(attr, "content") == 0)
+ {
+ content = rdfa_replace_string(content, value);
+ }
+ else if(strcmp(attr, "datatype") == 0)
+ {
+ datatype_curie = value;
+
+ if(strlen(datatype_curie) == 0)
+ {
+ datatype = rdfa_replace_string(datatype, "");
+ }
+ else
+ {
+ datatype = rdfa_resolve_curie(context, datatype_curie,
+ CURIE_PARSE_INSTANCEOF_DATATYPE);
+ }
+ }
+ else if((attrns == NULL && strcmp(attr, "lang") == 0) ||
+ (attrns != NULL && strcmp(attrns, "xml") == 0 &&
+ strcmp(attr, "lang") == 0))
+ {
+ xml_lang = rdfa_replace_string(xml_lang, value);
+ }
+
+ free(value);
+ }
+ }
+
+ /* The root element has an implicit @about declaration */
+ if(context->depth == 1 && about == NULL && resource == NULL &&
+ href == NULL && src == NULL)
+ {
+ about_curie = "";
+ about = rdfa_resolve_curie(
+ context, about_curie, CURIE_PARSE_ABOUT_RESOURCE);
+ }
+
+ /* The HEAD and BODY element in XHTML and HTML has an implicit
+ * about="" on it.
+ */
+ if(about == NULL && resource == NULL && href == NULL && src == NULL &&
+ (context->parent_subject == NULL || type_of != NULL) &&
+ ((context->host_language == HOST_LANGUAGE_XHTML1 ||
+ context->host_language == HOST_LANGUAGE_HTML) &&
+ (strcasecmp(name, "head") == 0 || strcasecmp(name, "body") == 0)))
+ {
+ about_curie = "";
+ about = rdfa_resolve_curie(
+ context, about_curie, CURIE_PARSE_ABOUT_RESOURCE);
+ }
+
+ /* check to see if we should append an xml:lang to the XML Literal
+ * if one is defined in the context and does not exist on the
+ * element. */
+ if((xml_lang == NULL) && (context->language != NULL) &&
+ !context->xml_literal_xml_lang_defined)
+ {
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ " xml:lang=\"", strlen(" xml:lang=\""));
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ context->language, strlen(context->language));
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, "\"", 1);
+
+ /* ensure that the lang isn't set in a subtree (unless it's overwritten) */
+ context->xml_literal_xml_lang_defined = 1;
+ }
+
+ /* close the XML Literal value */
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, ">", 1);
+
+ /* 3. The [current element] is also parsed for any language
+ * information, and [language] is set in the [current
+ * evaluation context]; */
+ rdfa_update_language(context, xml_lang);
+
+ /***************** FOR DEBUGGING PURPOSES ONLY ******************/
+#if defined(DEBUG) && DEBUG > 0
+ printf("DEBUG: depth = %u\n", context->depth);
+ if(about != NULL)
+ {
+ printf("DEBUG: @about = %s\n", about);
+ }
+ if(src != NULL)
+ {
+ printf("DEBUG: @src = %s\n", src);
+ }
+ if(type_of != NULL)
+ {
+ printf("DEBUG: @type_of = ");
+ rdfa_print_list(type_of);
+ }
+ if(context->inlist_present)
+ {
+ printf("DEBUG: @inlist = true\n");
+ }
+ if(rel != NULL)
+ {
+ printf("DEBUG: @rel = ");
+ rdfa_print_list(rel);
+ }
+ if(rev != NULL)
+ {
+ printf("DEBUG: @rev = ");
+ rdfa_print_list(rev);
+ }
+ if(property != NULL)
+ {
+ printf("DEBUG: @property = ");
+ rdfa_print_list(property);
+ }
+ if(resource != NULL)
+ {
+ printf("DEBUG: @resource = %s\n", resource);
+ }
+ if(href != NULL)
+ {
+ printf("DEBUG: @href = %s\n", href);
+ }
+ if(content != NULL)
+ {
+ printf("DEBUG: @content = %s\n", content);
+ }
+ if(datatype != NULL)
+ {
+ printf("DEBUG: @datatype = %s\n", datatype);
+ }
+ if(xml_lang != NULL)
+ {
+ printf("DEBUG: @xml:lang = %s\n", xml_lang);
+ }
+#endif
+
+ /* TODO: This isn't part of the processing model, it needs to be
+ * included and is a correction for the last item in step #4. */
+ if((about == NULL) && (src == NULL) && (type_of == NULL) &&
+ (rel == NULL) && (rev == NULL) && (property == NULL) &&
+ (resource == NULL) && (href == NULL) &&
+ (context->default_vocabulary == NULL) && (prefix == NULL))
+ {
+ context->skip_element = 1;
+ }
+
+ if((rel == NULL) && (rev == NULL))
+ {
+ if(context->rdfa_version == RDFA_VERSION_1_0)
+ {
+ /* 4. If the [current element] contains no valid @rel or @rev
+ * URI, obtained according to the section on CURIE and URI
+ * Processing, then the next step is to establish a value for
+ * [new subject]. Any of the attributes that can carry a
+ * resource can set [new subject]; */
+ rdfa_establish_new_1_0_subject(
+ context, name, about, src, resource, href, type_of);
+ }
+ else
+ {
+ rdfa_establish_new_1_1_subject(
+ context, name, about, src, resource, href, type_of, property,
+ content, datatype);
+ }
+ }
+ else
+ {
+ if(context->rdfa_version == RDFA_VERSION_1_0)
+ {
+ /* 5. If the [current element] does contain a valid @rel or @rev
+ * URI, obtained according to the section on CURIE and URI
+ * Processing, then the next step is to establish both a value
+ * for [new subject] and a value for [current object resource]: */
+ rdfa_establish_new_1_0_subject_with_relrev(
+ context, name, about, src, resource, href, type_of);
+ }
+ else
+ {
+ rdfa_establish_new_1_1_subject_with_relrev(
+ context, name, about, src, resource, href, type_of);
+ }
+ }
+
+ if(context->new_subject != NULL)
+ {
+#if defined(DEBUG) && DEBUG > 0
+ printf("DEBUG: new_subject = %s\n", context->new_subject);
+#endif
+
+ /* RDFa 1.0: 6. If in any of the previous steps a [new subject] was set
+ * to a non-null value, it is now used to provide a subject for
+ * type values; */
+ /* RDFa 1.1: 7. If in any of the previous steps a typed resource was set
+ * to a non-null value, it is now used to provide a subject for type
+ * values;
+ */
+ if(type_of != NULL)
+ {
+ rdfa_complete_type_triples(context, type_of);
+ }
+
+ /* Note that none of this block is executed if there is no
+ * [new subject] value, i.e., [new subject] remains null. */
+ }
+
+ if(context->current_object_resource != NULL)
+ {
+ /* If the element contains both the @inlist and the @rel attributes:
+ * the @rel may contain one or more resources, obtained according to
+ * the section on CURIE and IRI Processing each of which is used to
+ * add an entry to the list mapping as follows:
+ * if the local list mapping does not contain a list associated with
+ * the IRI, instantiate a new list and add to local list mappings
+ * add the current object resource to the list associated with the
+ * resource in the local list mapping */
+ if(context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) &&
+ context->inlist_present)
+ {
+ rdfresource_t object_type = RDF_TYPE_IRI;
+ if((property != NULL) || (content != NULL))
+ {
+ object_type = RDF_TYPE_PLAIN_LITERAL;
+ if(datatype != NULL)
+ {
+ object_type = RDF_TYPE_TYPED_LITERAL;
+ }
+ }
+ rdfa_establish_new_inlist_triples(
+ context, rel, context->current_object_resource, object_type);
+ }
+
+ /* 7. If in any of the previous steps a [current object resource]
+ * was set to a non-null value, it is now used to generate triples */
+ rdfa_complete_relrev_triples(context, rel, rev);
+ }
+
+ if((context->current_object_resource == NULL) &&
+ context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) &&
+ context->inlist_present)
+ {
+ rdfa_save_incomplete_list_triples(context, rel);
+ }
+ else if((context->current_object_resource == NULL) &&
+ ((rel != NULL) || (rev != NULL)))
+ {
+ /* 8. If however [current object resource] was set to null, but
+ * there are predicates present, then they must be stored as
+ * [incomplete triple]s, pending the discovery of a subject that
+ * can be used as the object. Also, [current object resource]
+ * should be set to a newly created [bnode] */
+ rdfa_save_incomplete_triples(context, rel, rev);
+ }
+
+ /* Ensure to re-insert XML Literal namespace information from this
+ * point on... */
+ if(property != NULL)
+ {
+ context->xml_literal_namespaces_defined = 0;
+ }
+
+ /* save these for processing steps #9 and #10 */
+ context->about = rdfa_replace_string(context->about, about);
+ context->resource = rdfa_replace_string(context->resource, resource);
+ context->href = rdfa_replace_string(context->href, href);
+ context->src = rdfa_replace_string(context->src, src);
+ context->content = rdfa_replace_string(context->content, content);
+ context->datatype = rdfa_replace_string(context->datatype, datatype);
+ context->property = property;
+
+ /* free the resolved CURIEs */
+ free(about);
+ free(src);
+ rdfa_free_list(type_of);
+ rdfa_free_list(rel);
+ rdfa_free_list(rev);
+ free(xml_lang);
+ free(content);
+ free(resource);
+ free(href);
+ free(datatype);
+}
+
+static void character_data(
+ void *parser_context, const xmlChar *s, int len)
+{
+ /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/
+ rdfalist* context_stack =
+ (rdfalist*)((rdfacontext*)parser_context)->context_stack;
+ rdfacontext* context = (rdfacontext*)
+ context_stack->items[context_stack->num_items - 1]->data;
+
+ char *buffer = (char*)malloc(len + 1);
+ memset(buffer, 0, len + 1);
+ memcpy(buffer, s, len);
+
+ /* append the text to the current context's plain literal */
+ if(context->plain_literal == NULL)
+ {
+ context->plain_literal =
+ rdfa_replace_string(context->plain_literal, buffer);
+ context->plain_literal_size = len;
+ }
+ else
+ {
+ context->plain_literal = rdfa_n_append_string(
+ context->plain_literal,
+ &context->plain_literal_size, buffer, len);
+ }
+
+ /* append the text to the current context's XML literal */
+ if(context->xml_literal == NULL)
+ {
+ context->xml_literal =
+ rdfa_replace_string(context->xml_literal, buffer);
+ context->xml_literal_size = len;
+ }
+ else
+ {
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size, buffer, len);
+ }
+
+ /*printf("plain_literal: %s\n", context->plain_literal);*/
+ /*printf("xml_literal: %s\n", context->xml_literal);*/
+
+ free(buffer);
+}
+
+static void end_element(void* parser_context, const char* name,
+ const char* prefix,const xmlChar* URI)
+{
+ /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/
+ rdfalist* context_stack =
+ (rdfalist*)((rdfacontext*)parser_context)->context_stack;
+ rdfacontext* context = (rdfacontext*)rdfa_pop_item(context_stack);
+ rdfacontext* parent_context = (rdfacontext*)
+ context_stack->items[context_stack->num_items - 1]->data;
+
+ /* append the text to the current context's XML literal */
+ size_t name_len = strlen(name);
+ char* buffer = (char*)malloc(name_len + 3 + 1);
+
+#if defined(DEBUG) && DEBUG > 0
+ printf("DEBUG: </%s>\n", name);
+ printf("context->local_list_mappings (start of end_element): ");
+ rdfa_print_mapping(context->local_list_mappings,
+ (print_mapping_value_fp)rdfa_print_triple_list);
+#endif
+
+ buffer[0] = '<';
+ buffer[1] = '/';
+ memcpy(buffer + 2, name, name_len);
+ buffer[name_len + 2] = '>';
+ buffer[name_len + 3] = '\0';
+ if(context->xml_literal == NULL)
+ {
+ context->xml_literal =
+ rdfa_replace_string(context->xml_literal, buffer);
+ context->xml_literal_size = strlen(buffer);
+ }
+ else
+ {
+ context->xml_literal = rdfa_n_append_string(
+ context->xml_literal, &context->xml_literal_size,
+ buffer, strlen(buffer));
+ }
+ free(buffer);
+
+ /* 9. The next step of the iteration is to establish any
+ * [current object literal]; */
+
+ /* generate the complete object literal triples */
+ if(context->property != NULL)
+ {
+ /* save the current xml literal */
+ char* saved_xml_literal = context->xml_literal;
+ char* content_start = NULL;
+ char* content_end = NULL;
+
+ /* ensure to mark only the inner-content of the XML node for
+ * processing the object literal. */
+ buffer = NULL;
+
+ if(context->xml_literal != NULL)
+ {
+ /* get the data between the first tag and the last tag */
+ content_start = strchr(context->xml_literal, '>');
+ content_end = strrchr(context->xml_literal, '<');
+
+ if((content_start != NULL) && (content_end != NULL))
+ {
+ /* set content end to null terminator */
+ context->xml_literal = ++content_start;
+ *content_end = '\0';
+ }
+ }
+
+ /* update the plain literal if the XML Literal is an empty string */
+ if(context->xml_literal != NULL && strlen(context->xml_literal) == 0)
+ {
+ context->plain_literal =
+ rdfa_replace_string(context->plain_literal, "");
+ }
+
+ /* process data between first tag and last tag
+ * this needs the xml literal to be null terminated */
+ if(context->rdfa_version == RDFA_VERSION_1_0)
+ {
+ rdfa_complete_object_literal_triples(context);
+ }
+ else
+ {
+ rdfa_complete_current_property_value_triples(context);
+ }
+
+ if(content_end != NULL)
+ {
+ /* set content end back */
+ *content_end = '<';
+ }
+
+ if(saved_xml_literal != NULL)
+ {
+ /* restore xml literal */
+ context->xml_literal = saved_xml_literal;
+ }
+ }
+
+ /*printf(context->plain_literal);*/
+
+ if(parent_context != NULL) {
+ /* append the XML literal and plain text literals to the parent
+ * literals */
+ if(context->xml_literal != NULL)
+ {
+ if(parent_context->xml_literal == NULL)
+ {
+ parent_context->xml_literal =
+ rdfa_replace_string(
+ parent_context->xml_literal, context->xml_literal);
+ parent_context->xml_literal_size = context->xml_literal_size;
+ }
+ else
+ {
+ parent_context->xml_literal =
+ rdfa_n_append_string(
+ parent_context->xml_literal,
+ &parent_context->xml_literal_size,
+ context->xml_literal, context->xml_literal_size);
+ }
+
+ /* if there is an XML literal, there is probably a plain literal */
+ if(context->plain_literal != NULL)
+ {
+ if(parent_context->plain_literal == NULL)
+ {
+ parent_context->plain_literal =
+ rdfa_replace_string(
+ parent_context->plain_literal, context->plain_literal);
+ parent_context->plain_literal_size =
+ context->plain_literal_size;
+ }
+ else
+ {
+ parent_context->plain_literal =
+ rdfa_n_append_string(
+ parent_context->plain_literal,
+ &parent_context->plain_literal_size,
+ context->plain_literal,
+ context->plain_literal_size);
+ }
+ }
+ }
+
+ /* preserve the bnode count by copying it to the parent_context */
+ parent_context->bnode_count = context->bnode_count;
+ parent_context->underscore_colon_bnode_name = \
+ rdfa_replace_string(parent_context->underscore_colon_bnode_name,
+ context->underscore_colon_bnode_name);
+ }
+
+ /* 10. If the [ skip element ] flag is 'false', and [ new subject ]
+ * was set to a non-null value, then any [ incomplete triple ]s
+ * within the current context should be completed: */
+ if((context->skip_element == 0) && (context->new_subject != NULL))
+ {
+ rdfa_complete_incomplete_triples(context);
+ }
+
+ /* 14. Once all the child elements have been traversed, list triples are
+ * generated, if necessary. */
+ if(context->rdfa_version == RDFA_VERSION_1_1 && (context->new_subject != NULL))
+ {
+ rdfa_complete_list_triples(context);
+
+ if(parent_context != NULL)
+ {
+ /* copy the current mapping to the parent mapping */
+ rdfa_free_mapping(parent_context->local_list_mappings,
+ (free_mapping_value_fp)rdfa_free_list);
+ parent_context->local_list_mappings = rdfa_copy_mapping(
+ (void**)context->local_list_mappings,
+ (copy_mapping_value_fp)rdfa_replace_list);
+
+#if defined(DEBUG) && DEBUG > 0
+ printf("parent_context->local_list_mappings (after copy): ");
+ rdfa_print_mapping(context->local_list_mappings,
+ (print_mapping_value_fp)rdfa_print_triple_list);
+#endif
+ rdfa_free_mapping(context->local_list_mappings,
+ (free_mapping_value_fp)rdfa_free_list);
+ context->local_list_mappings = NULL;
+ }
+ }
+
+ /* free the context */
+ rdfa_free_context(context);
+
+#if defined(DEBUG) && DEBUG > 0
+ printf("-------------------------------------------------------------\n");
+#endif
+}
+
+void rdfa_set_default_graph_triple_handler(
+ rdfacontext* context, triple_handler_fp th)
+{
+ context->default_graph_triple_callback = th;
+}
+
+void rdfa_set_processor_graph_triple_handler(
+ rdfacontext* context, triple_handler_fp th)
+{
+ context->processor_graph_triple_callback = th;
+}
+
+void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf)
+{
+ context->buffer_filler_callback = bf;
+}
+
+#ifdef LIBRDFA_IN_RAPTOR
+/* Raptor reports its errors a different way */
+#else
+static void rdfa_report_error(void* parser_context, char* msg, ...)
+{
+ char error[1024];
+ char* eptr;
+ va_list args;
+ rdfacontext* context = (rdfacontext*)parser_context;
+
+ /* format the error message */
+ va_start(args, msg);
+ vsprintf(error, msg, args);
+ va_end(args);
+
+ /* Remove any newlines from the libxml2 error */
+ eptr = error;
+ while(*eptr != '\0')
+ {
+ if(*eptr == '\n')
+ {
+ *eptr = '.';
+ }
+ eptr++;
+ }
+
+ /* Generate the processor error */
+ rdfa_processor_triples(context, RDFA_PROCESSOR_ERROR, error);
+}
+#endif
+
+#ifdef LIBRDFA_IN_RAPTOR
+
+static void raptor_rdfa_start_element(void *user_data,
+ raptor_xml_element *xml_element)
+{
+ raptor_qname* qname = raptor_xml_element_get_name(xml_element);
+ int nb_attributes = raptor_xml_element_get_attributes_count(xml_element);
+ raptor_qname** attrs = raptor_xml_element_get_attributes(xml_element);
+ unsigned char* localname = raptor_qname_to_counted_name(qname, NULL);
+ const raptor_namespace* qname_ns = raptor_qname_get_namespace(qname);
+ int nb_namespaces = 0;
+ const char** namespaces = NULL;
+ int nb_defaulted = 0;
+ char** attr = NULL;
+ int i;
+ const char* ns_name = NULL;
+ const char* ns_uri = NULL;
+
+ if(nb_attributes > 0) {
+ /* Everything written into 'attr' is a shared pointer into
+ * xml_element or contained objects - qnames, namespaces, uris
+ * and values
+ */
+ attr = (char**)malloc(sizeof(char*) * (1 + (nb_attributes * 5)));
+ for(i = 0; i < nb_attributes; i++) {
+ const raptor_namespace* attr_ns = attrs[i]->nspace;
+ char** attri = &attr[5 * i];
+ /* 5 tuple: (localname, prefix, URI, value, end) */
+ attri[0] = (char*)attrs[i]->local_name;
+ attri[1] = attr_ns ? (char*)attr_ns->prefix : NULL;
+ attri[2] = attr_ns ? (char*)raptor_uri_as_string(attr_ns->uri) : NULL;
+ attri[3] = (char*)attrs[i]->value;
+ attri[4] = attri[3] + attrs[i]->value_length;
+ }
+ attr[5 * i] = NULL;
+ }
+
+/*
+ * @ctx: the user data (XML parser context)
+ * @localname: the local name of the element
+ * @prefix: the element namespace prefix if available
+ * @URI: the element namespace name if available
+ * @nb_namespaces: number of namespace definitions on that node
+ * @namespaces: pointer to the array of prefix/URI pairs namespace definitions
+ * @nb_attributes: the number of attributes on that node
+ * @nb_defaulted: the number of defaulted attributes. The defaulted
+ * ones are at the end of the array
+ * @attributes: pointer to the array of (localname/prefix/URI/value/end)
+ * attribute values.
+ */
+ if(qname_ns) {
+ ns_name = (const char*)raptor_namespace_get_prefix(qname_ns);
+ ns_uri = (const char*)raptor_uri_as_string(qname_ns->uri);
+ }
+
+ start_element(user_data, (const char*)localname,
+ ns_name,
+ ns_uri,
+ nb_namespaces,
+ (const char**)namespaces,
+ nb_attributes,
+ nb_defaulted,
+ (const char**)attr);
+ if(attr)
+ free(attr);
+ raptor_free_memory(localname);
+}
+
+static void raptor_rdfa_end_element(void *user_data,
+ raptor_xml_element* xml_element)
+{
+ raptor_qname* qname = raptor_xml_element_get_name(xml_element);
+ unsigned char* localname = raptor_qname_to_counted_name(qname, NULL);
+ const raptor_namespace* qname_ns = raptor_qname_get_namespace(qname);
+
+ if(qname_ns)
+ end_element(user_data, (const char*)localname,
+ (const char*)qname_ns->prefix,
+ (const xmlChar*)raptor_uri_as_string(qname_ns->uri));
+ else
+ end_element(user_data, (const char*)localname, NULL, NULL);
+
+ raptor_free_memory(localname);
+}
+
+static void raptor_rdfa_character_data(void *user_data,
+ raptor_xml_element* xml_element,
+ const unsigned char *s, int len)
+{
+ character_data(user_data, (const xmlChar *)s, len);
+}
+
+static void raptor_rdfa_namespace_handler(void *user_data,
+ raptor_namespace* nspace)
+{
+ rdfacontext* context = (rdfacontext*)user_data;
+
+ if(context->namespace_handler)
+ (*context->namespace_handler)(context->namespace_handler_user_data,
+ nspace);
+}
+
+#endif
+
+int rdfa_parse_start(rdfacontext* context)
+{
+ /* create the buffers and expat parser */
+ int rval = RDFA_PARSE_SUCCESS;
+
+ context->wb_allocated = sizeof(char) * READ_BUFFER_SIZE;
+ /* +1 for NUL at end, to allow strstr() etc. to work
+ * malloc - only the first char needs to be NUL */
+ context->working_buffer = (char*)malloc(context->wb_allocated + 1);
+ *context->working_buffer = '\0';
+ context->done = 0;
+ context->context_stack = rdfa_create_list(32);
+
+ /* initialize the context stack */
+ rdfa_push_item(context->context_stack, context, RDFALIST_FLAG_CONTEXT);
+
+#ifdef LIBRDFA_IN_RAPTOR
+ context->sax2 = raptor_new_sax2(context->world, context->locator,
+ context);
+#else
+ /* init libxml2 */
+ xmlInitParser();
+#endif
+
+ /* set up the context stack */
+#ifdef LIBRDFA_IN_RAPTOR
+ raptor_sax2_set_start_element_handler(context->sax2,
+ raptor_rdfa_start_element);
+ raptor_sax2_set_end_element_handler(context->sax2,
+ raptor_rdfa_end_element);
+ raptor_sax2_set_characters_handler(context->sax2,
+ raptor_rdfa_character_data);
+ raptor_sax2_set_namespace_handler(context->sax2,
+ raptor_rdfa_namespace_handler);
+#endif
+
+ rdfa_init_context(context);
+
+#ifdef LIBRDFA_IN_RAPTOR
+ context->base_uri = raptor_new_uri(context->sax2->world,
+ (const unsigned char*)context->base);
+ raptor_sax2_parse_start(context->sax2, context->base_uri);
+#endif
+
+ return rval;
+}
+
+int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done)
+{
+#ifdef LIBRDFA_IN_RAPTOR
+#else
+ xmlSAXHandler handler;
+ xmlParserCtxtPtr parser;
+#endif
+
+ /* it is an error to call this before rdfa_parse_start() */
+ if(context->done)
+ {
+ return RDFA_PARSE_FAILED;
+ }
+
+ if(!context->preread)
+ {
+ /* search for the <base> tag and use the href contained therein to
+ * set the parsing context. */
+ context->wb_preread = rdfa_init_base(context,
+ &context->working_buffer, &context->wb_allocated, data, wblen);
+
+ /* continue looking if in first 131072 bytes of data */
+ if(!context->base && context->wb_preread < (1<<17))
+ return RDFA_PARSE_SUCCESS;
+
+#ifdef LIBRDFA_IN_RAPTOR
+ /* term mappings are needed before SAX2 parsing */
+ rdfa_setup_initial_context(context);
+
+ if(raptor_sax2_parse_chunk(context->sax2,
+ (const unsigned char*)context->working_buffer,
+ context->wb_position, done))
+ {
+ return RDFA_PARSE_FAILED;
+ }
+#else
+ /* create the SAX2 handler structure */
+ memset(&handler, 0, sizeof(xmlSAXHandler));
+ handler.initialized = XML_SAX2_MAGIC;
+ handler.startElementNs = (startElementNsSAX2Func)start_element;
+ handler.endElementNs = (endElementNsSAX2Func)end_element;
+ handler.characters = (charactersSAXFunc)character_data;
+ handler.error = (errorSAXFunc)rdfa_report_error;
+
+ /* create a push-based parser */
+ parser = xmlCreatePushParserCtxt(
+ &handler, context, (const char*)context->working_buffer,
+ context->wb_position, NULL);
+
+ /* ensure that entity substitution is turned on by default */
+ xmlSubstituteEntitiesDefault(1);
+
+ context->parser = parser;
+
+ rdfa_setup_initial_context(context);
+#endif
+
+ context->preread = 1;
+
+ return RDFA_PARSE_SUCCESS;
+ }
+
+ /* otherwise just parse the block passed in */
+#ifdef LIBRDFA_IN_RAPTOR
+ if(raptor_sax2_parse_chunk(context->sax2,
+ (const unsigned char*)data, wblen, done))
+ {
+ return RDFA_PARSE_FAILED;
+ }
+#else
+ if(xmlParseChunk(context->parser, data, wblen, done))
+ {
+ return RDFA_PARSE_FAILED;
+ }
+#endif
+
+ return RDFA_PARSE_SUCCESS;
+}
+
+void rdfa_parse_end(rdfacontext* context)
+{
+ /* free context stack */
+ rdfa_free_context_stack(context);
+
+ /* Free the expat parser and the like */
+#ifdef LIBRDFA_IN_RAPTOR
+ if(context->base_uri)
+ raptor_free_uri(context->base_uri);
+ raptor_free_sax2(context->sax2);
+ context->sax2=NULL;
+#else
+ /* free parser */
+ xmlFreeParserCtxt(context->parser);
+ xmlCleanupParser();
+#endif
+}
+
+char* rdfa_get_buffer(rdfacontext* context, size_t* blen)
+{
+ *blen = context->wb_allocated;
+ return context->working_buffer;
+}
+
+int rdfa_parse_buffer(rdfacontext* context, size_t bytes)
+{
+ int rval;
+ int done;
+ done = (bytes == 0);
+ rval = rdfa_parse_chunk(context, context->working_buffer, bytes, done);
+ context->done = done;
+ return rval;
+}
+
+int rdfa_parse(rdfacontext* context)
+{
+ int rval;
+
+ rval = rdfa_parse_start(context);
+ if(rval != RDFA_PARSE_SUCCESS)
+ {
+ context->done = 1;
+ return rval;
+ }
+
+ do
+ {
+ size_t wblen;
+ int done;
+
+ wblen = context->buffer_filler_callback(
+ context->working_buffer, context->wb_allocated,
+ context->callback_data);
+ done = (wblen == 0);
+
+ rval = rdfa_parse_chunk(context, context->working_buffer, wblen, done);
+ context->done=done;
+ }
+ while(!context->done && rval == RDFA_PARSE_SUCCESS);
+
+ rdfa_parse_end(context);
+
+ return rval;
+}