diff options
Diffstat (limited to 'librdfa/context.c')
-rw-r--r-- | librdfa/context.c | 591 |
1 files changed, 591 insertions, 0 deletions
diff --git a/librdfa/context.c b/librdfa/context.c new file mode 100644 index 0000000..fca5b07 --- /dev/null +++ b/librdfa/context.c @@ -0,0 +1,591 @@ +/** + * Copyright 2008-2012 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The librdfa library is the Fastest RDFa Parser in the Universe. It is + * a stream parser, meaning that it takes an XML data as input and spits + * out RDF triples as it comes across them in the stream. Due to this + * processing approach, librdfa has a very, very small memory footprint. + * It is also very fast and can operate on hundreds of gigabytes of XML + * data without breaking a sweat. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +rdfacontext* rdfa_create_context(const char* base) +{ + rdfacontext* rval = NULL; + size_t base_length = strlen(base); + + /* if the base isn't specified, don't create a context */ + if(base_length > 0) + { + char* cleaned_base; + + /* malloc and init whole context to NULL */ + rval = (rdfacontext*)malloc(sizeof(rdfacontext)); + if(!rval) + return NULL; + + memset(rval, 0, sizeof(rdfacontext)); + + /* clean and initialize base */ + cleaned_base = rdfa_iri_get_base(base); + rval->base = rdfa_replace_string(rval->base, cleaned_base); + free(cleaned_base); + } + else + { +#ifdef LIBRDFA_IN_RAPTOR +#else + printf("librdfa error: Failed to create a parsing context, " + "base IRI was not specified!\n"); +#endif + } + + return rval; +} + +void rdfa_init_context(rdfacontext* context) +{ + /* assume the RDFa processing rules are RDFa 1.1 unless otherwise specified */ + context->rdfa_version = RDFA_VERSION_1_1; + + /* assume the default host language is XML1 */ + context->host_language = HOST_LANGUAGE_XML1; + + /* the [parent subject] is set to the [base] value; */ + context->parent_subject = NULL; + if(context->base != NULL) + { + char* cleaned_base = rdfa_iri_get_base(context->base); + context->parent_subject = + rdfa_replace_string(context->parent_subject, cleaned_base); + free(cleaned_base); + } + + /* the [parent object] is set to null; */ + context->parent_object = NULL; + +#ifdef LIBRDFA_IN_RAPTOR +#else + /* the [list of URI mappings] is cleared; */ + context->uri_mappings = rdfa_create_mapping(MAX_URI_MAPPINGS); +#endif + + /* the [list of incomplete triples] is cleared; */ + context->incomplete_triples = rdfa_create_list(3); + + /* the [language] is set to null. */ + context->language = NULL; + + /* set the [current object resource] to null; */ + context->current_object_resource = NULL; + + /* the list of term mappings is set to null + * (or a list defined in the initial context of the Host Language). */ + context->term_mappings = rdfa_create_mapping(MAX_TERM_MAPPINGS); + + /* the maximum number of list mappings */ + context->list_mappings = rdfa_create_mapping(MAX_LIST_MAPPINGS); + + /* the maximum number of local list mappings */ + context->local_list_mappings = + rdfa_create_mapping(MAX_LOCAL_LIST_MAPPINGS); + + /* the default vocabulary is set to null + * (or a IRI defined in the initial context of the Host Language). */ + context->default_vocabulary = NULL; + + /* whether or not the @inlist attribute is present on the current element */ + context->inlist_present = 0; + + /* whether or not the @rel attribute is present on the current element */ + context->rel_present = 0; + + /* whether or not the @rev attribute is present on the current element */ + context->rev_present = 0; + + /* 1. First, the local values are initialized, as follows: + * + * * the [recurse] flag is set to 'true'; */ + context->recurse = 1; + + /* * the [skip element] flag is set to 'false'; */ + context->skip_element = 0; + + /* * [new subject] is set to null; */ + context->new_subject = NULL; + + /* * [current object resource] is set to null; */ + context->current_object_resource = NULL; + + /* * the [local list of URI mappings] is set to the list of URI + * mappings from the [evaluation context]; + * NOTE: This step is done in rdfa_create_new_element_context() */ + + /* FIXME: Initialize the term mappings and URI mappings based on Host Language */ + + /* * the [local list of incomplete triples] is set to null; */ + context->local_incomplete_triples = rdfa_create_list(3); + + /* * the [current language] value is set to the [language] value + * from the [evaluation context]. + * NOTE: This step is done in rdfa_create_new_element_context() */ +} + +#ifdef LIBRDFA_IN_RAPTOR +#define DECLARE_URI_MAPPING(context, prefix, value) \ +do { \ + raptor_namespace_stack* nstack = &context->sax2->namespaces; \ + raptor_namespace* ns = raptor_new_namespace(nstack, \ + (const unsigned char *)prefix, (const unsigned char*)value, 0); \ + raptor_namespaces_start_namespace(nstack, ns); \ + } while(0) +#else +#define DECLARE_URI_MAPPING(context, prefix, value) \ + rdfa_update_mapping(context->uri_mappings, prefix, value, \ + (update_mapping_value_fp)rdfa_replace_string) +#endif + +void rdfa_setup_initial_context(rdfacontext* context) +{ +#ifdef LIBRDFA_IN_RAPTOR +#else + char* key = NULL; + void* value = NULL; + void** mptr = context->uri_mappings; +#endif + + /* Setup the base RDFa 1.1 prefix and term mappings */ + if(context->rdfa_version == RDFA_VERSION_1_1) + { + /* Setup the base RDFa 1.1 prefix mappings */ + DECLARE_URI_MAPPING(context, + "grddl", "http://www.w3.org/2003/g/data-view#"); + DECLARE_URI_MAPPING(context, + "ma", "http://www.w3.org/ns/ma-ont#"); + DECLARE_URI_MAPPING(context, + "owl", "http://www.w3.org/2002/07/owl#"); + DECLARE_URI_MAPPING(context, + "rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + DECLARE_URI_MAPPING(context, + "rdfa", "http://www.w3.org/ns/rdfa#"); + DECLARE_URI_MAPPING(context, + "rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + DECLARE_URI_MAPPING(context, + "rif", "http://www.w3.org/2007/rif#"); + DECLARE_URI_MAPPING(context, + "skos", "http://www.w3.org/2004/02/skos/core#"); + DECLARE_URI_MAPPING(context, + "skosxl", "http://www.w3.org/2008/05/skos-xl#"); + DECLARE_URI_MAPPING(context, + "wdr", "http://www.w3.org/2007/05/powder#"); + DECLARE_URI_MAPPING(context, + "void", "http://rdfs.org/ns/void#"); + DECLARE_URI_MAPPING(context, + "wdrs", "http://www.w3.org/2007/05/powder-s#"); + DECLARE_URI_MAPPING(context, + "xhv", "http://www.w3.org/1999/xhtml/vocab#"); + DECLARE_URI_MAPPING(context, + "xml", "http://www.w3.org/XML/1998/namespace"); + DECLARE_URI_MAPPING(context, + "xsd", "http://www.w3.org/2001/XMLSchema#"); + DECLARE_URI_MAPPING(context, + "cc", "http://creativecommons.org/ns#"); + DECLARE_URI_MAPPING(context, + "ctag", "http://commontag.org/ns#"); + DECLARE_URI_MAPPING(context, + "dc", "http://purl.org/dc/terms/"); + DECLARE_URI_MAPPING(context, + "dcterms", "http://purl.org/dc/terms/"); + DECLARE_URI_MAPPING(context, + "foaf", "http://xmlns.com/foaf/0.1/"); + DECLARE_URI_MAPPING(context, + "gr", "http://purl.org/goodrelations/v1#"); + DECLARE_URI_MAPPING(context, + "ical", "http://www.w3.org/2002/12/cal/icaltzd#"); + DECLARE_URI_MAPPING(context, + "og", "http://ogp.me/ns#"); + DECLARE_URI_MAPPING(context, + "rev", "http://purl.org/stuff/rev#"); + DECLARE_URI_MAPPING(context, + "sioc", "http://rdfs.org/sioc/ns#"); + DECLARE_URI_MAPPING(context, + "v", "http://rdf.data-vocabulary.org/#"); + DECLARE_URI_MAPPING(context, + "vcard", "http://www.w3.org/2006/vcard/ns#"); + DECLARE_URI_MAPPING(context, + "schema", "http://schema.org/"); + + /* Setup the base RDFa 1.1 term mappings */ + rdfa_update_mapping(context->term_mappings, + "describedby", "http://www.w3.org/2007/05/powder-s#describedby", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "license", "http://www.w3.org/1999/xhtml/vocab#license", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "role", "http://www.w3.org/1999/xhtml/vocab#role", + (update_mapping_value_fp)rdfa_replace_string); + } + + /* Setup the term mappings for XHTML1 */ + if(context->host_language == HOST_LANGUAGE_XHTML1) + { + rdfa_update_mapping(context->term_mappings, + "alternate", "http://www.w3.org/1999/xhtml/vocab#alternate", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "appendix", "http://www.w3.org/1999/xhtml/vocab#appendix", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "cite", "http://www.w3.org/1999/xhtml/vocab#cite", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "bookmark", "http://www.w3.org/1999/xhtml/vocab#bookmark", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "contents", "http://www.w3.org/1999/xhtml/vocab#contents", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "chapter", "http://www.w3.org/1999/xhtml/vocab#chapter", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "copyright", "http://www.w3.org/1999/xhtml/vocab#copyright", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "first", "http://www.w3.org/1999/xhtml/vocab#first", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "glossary", "http://www.w3.org/1999/xhtml/vocab#glossary", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "help", "http://www.w3.org/1999/xhtml/vocab#help", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "icon", "http://www.w3.org/1999/xhtml/vocab#icon", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "index", "http://www.w3.org/1999/xhtml/vocab#index", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "last", "http://www.w3.org/1999/xhtml/vocab#last", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "license", "http://www.w3.org/1999/xhtml/vocab#license", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "meta", "http://www.w3.org/1999/xhtml/vocab#meta", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "next", "http://www.w3.org/1999/xhtml/vocab#next", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "prev", "http://www.w3.org/1999/xhtml/vocab#prev", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "previous", "http://www.w3.org/1999/xhtml/vocab#previous", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "section", "http://www.w3.org/1999/xhtml/vocab#section", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "start", "http://www.w3.org/1999/xhtml/vocab#start", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "stylesheet", "http://www.w3.org/1999/xhtml/vocab#stylesheet", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "subsection", "http://www.w3.org/1999/xhtml/vocab#subsection", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "top", "http://www.w3.org/1999/xhtml/vocab#top", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "up", "http://www.w3.org/1999/xhtml/vocab#up", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "p3pv1", "http://www.w3.org/1999/xhtml/vocab#p3pv1", + (update_mapping_value_fp)rdfa_replace_string); + + /* From the role attribute module */ + rdfa_update_mapping(context->term_mappings, + "role", "http://www.w3.org/1999/xhtml/vocab#role", + (update_mapping_value_fp)rdfa_replace_string); + } + + /* Setup the prefix and term mappings for HTML4 and HTML5 */ + if(context->host_language == HOST_LANGUAGE_HTML) + { + /* No term or prefix mappings as of 2012-04-04 */ + } + +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this elsewhere */ +#else + /* Generate namespace triples for all values in the uri_mapping */ + while(*mptr != NULL) + { + rdfa_next_mapping(mptr++, &key, &value); + mptr++; + rdfa_generate_namespace_triple(context, key, value); + } +#endif +} + +/** + * Creates a new context for the current element by cloning certain + * parts of the old context on the top of the given stack. + * + * @param context_stack the context stack that is associated with this + * processing run. + */ +rdfacontext* rdfa_create_new_element_context(rdfalist* context_stack) +{ + rdfacontext* parent_context = (rdfacontext*) + context_stack->items[context_stack->num_items - 1]->data; + rdfacontext* rval = rdfa_create_context(parent_context->base); + + if(!rval) + return NULL; + + /* * Otherwise, the values are: */ + + /* * the [ base ] is set to the [ base ] value of the current + * [ evaluation context ]; */ + rval->base = rdfa_replace_string(rval->base, parent_context->base); + rdfa_init_context(rval); + + /* Set the processing depth as parent + 1 */ + rval->depth = parent_context->depth + 1; + + /* copy the URI mappings */ +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this automatically for URIs */ +#else + rdfa_free_mapping(rval->uri_mappings, (free_mapping_value_fp)free); +#endif + rdfa_free_mapping(rval->term_mappings, (free_mapping_value_fp)free); + rdfa_free_mapping(rval->list_mappings, (free_mapping_value_fp)rdfa_free_list); + rdfa_free_mapping(rval->local_list_mappings, (free_mapping_value_fp)rdfa_free_list); +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this automatically for URIs */ +#else + rval->uri_mappings = + rdfa_copy_mapping((void**)parent_context->uri_mappings, + (copy_mapping_value_fp)rdfa_replace_string); +#endif + rval->term_mappings = + rdfa_copy_mapping((void**)parent_context->term_mappings, + (copy_mapping_value_fp)rdfa_replace_string); + rval->list_mappings = + rdfa_copy_mapping((void**)parent_context->local_list_mappings, + (copy_mapping_value_fp)rdfa_replace_list); + rval->local_list_mappings = + rdfa_copy_mapping((void**)parent_context->local_list_mappings, + (copy_mapping_value_fp)rdfa_replace_list); + + /* inherit the parent context's host language and RDFa processor mode */ + rval->host_language = parent_context->host_language; + rval->rdfa_version = parent_context->rdfa_version; + + /* inherit the parent context's language */ + if(parent_context->language != NULL) + { + rval->language = + rdfa_replace_string(rval->language, parent_context->language); + } + + /* inherit the parent context's default vocabulary */ + if(parent_context->default_vocabulary != NULL) + { + rval->default_vocabulary = rdfa_replace_string( + rval->default_vocabulary, parent_context->default_vocabulary); + } + + /* set the callbacks callback */ + rval->default_graph_triple_callback = + parent_context->default_graph_triple_callback; + rval->processor_graph_triple_callback = + parent_context->processor_graph_triple_callback; + rval->buffer_filler_callback = parent_context->buffer_filler_callback; + + /* inherit the bnode count, _: bnode name, recurse flag, and state + * of the xml_literal_namespace_insertion */ + rval->bnode_count = parent_context->bnode_count; + rval->underscore_colon_bnode_name = + rdfa_replace_string(rval->underscore_colon_bnode_name, + parent_context->underscore_colon_bnode_name); + rval->recurse = parent_context->recurse; + rval->skip_element = 0; + rval->callback_data = parent_context->callback_data; + rval->xml_literal_namespaces_defined = + parent_context->xml_literal_namespaces_defined; + rval->xml_literal_xml_lang_defined = + parent_context->xml_literal_xml_lang_defined; + +#if 0 + /* inherit the parent context's new_subject + * TODO: This is not anywhere in the syntax processing document */ + if(parent_context->new_subject != NULL) + { + rval->new_subject = rdfa_replace_string( + rval->new_subject, parent_context->new_subject); + } +#endif + + if(parent_context->skip_element == 0) + { + /* o the [ parent subject ] is set to the value of [ new subject ], + * if non-null, or the value of the [ parent subject ] of the + * current [ evaluation context ]; */ + if(parent_context->new_subject != NULL) + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->new_subject); + } + else + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->parent_subject); + } + + /* o the [ parent object ] is set to value of [ current object + * resource ], if non-null, or the value of [ new subject ], if + * non-null, or the value of the [ parent subject ] of the + * current [ evaluation context ]; */ + if(parent_context->current_object_resource != NULL) + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->current_object_resource); + } + else if(parent_context->new_subject != NULL) + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->new_subject); + } + else + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->parent_subject); + } + + /* o the [ list of incomplete triples ] is set to the [ local list + * of incomplete triples ]; */ + rval->incomplete_triples = rdfa_replace_list( + rval->incomplete_triples, parent_context->local_incomplete_triples); + } + else + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->parent_subject); + rval->parent_object = rdfa_replace_string( + rval->parent_object, parent_context->parent_object); + + /* copy the incomplete triples */ + rval->incomplete_triples = rdfa_replace_list( + rval->incomplete_triples, parent_context->incomplete_triples); + + /* copy the local list of incomplete triples */ + rval->local_incomplete_triples = rdfa_replace_list( + rval->local_incomplete_triples, + parent_context->local_incomplete_triples); + } + +#ifdef LIBRDFA_IN_RAPTOR + rval->base_uri = parent_context->base_uri; + rval->sax2 = parent_context->sax2; + rval->namespace_handler = parent_context->namespace_handler; + rval->namespace_handler_user_data = parent_context->namespace_handler_user_data; +#endif + + return rval; +} + +void rdfa_free_context_stack(rdfacontext* context) +{ + /* this field is not NULL only on the rdfacontext* at the top of the stack */ + if(context->context_stack != NULL) + { + void* rval; + /* free the stack ensuring that we do not delete this context if + * it is in the list (which it may be, if parsing ended on error) */ + do + { + rval = rdfa_pop_item(context->context_stack); + if(rval && rval != context) + { + rdfa_free_context((rdfacontext*)rval); + } + } + while(rval); + free(context->context_stack->items); + free(context->context_stack); + context->context_stack = NULL; + } +} + +void rdfa_free_context(rdfacontext* context) +{ + free(context->base); + free(context->default_vocabulary); + free(context->parent_subject); + free(context->parent_object); + +#ifdef LIBRDFA_IN_RAPTOR +#else + rdfa_free_mapping(context->uri_mappings, (free_mapping_value_fp)free); +#endif + + rdfa_free_mapping(context->term_mappings, (free_mapping_value_fp)free); + rdfa_free_list(context->incomplete_triples); + rdfa_free_mapping(context->list_mappings, + (free_mapping_value_fp)rdfa_free_list); + rdfa_free_mapping(context->local_list_mappings, + (free_mapping_value_fp)rdfa_free_list); + free(context->language); + free(context->underscore_colon_bnode_name); + free(context->new_subject); + free(context->current_object_resource); + free(context->about); + free(context->typed_resource); + free(context->resource); + free(context->href); + free(context->src); + free(context->content); + free(context->datatype); + rdfa_free_list(context->property); + free(context->plain_literal); + free(context->xml_literal); + + /* TODO: These should be moved into their own data structure */ + rdfa_free_list(context->local_incomplete_triples); + + rdfa_free_context_stack(context); + free(context->working_buffer); + free(context); +} |