diff options
Diffstat (limited to 'src/raptor_abbrev.c')
-rw-r--r-- | src/raptor_abbrev.c | 642 |
1 files changed, 642 insertions, 0 deletions
diff --git a/src/raptor_abbrev.c b/src/raptor_abbrev.c new file mode 100644 index 0000000..d850e58 --- /dev/null +++ b/src/raptor_abbrev.c @@ -0,0 +1,642 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_abbrev.c - Code common to abbreviating serializers (ttl/rdfxmla) + * + * Copyright (C) 2006, Dave Robillard + * Copyright (C) 2004-2011, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * Copyright (C) 2005, Steve Shepard steveshep@gmail.com + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * raptor_abbrev_node implementation + * + */ + + +static raptor_abbrev_subject* raptor_new_abbrev_subject(raptor_abbrev_node* node); + +/** + * raptor_new_abbrev_node: + * @world: raptor world + * @term: term to use + * + * INTERNAL - Constructor for raptor_abbrev_node + * + * The @term is copied by the constructor. + * + * Return value: new raptor abbrev node or NULL on failure + **/ +raptor_abbrev_node* +raptor_new_abbrev_node(raptor_world* world, raptor_term *term) +{ + raptor_abbrev_node* node = NULL; + + if(term->type == RAPTOR_TERM_TYPE_UNKNOWN) + return NULL; + + node = RAPTOR_CALLOC(raptor_abbrev_node*, 1, sizeof(*node)); + if(node) { + node->world = world; + node->ref_count = 1; + node->term = raptor_term_copy(term); + } + + return node; +} + + +/** + * raptor_new_abbrev_node: + * @node: raptor abbrev node + * + * INTERNAL - Destructor for raptor_abbrev_node + */ +void +raptor_free_abbrev_node(raptor_abbrev_node* node) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(node, raptor_abbrev_node); + + if(--node->ref_count) + return; + + if(node->term) + raptor_free_term(node->term); + + RAPTOR_FREE(raptor_abbrev_node, node); +} + + +/** + * raptor_abbrev_node_compare: + * @node1: node 1 + * @node2: node 2 + * + * INTERNAL - compare two raptor_abbrev_nodes. + * + * This needs to be a strong ordering for use by raptor_avltree. + * This is very performance critical, anything to make it faster is worth it. + * + * Return value: <0, 0 or 1 if @node1 less than, equal or greater + * than @node2 respectively + */ +int +raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2) +{ + if(node1 == node2) + return 0; + + return raptor_term_compare(node1->term, node2->term); +} + + +/** + * raptor_abbrev_node_equals: + * @node1: node 1 + * @node2: node 2 + * + * INTERNAL - compare two raptor_abbrev_nodes for equality + * + * Return value: non-0 if nodes are equal + */ +int +raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2) +{ + return raptor_term_equals(node1->term, node2->term); +} + + +/** + * raptor_abbrev_node_lookup: + * @nodes: Tree of nodes to search + * @node: Node value to search for + * + * INTERNAL - Look in an avltree of nodes for a node described by parameters + * and if present create it, add it and return it + * + * Return value: the node found/created or NULL on failure + */ +raptor_abbrev_node* +raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term) +{ + raptor_abbrev_node *lookup_node; + raptor_abbrev_node *rv_node; + + /* Create a temporary node for search comparison. */ + lookup_node = raptor_new_abbrev_node(term->world, term); + + if(!lookup_node) + return NULL; + + rv_node = (raptor_abbrev_node*)raptor_avltree_search(nodes, lookup_node); + + /* If not found, insert/return a new one */ + if(!rv_node) { + + if(raptor_avltree_add(nodes, lookup_node)) + return NULL; + else + return lookup_node; + + /* Found */ + } else { + raptor_free_abbrev_node(lookup_node); + return rv_node; + } +} + + +static raptor_abbrev_node** +raptor_new_abbrev_po(raptor_abbrev_node* predicate, raptor_abbrev_node* object) +{ + raptor_abbrev_node** nodes = NULL; + nodes = RAPTOR_CALLOC(raptor_abbrev_node**, 2, sizeof(raptor_abbrev_node*)); + if(!nodes) + return NULL; + + nodes[0] = predicate; + nodes[1] = object; + + return nodes; +} + + +static void +raptor_free_abbrev_po(raptor_abbrev_node** nodes) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(nodes, raptor_abbrev_node_pair); + + if(nodes[0]) + raptor_free_abbrev_node(nodes[0]); + if(nodes[1]) + raptor_free_abbrev_node(nodes[1]); + + RAPTOR_FREE(raptor_abbrev_nodes, nodes); +} + + +static int +raptor_compare_abbrev_po(raptor_abbrev_node** nodes1, + raptor_abbrev_node** nodes2) +{ + int d; + d = raptor_abbrev_node_compare(nodes1[0], nodes2[0]); + if(!d) + d = raptor_abbrev_node_compare(nodes1[1], nodes2[1]); + + return d; +} + + +#ifdef RAPTOR_DEBUG +static int +raptor_print_abbrev_po(void** object, FILE* handle) +{ + raptor_abbrev_node** nodes = (raptor_abbrev_node**)object; + raptor_abbrev_node* p = nodes[0]; + raptor_abbrev_node* o = nodes[1]; + + if(p && o) { + fputc('[', handle); + raptor_term_print_as_ntriples(p->term, handle); + fputs(" : ", handle); + raptor_term_print_as_ntriples(o->term, handle); + fputs("]\n", handle); + } + return 0; +} +#endif + + +/* + * raptor_abbrev_subject implementation + * + * The subject of triples, with all predicates and values + * linked from them. + * + **/ + + +static raptor_abbrev_subject* +raptor_new_abbrev_subject(raptor_abbrev_node* node) +{ + raptor_abbrev_subject* subject; + + if(!(node->term->type == RAPTOR_TERM_TYPE_URI || + node->term->type == RAPTOR_TERM_TYPE_BLANK)) { + raptor_log_error(node->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Subject node is type %d not a uri or blank node"); + return NULL; + } + + subject = RAPTOR_CALLOC(raptor_abbrev_subject*, 1, sizeof(*subject)); + + if(subject) { + subject->node = node; + subject->node->ref_count++; + subject->node->count_as_subject++; + + subject->node_type = NULL; + + subject->valid = 1; + + subject->properties = + raptor_new_avltree((raptor_data_compare_handler)raptor_compare_abbrev_po, + (raptor_data_free_handler)raptor_free_abbrev_po, + 0); +#ifdef RAPTOR_DEBUG + if(subject->properties) + raptor_avltree_set_print_handler(subject->properties, + (raptor_data_print_handler)raptor_print_abbrev_po); +#endif + + subject->list_items = + raptor_new_sequence((raptor_data_free_handler)raptor_free_abbrev_node, NULL); + + if(!subject->properties || !subject->list_items) { + raptor_free_abbrev_subject(subject); + subject = NULL; + } + + } + + return subject; +} + + +void +raptor_free_abbrev_subject(raptor_abbrev_subject* subject) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(subject, raptor_abbrev_subject); + + if(subject->node) + raptor_free_abbrev_node(subject->node); + + if(subject->node_type) + raptor_free_abbrev_node(subject->node_type); + + if(subject->properties) + raptor_free_avltree(subject->properties); + + if(subject->list_items) + raptor_free_sequence(subject->list_items); + + RAPTOR_FREE(raptor_subject, subject); +} + + +int +raptor_abbrev_subject_valid(raptor_abbrev_subject *subject) +{ + return subject->valid; +} + + +int +raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject) +{ + subject->valid = 0; + return 0; +} + + + + +/** + * raptor_subject_add_property: + * @subject: subject node to add to + * @predicate: predicate node + * @object: object node + * + * INTERNAL - Add predicate/object pair into properties array of a subject node. + * + * The subject node takes ownership of the predicate/object nodes. + * On error, predicate/object are freed immediately. + * + * Return value: <0 on failure, >0 if pair is a duplicate and it was not added + **/ +int +raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject, + raptor_abbrev_node* predicate, + raptor_abbrev_node* object) +{ + int err; + raptor_abbrev_node** nodes; + + nodes = raptor_new_abbrev_po(predicate, object); + if(!nodes) + return -1; + + predicate->ref_count++; + object->ref_count++; + + if(raptor_avltree_search(subject->properties, nodes)) { + /* Already present - do not add a duplicate triple (s->[p o]) */ + raptor_free_abbrev_po(nodes); + return 1; + } + +#if 0 + fprintf(stderr, "Adding P,O "); + raptor_print_abbrev_po(stderr, nodes); + + raptor_avltree_dump(subject->properties, stderr); +#endif + err = raptor_avltree_add(subject->properties, nodes); + if(err) + return -1; +#if 0 + fprintf(stderr, "Result "); + raptor_avltree_print(subject->properties, stderr); + + raptor_avltree_dump(subject->properties, stderr); + + raptor_avltree_check(subject->properties); + + fprintf(stderr, "\n\n"); +#endif + + return 0; +} + + +int +raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1, + raptor_abbrev_subject* subject2) +{ + return raptor_abbrev_node_compare(subject1->node, subject2->node); +} + + +/** + * raptor_abbrev_subject_find: + * @subjects: AVL-Tree of subject nodes + * @term: node to find + * + * INTERNAL - Find a subject node in an AVL-Tree of subject nodes + * + * Return value: node or NULL if not found or failure + */ +raptor_abbrev_subject* +raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node) +{ + raptor_abbrev_subject* rv_subject = NULL; + raptor_abbrev_node* lookup_node = NULL; + raptor_abbrev_subject* lookup = NULL; + + /* datatype and language are both NULL for a subject node */ + + lookup_node = raptor_new_abbrev_node(node->world, node); + if(!lookup_node) + return NULL; + + lookup = raptor_new_abbrev_subject(lookup_node); + if(!lookup) { + raptor_free_abbrev_node(lookup_node); + return NULL; + } + + rv_subject = (raptor_abbrev_subject*) raptor_avltree_search(subjects, lookup); + + raptor_free_abbrev_subject(lookup); + raptor_free_abbrev_node(lookup_node); + + return rv_subject; +} + + +/** + * raptor_abbrev_subject_lookup: + * @nodes: AVL-Tree of subject nodes + * @subjects: AVL-Tree of URI-subject nodes + * @blanks: AVL-Tree of blank-subject nodes + * @term: node to find + * + * INTERNAL - Find a subject node in the appropriate uri/blank AVL-Tree of subject nodes or add it + * + * Return value: node or NULL on failure + */ +raptor_abbrev_subject* +raptor_abbrev_subject_lookup(raptor_avltree* nodes, + raptor_avltree* subjects, raptor_avltree* blanks, + raptor_term* term) +{ + raptor_avltree *tree; + raptor_abbrev_subject* rv_subject; + + /* Search for specified resource. */ + tree = (term->type == RAPTOR_TERM_TYPE_BLANK) ? blanks : subjects; + rv_subject = raptor_abbrev_subject_find(tree, term); + + /* If not found, create one and insert it */ + if(!rv_subject) { + raptor_abbrev_node* node = raptor_abbrev_node_lookup(nodes, term); + if(node) { + rv_subject = raptor_new_abbrev_subject(node); + if(rv_subject) { + if(raptor_avltree_add(tree, rv_subject)) { + rv_subject = NULL; + } + } + } + } + + return rv_subject; +} + + +#ifdef ABBREV_DEBUG +void +raptor_print_subject(raptor_abbrev_subject* subject) +{ + int i; + unsigned char *subj; + unsigned char *pred; + unsigned char *obj; + raptor_avltree_iterator* iter = NULL; + + /* Note: The raptor_abbrev_node field passed as the first argument for + * raptor_term_to_string() is somewhat arbitrary, since as + * the data structure is designed, the first word in the value union + * is what was passed as the subject/predicate/object of the + * statement. + */ + subj = raptor_term_to_string(subject); + + if(subject->type) { + obj = raptor_term_to_string(subject); + fprintf(stderr,"[%s, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, %s]\n", subj, obj); + RAPTOR_FREE(char*, obj); + } + + for(i = 0; i < raptor_sequence_size(subject->elements); i++) { + + raptor_abbrev_node* o = raptor_sequence_get_at(subject->elements, i); + if(o) { + obj = raptor_term_to_string(o); + fprintf(stderr,"[%s, [rdf:_%d], %s]\n", subj, i, obj); + RAPTOR_FREE(char*, obj); + } + + } + + + iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); + while(iter) { + raptor_abbrev_node** nodes; + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + raptor_print_abbrev_po(stderr, nodes); + + if(raptor_avltree_iterator_next(iter)) + break; + } + if(iter) + raptor_free_avltree_iterator(iter); + + RAPTOR_FREE(char*, subj); + +} +#endif + + +/* helper functions */ + +/** + * raptor_new_qname_from_resource: + * @namespaces: sequence of namespaces (corresponding to nstack) + * @nstack: #raptor_namespace_stack to use/update + * @namespace_count: size of nstack (may be modified) + * @node: #raptor_abbrev_node to use + * + * INTERNAL - Make an XML QName from the URI associated with the node. + * + * Return value: the QName or NULL on failure + **/ +raptor_qname* +raptor_new_qname_from_resource(raptor_sequence* namespaces, + raptor_namespace_stack* nstack, + int* namespace_count, + raptor_abbrev_node* node) +{ + unsigned char* name = NULL; /* where to split predicate name */ + size_t name_len = 1; + unsigned char *uri_string; + size_t uri_len; + unsigned char *p; + raptor_uri *ns_uri; + raptor_namespace *ns; + raptor_qname *qname; + unsigned char *ns_uri_string; + size_t ns_uri_string_len; + + if(node->term->type != RAPTOR_TERM_TYPE_URI) { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL1("Node must be a URI\n"); +#else + return NULL; +#endif + } + + qname = raptor_new_qname_from_namespace_uri(nstack, node->term->value.uri, 10); + if(qname) + return qname; + + uri_string = raptor_uri_as_counted_string(node->term->value.uri, &uri_len); + + p= uri_string; + name_len = uri_len; + while(name_len >0) { + if(raptor_xml_name_check(p, name_len, 10)) { + name = p; + break; + } + p++; name_len--; + } + + if(!name || (name == uri_string)) + return NULL; + + ns_uri_string_len = uri_len - name_len; + ns_uri_string = RAPTOR_MALLOC(unsigned char*, ns_uri_string_len + 1); + if(!ns_uri_string) + return NULL; + memcpy(ns_uri_string, (const char*)uri_string, ns_uri_string_len); + ns_uri_string[ns_uri_string_len] = '\0'; + + ns_uri = raptor_new_uri_from_counted_string(node->world, ns_uri_string, + ns_uri_string_len); + RAPTOR_FREE(char*, ns_uri_string); + + if(!ns_uri) + return NULL; + + ns = raptor_namespaces_find_namespace_by_uri(nstack, ns_uri); + if(!ns) { + /* The namespace was not declared, so create one */ + unsigned char prefix[2 + MAX_ASCII_INT_SIZE + 1]; + (*namespace_count)++; + prefix[0] = 'n'; + prefix[1] = 's'; + (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*,&prefix[2]), + MAX_ASCII_INT_SIZE + 1, *namespace_count, + /* base */ 10, -1, '\0'); + + ns = raptor_new_namespace_from_uri(nstack, prefix, ns_uri, 0); + + /* We'll most likely need this namespace again. Push it on our + * stack. It will be deleted in raptor_rdfxmla_serialize_terminate() + */ + if(raptor_sequence_push(namespaces, ns)) { + /* namespaces sequence has no free handler so we have to free + * the ns ourselves on error + */ + raptor_free_namespace(ns); + raptor_free_uri(ns_uri); + return NULL; + } + } + + qname = raptor_new_qname_from_namespace_local_name(node->world, ns, name, + NULL); + + raptor_free_uri(ns_uri); + + return qname; +} |