summaryrefslogtreecommitdiffstats
path: root/utils/rdfdiff.c
diff options
context:
space:
mode:
Diffstat (limited to 'utils/rdfdiff.c')
-rw-r--r--utils/rdfdiff.c1069
1 files changed, 1069 insertions, 0 deletions
diff --git a/utils/rdfdiff.c b/utils/rdfdiff.c
new file mode 100644
index 0000000..fa604e5
--- /dev/null
+++ b/utils/rdfdiff.c
@@ -0,0 +1,1069 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * rdfdiff.c - Raptor RDF diff tool
+ *
+ * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ * Copyright (C) 2005, Steve Shepard steveshep@gmail.com
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+
+/* Raptor includes */
+#include <raptor2.h>
+#include <raptor_internal.h>
+
+/* for access() and R_OK */
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef WIN32
+#include <io.h>
+#endif
+
+/* many places for getopt */
+#ifdef HAVE_GETOPT_H
+#include <getopt.h>
+#else
+#include <raptor_getopt.h>
+#endif
+#ifdef HAVE_UNISTD_H
+#include <unistd.h>
+#endif
+
+#if 0
+#undef RAPTOR_DEBUG
+#define RAPTOR_DEBUG 2
+#endif
+
+#ifndef RAPTOR_INTERNAL
+#define RAPTOR_MALLOC(type, size) (type)malloc(size)
+#define RAPTOR_CALLOC(type, nmemb, size) (type)calloc(nmemb, size)
+#define RAPTOR_FREE(type, ptr) free((void*)ptr)
+#endif
+
+#ifdef NEED_OPTIND_DECLARATION
+extern int optind;
+extern char *optarg;
+#endif
+
+#define MAX_ASCII_INT_SIZE 13
+#define RDF_NAMESPACE_URI_LEN 43
+#define ORDINAL_STRING_LEN (RDF_NAMESPACE_URI_LEN + MAX_ASCII_INT_SIZE + 1)
+
+#define GETOPT_STRING "bhf:t:u:"
+
+#ifdef HAVE_GETOPT_LONG
+static const struct option long_options[] =
+{
+ /* name, has_arg, flag, val */
+ {"brief" , 0, 0, 'b'},
+ {"help" , 0, 0, 'h'},
+ {"from-format" , 1, 0, 'f'},
+ {"to-format" , 1, 0, 't'},
+ {"base-uri" , 1, 0, 'u'},
+ {NULL , 0, 0, 0}
+};
+#endif
+
+#ifdef HAVE_GETOPT_LONG
+#define HELP_TEXT(short, long, description) " -" short ", --" long " " description
+#define HELP_ARG(short, long) "--" #long
+#define HELP_PAD "\n "
+#else
+#define HELP_TEXT(short, long, description) " -" short " " description
+#define HELP_ARG(short, long) "-" #short
+#define HELP_PAD "\n "
+#endif
+
+typedef struct rdfdiff_link_s {
+ struct rdfdiff_link_s *next;
+ raptor_statement *statement;
+} rdfdiff_link;
+
+typedef struct rdfdiff_blank_s {
+ struct rdfdiff_blank_s *next;
+ raptor_world *world;
+ char *blank_id;
+ raptor_statement *owner;
+ rdfdiff_link *first;
+ rdfdiff_link *last;
+ int matched;
+} rdfdiff_blank;
+
+typedef struct {
+ raptor_world *world;
+ char *name;
+ raptor_parser *parser;
+ rdfdiff_link *first;
+ rdfdiff_link *last;
+ rdfdiff_blank *first_blank;
+ rdfdiff_blank *last_blank;
+ int statement_count;
+ int error_count;
+ int warning_count;
+ int difference_count;
+} rdfdiff_file;
+
+static int brief = 0;
+static char *program = NULL;
+static const char * const title_string="Raptor RDF diff utility";
+static int ignore_errors = 0;
+static int ignore_warnings = 0;
+static int emit_from_header = 1;
+static int emit_to_header = 1;
+
+static rdfdiff_file* from_file = NULL;
+static rdfdiff_file*to_file = NULL;
+
+static rdfdiff_file* rdfdiff_new_file(raptor_world* world, const unsigned char *name, const char *syntax);
+static void rdfdiff_free_file(rdfdiff_file* file);
+
+static rdfdiff_blank *rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id);
+static rdfdiff_blank *rdfdiff_new_blank(raptor_world *world, char *blank_id);
+static void rdfdiff_free_blank(rdfdiff_blank *blank);
+
+static int rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
+ rdfdiff_file*b1_file, rdfdiff_file*b2_file);
+
+static void rdfdiff_log_handler(void *data, raptor_log_message *message);
+
+static void rdfdiff_collect_statements(void *user_data, raptor_statement *statement);
+
+int main(int argc, char *argv[]);
+
+
+/* Version of strcmp that can take NULL parameters. Assume that
+ * Non-NULL strings are lexically greater than NULL strings
+ */
+static int
+safe_strcmp(const char *s1, const char *s2)
+{
+ if(s1 == NULL && s2 == NULL) {
+ return 0;
+ } else if(s1 == NULL && s2 != NULL) {
+ return -1;
+ } else if(s1 != NULL && s2 == NULL) {
+ return 1;
+ } else {
+ return strcmp(s1, s2);
+ }
+
+}
+
+
+static rdfdiff_file*
+rdfdiff_new_file(raptor_world *world, const unsigned char *name, const char *syntax)
+{
+ rdfdiff_file* file = RAPTOR_CALLOC(rdfdiff_file*, 1, sizeof(*file));
+ if(file) {
+ size_t name_len = strlen((const char*)name);
+ file->world = world;
+ file->name = RAPTOR_MALLOC(char*, name_len + 1);
+ if(!file->name) {
+ rdfdiff_free_file(file);
+ return(0);
+ }
+ memcpy(file->name, name, name_len + 1);
+
+ file->parser = raptor_new_parser(world, syntax);
+ if(file->parser) {
+ raptor_world_set_log_handler(world, file, rdfdiff_log_handler);
+ } else {
+ fprintf(stderr, "%s: Failed to create raptor parser type %s for %s\n",
+ program, syntax, name);
+ rdfdiff_free_file(file);
+ return(0);
+ }
+
+
+ }
+
+ return file;
+}
+
+
+static void
+rdfdiff_free_file(rdfdiff_file* file)
+{
+ rdfdiff_link *cur, *next;
+ rdfdiff_blank *cur1, *next1;
+
+ if(file->name)
+ RAPTOR_FREE(char*, file->name);
+
+ if(file->parser)
+ raptor_free_parser(file->parser);
+
+ for(cur = file->first; cur; cur = next) {
+ next = cur->next;
+
+ raptor_free_statement(cur->statement);
+ RAPTOR_FREE(rdfdiff_link, cur);
+ }
+
+ for(cur1 = file->first_blank; cur1; cur1 = next1) {
+ next1 = cur1->next;
+
+ rdfdiff_free_blank(cur1);
+ }
+
+ RAPTOR_FREE(rdfdiff_file, file);
+
+}
+
+
+static rdfdiff_blank *
+rdfdiff_new_blank(raptor_world* world, char *blank_id)
+{
+ rdfdiff_blank *blank = RAPTOR_CALLOC(rdfdiff_blank*, 1, sizeof(*blank));
+
+ if(blank) {
+ size_t blank_id_len = strlen(blank_id);
+ blank->world = world;
+ blank->blank_id = RAPTOR_MALLOC(char*, blank_id_len + 1);
+ if(!blank->blank_id) {
+ rdfdiff_free_blank(blank);
+ return NULL;
+ }
+
+ memcpy(blank->blank_id, blank_id, blank_id_len + 1);
+ }
+
+ return blank;
+}
+
+
+static void
+rdfdiff_free_blank(rdfdiff_blank *blank)
+{
+ rdfdiff_link *cur, *next;
+
+ if(blank->blank_id)
+ RAPTOR_FREE(char*, blank->blank_id);
+
+ if(blank->owner)
+ raptor_free_statement(blank->owner);
+
+ for(cur = blank->first; cur; cur = next) {
+ next = cur->next;
+
+ raptor_free_statement(cur->statement);
+ RAPTOR_FREE(rdfdiff_link, cur);
+ }
+
+ RAPTOR_FREE(rdfdiff_blank, blank);
+
+}
+
+
+static int
+rdfdiff_statement_equals(raptor_world *world, const raptor_statement *s1, const raptor_statement *s2)
+{
+ int rv = 0;
+
+ if(!s1 || !s2)
+ return 0;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, "(rdfdiff_statement_equals) Comparing ");
+ raptor_statement_print_as_ntriples(s1, stderr);
+ fprintf(stderr, " to ");
+ raptor_statement_print_as_ntriples(s2, stderr);
+#endif
+
+ /* normal comparison */
+ if(s1->subject->type != s2->subject->type) {
+ rv = 0;
+ goto done;
+ }
+
+ if(s1->subject->type == RAPTOR_TERM_TYPE_BLANK) {
+ /* Here for completeness. Anonymous nodes are taken care of
+ * elsewhere */
+ /*if(strcmp((const char *)s1->subject, (const char *)s2->subject->value) != 0)
+ return 0;*/
+ } else {
+ if(!raptor_uri_equals(s1->subject->value.uri,
+ s2->subject->value.uri)) {
+ rv = 0;
+ goto done;
+ }
+ }
+
+ if(s1->predicate->type != s2->predicate->type) {
+ rv = 0;
+ goto done;
+ }
+
+ if(!raptor_uri_equals(s1->predicate->value.uri,
+ s2->predicate->value.uri)) {
+ rv = 0;
+ goto done;
+ }
+
+ if(s1->object->type != s2->object->type) {
+ rv = 0;
+ goto done;
+ }
+
+ if(s1->object->type == RAPTOR_TERM_TYPE_LITERAL) {
+ int equal;
+
+ equal= !safe_strcmp((char *)s1->object->value.literal.string,
+ (char *)s2->object->value.literal.string);
+
+ if(equal) {
+ if(s1->object->value.literal.language && s2->object->value.literal.language)
+ equal = !strcmp((char *)s1->object->value.literal.language,
+ (char *)s2->object->value.literal.language);
+ else if(s1->object->value.literal.language || s2->object->value.literal.language)
+ equal = 0;
+ else
+ equal = 1;
+
+ if(equal)
+ equal = raptor_uri_equals(s1->object->value.literal.datatype,
+ s2->object->value.literal.datatype);
+ }
+
+ rv = equal;
+ goto done;
+ } else if(s1->object->type == RAPTOR_TERM_TYPE_BLANK) {
+ /* Here for completeness. Anonymous nodes are taken care of
+ * elsewhere */
+ /* if(strcmp((const char *)s1->object, (const char *)s2->object->value) != 0)
+ return 0; */
+ } else {
+ if(!raptor_uri_equals(s1->object->value.uri,
+ s2->object->value.uri)) {
+ rv = 0;
+ goto done;
+ }
+ }
+
+ rv = 1;
+ done:
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, " : %s\n", (rv ? "equal" : "not equal"));
+#endif
+ return rv;
+}
+
+
+static int
+rdfdiff_blank_equals(const rdfdiff_blank *b1, const rdfdiff_blank *b2,
+ rdfdiff_file *b1_file, rdfdiff_file *b2_file)
+{
+ /* first compare "owners". Owners are subject/predicate or arcs
+ * in. */
+ int equal = 0;
+
+ if(b1->owner == NULL && b2->owner == NULL) {
+ /* Both are "top-level" anonymous objects. I.E. Neither is the
+ * object of a statement. Fall through and compare based on their
+ * contents. */
+ equal = 1;
+ } else if(b1->owner == NULL || b2->owner == NULL) {
+ equal = 0;
+ } else if(b1->owner->subject->type != RAPTOR_TERM_TYPE_BLANK &&
+ b2->owner->subject->type != RAPTOR_TERM_TYPE_BLANK) {
+ /* Neither are anonymous. Normal comparison. This will return
+ * false if both the subject and the predicates don't match. We
+ * know the objects are blank nodes. */
+ equal = rdfdiff_statement_equals(b1->world, b1->owner, b2->owner);
+
+ } else if(b1->owner->subject->type == RAPTOR_TERM_TYPE_BLANK &&
+ b2->owner->subject->type == RAPTOR_TERM_TYPE_BLANK) {
+ rdfdiff_blank *p1;
+ rdfdiff_blank *p2;
+
+ /* Both are anonymous. Need further testing. Check that the
+ * containing anononymous nodes are eaual. */
+#if 0
+ fprintf(stderr, "b1->owner: ");
+ raptor_statement_print_as_ntriples(b1->owner, stderr);
+ fprintf(stderr, "\n");
+
+ fprintf(stderr, "b2->owner: ");
+ raptor_statement_print_as_ntriples(b2->owner, stderr);
+ fprintf(stderr, "\n");
+#endif
+ p1 = rdfdiff_find_blank(b1_file->first_blank,
+ (char *)b1->owner->subject->value.blank.string);
+ p2 = rdfdiff_find_blank(b2_file->first_blank,
+ (char *)b2->owner->subject->value.blank.string);
+ equal = rdfdiff_blank_equals(p1, p2, b1_file, b2_file);
+ } else {
+ equal = 0;
+ }
+
+ /* Now compare the contents. This accounts for the case where a
+ * subject has several properties (of the same predicate value) with
+ * different blank nodes as values. */
+ if(equal) {
+ rdfdiff_link *s1 = b1->first;
+ while(s1) {
+
+ rdfdiff_link *s2 = b2->first;
+ while(s2) {
+
+ if(rdfdiff_statement_equals(b1->world, s1->statement, s2->statement))
+ break;
+
+ s2 = s2->next;
+
+ }
+
+ if(s2 == 0) {
+ equal = 0;
+ break;
+ }
+
+ s1 = s1->next;
+
+ }
+
+ }
+
+ return equal;
+}
+
+
+static void
+rdfdiff_log_handler(void *data, raptor_log_message *message)
+{
+ rdfdiff_file* file = (rdfdiff_file*)data;
+
+ switch(message->level) {
+ case RAPTOR_LOG_LEVEL_FATAL:
+ case RAPTOR_LOG_LEVEL_ERROR:
+ if(!ignore_errors) {
+ fprintf(stderr, "%s: Error - ", program);
+ raptor_locator_print(message->locator, stderr);
+ fprintf(stderr, " - %s\n", message->text);
+
+ raptor_parser_parse_abort(file->parser);
+ }
+
+ file->error_count++;
+ break;
+
+ case RAPTOR_LOG_LEVEL_WARN:
+ if(!ignore_warnings) {
+ fprintf(stderr, "%s: Warning - ", program);
+ raptor_locator_print(message->locator, stderr);
+ fprintf(stderr, " - %s\n", message->text);
+ }
+
+ file->warning_count++;
+ break;
+
+ case RAPTOR_LOG_LEVEL_NONE:
+ case RAPTOR_LOG_LEVEL_TRACE:
+ case RAPTOR_LOG_LEVEL_DEBUG:
+ case RAPTOR_LOG_LEVEL_INFO:
+
+ fprintf(stderr, "%s: Unexpected %s message - ", program,
+ raptor_log_level_get_label(message->level));
+ raptor_locator_print(message->locator, stderr);
+ fprintf(stderr, " - %s\n", message->text);
+ break;
+ }
+
+}
+
+
+
+static rdfdiff_blank *
+rdfdiff_find_blank(rdfdiff_blank *first, char *blank_id)
+{
+ rdfdiff_blank *rv_blank = 0;
+ rdfdiff_blank *cur = first;
+
+ while(cur) {
+
+ if(strcmp(cur->blank_id, blank_id) == 0) {
+ rv_blank = cur;
+ break;
+ }
+
+ cur = cur->next;
+
+ }
+
+ return rv_blank;
+
+}
+
+
+static rdfdiff_blank *
+rdfdiff_lookup_blank(rdfdiff_file* file, char *blank_id)
+{
+ rdfdiff_blank *rv_blank = rdfdiff_find_blank(file->first_blank, blank_id);
+
+ if(!rv_blank) {
+ rv_blank = rdfdiff_new_blank(file->world, blank_id);
+ if(rv_blank) {
+
+ if(!file->first_blank) {
+ file->first_blank = rv_blank;
+ file->last_blank = rv_blank;
+ } else {
+ file->last_blank->next = rv_blank;
+ file->last_blank = rv_blank;
+ }
+ }
+ }
+
+ return rv_blank;
+
+}
+
+
+static int
+rdfdiff_add_blank_statement(rdfdiff_file* file,
+ raptor_statement *statement)
+{
+ rdfdiff_blank *blank;
+ rdfdiff_link *dlink;
+
+ blank = rdfdiff_lookup_blank(file, (char *)statement->subject->value.blank.string);
+ if(!blank)
+ goto failed;
+
+ dlink = RAPTOR_MALLOC(rdfdiff_link*, sizeof(*dlink));
+ if(!dlink)
+ goto failed;
+
+ dlink->statement = raptor_statement_copy(statement);
+ if(!dlink->statement) {
+ RAPTOR_FREE(rdfdiff_link, dlink);
+ goto failed;
+ }
+
+ dlink->next = NULL;
+ if(!blank->first) {
+ blank->first = dlink;
+ blank->last = dlink;
+ } else {
+ blank->last->next = dlink;
+ blank->last = dlink;
+ }
+
+ return 0;
+
+failed:
+ fprintf(stderr, "%s: Internal Error\n", program);
+ return 1;
+}
+
+
+static int
+rdfdiff_add_blank_statement_owner(rdfdiff_file* file,
+ raptor_statement *statement)
+{
+ rdfdiff_blank *blank;
+
+ blank = rdfdiff_lookup_blank(file,
+ (char*)statement->object->value.blank.string);
+ if(!blank)
+ goto failed;
+
+ if(blank->owner)
+ raptor_free_statement(blank->owner);
+
+ blank->owner = raptor_statement_copy(statement);
+ if(!blank->owner)
+ goto failed;
+
+ return 0;
+
+failed:
+ fprintf(stderr, "%s: Internal Error\n", program);
+ return 1;
+}
+
+
+static int
+rdfdiff_add_statement(rdfdiff_file* file, raptor_statement *statement)
+{
+ int rv = 0;
+
+ rdfdiff_link *dlink = RAPTOR_MALLOC(rdfdiff_link*, sizeof(*dlink));
+
+ if(dlink) {
+
+ dlink->statement = raptor_statement_copy(statement);
+
+ if(dlink->statement) {
+
+ dlink->next = NULL;
+
+ if(!file->first) {
+ file->first = dlink;
+ file->last = dlink;
+ } else {
+ file->last->next = dlink;
+ file->last = dlink;
+ }
+
+ } else {
+ RAPTOR_FREE(rdfdiff_link, dlink);
+ rv = 1;
+ }
+
+ } else {
+ rv = 1;
+ }
+
+ if(rv != 0)
+ fprintf(stderr, "%s: Internal Error\n", program);
+
+ return rv;
+
+}
+
+
+static rdfdiff_link*
+rdfdiff_statement_find(rdfdiff_file* file, const raptor_statement *statement,
+ rdfdiff_link** prev_p)
+{
+ rdfdiff_link* prev = NULL;
+ rdfdiff_link* cur = file->first;
+
+ while(cur) {
+ if(rdfdiff_statement_equals(file->world, cur->statement, statement)) {
+ if(prev_p)
+ *prev_p=prev;
+ return cur;
+ }
+ prev = cur;
+ cur = cur->next;
+ }
+
+ return NULL;
+}
+
+
+static int
+rdfdiff_statement_exists(rdfdiff_file* file, const raptor_statement *statement)
+{
+ rdfdiff_link* node;
+ rdfdiff_link* prev = NULL;
+ node = rdfdiff_statement_find(file, statement, &prev);
+ return (node != NULL);
+}
+
+
+/*
+ * rdfdiff_collect_statements - Called when parsing "from" file to build a
+ * list of statements for comparison with those in the "to" file.
+ */
+static void
+rdfdiff_collect_statements(void *user_data, raptor_statement *statement)
+{
+ int rv = 0;
+ rdfdiff_file* file = (rdfdiff_file*)user_data;
+
+ if(rdfdiff_statement_exists(file, statement))
+ return;
+
+ file->statement_count++;
+
+ if(statement->subject->type == RAPTOR_TERM_TYPE_BLANK ||
+ statement->object->type == RAPTOR_TERM_TYPE_BLANK) {
+
+ if(statement->subject->type == RAPTOR_TERM_TYPE_BLANK)
+ rv = rdfdiff_add_blank_statement(file, statement);
+
+ if(rv == 0 && statement->object->type == RAPTOR_TERM_TYPE_BLANK)
+ rv = rdfdiff_add_blank_statement_owner(file, statement);
+
+ } else {
+ rv = rdfdiff_add_statement(file, statement);
+ }
+
+ if(rv != 0) {
+ raptor_parser_parse_abort(file->parser);
+ }
+
+}
+
+
+
+int
+main(int argc, char *argv[])
+{
+ raptor_world *world = NULL;
+ unsigned char *from_string = NULL;
+ unsigned char *to_string = NULL;
+ raptor_uri *from_uri = NULL;
+ raptor_uri *to_uri = NULL;
+ raptor_uri *base_uri = NULL;
+ const char *from_syntax = "rdfxml";
+ const char *to_syntax = "rdfxml";
+ int free_from_string = 0;
+ int free_to_string = 0;
+ int usage = 0;
+ int help = 0;
+ char *p;
+ int rv = 0;
+ rdfdiff_blank *b1;
+ rdfdiff_link *cur;
+
+ program = argv[0];
+ if((p = strrchr(program, '/')))
+ program = p+1;
+ else if((p = strrchr(program, '\\')))
+ program = p+1;
+ argv[0] = program;
+
+ world = raptor_new_world();
+ if(!world)
+ exit(1);
+ rv = raptor_world_open(world);
+ if(rv)
+ exit(1);
+
+ while(!usage && !help)
+ {
+ int c;
+#ifdef HAVE_GETOPT_LONG
+ int option_index = 0;
+
+ c = getopt_long (argc, argv, GETOPT_STRING, long_options, &option_index);
+#else
+ c = getopt (argc, argv, GETOPT_STRING);
+#endif
+ if(c == -1)
+ break;
+
+ switch (c) {
+ case 0:
+ case '?': /* getopt() - unknown option */
+ usage = 1;
+ break;
+
+ case 'b':
+ brief = 1;
+ break;
+
+ case 'h':
+ help = 1;
+ break;
+
+ case 'f':
+ if(optarg)
+ from_syntax = optarg;
+ break;
+
+ case 't':
+ if(optarg)
+ to_syntax = optarg;
+ break;
+
+ case 'u':
+ if(optarg)
+ base_uri = raptor_new_uri(world, (const unsigned char*)optarg);
+ break;
+
+ }
+
+ }
+
+ if(optind != argc-2 && !help && !usage) {
+ usage = 2; /* Title and usage */
+ }
+
+ if(usage) {
+ if(usage > 1) {
+ fputs(title_string, stderr); putc(' ', stderr); fputs(raptor_version_string, stderr); putc('\n', stderr);
+ fputs(raptor_short_copyright_string, stderr);
+ fputc('\n', stderr);
+ }
+ fprintf(stderr, "Try `%s " HELP_ARG(h, help) "' for more information.\n",
+ program);
+ rv = 1;
+ goto exit;
+ }
+
+ if(help) {
+ printf("Usage: %s [OPTIONS] <from URI> <to URI>\n", program);
+ puts(title_string); putchar(' '); puts(raptor_version_string); putchar('\n');
+ puts(raptor_short_copyright_string);
+ puts("Find differences between two RDF files.");
+ puts("\nOPTIONS:");
+ puts(HELP_TEXT("h", "help ", "Print this help, then exit"));
+ puts(HELP_TEXT("b", "brief ", "Report only whether files differ"));
+ puts(HELP_TEXT("u BASE-URI", "base-uri BASE-URI ", "Set the base URI for the files"));
+ puts(HELP_TEXT("f FORMAT", "from-format FORMAT ", "Format of <from URI> (default is rdfxml)"));
+ puts(HELP_TEXT("t FORMAT", "to-format FORMAT ", "Format of <to URI> (default is rdfxml)"));
+ rv = 1;
+ goto exit;
+ }
+
+ from_string = (unsigned char *)argv[optind++];
+ to_string = (unsigned char *)argv[optind];
+
+ if(!access((const char *)from_string, R_OK)) {
+ char *filename = (char *)from_string;
+ from_string = raptor_uri_filename_to_uri_string(filename);
+ if(!from_string) {
+ fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
+ rv = 2;
+ goto exit;
+ }
+ free_from_string = 1;
+ }
+
+ if(!access((const char *)to_string, R_OK)) {
+ char *filename = (char *)to_string;
+ to_string = raptor_uri_filename_to_uri_string(filename);
+ if(!to_string) {
+ fprintf(stderr, "%s: Failed to create URI for file %s.\n", program, filename);
+ rv = 2;
+ goto exit;
+ }
+ free_to_string = 1;
+ }
+
+ from_uri = raptor_new_uri(world, from_string);
+ if(!from_uri) {
+ fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
+ rv = 2;
+ goto exit;
+ }
+
+ to_uri = raptor_new_uri(world, to_string);
+ if(!to_uri) {
+ fprintf(stderr, "%s: Failed to create URI for %s\n", program, from_string);
+ rv = 2;
+ goto exit;
+ }
+
+ /* create and init "from" data structures */
+ from_file = rdfdiff_new_file(world, from_string, from_syntax);
+ if(!from_file) {
+ rv = 2;
+ goto exit;
+ }
+
+ /* create and init "to" data structures */
+ to_file = rdfdiff_new_file(world, to_string, to_syntax);
+ if(!to_file) {
+ rv = 2;
+ goto exit;
+ }
+
+ /* parse the files */
+ raptor_parser_set_statement_handler(from_file->parser, from_file,
+ rdfdiff_collect_statements);
+
+ if(raptor_parser_parse_uri(from_file->parser, from_uri, base_uri)) {
+ fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
+ from_string, from_syntax);
+ rv = 1;
+ goto exit;
+ } else {
+
+ /* Note intentional from_uri as base_uri */
+ raptor_parser_set_statement_handler(to_file->parser, to_file,
+ rdfdiff_collect_statements);
+ if(raptor_parser_parse_uri(to_file->parser, to_uri, base_uri ? base_uri: from_uri)) {
+ fprintf(stderr, "%s: Failed to parse URI %s as %s content\n", program,
+ to_string, to_syntax);
+ rv = 1;
+ goto exit;
+ }
+ }
+
+
+ /* Compare triples with no blank nodes */
+ cur = to_file->first;
+ while(cur) {
+ rdfdiff_link* node;
+ rdfdiff_link* prev;
+ node = rdfdiff_statement_find(from_file, cur->statement, &prev);
+ if(node) {
+ /* exists in from file - remove it from the list */
+ if(from_file->first == node) {
+ from_file->first = node->next;
+ } else {
+ prev->next = node->next;
+ }
+ raptor_free_statement(node->statement);
+ RAPTOR_FREE(rdfdiff_link, node);
+ } else {
+ if(!brief) {
+ if(emit_from_header) {
+ fprintf(stderr, "Statements in %s but not in %s\n",
+ to_file->name, from_file->name);
+ emit_from_header = 0;
+ }
+
+ fprintf(stderr, "< ");
+ raptor_statement_print_as_ntriples(cur->statement, stderr);
+ fprintf(stderr, "\n");
+ }
+
+ to_file->difference_count++;
+ }
+ cur = cur->next;
+ }
+
+
+ /* Now compare the blank nodes */
+ b1 = to_file->first_blank;
+ while(b1) {
+
+ rdfdiff_blank *b2 = from_file->first_blank;
+
+ while(b2) {
+
+ if(!b2->matched && rdfdiff_blank_equals(b1, b2, to_file, from_file)) {
+ b1->matched = 1;
+ b2->matched = 1;
+ break;
+ }
+
+ b2 = b2->next;
+
+ }
+
+ if(b2 == 0) {
+ if(!brief) {
+#if 0
+ fprintf(stderr, "< ");
+ raptor_statement_print_as_ntriples(b1->owner, stderr);
+ fprintf(stderr, "\n");
+#else
+ if(emit_from_header) {
+ fprintf(stderr, "Statements in %s but not in %s\n", to_file->name, from_file->name);
+ emit_from_header = 0;
+ }
+
+ fprintf(stderr, "< anonymous node %s\n", b1->blank_id);
+#endif
+ }
+
+ to_file->difference_count++;
+ }
+
+ b1 = b1->next;
+
+ }
+
+ if(from_file->first) {
+ /* The entrys left in from_file have not been found in to_file. */
+ if(!brief) {
+
+ if(emit_to_header) {
+ fprintf(stderr, "Statements in %s but not in %s\n", from_file->name,
+ to_file->name);
+ emit_to_header = 0;
+ }
+
+ cur = from_file->first;
+ while(cur) {
+ if(!brief) {
+ fprintf(stderr, "> ");
+ raptor_statement_print_as_ntriples(cur->statement, stderr);
+ fprintf(stderr, "\n");
+ }
+
+ cur = cur->next;
+ from_file->difference_count++;
+ }
+ }
+
+ }
+
+ if(from_file->first_blank) {
+ rdfdiff_blank *blank = from_file->first_blank;
+ while(blank) {
+
+ if(!blank->matched) {
+ if(!brief) {
+#if 0
+ fprintf(stderr, "> ");
+ raptor_statement_print_as_ntriples(blank->owner, stderr);
+ fprintf(stderr, "\n");
+#else
+ if(emit_to_header) {
+ fprintf(stderr, "Statements in %s but not in %s\n", from_file->name, to_file->name);
+ emit_to_header = 0;
+ }
+ fprintf(stderr, "> anonymous node %s\n", blank->blank_id);
+#endif
+ }
+ from_file->difference_count++;
+ }
+
+ blank = blank->next;
+
+ }
+
+ }
+
+ if(!(from_file->difference_count == 0 &&
+ to_file->difference_count == 0)) {
+
+ if(brief)
+ fprintf(stderr, "Files differ\n");
+
+ rv = 1;
+ }
+
+exit:
+
+ if(base_uri)
+ raptor_free_uri(base_uri);
+
+ if(from_file)
+ rdfdiff_free_file(from_file);
+
+ if(to_file)
+ rdfdiff_free_file(to_file);
+
+ if(free_from_string)
+ raptor_free_memory(from_string);
+
+ if(free_to_string)
+ raptor_free_memory(to_string);
+
+ if(from_uri)
+ raptor_free_uri(from_uri);
+
+ if(to_uri)
+ raptor_free_uri(to_uri);
+
+ raptor_free_world(world);
+
+ return rv;
+
+}
+