summaryrefslogtreecommitdiffstats
path: root/src/raptor_parse.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/raptor_parse.c')
-rw-r--r--src/raptor_parse.c1832
1 files changed, 1832 insertions, 0 deletions
diff --git a/src/raptor_parse.c b/src/raptor_parse.c
new file mode 100644
index 0000000..9bc0a39
--- /dev/null
+++ b/src/raptor_parse.c
@@ -0,0 +1,1832 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * raptor_parse.c - Raptor Parser API
+ *
+ * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ */
+
+
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+#ifdef HAVE_SYS_STAT_H
+#include <sys/stat.h>
+#endif
+#ifdef HAVE_FCNTL_H
+#include <fcntl.h>
+#endif
+
+/* Raptor includes */
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+
+#ifndef STANDALONE
+
+/* prototypes for helper functions */
+static void raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict);
+
+/* helper methods */
+
+static void
+raptor_free_parser_factory(raptor_parser_factory* factory)
+{
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory);
+
+ if(factory->finish_factory)
+ factory->finish_factory(factory);
+
+ RAPTOR_FREE(raptor_parser_factory, factory);
+}
+
+
+/* class methods */
+
+int
+raptor_parsers_init(raptor_world *world)
+{
+ int rc = 0;
+
+ world->parsers = raptor_new_sequence((raptor_data_free_handler)raptor_free_parser_factory, NULL);
+ if(!world->parsers)
+ return 1;
+
+#ifdef RAPTOR_PARSER_RDFXML
+ rc+= raptor_init_parser_rdfxml(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_NTRIPLES
+ rc+= raptor_init_parser_ntriples(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_N3
+ rc+= raptor_init_parser_n3(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_TURTLE
+ rc+= raptor_init_parser_turtle(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_TRIG
+ rc+= raptor_init_parser_trig(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_RSS
+ rc+= raptor_init_parser_rss(world) != 0;
+#endif
+
+#if defined(RAPTOR_PARSER_GRDDL)
+ rc+= raptor_init_parser_grddl_common(world) != 0;
+
+#ifdef RAPTOR_PARSER_GRDDL
+ rc+= raptor_init_parser_grddl(world) != 0;
+#endif
+
+#endif
+
+#ifdef RAPTOR_PARSER_GUESS
+ rc+= raptor_init_parser_guess(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_RDFA
+ rc+= raptor_init_parser_rdfa(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_JSON
+ rc+= raptor_init_parser_json(world) != 0;
+#endif
+
+#ifdef RAPTOR_PARSER_NQUADS
+ rc+= raptor_init_parser_nquads(world) != 0;
+#endif
+
+ return rc;
+}
+
+
+/*
+ * raptor_finish_parsers - delete all the registered parsers
+ */
+void
+raptor_parsers_finish(raptor_world *world)
+{
+ if(world->parsers) {
+ raptor_free_sequence(world->parsers);
+ world->parsers = NULL;
+ }
+#if defined(RAPTOR_PARSER_GRDDL)
+ raptor_terminate_parser_grddl_common(world);
+#endif
+}
+
+
+/*
+ * raptor_world_register_parser_factory:
+ * @world: raptor world
+ * @factory: pointer to function to call to register the factory
+ *
+ * Internal - Register a parser via parser factory.
+ *
+ * All strings set in the @factory method are shared with the
+ * #raptor_parser_factory
+ *
+ * Return value: new factory object or NULL on failure
+ **/
+RAPTOR_EXTERN_C
+raptor_parser_factory*
+raptor_world_register_parser_factory(raptor_world* world,
+ int (*factory) (raptor_parser_factory*))
+{
+ raptor_parser_factory *parser = NULL;
+
+ parser = RAPTOR_CALLOC(raptor_parser_factory*, 1, sizeof(*parser));
+ if(!parser)
+ return NULL;
+
+ parser->world = world;
+
+ parser->desc.mime_types = NULL;
+
+ if(raptor_sequence_push(world->parsers, parser))
+ return NULL; /* on error, parser is already freed by the sequence */
+
+ /* Call the parser registration function on the new object */
+ if(factory(parser))
+ return NULL; /* parser is owned and freed by the parsers sequence */
+
+ if(raptor_syntax_description_validate(&parser->desc)) {
+ raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL,
+ "Parser description failed to validate\n");
+ goto tidy;
+ }
+
+
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ RAPTOR_DEBUG2("Registered parser %s\n", parser->desc.names[0]);
+#endif
+
+ return parser;
+
+ /* Clean up on failure */
+ tidy:
+ raptor_free_parser_factory(parser);
+ return NULL;
+}
+
+
+/*
+ * raptor_world_get_parser_factory:
+ * @world: world object
+ * @name: the factory name or NULL for the default factory
+ *
+ * INTERNAL - Get a parser factory by name.
+ *
+ * Return value: the factory object or NULL if there is no such factory
+ **/
+raptor_parser_factory*
+raptor_world_get_parser_factory(raptor_world *world, const char *name)
+{
+ raptor_parser_factory *factory = NULL;
+
+ /* return 1st parser if no particular one wanted - why? */
+ if(!name) {
+ factory = (raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0);
+ if(!factory) {
+ RAPTOR_DEBUG1("No (default) parsers registered\n");
+ return NULL;
+ }
+ } else {
+ int i;
+
+ for(i = 0;
+ (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
+ i++) {
+ int namei;
+ const char* fname;
+
+ for(namei = 0; (fname = factory->desc.names[namei]); namei++) {
+ if(!strcmp(fname, name))
+ break;
+ }
+ if(fname)
+ break;
+ }
+ }
+
+ return factory;
+}
+
+
+/**
+ * raptor_world_get_parsers_count:
+ * @world: world object
+ *
+ * Get number of parsers
+ *
+ * Return value: number of parsers or <0 on failure
+ **/
+int
+raptor_world_get_parsers_count(raptor_world* world)
+{
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1);
+
+ raptor_world_open(world);
+
+ return raptor_sequence_size(world->parsers);
+}
+
+
+/**
+ * raptor_world_get_parser_description:
+ * @world: world object
+ * @counter: index into the list of parsers
+ *
+ * Get parser descriptive syntax information
+ *
+ * Return value: description or NULL if counter is out of range
+ **/
+const raptor_syntax_description*
+raptor_world_get_parser_description(raptor_world* world,
+ unsigned int counter)
+{
+ raptor_parser_factory *factory;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
+
+ raptor_world_open(world);
+
+ factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers,
+ counter);
+
+ if(!factory)
+ return NULL;
+
+ return &factory->desc;
+}
+
+
+/**
+ * raptor_world_is_parser_name:
+ * @world: world object
+ * @name: the syntax name
+ *
+ * Check the name of a parser is known.
+ *
+ * Return value: non 0 if name is a known syntax name
+ */
+int
+raptor_world_is_parser_name(raptor_world* world, const char *name)
+{
+ if(!name)
+ return 0;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0);
+
+ raptor_world_open(world);
+
+ return (raptor_world_get_parser_factory(world, name) != NULL);
+}
+
+
+/**
+ * raptor_new_parser:
+ * @world: world object
+ * @name: the parser name or NULL for default parser
+ *
+ * Constructor - create a new raptor_parser object.
+ *
+ * Return value: a new #raptor_parser object or NULL on failure
+ */
+raptor_parser*
+raptor_new_parser(raptor_world* world, const char *name)
+{
+ raptor_parser_factory* factory;
+ raptor_parser* rdf_parser;
+
+ RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
+
+ raptor_world_open(world);
+
+ factory = raptor_world_get_parser_factory(world, name);
+ if(!factory)
+ return NULL;
+
+ rdf_parser = RAPTOR_CALLOC(raptor_parser*, 1, sizeof(*rdf_parser));
+ if(!rdf_parser)
+ return NULL;
+
+ rdf_parser->world = world;
+ raptor_statement_init(&rdf_parser->statement, world);
+
+ rdf_parser->context = RAPTOR_CALLOC(void*, 1, factory->context_length);
+ if(!rdf_parser->context) {
+ raptor_free_parser(rdf_parser);
+ return NULL;
+ }
+
+#ifdef RAPTOR_XML_LIBXML
+ rdf_parser->magic = RAPTOR_LIBXML_MAGIC;
+#endif
+ rdf_parser->factory = factory;
+
+ /* Bit flags */
+ rdf_parser->failed = 0;
+ rdf_parser->emit_graph_marks = 1;
+ rdf_parser->emitted_default_graph = 0;
+
+ raptor_object_options_init(&rdf_parser->options, RAPTOR_OPTION_AREA_PARSER);
+
+ /* set parsing strictness from default value */
+ raptor_parser_set_strict(rdf_parser,
+ RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_STRICT));
+
+ if(factory->init(rdf_parser, name)) {
+ raptor_free_parser(rdf_parser);
+ return NULL;
+ }
+
+ return rdf_parser;
+}
+
+
+/**
+ * raptor_new_parser_for_content:
+ * @world: world object
+ * @uri: URI identifying the syntax (or NULL)
+ * @mime_type: mime type identifying the content (or NULL)
+ * @buffer: buffer of content to guess (or NULL)
+ * @len: length of buffer
+ * @identifier: identifier of content (or NULL)
+ *
+ * Constructor - create a new raptor_parser.
+ *
+ * Uses raptor_world_guess_parser_name() to find a parser by scoring
+ * recognition of the syntax by a block of characters, the content
+ * identifier or a mime type. The content identifier is typically a
+ * filename or URI or some other identifier.
+ *
+ * Return value: a new #raptor_parser object or NULL on failure
+ **/
+raptor_parser*
+raptor_new_parser_for_content(raptor_world* world,
+ raptor_uri *uri, const char *mime_type,
+ const unsigned char *buffer, size_t len,
+ const unsigned char *identifier)
+{
+ const char* name;
+
+ RAPTOR_CHECK_CONSTRUCTOR_WORLD(world);
+
+ raptor_world_open(world);
+
+ name = raptor_world_guess_parser_name(world, uri, mime_type,
+ buffer, len, identifier);
+ return name ? raptor_new_parser(world, name) : NULL;
+}
+
+
+/**
+ * raptor_parser_parse_start:
+ * @rdf_parser: RDF parser
+ * @uri: base URI or may be NULL if no base URI is required
+ *
+ * Start a parse of content with base URI.
+ *
+ * Parsers that need a base URI can be identified using a syntax
+ * description returned by raptor_world_get_parser_description()
+ * statically or raptor_parser_get_description() on a constructed
+ * parser.
+ *
+ * Return value: non-0 on failure, <0 if a required base URI was missing
+ **/
+int
+raptor_parser_parse_start(raptor_parser *rdf_parser, raptor_uri *uri)
+{
+ if((rdf_parser->factory->desc.flags & RAPTOR_SYNTAX_NEED_BASE_URI) && !uri) {
+ raptor_parser_error(rdf_parser, "Missing base URI for %s parser.",
+ rdf_parser->factory->desc.names[0]);
+ return -1;
+ }
+
+ if(uri)
+ uri = raptor_uri_copy(uri);
+
+ if(rdf_parser->base_uri)
+ raptor_free_uri(rdf_parser->base_uri);
+ rdf_parser->base_uri = uri;
+
+ rdf_parser->locator.uri = uri;
+ rdf_parser->locator.line = -1;
+ rdf_parser->locator.column = -1;
+ rdf_parser->locator.byte = -1;
+
+ if(rdf_parser->factory->start)
+ return rdf_parser->factory->start(rdf_parser);
+ else
+ return 0;
+}
+
+
+
+
+/**
+ * raptor_parser_parse_chunk:
+ * @rdf_parser: RDF parser
+ * @buffer: content to parse
+ * @len: length of buffer
+ * @is_end: non-0 if this is the end of the content (such as EOF)
+ *
+ * Parse a block of content into triples.
+ *
+ * This method can only be called after raptor_parser_parse_start() has
+ * initialised the parser.
+ *
+ * Return value: non-0 on failure.
+ **/
+int
+raptor_parser_parse_chunk(raptor_parser* rdf_parser,
+ const unsigned char *buffer, size_t len, int is_end)
+{
+ if(rdf_parser->sb)
+ raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1);
+
+ return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end);
+}
+
+
+/**
+ * raptor_free_parser:
+ * @parser: #raptor_parser object
+ *
+ * Destructor - destroy a raptor_parser object.
+ *
+ **/
+void
+raptor_free_parser(raptor_parser* rdf_parser)
+{
+ if(!rdf_parser)
+ return;
+
+ if(rdf_parser->factory)
+ rdf_parser->factory->terminate(rdf_parser);
+
+ if(rdf_parser->www)
+ raptor_free_www(rdf_parser->www);
+
+ if(rdf_parser->context)
+ RAPTOR_FREE(raptor_parser_context, rdf_parser->context);
+
+ if(rdf_parser->base_uri)
+ raptor_free_uri(rdf_parser->base_uri);
+
+ if(rdf_parser->sb)
+ raptor_free_stringbuffer(rdf_parser->sb);
+
+ raptor_object_options_clear(&rdf_parser->options);
+
+ RAPTOR_FREE(raptor_parser, rdf_parser);
+}
+
+
+/**
+ * raptor_parser_parse_file_stream:
+ * @rdf_parser: parser
+ * @stream: FILE* of RDF content
+ * @filename: filename of content or NULL if it has no name
+ * @base_uri: the base URI to use
+ *
+ * Parse RDF content from a FILE*.
+ *
+ * After draining the FILE* stream (EOF), fclose is not called on it.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_parser_parse_file_stream(raptor_parser* rdf_parser,
+ FILE *stream, const char* filename,
+ raptor_uri *base_uri)
+{
+ int rc = 0;
+ raptor_locator *locator = &rdf_parser->locator;
+
+ if(!stream || !base_uri)
+ return 1;
+
+ locator->line= locator->column = -1;
+ locator->file= filename;
+
+ if(raptor_parser_parse_start(rdf_parser, base_uri))
+ return 1;
+
+ while(!feof(stream)) {
+ size_t len = fread(rdf_parser->buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream);
+ int is_end = (len < RAPTOR_READ_BUFFER_SIZE);
+ rdf_parser->buffer[len] = '\0';
+ rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
+ if(rc || is_end)
+ break;
+ }
+
+ return (rc != 0);
+}
+
+
+/**
+ * raptor_parser_parse_file:
+ * @rdf_parser: parser
+ * @uri: URI of RDF content or NULL to read from standard input
+ * @base_uri: the base URI to use (or NULL if the same)
+ *
+ * Parse RDF content at a file URI.
+ *
+ * If @uri is NULL (source is stdin), then the @base_uri is required.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_parser_parse_file(raptor_parser* rdf_parser, raptor_uri *uri,
+ raptor_uri *base_uri)
+{
+ int rc = 0;
+ int free_base_uri = 0;
+ const char *filename = NULL;
+ FILE *fh = NULL;
+#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
+ struct stat buf;
+#endif
+
+ if(uri) {
+ filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri));
+ if(!filename)
+ return 1;
+
+#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H)
+ if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) {
+ raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'",
+ filename);
+ goto cleanup;
+ }
+#endif
+
+ fh = fopen(filename, "r");
+ if(!fh) {
+ raptor_parser_error(rdf_parser, "file '%s' open failed - %s",
+ filename, strerror(errno));
+ goto cleanup;
+ }
+ if(!base_uri) {
+ base_uri = raptor_uri_copy(uri);
+ free_base_uri = 1;
+ }
+ } else {
+ if(!base_uri)
+ return 1;
+ fh = stdin;
+ }
+
+ rc = raptor_parser_parse_file_stream(rdf_parser, fh, filename, base_uri);
+
+ cleanup:
+ if(uri) {
+ if(fh)
+ fclose(fh);
+ RAPTOR_FREE(char*, filename);
+ }
+ if(free_base_uri)
+ raptor_free_uri(base_uri);
+
+ return rc;
+}
+
+
+void
+raptor_parser_parse_uri_write_bytes(raptor_www* www,
+ void *userdata, const void *ptr,
+ size_t size, size_t nmemb)
+{
+ raptor_parse_bytes_context* rpbc = (raptor_parse_bytes_context*)userdata;
+ size_t len = size * nmemb;
+
+ if(!rpbc->started) {
+ raptor_uri* base_uri = rpbc->base_uri;
+
+ if(!base_uri) {
+ rpbc->final_uri = raptor_www_get_final_uri(www);
+ /* base URI after URI resolution is finally chosen */
+ base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri;
+ }
+
+ if(raptor_parser_parse_start(rpbc->rdf_parser, base_uri))
+ raptor_www_abort(www, "Parsing failed");
+ rpbc->started = 1;
+ }
+
+ if(raptor_parser_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0))
+ raptor_www_abort(www, "Parsing failed");
+}
+
+
+static void
+raptor_parser_parse_uri_content_type_handler(raptor_www* www, void* userdata,
+ const char* content_type)
+{
+ raptor_parser* rdf_parser = (raptor_parser*)userdata;
+ if(rdf_parser->factory->content_type_handler)
+ rdf_parser->factory->content_type_handler(rdf_parser, content_type);
+}
+
+
+int
+raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri)
+{
+ unsigned char* uri_string = raptor_uri_as_string(uri);
+
+ if(raptor_uri_uri_string_is_file_uri(uri_string))
+ return 0;
+
+ raptor_parser_error((raptor_parser*)user_data,
+ "Network fetch of URI '%s' denied", uri_string);
+ return 1;
+}
+
+
+/**
+ * raptor_parser_parse_uri:
+ * @rdf_parser: parser
+ * @uri: URI of RDF content
+ * @base_uri: the base URI to use (or NULL if the same)
+ *
+ * Parse the RDF content at URI.
+ *
+ * Sends an HTTP Accept: header whent the URI is of the HTTP protocol,
+ * see raptor_parser_parse_uri_with_connection() for details including
+ * how the @base_uri is used.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_parser_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri,
+ raptor_uri *base_uri)
+{
+ return raptor_parser_parse_uri_with_connection(rdf_parser, uri, base_uri,
+ NULL);
+}
+
+
+/**
+ * raptor_parser_parse_uri_with_connection:
+ * @rdf_parser: parser
+ * @uri: URI of RDF content
+ * @base_uri: the base URI to use (or NULL if the same)
+ * @connection: connection object pointer or NULL to create a new one
+ *
+ * Parse RDF content at URI using existing WWW connection.
+ *
+ * If @base_uri is not given and during resolution of the URI, a
+ * protocol redirection occurs, the final resolved URI will be
+ * used as the base URI. If redirection does not occur, the
+ * base URI will be @uri.
+ *
+ * If @base_uri is given, it overrides the process above.
+ *
+ * When @connection is NULL and a MIME Type exists for the parser
+ * type, this type is sent in an HTTP Accept: header in the form
+ * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is
+ * prefered rather than the sole answer. The latter part may not be
+ * necessary but should ensure an HTTP 200 response.
+ *
+ * Return value: non 0 on failure
+ **/
+int
+raptor_parser_parse_uri_with_connection(raptor_parser* rdf_parser,
+ raptor_uri *uri,
+ raptor_uri *base_uri, void *connection)
+{
+ int ret = 0;
+ raptor_parse_bytes_context rpbc;
+ char* ua = NULL;
+ char* cert_filename = NULL;
+ char* cert_type = NULL;
+ char* cert_passphrase = NULL;
+ int ssl_verify_peer;
+ int ssl_verify_host;
+
+ if(connection) {
+ if(rdf_parser->www)
+ raptor_free_www(rdf_parser->www);
+ rdf_parser->www = raptor_new_www_with_connection(rdf_parser->world,
+ connection);
+ if(!rdf_parser->www)
+ return 1;
+ } else {
+ const char *accept_h;
+
+ if(rdf_parser->www)
+ raptor_free_www(rdf_parser->www);
+ rdf_parser->www = raptor_new_www(rdf_parser->world);
+ if(!rdf_parser->www)
+ return 1;
+
+ accept_h = raptor_parser_get_accept_header(rdf_parser);
+ if(accept_h) {
+ ret = raptor_www_set_http_accept2(rdf_parser->www, accept_h, 0);
+ RAPTOR_FREE(char*, accept_h);
+ if(ret)
+ return 1;
+ }
+ }
+
+ rpbc.rdf_parser = rdf_parser;
+ rpbc.base_uri = base_uri;
+ rpbc.final_uri = NULL;
+ rpbc.started = 0;
+
+ if(rdf_parser->uri_filter)
+ raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter,
+ rdf_parser->uri_filter_user_data);
+ else if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET))
+ raptor_www_set_uri_filter(rdf_parser->www,
+ raptor_parser_set_uri_filter_no_net, rdf_parser);
+
+ raptor_www_set_write_bytes_handler(rdf_parser->www,
+ raptor_parser_parse_uri_write_bytes,
+ &rpbc);
+
+ raptor_www_set_content_type_handler(rdf_parser->www,
+ raptor_parser_parse_uri_content_type_handler,
+ rdf_parser);
+
+ raptor_www_set_http_cache_control(rdf_parser->www,
+ RAPTOR_OPTIONS_GET_STRING(rdf_parser,
+ RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL));
+
+ ua = RAPTOR_OPTIONS_GET_STRING(rdf_parser, RAPTOR_OPTION_WWW_HTTP_USER_AGENT);
+ if(ua) {
+ if(raptor_www_set_user_agent2(rdf_parser->www, ua, 0))
+ return 1;
+ }
+
+ cert_filename = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
+ RAPTOR_OPTION_WWW_CERT_FILENAME);
+ cert_type = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
+ RAPTOR_OPTION_WWW_CERT_TYPE);
+ cert_passphrase = RAPTOR_OPTIONS_GET_STRING(rdf_parser,
+ RAPTOR_OPTION_WWW_CERT_PASSPHRASE);
+ if(cert_filename || cert_type || cert_passphrase)
+ raptor_www_set_ssl_cert_options(rdf_parser->www, cert_filename,
+ cert_type, cert_passphrase);
+
+ ssl_verify_peer = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
+ RAPTOR_OPTION_WWW_SSL_VERIFY_PEER);
+ ssl_verify_host = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser,
+ RAPTOR_OPTION_WWW_SSL_VERIFY_HOST);
+ raptor_www_set_ssl_verify_options(rdf_parser->www, ssl_verify_peer,
+ ssl_verify_host);
+
+ ret = raptor_www_fetch(rdf_parser->www, uri);
+
+ if(!rpbc.started && !ret)
+ ret = raptor_parser_parse_start(rdf_parser, base_uri);
+
+ if(rpbc.final_uri)
+ raptor_free_uri(rpbc.final_uri);
+
+ if(ret) {
+ raptor_free_www(rdf_parser->www);
+ rdf_parser->www = NULL;
+ return 1;
+ }
+
+ if(raptor_parser_parse_chunk(rdf_parser, NULL, 0, 1))
+ rdf_parser->failed = 1;
+
+ raptor_free_www(rdf_parser->www);
+ rdf_parser->www = NULL;
+
+ return rdf_parser->failed;
+}
+
+
+/*
+ * raptor_parser_fatal_error - Fatal Error from a parser - Internal
+ */
+void
+raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...)
+{
+ va_list arguments;
+
+ va_start(arguments, message);
+ if(parser) {
+ parser->failed = 1;
+ raptor_log_error_varargs(parser->world,
+ RAPTOR_LOG_LEVEL_FATAL,
+ &parser->locator,
+ message, arguments);
+ } else
+ raptor_log_error_varargs(NULL,
+ RAPTOR_LOG_LEVEL_FATAL, NULL,
+ message, arguments);
+ va_end(arguments);
+}
+
+
+/*
+ * raptor_parser_error - Error from a parser - Internal
+ */
+void
+raptor_parser_error(raptor_parser* parser, const char *message, ...)
+{
+ va_list arguments;
+
+ va_start(arguments, message);
+
+ raptor_parser_log_error_varargs(parser, RAPTOR_LOG_LEVEL_ERROR,
+ message, arguments);
+
+ va_end(arguments);
+}
+
+
+/**
+ * raptor_parser_log_error_varargs:
+ * @parser: parser (or NULL)
+ * @level: log level
+ * @message: error format message
+ * @arguments: varargs for message
+ *
+ * Error from a parser - Internal.
+ */
+void
+raptor_parser_log_error_varargs(raptor_parser* parser,
+ raptor_log_level level,
+ const char *message, va_list arguments)
+{
+ if(parser)
+ raptor_log_error_varargs(parser->world,
+ level,
+ &parser->locator,
+ message, arguments);
+ else
+ raptor_log_error_varargs(NULL,
+ level,
+ NULL,
+ message, arguments);
+}
+
+
+/**
+ * raptor_parser_log_error:
+ * @parser: parser (or NULL)
+ * @level: log level
+ * @message: error format message
+ *
+ * Error from a parser - Internal.
+ */
+void
+raptor_parser_log_error(raptor_parser* parser,
+ raptor_log_level level,
+ const char *message, ...)
+{
+ va_list arguments;
+
+ va_start(arguments, message);
+
+ if(parser)
+ raptor_log_error_varargs(parser->world,
+ level,
+ &parser->locator,
+ message, arguments);
+ else
+ raptor_log_error_varargs(NULL,
+ level,
+ NULL,
+ message, arguments);
+
+ va_end(arguments);
+}
+
+
+/*
+ * raptor_parser_warning - Warning from a parser - Internal
+ */
+void
+raptor_parser_warning(raptor_parser* parser, const char *message, ...)
+{
+ va_list arguments;
+
+ va_start(arguments, message);
+
+ if(parser)
+ raptor_log_error_varargs(parser->world,
+ RAPTOR_LOG_LEVEL_WARN,
+ &parser->locator,
+ message, arguments);
+ else
+ raptor_log_error_varargs(NULL,
+ RAPTOR_LOG_LEVEL_WARN,
+ NULL,
+ message, arguments);
+
+ va_end(arguments);
+}
+
+
+
+/* PUBLIC FUNCTIONS */
+
+/**
+ * raptor_parser_set_statement_handler:
+ * @parser: #raptor_parser parser object
+ * @user_data: user data pointer for callback
+ * @handler: new statement callback function
+ *
+ * Set the statement handler function for the parser.
+ *
+ * Use this to set the function to receive statements as the parsing
+ * proceeds. The statement argument to @handler is shared and must be
+ * copied by the caller with raptor_statement_copy().
+ **/
+void
+raptor_parser_set_statement_handler(raptor_parser* parser,
+ void *user_data,
+ raptor_statement_handler handler)
+{
+ parser->user_data = user_data;
+ parser->statement_handler = handler;
+}
+
+
+/**
+ * raptor_parser_set_graph_mark_handler:
+ * @parser: #raptor_parser parser object
+ * @user_data: user data pointer for callback
+ * @handler: new graph callback function
+ *
+ * Set the graph mark handler function for the parser.
+ *
+ * See #raptor_graph_mark_handler and #raptor_graph_mark_flags for
+ * the marks that may be returned by the handler.
+ *
+ **/
+void
+raptor_parser_set_graph_mark_handler(raptor_parser* parser,
+ void *user_data,
+ raptor_graph_mark_handler handler)
+{
+ parser->user_data = user_data;
+ parser->graph_mark_handler = handler;
+}
+
+
+/**
+ * raptor_parser_set_namespace_handler:
+ * @parser: #raptor_parser parser object
+ * @user_data: user data pointer for callback
+ * @handler: new namespace callback function
+ *
+ * Set the namespace handler function for the parser.
+ *
+ * When a prefix/namespace is seen in a parser, call the given
+ * @handler with the prefix string and the #raptor_uri namespace URI.
+ * Either can be NULL for the default prefix or default namespace.
+ *
+ * The handler function does not deal with duplicates so any
+ * namespace may be declared multiple times.
+ *
+ **/
+void
+raptor_parser_set_namespace_handler(raptor_parser* parser,
+ void *user_data,
+ raptor_namespace_handler handler)
+{
+ parser->namespace_handler = handler;
+ parser->namespace_handler_user_data = user_data;
+}
+
+
+/**
+ * raptor_parser_set_uri_filter:
+ * @parser: parser object
+ * @filter: URI filter function
+ * @user_data: User data to pass to filter function
+ *
+ * Set URI filter function for WWW retrieval.
+ **/
+void
+raptor_parser_set_uri_filter(raptor_parser* parser,
+ raptor_uri_filter_func filter,
+ void *user_data)
+{
+ parser->uri_filter = filter;
+ parser->uri_filter_user_data = user_data;
+}
+
+
+/**
+ * raptor_parser_set_option:
+ * @parser: #raptor_parser parser object
+ * @option: option to set from enumerated #raptor_option values
+ * @string: string option value (or NULL)
+ * @integer: integer option value
+ *
+ * Set parser option.
+ *
+ * If @string is not NULL and the option type is numeric, the string
+ * value is converted to an integer and used in preference to @integer.
+ *
+ * If @string is NULL and the option type is not numeric, an error is
+ * returned.
+ *
+ * The @string values used are copied.
+ *
+ * The allowed options are available via
+ * raptor_world_get_option_description().
+ *
+ * Return value: non 0 on failure or if the option is unknown
+ **/
+int
+raptor_parser_set_option(raptor_parser *parser, raptor_option option,
+ const char* string, int integer)
+{
+ int rc;
+
+ rc = raptor_object_options_set_option(&parser->options, option,
+ string, integer);
+ if(option == RAPTOR_OPTION_STRICT && !rc) {
+ int is_strict = RAPTOR_OPTIONS_GET_NUMERIC(parser, RAPTOR_OPTION_STRICT);
+ raptor_parser_set_strict(parser, is_strict);
+ }
+
+ return rc;
+}
+
+
+/**
+ * raptor_parser_get_option:
+ * @parser: #raptor_parser parser object
+ * @option: option to get value
+ * @string_p: pointer to where to store string value
+ * @integer_p: pointer to where to store integer value
+ *
+ * Get parser option.
+ *
+ * Any string value returned in *@string_p is shared and must
+ * be copied by the caller.
+ *
+ * The allowed options are available via
+ * raptor_world_get_option_description().
+ *
+ * Return value: option value or < 0 for an illegal option
+ **/
+int
+raptor_parser_get_option(raptor_parser *parser, raptor_option option,
+ char** string_p, int* integer_p)
+{
+ return raptor_object_options_get_option(&parser->options, option,
+ string_p, integer_p);
+}
+
+
+/**
+ * raptor_parser_set_strict:
+ * @rdf_parser: #raptor_parser object
+ * @is_strict: Non 0 for strict parsing
+ *
+ * INTERNAL - Set parser to strict / lax mode.
+ *
+ **/
+static void
+raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict)
+{
+ is_strict = (is_strict) ? 1 : 0;
+
+ /* Initialise default parser mode */
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING, 0);
+
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, 0);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE, 1);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL, is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID, 1);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_TAG_SOUP, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_MICROFORMATS, !is_strict);
+ RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_LINK, !is_strict);
+}
+
+
+/**
+ * raptor_parser_get_name:
+ * @rdf_parser: #raptor_parser parser object
+ *
+ * Get the name of a parser.
+ *
+ * Use raptor_parser_get_description() to get the alternate names and
+ * aliases as well as other descriptive values.
+ *
+ * Return value: the short name for the parser.
+ **/
+const char*
+raptor_parser_get_name(raptor_parser *rdf_parser)
+{
+ if(rdf_parser->factory->get_name)
+ return rdf_parser->factory->get_name(rdf_parser);
+ else
+ return rdf_parser->factory->desc.names[0];
+}
+
+
+/**
+ * raptor_parser_get_description:
+ * @rdf_parser: #raptor_parser parser object
+ *
+ * Get description of the syntaxes of the parser.
+ *
+ * The returned description is static and lives as long as the raptor
+ * library (raptor world).
+ *
+ * Return value: description of syntax
+ **/
+const raptor_syntax_description*
+raptor_parser_get_description(raptor_parser *rdf_parser)
+{
+ if(rdf_parser->factory->get_description)
+ return rdf_parser->factory->get_description(rdf_parser);
+ else
+ return &rdf_parser->factory->desc;
+}
+
+
+
+/**
+ * raptor_parser_parse_abort:
+ * @rdf_parser: #raptor_parser parser object
+ *
+ * Abort an ongoing parsing.
+ *
+ * Causes any ongoing generation of statements by a parser to be
+ * terminated and the parser to return controlto the application
+ * as soon as draining any existing buffers.
+ *
+ * Most useful inside raptor_parser_parse_file() or
+ * raptor_parser_parse_uri() when the Raptor library is directing the
+ * parsing and when one of the callback handlers such as as set by
+ * raptor_parser_set_statement_handler() requires to return to the main
+ * application code.
+ **/
+void
+raptor_parser_parse_abort(raptor_parser *rdf_parser)
+{
+ rdf_parser->failed = 1;
+}
+
+
+/**
+ * raptor_parser_get_locator:
+ * @rdf_parser: raptor parser
+ *
+ * Get the current raptor locator object.
+ *
+ * Return value: raptor locator
+ **/
+raptor_locator*
+raptor_parser_get_locator(raptor_parser *rdf_parser)
+{
+ if(rdf_parser->factory->get_locator)
+ return rdf_parser->factory->get_locator(rdf_parser);
+ else
+ return &rdf_parser->locator;
+}
+
+
+#ifdef RAPTOR_DEBUG
+void
+raptor_stats_print(raptor_parser *rdf_parser, FILE *stream)
+{
+#ifdef RAPTOR_PARSER_RDFXML
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ if(!strcmp(rdf_parser->factory->desc.names[0], "rdfxml")) {
+ raptor_rdfxml_parser *rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context;
+ fputs("raptor parser stats\n ", stream);
+ raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream);
+ }
+#endif
+#endif
+}
+#endif
+
+
+struct syntax_score
+{
+ int score;
+ raptor_parser_factory* factory;
+};
+
+
+static int
+compare_syntax_score(const void *a, const void *b) {
+ return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score;
+}
+
+#define RAPTOR_MIN_GUESS_SCORE 2
+
+/**
+ * raptor_world_guess_parser_name:
+ * @world: world object
+ * @uri: URI identifying the syntax (or NULL)
+ * @mime_type: mime type identifying the content (or NULL)
+ * @buffer: buffer of content to guess (or NULL)
+ * @len: length of buffer
+ * @identifier: identifier of content (or NULL)
+ *
+ * Guess a parser name for content.
+ *
+ * Find a parser by scoring recognition of the syntax by a block of
+ * characters, the content identifier or a mime type. The content
+ * identifier is typically a filename or URI or some other identifier.
+ *
+ * If the guessing finds only low scores, NULL will be returned.
+ *
+ * Return value: a parser name or NULL if no guess could be made
+ **/
+const char*
+raptor_world_guess_parser_name(raptor_world* world,
+ raptor_uri *uri, const char *mime_type,
+ const unsigned char *buffer, size_t len,
+ const unsigned char *identifier)
+{
+ unsigned int i;
+ raptor_parser_factory *factory;
+ unsigned char *suffix = NULL;
+ struct syntax_score* scores;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL);
+
+ raptor_world_open(world);
+
+ scores = RAPTOR_CALLOC(struct syntax_score*,
+ raptor_sequence_size(world->parsers),
+ sizeof(struct syntax_score));
+ if(!scores)
+ return NULL;
+
+ if(identifier) {
+ unsigned char *p = (unsigned char*)strrchr((const char*)identifier, '.');
+ if(p) {
+ unsigned char *from, *to;
+
+ p++;
+ suffix = RAPTOR_MALLOC(unsigned char*, strlen((const char*)p) + 1);
+ if(!suffix) {
+ RAPTOR_FREE(syntax_scores, scores);
+ return NULL;
+ }
+
+ for(from = p, to = suffix; *from; ) {
+ unsigned char c = *from++;
+ /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */
+ if(!isalpha(c) && !isdigit(c)) {
+ RAPTOR_FREE(char*, suffix);
+ suffix = NULL;
+ to = NULL;
+ break;
+ }
+ *to++ = isupper(c) ? (unsigned char)tolower(c): c;
+ }
+ if(to)
+ *to = '\0';
+ }
+ }
+
+ for(i = 0;
+ (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
+ i++) {
+ int score = -1;
+ const raptor_type_q* type_q = NULL;
+
+ if(mime_type && factory->desc.mime_types) {
+ int j;
+ type_q = NULL;
+ for(j = 0;
+ (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
+ j++) {
+ if(!strcmp(mime_type, type_q->mime_type))
+ break;
+ }
+ /* got an exact match mime type - score it via the Q */
+ if(type_q)
+ score = type_q->q;
+ }
+ /* mime type match has high Q - return factory as result */
+ if(score >= 10)
+ break;
+
+ if(uri && factory->desc.uri_strings) {
+ int j;
+ const char* uri_string = (const char*)raptor_uri_as_string(uri);
+ const char* factory_uri_string = NULL;
+
+ for(j = 0;
+ (factory_uri_string = factory->desc.uri_strings[j]);
+ j++) {
+ if(!strcmp(uri_string, factory_uri_string))
+ break;
+ }
+ if(factory_uri_string)
+ /* got an exact match syntax for URI - return factory as result */
+ break;
+ }
+
+ if(factory->recognise_syntax) {
+ int c = -1;
+
+ /* Only use first N bytes to avoid HTML documents that contain
+ * RDF/XML examples
+ */
+#define FIRSTN 1024
+#if FIRSTN > RAPTOR_READ_BUFFER_SIZE
+#error "RAPTOR_READ_BUFFER_SIZE is not large enough"
+#endif
+ if(buffer && len && len > FIRSTN) {
+ c = buffer[FIRSTN];
+ ((char*)buffer)[FIRSTN] = '\0';
+ }
+
+ score += factory->recognise_syntax(factory, buffer, len,
+ identifier, suffix,
+ mime_type);
+
+ if(c >= 0)
+ ((char*)buffer)[FIRSTN] = RAPTOR_GOOD_CAST(char, c);
+ }
+
+ scores[i].score = score < 10 ? score : 10;
+ scores[i].factory = factory;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ RAPTOR_DEBUG3("Score %15s : %d\n", factory->desc.names[0], score);
+#endif
+ }
+
+ if(!factory) {
+ /* sort the scores and pick a factory if score is good enough */
+ qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score);
+
+ if(scores[0].score >= RAPTOR_MIN_GUESS_SCORE)
+ factory = scores[0].factory;
+ }
+
+ if(suffix)
+ RAPTOR_FREE(char*, suffix);
+
+ RAPTOR_FREE(syntax_scores, scores);
+
+ return factory ? factory->desc.names[0] : NULL;
+}
+
+
+/*
+ * raptor_parser_copy_flags_state:
+ * @to_parser: destination parser
+ * @from_parser: source parser
+ *
+ * Copy status flags between parsers - INTERNAL.
+ **/
+void
+raptor_parser_copy_flags_state(raptor_parser *to_parser,
+ raptor_parser *from_parser)
+{
+ to_parser->failed = from_parser->failed;
+ to_parser->emit_graph_marks = from_parser->emit_graph_marks;
+ to_parser->emitted_default_graph = from_parser->emitted_default_graph;
+}
+
+
+
+/*
+ * raptor_parser_copy_user_state:
+ * @to_parser: destination parser
+ * @from_parser: source parser
+ *
+ * Copy user state between parsers - INTERNAL.
+ *
+ * Return value: non-0 on failure
+ **/
+int
+raptor_parser_copy_user_state(raptor_parser *to_parser,
+ raptor_parser *from_parser)
+{
+ int rc = 0;
+
+ to_parser->user_data = from_parser->user_data;
+ to_parser->statement_handler = from_parser->statement_handler;
+ to_parser->namespace_handler = from_parser->namespace_handler;
+ to_parser->namespace_handler_user_data = from_parser->namespace_handler_user_data;
+ to_parser->uri_filter = from_parser->uri_filter;
+ to_parser->uri_filter_user_data = from_parser->uri_filter_user_data;
+
+ /* copy bit flags */
+ raptor_parser_copy_flags_state(to_parser, from_parser);
+
+ /* copy options */
+ if(!rc)
+ rc = raptor_object_options_copy_state(&to_parser->options,
+ &from_parser->options);
+
+ return rc;
+}
+
+
+/*
+ * raptor_parser_start_namespace:
+ * @rdf_parser: parser
+ * @nspace: namespace starting
+ *
+ * Internal - Invoke start namespace handler
+ **/
+void
+raptor_parser_start_namespace(raptor_parser* rdf_parser,
+ raptor_namespace* nspace)
+{
+ if(!rdf_parser->namespace_handler)
+ return;
+
+ (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data,
+ nspace);
+}
+
+
+/**
+ * raptor_parser_get_accept_header:
+ * @rdf_parser: parser
+ *
+ * Get an HTTP Accept value for the parser.
+ *
+ * The returned string must be freed by the caller such as with
+ * raptor_free_memory().
+ *
+ * Return value: a new Accept: header string or NULL on failure
+ **/
+const char*
+raptor_parser_get_accept_header(raptor_parser* rdf_parser)
+{
+ raptor_parser_factory *factory = rdf_parser->factory;
+ char *accept_header = NULL;
+ size_t len;
+ char *p;
+ int i;
+ const raptor_type_q* type_q;
+
+ if(factory->accept_header)
+ return factory->accept_header(rdf_parser);
+
+ if(!factory->desc.mime_types)
+ return NULL;
+
+ len = 0;
+ for(i = 0;
+ (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
+ i++) {
+ len += type_q->mime_type_len + 2; /* ", " */
+ if(type_q->q < 10)
+ len += 6; /* ";q=X.Y" */
+ }
+
+ /* 9 = strlen("\*\/\*;q=0.1") */
+#define ACCEPT_HEADER_LEN 9
+ accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
+ if(!accept_header)
+ return NULL;
+
+ p = accept_header;
+ for(i = 0;
+ (type_q = &factory->desc.mime_types[i]) && type_q->mime_type;
+ i++) {
+ memcpy(p, type_q->mime_type, type_q->mime_type_len);
+ p += type_q->mime_type_len;
+ if(type_q->q < 10) {
+ *p++ = ';';
+ *p++ = 'q';
+ *p++ = '=';
+ *p++ = '0';
+ *p++ = '.';
+ *p++ = RAPTOR_GOOD_CAST(char, '0' + (type_q->q));
+ }
+
+ *p++ = ',';
+ *p++ = ' ';
+ }
+
+ memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
+
+ return accept_header;
+}
+
+
+const char*
+raptor_parser_get_accept_header_all(raptor_world* world)
+{
+ raptor_parser_factory *factory;
+ char *accept_header = NULL;
+ size_t len;
+ char *p;
+ int i;
+
+ len = 0;
+ for(i = 0;
+ (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
+ i++) {
+ const raptor_type_q* type_q;
+ int j;
+
+ for(j = 0;
+ (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
+ j++) {
+ len += type_q->mime_type_len + 2; /* ", " */
+ if(type_q->q < 10)
+ len += 6; /* ";q=X.Y" */
+ }
+ }
+
+ /* 9 = strlen("\*\/\*;q=0.1") */
+#define ACCEPT_HEADER_LEN 9
+ accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1);
+ if(!accept_header)
+ return NULL;
+
+ p = accept_header;
+ for(i = 0;
+ (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i));
+ i++) {
+ const raptor_type_q* type_q;
+ int j;
+
+ for(j = 0;
+ (type_q = &factory->desc.mime_types[j]) && type_q->mime_type;
+ j++) {
+ memcpy(p, type_q->mime_type, type_q->mime_type_len);
+ p+= type_q->mime_type_len;
+ if(type_q->q < 10) {
+ *p++ = ';';
+ *p++ = 'q';
+ *p++ = '=';
+ *p++ = '0';
+ *p++ = '.';
+ *p++ = RAPTOR_GOOD_CAST(char, '0' + (type_q->q));
+ }
+
+ *p++ = ',';
+ *p++ = ' ';
+ }
+
+ }
+
+ memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1);
+
+ return accept_header;
+}
+
+
+void
+raptor_parser_save_content(raptor_parser* rdf_parser, int save)
+{
+ if(rdf_parser->sb)
+ raptor_free_stringbuffer(rdf_parser->sb);
+
+ rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL;
+}
+
+
+const unsigned char*
+raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p)
+{
+ unsigned char* buffer;
+ size_t len;
+
+ if(!rdf_parser->sb)
+ return NULL;
+
+ len = raptor_stringbuffer_length(rdf_parser->sb);
+ buffer = RAPTOR_MALLOC(unsigned char*, len + 1);
+ if(!buffer)
+ return NULL;
+
+ raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len);
+
+ if(length_p)
+ *length_p=len;
+
+ return buffer;
+}
+
+
+void
+raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri,
+ int is_declared)
+{
+ int flags = RAPTOR_GRAPH_MARK_START;
+ if(is_declared)
+ flags |= RAPTOR_GRAPH_MARK_DECLARED;
+
+ if(!parser->emit_graph_marks)
+ return;
+
+ if(parser->graph_mark_handler)
+ (*parser->graph_mark_handler)(parser->user_data, uri, flags);
+}
+
+
+void
+raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared)
+{
+ int flags = 0;
+ if(is_declared)
+ flags |= RAPTOR_GRAPH_MARK_DECLARED;
+
+ if(!parser->emit_graph_marks)
+ return;
+
+ if(parser->graph_mark_handler)
+ (*parser->graph_mark_handler)(parser->user_data, uri, flags);
+}
+
+
+/**
+ * raptor_parser_get_world:
+ * @rdf_parser: parser
+ *
+ * Get the #raptor_world object associated with a parser.
+ *
+ * Return value: raptor_world* pointer
+ **/
+raptor_world *
+raptor_parser_get_world(raptor_parser* rdf_parser)
+{
+ return rdf_parser->world;
+}
+
+
+/**
+ * raptor_parser_get_graph:
+ * @rdf_parser: parser
+ *
+ * Get the current graph for the parser
+ *
+ * The returned URI is owned by the caller and must be freed with
+ * raptor_free_uri()
+ *
+ * Return value: raptor_uri* graph name or NULL for the default graph
+ **/
+raptor_uri*
+raptor_parser_get_graph(raptor_parser* rdf_parser)
+{
+ if(rdf_parser->factory->get_graph)
+ return rdf_parser->factory->get_graph(rdf_parser);
+ return NULL;
+}
+
+
+/**
+ * raptor_parser_parse_iostream:
+ * @rdf_parser: parser
+ * @iostr: iostream to read from
+ * @base_uri: the base URI to use (or NULL)
+ *
+ * Parse content from an iostream
+ *
+ * If the parser requires a base URI and @base_uri is NULL, an error
+ * will be generated and the function will fail.
+ *
+ * Return value: non 0 on failure, <0 if a required base URI was missing
+ **/
+int
+raptor_parser_parse_iostream(raptor_parser* rdf_parser, raptor_iostream *iostr,
+ raptor_uri *base_uri)
+{
+ int rc = 0;
+
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(rdf_parser, raptor_parser, 1);
+ RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(iostr, raptor_iostr, 1);
+
+ rc = raptor_parser_parse_start(rdf_parser, base_uri);
+ if(rc)
+ return rc;
+
+ while(!raptor_iostream_read_eof(iostr)) {
+ int ilen;
+ size_t len;
+ int is_end;
+
+ ilen = raptor_iostream_read_bytes(rdf_parser->buffer, 1,
+ RAPTOR_READ_BUFFER_SIZE, iostr);
+ if(ilen < 0)
+ break;
+ len = RAPTOR_GOOD_CAST(size_t, ilen);
+ is_end = (len < RAPTOR_READ_BUFFER_SIZE);
+
+ rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end);
+ if(rc || is_end)
+ break;
+ }
+
+ return rc;
+}
+
+
+/* end not STANDALONE */
+#endif
+
+
+#ifdef STANDALONE
+#include <stdio.h>
+
+int main(int argc, char *argv[]);
+
+
+int
+main(int argc, char *argv[])
+{
+ raptor_world *world;
+ const char *program = raptor_basename(argv[0]);
+ int i;
+ const char *s;
+
+ world = raptor_new_world();
+ if(!world || raptor_world_open(world))
+ exit(1);
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, "%s: Known options:\n", program);
+#endif
+
+ for(i = 0; i <= (int)raptor_option_get_count(); i++) {
+ raptor_option_description *od;
+ int fn;
+
+ od = raptor_world_get_option_description(world,
+ RAPTOR_DOMAIN_PARSER,
+ (raptor_option)i);
+ if(!od)
+ continue;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, " %2d %-20s %s <%s>\n", i, od->name, od->label,
+ (od->uri ? (const char*)raptor_uri_as_string(od->uri) : ""));
+#endif
+ fn = raptor_world_get_option_from_uri(world, od->uri);
+ if(fn != i) {
+ fprintf(stderr,
+ "%s: raptor_option_from_uri() returned %d expected %d\n",
+ program, fn, i);
+ return 1;
+ }
+ raptor_free_option_description(od);
+ }
+
+ s = raptor_parser_get_accept_header_all(world);
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fprintf(stderr, "Default HTTP accept header: '%s'\n", s);
+#endif
+ if(!s) {
+ fprintf(stderr, "%s: raptor_parser_get_accept_header_all() failed\n",
+ program);
+ return 1;
+ }
+ RAPTOR_FREE(char*, s);
+
+ raptor_free_world(world);
+
+ return 0;
+}
+
+#endif