summaryrefslogtreecommitdiffstats
path: root/src/turtle_parser.y
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--src/turtle_parser.y2128
1 files changed, 2128 insertions, 0 deletions
diff --git a/src/turtle_parser.y b/src/turtle_parser.y
new file mode 100644
index 0000000..1474a3d
--- /dev/null
+++ b/src/turtle_parser.y
@@ -0,0 +1,2128 @@
+/* -*- Mode: c; c-basic-offset: 2 -*-
+ *
+ * turtle_parser.y - Raptor Turtle / TRIG / N3 parsers - over tokens from turtle grammar lexer
+ *
+ * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/
+ * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/
+ *
+ * This package is Free Software and part of Redland http://librdf.org/
+ *
+ * It is licensed under the following three licenses as alternatives:
+ * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version
+ * 2. GNU General Public License (GPL) V2 or any newer version
+ * 3. Apache License, V2.0 or any newer version
+ *
+ * You may not use this file except in compliance with at least one of
+ * the above three licenses.
+ *
+ * See LICENSE.html or LICENSE.txt at the top of this package for the
+ * complete terms and further detail along with the license texts for
+ * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively.
+ *
+ *
+ * Turtle is defined in http://www.dajobe.org/2004/01/turtle/
+ *
+ * Made from a subset of the terms in
+ * http://www.w3.org/DesignIssues/Notation3.html
+ *
+ * TRIG is defined in http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/
+ */
+
+%{
+#ifdef HAVE_CONFIG_H
+#include <raptor_config.h>
+#endif
+
+#include <stdio.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdarg.h>
+#ifdef HAVE_ERRNO_H
+#include <errno.h>
+#endif
+#ifdef HAVE_STDLIB_H
+#include <stdlib.h>
+#endif
+
+#include "raptor2.h"
+#include "raptor_internal.h"
+
+#include <turtle_parser.h>
+
+#define YY_NO_UNISTD_H 1
+#undef yylex
+#include <turtle_lexer.h>
+
+#include <turtle_common.h>
+
+
+/* Set RAPTOR_DEBUG to 3 for super verbose parsing - watching the shift/reduces */
+#if 0
+#undef RAPTOR_DEBUG
+#define RAPTOR_DEBUG 3
+#endif
+
+
+/* Fail with an debug error message if RAPTOR_DEBUG > 1 */
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+#define YYERROR_MSG(msg) do { fputs("** YYERROR ", RAPTOR_DEBUG_FH); fputs(msg, RAPTOR_DEBUG_FH); fputc('\n', RAPTOR_DEBUG_FH); YYERROR; } while(0)
+#else
+#define YYERROR_MSG(ignore) YYERROR
+#endif
+#define YYERR_MSG_GOTO(label,msg) do { errmsg = msg; goto label; } while(0)
+
+/* Slow down the grammar operation and watch it work */
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+#undef YYDEBUG
+#define YYDEBUG 1
+#endif
+
+#ifdef RAPTOR_DEBUG
+const char * turtle_token_print(raptor_world* world, int token,
+ TURTLE_PARSER_STYPE *lval);
+#endif
+
+
+/* the lexer does not seem to track this */
+#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+
+/* set api.push-pull to "push" if this is defined */
+#undef TURTLE_PUSH_PARSE
+
+/* Prototypes */
+int turtle_parser_error(raptor_parser* rdf_parser, void* scanner, const char *msg);
+static void turtle_parser_error_simple(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3);
+
+/* Make lex/yacc interface as small as possible */
+#undef yylex
+#define yylex turtle_lexer_lex
+
+/* Prototypes for local functions */
+static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple);
+
+static void raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *triple);
+
+static void raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *triple);
+
+%}
+
+
+/* directives */
+
+%require "3.0"
+
+/* File prefix (-b) */
+%file-prefix "turtle_parser"
+
+/* Bison 2.6+ : Symbol prefix */
+%define api.prefix {turtle_parser_}
+/* Bison 3.4+ : Generated header file */
+%define api.header.include {<turtle_parser.h>}
+
+/* Write parser header file with macros (bison -d) */
+%defines
+
+/* Make verbose error messages for syntax errors */
+%define parse.error verbose
+
+/* Write output file with verbose descriptions of parser states */
+%verbose
+
+/* Generate code processing locations */
+ /* %locations */
+
+/* Pure parser - want a reentrant parser */
+%define api.pure full
+
+/* Push or pull parser? */
+%define api.push-pull pull
+
+/* Pure parser argument: lexer - yylex() and parser - yyparse() */
+%lex-param { yyscan_t yyscanner }
+%parse-param { raptor_parser* rdf_parser } { void* yyscanner }
+
+/* Interface between lexer and parser */
+%union {
+ unsigned char *string;
+ raptor_term *identifier;
+ raptor_sequence *sequence;
+ raptor_uri *uri;
+}
+
+
+/* others */
+
+%token A "a"
+%token HAT "^"
+%token DOT "."
+%token COMMA ","
+%token SEMICOLON ";"
+%token LEFT_SQUARE "["
+%token RIGHT_SQUARE "]"
+%token LEFT_ROUND "("
+%token RIGHT_ROUND ")"
+%token LEFT_CURLY "{"
+%token RIGHT_CURLY "}"
+%token TRUE_TOKEN "true"
+%token FALSE_TOKEN "false"
+%token PREFIX "@prefix"
+%token BASE "@base"
+%token SPARQL_PREFIX "PREFIX"
+%token SPARQL_BASE "BASE"
+
+/* literals */
+%token
+ <string>
+ STRING_LITERAL "string literal"
+ IDENTIFIER "identifier"
+ LANGTAG "langtag"
+ INTEGER_LITERAL "integer literal"
+ FLOATING_LITERAL "floating point literal"
+ DECIMAL_LITERAL "decimal literal"
+ BLANK_LITERAL "blank node"
+ <uri>
+ URI_LITERAL "URI literal"
+ GRAPH_NAME_LEFT_CURLY "Graph URI literal {"
+ QNAME_LITERAL "QName"
+
+/* syntax error */
+%token ERROR_TOKEN
+
+%type <identifier> subject predicate object verb literal resource blankNode collection blankNodePropertyList
+%type <sequence> triples objectList itemList predicateObjectList predicateObjectListOpt
+
+/* tidy up tokens after errors */
+
+%destructor {
+ if($$)
+ RAPTOR_FREE(char*, $$);
+} <string>
+
+%destructor {
+ if($$)
+ raptor_free_uri($$);
+} <uri>
+
+%destructor {
+ if($$)
+ raptor_free_term($$);
+} <identifier>
+
+%destructor {
+ if($$)
+ raptor_free_sequence($$);
+} <sequence>
+
+%%
+
+Document : statementList
+;;
+
+
+graph: GRAPH_NAME_LEFT_CURLY
+ {
+ /* action in mid-rule so this is run BEFORE the triples in graphBody */
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ if(!turtle_parser->trig)
+ turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle");
+ else {
+ if(turtle_parser->graph_name)
+ raptor_free_term(turtle_parser->graph_name);
+ turtle_parser->graph_name = raptor_new_term_from_uri(rdf_parser->world, $1);
+ raptor_free_uri($1);
+ raptor_parser_start_graph(rdf_parser,
+ turtle_parser->graph_name->value.uri, 1);
+ }
+ }
+ graphBody RIGHT_CURLY
+{
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(turtle_parser->trig) {
+ raptor_parser_end_graph(rdf_parser,
+ turtle_parser->graph_name->value.uri, 1);
+ raptor_free_term(turtle_parser->graph_name);
+ turtle_parser->graph_name = NULL;
+ rdf_parser->emitted_default_graph = 0;
+ }
+}
+|
+LEFT_CURLY
+ {
+ /* action in mid-rule so this is run BEFORE the triples in graphBody */
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ if(!turtle_parser->trig)
+ turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle");
+ else {
+ raptor_parser_start_graph(rdf_parser, NULL, 1);
+ rdf_parser->emitted_default_graph++;
+ }
+ }
+ graphBody RIGHT_CURLY
+{
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ if(turtle_parser->trig) {
+ raptor_parser_end_graph(rdf_parser, NULL, 1);
+ rdf_parser->emitted_default_graph = 0;
+ }
+}
+;
+
+
+graphBody: triplesList
+| %empty
+;
+
+triplesList: dotTriplesList
+| dotTriplesList DOT
+;
+
+dotTriplesList: triples
+{
+ int i;
+
+ if($1) {
+ for(i = 0; i < raptor_sequence_size($1); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i);
+ raptor_turtle_generate_statement(rdf_parser, t2);
+ }
+ raptor_free_sequence($1);
+ }
+}
+| dotTriplesList DOT triples
+{
+ int i;
+
+ if($3) {
+ for(i = 0; i < raptor_sequence_size($3); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($3, i);
+ raptor_turtle_generate_statement(rdf_parser, t2);
+ }
+ raptor_free_sequence($3);
+ }
+}
+;
+
+statementList: statementList statement
+{
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ /* sync up consumed/processed so we know what to unwind */
+ turtle_parser->processed = turtle_parser->consumed;
+ turtle_parser->lineno_last_good = turtle_parser->lineno;
+}
+| statementList error
+| %empty
+;
+
+statement: directive
+| graph
+| triples DOT
+{
+ raptor_turtle_parser* turtle_parser;
+ int i;
+
+ /* yield deferred statements, if any */
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ if(turtle_parser->deferred) {
+ raptor_sequence* def = turtle_parser->deferred;
+
+ for(i = 0; i < raptor_sequence_size(def); i++) {
+ raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i);
+
+ raptor_turtle_handle_statement(rdf_parser, t2);
+ }
+ }
+
+ if($1) {
+ for(i = 0; i < raptor_sequence_size($1); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i);
+ raptor_turtle_generate_statement(rdf_parser, t2);
+ }
+ raptor_free_sequence($1);
+ }
+
+ if(turtle_parser->deferred) {
+ /* debrief resources */
+ raptor_free_sequence(turtle_parser->deferred);
+ turtle_parser->deferred = NULL;
+ }
+}
+;
+
+triples: subject predicateObjectList
+{
+ int i;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("triples 1\n subject=");
+ if($1)
+ raptor_term_print_as_ntriples($1, stdout);
+ else
+ fputs("NULL", stdout);
+ if($2) {
+ printf("\n predicateObjectList (reverse order to syntax)=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+ } else
+ printf("\n and empty predicateObjectList\n");
+#endif
+
+ if($1 && $2) {
+ /* have subject and non-empty property list, handle it */
+ for(i = 0; i < raptor_sequence_size($2); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i);
+ t2->subject = raptor_term_copy($1);
+ }
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution predicateObjectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n\n");
+#endif
+ }
+
+ if($1)
+ raptor_free_term($1);
+
+ $$ = $2;
+}
+| blankNodePropertyList predicateObjectListOpt
+{
+ int i;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("triples 2\n blankNodePropertyList=");
+ if($1)
+ raptor_term_print_as_ntriples($1, stdout);
+ else
+ fputs("NULL", stdout);
+ if($2) {
+ printf("\n predicateObjectListOpt (reverse order to syntax)=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+ } else
+ printf("\n and empty predicateObjectListOpt\n");
+#endif
+
+ if($1 && $2) {
+ /* have subject and non-empty predicate object list, handle it */
+ for(i = 0; i < raptor_sequence_size($2); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i);
+ t2->subject = raptor_term_copy($1);
+ }
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution predicateObjectListOpt=");
+ raptor_sequence_print($2, stdout);
+ printf("\n\n");
+#endif
+ }
+
+ if($1)
+ raptor_free_term($1);
+
+ $$ = $2;
+}
+| error DOT
+{
+ $$ = NULL;
+}
+;
+
+
+objectList: objectList COMMA object
+{
+ raptor_statement *triple;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("objectList 1\n");
+ if($3) {
+ printf(" object=\n");
+ raptor_term_print_as_ntriples($3, stdout);
+ printf("\n");
+ } else
+ printf(" and empty object\n");
+ if($1) {
+ printf(" objectList=");
+ raptor_sequence_print($1, stdout);
+ printf("\n");
+ } else
+ printf(" and empty objectList\n");
+#endif
+
+ if(!$3)
+ $$ = NULL;
+ else {
+ triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $3, NULL);
+ if(!triple) {
+ raptor_free_sequence($1);
+ YYERROR;
+ }
+ if(raptor_sequence_push($1, triple)) {
+ raptor_free_sequence($1);
+ YYERROR;
+ }
+ $$ = $1;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" objectList is now ");
+ raptor_sequence_print($$, stdout);
+ printf("\n\n");
+#endif
+ }
+}
+| object
+{
+ raptor_statement *triple;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("objectList 2\n");
+ if($1) {
+ printf(" object=\n");
+ raptor_term_print_as_ntriples($1, stdout);
+ printf("\n");
+ } else
+ printf(" and empty object\n");
+#endif
+
+ if(!$1)
+ $$ = NULL;
+ else {
+ triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL);
+ if(!triple)
+ YYERROR;
+#ifdef RAPTOR_DEBUG
+ $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement,
+ (raptor_data_print_handler)raptor_statement_print);
+#else
+ $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL);
+#endif
+ if(!$$) {
+ raptor_free_statement(triple);
+ YYERROR;
+ }
+ if(raptor_sequence_push($$, triple)) {
+ raptor_free_sequence($$);
+ $$ = NULL;
+ YYERROR;
+ }
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" objectList is now ");
+ raptor_sequence_print($$, stdout);
+ printf("\n\n");
+#endif
+ }
+}
+;
+
+itemList: itemList object
+{
+ raptor_statement *triple;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("objectList 1\n");
+ if($2) {
+ printf(" object=\n");
+ raptor_term_print_as_ntriples($2, stdout);
+ printf("\n");
+ } else
+ printf(" and empty object\n");
+ if($1) {
+ printf(" objectList=");
+ raptor_sequence_print($1, stdout);
+ printf("\n");
+ } else
+ printf(" and empty objectList\n");
+#endif
+
+ if(!$2)
+ $$ = NULL;
+ else {
+ triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $2, NULL);
+ if(!triple) {
+ raptor_free_sequence($1);
+ YYERROR;
+ }
+ if(raptor_sequence_push($1, triple)) {
+ raptor_free_sequence($1);
+ YYERROR;
+ }
+ $$ = $1;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" objectList is now ");
+ raptor_sequence_print($$, stdout);
+ printf("\n\n");
+#endif
+ }
+}
+| object
+{
+ raptor_statement *triple;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("objectList 2\n");
+ if($1) {
+ printf(" object=\n");
+ raptor_term_print_as_ntriples($1, stdout);
+ printf("\n");
+ } else
+ printf(" and empty object\n");
+#endif
+
+ if(!$1)
+ $$ = NULL;
+ else {
+ triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL);
+ if(!triple)
+ YYERROR;
+#ifdef RAPTOR_DEBUG
+ $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement,
+ (raptor_data_print_handler)raptor_statement_print);
+#else
+ $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL);
+#endif
+ if(!$$) {
+ raptor_free_statement(triple);
+ YYERROR;
+ }
+ if(raptor_sequence_push($$, triple)) {
+ raptor_free_sequence($$);
+ $$ = NULL;
+ YYERROR;
+ }
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" objectList is now ");
+ raptor_sequence_print($$, stdout);
+ printf("\n\n");
+#endif
+ }
+}
+;
+
+verb: predicate
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("verb predicate=");
+ raptor_term_print_as_ntriples($1, stdout);
+ printf("\n");
+#endif
+
+ $$ = $1;
+}
+| A
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("verb predicate = rdf:type (a)\n");
+#endif
+
+ $$ = raptor_term_copy(RAPTOR_RDF_type_term(rdf_parser->world));
+ if(!$$)
+ YYERROR;
+}
+;
+
+
+predicateObjectList: predicateObjectList SEMICOLON verb objectList
+{
+ int i;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("predicateObjectList 1\n verb=");
+ raptor_term_print_as_ntriples($3, stdout);
+ printf("\n objectList=");
+ raptor_sequence_print($4, stdout);
+ printf("\n predicateObjectList=");
+ raptor_sequence_print($1, stdout);
+ printf("\n\n");
+#endif
+
+ if($4 == NULL) {
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" empty objectList not processed\n");
+#endif
+ } else if($3 && $4) {
+ /* non-empty property list, handle it */
+ for(i = 0; i < raptor_sequence_size($4); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($4, i);
+ t2->predicate = raptor_term_copy($3);
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution objectList=");
+ raptor_sequence_print($4, stdout);
+ printf("\n");
+#endif
+ }
+
+ if($1 == NULL) {
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" empty predicateObjectList not copied\n\n");
+#endif
+ } else if($3 && $4 && $1) {
+ while(raptor_sequence_size($4)) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_unshift($4);
+ if(raptor_sequence_push($1, t2)) {
+ raptor_free_sequence($1);
+ raptor_free_term($3);
+ raptor_free_sequence($4);
+ YYERROR;
+ }
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after appending objectList (reverse order)=");
+ raptor_sequence_print($1, stdout);
+ printf("\n\n");
+#endif
+
+ raptor_free_sequence($4);
+ }
+
+ if($3)
+ raptor_free_term($3);
+
+ $$ = $1;
+}
+| verb objectList
+{
+ int i;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("predicateObjectList 2\n verb=");
+ raptor_term_print_as_ntriples($1, stdout);
+ if($2) {
+ printf("\n objectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+ } else
+ printf("\n and empty objectList\n");
+#endif
+
+ if($1 && $2) {
+ for(i = 0; i < raptor_sequence_size($2); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i);
+ t2->predicate = raptor_term_copy($1);
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution objectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n\n");
+#endif
+ }
+
+ if($1)
+ raptor_free_term($1);
+
+ $$ = $2;
+}
+| predicateObjectList SEMICOLON
+{
+ $$ = $1;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("predicateObjectList 5\n trailing semicolon returning existing list ");
+ raptor_sequence_print($$, stdout);
+ printf("\n\n");
+#endif
+}
+;
+
+directive : prefix | base
+;
+
+prefix: PREFIX IDENTIFIER URI_LITERAL DOT
+{
+ unsigned char *prefix = $2;
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context);
+ raptor_namespace *ns;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("directive PREFIX %s %s\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3));
+#endif
+
+ if(prefix) {
+ size_t len = strlen((const char*)prefix);
+ if(prefix[len-1] == ':') {
+ if(len == 1)
+ /* declaring default namespace prefix PREFIX : ... */
+ prefix = NULL;
+ else
+ prefix[len-1]='\0';
+ }
+ }
+
+ ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0);
+ if(ns) {
+ raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns);
+ raptor_parser_start_namespace(rdf_parser, ns);
+ }
+
+ if($2)
+ RAPTOR_FREE(char*, $2);
+ raptor_free_uri($3);
+
+ if(!ns)
+ YYERROR;
+}
+| SPARQL_PREFIX IDENTIFIER URI_LITERAL
+{
+ unsigned char *prefix = $2;
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context);
+ raptor_namespace *ns;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("directive @prefix %s %s.\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3));
+#endif
+
+ if(prefix) {
+ size_t len = strlen((const char*)prefix);
+ if(prefix[len-1] == ':') {
+ if(len == 1)
+ /* declaring default namespace prefix @prefix : ... */
+ prefix = NULL;
+ else
+ prefix[len-1]='\0';
+ }
+ }
+
+ ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0);
+ if(ns) {
+ raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns);
+ raptor_parser_start_namespace(rdf_parser, ns);
+ }
+
+ if($2)
+ RAPTOR_FREE(char*, $2);
+ raptor_free_uri($3);
+
+ if(!ns)
+ YYERROR;
+}
+;
+
+
+base: BASE URI_LITERAL DOT
+{
+ raptor_uri *uri=$2;
+
+ if(rdf_parser->base_uri)
+ raptor_free_uri(rdf_parser->base_uri);
+ rdf_parser->base_uri = uri;
+}
+| SPARQL_BASE URI_LITERAL
+{
+ raptor_uri *uri=$2;
+
+ if(rdf_parser->base_uri)
+ raptor_free_uri(rdf_parser->base_uri);
+ rdf_parser->base_uri = uri;
+}
+;
+
+subject: resource
+{
+ $$ = $1;
+}
+| blankNode
+{
+ $$ = $1;
+}
+| collection
+{
+ $$ = $1;
+}
+;
+
+
+predicate: resource
+{
+ $$ = $1;
+}
+;
+
+
+object: resource
+{
+ $$ = $1;
+}
+| blankNode
+{
+ $$ = $1;
+}
+| collection
+{
+ $$ = $1;
+}
+| blankNodePropertyList
+{
+ $$ = $1;
+}
+| literal
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("object literal=");
+ raptor_term_print_as_ntriples($1, stdout);
+ printf("\n");
+#endif
+
+ $$ = $1;
+}
+;
+
+
+literal: STRING_LITERAL LANGTAG
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal + language string=\"%s\"\n", $1);
+#endif
+
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, $2);
+ RAPTOR_FREE(char*, $1);
+ RAPTOR_FREE(char*, $2);
+ if(!$$)
+ YYERROR;
+}
+| STRING_LITERAL LANGTAG HAT URI_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal + language=\"%s\" datatype string=\"%s\" uri=\"%s\"\n", $1, $2, raptor_uri_as_string($4));
+#endif
+
+ if($4) {
+ if($2) {
+ raptor_parser_error(rdf_parser,
+ "Language not allowed with datatyped literal");
+ RAPTOR_FREE(char*, $2);
+ $2 = NULL;
+ }
+
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri($4);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+
+}
+| STRING_LITERAL LANGTAG HAT QNAME_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal + language=\"%s\" datatype string=\"%s\" qname URI=<%s>\n", $1, $2, raptor_uri_as_string($4));
+#endif
+
+ if($4) {
+ if($2) {
+ raptor_parser_error(rdf_parser,
+ "Language not allowed with datatyped literal");
+ RAPTOR_FREE(char*, $2);
+ $2 = NULL;
+ }
+
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri($4);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+
+}
+| STRING_LITERAL HAT URI_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal + datatype string=\"%s\" uri=\"%s\"\n", $1, raptor_uri_as_string($3));
+#endif
+
+ if($3) {
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri($3);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+
+}
+| STRING_LITERAL HAT QNAME_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal + datatype string=\"%s\" qname URI=<%s>\n", $1, raptor_uri_as_string($3));
+#endif
+
+ if($3) {
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri($3);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+}
+| STRING_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("literal string=\"%s\"\n", $1);
+#endif
+
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, NULL);
+ RAPTOR_FREE(char*, $1);
+ if(!$$)
+ YYERROR;
+}
+| INTEGER_LITERAL
+{
+ raptor_uri *uri;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource integer=%s\n", $1);
+#endif
+ uri = raptor_uri_copy(rdf_parser->world->xsd_integer_uri);
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri(uri);
+ if(!$$)
+ YYERROR;
+}
+| FLOATING_LITERAL
+{
+ raptor_uri *uri;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource double=%s\n", $1);
+#endif
+ uri = raptor_uri_copy(rdf_parser->world->xsd_double_uri);
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri(uri);
+ if(!$$)
+ YYERROR;
+}
+| DECIMAL_LITERAL
+{
+ raptor_uri *uri;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource decimal=%s\n", $1);
+#endif
+ uri = raptor_uri_copy(rdf_parser->world->xsd_decimal_uri);
+ if(!uri) {
+ RAPTOR_FREE(char*, $1);
+ YYERROR;
+ }
+ $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL);
+ RAPTOR_FREE(char*, $1);
+ raptor_free_uri(uri);
+ if(!$$)
+ YYERROR;
+}
+| TRUE_TOKEN
+{
+ raptor_uri *uri;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fputs("resource boolean true\n", stderr);
+#endif
+ uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri);
+ $$ = raptor_new_term_from_literal(rdf_parser->world,
+ (const unsigned char*)"true", uri, NULL);
+ raptor_free_uri(uri);
+ if(!$$)
+ YYERROR;
+}
+| FALSE_TOKEN
+{
+ raptor_uri *uri;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ fputs("resource boolean false\n", stderr);
+#endif
+ uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri);
+ $$ = raptor_new_term_from_literal(rdf_parser->world,
+ (const unsigned char*)"false", uri, NULL);
+ raptor_free_uri(uri);
+ if(!$$)
+ YYERROR;
+}
+;
+
+
+resource: URI_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource URI=<%s>\n", raptor_uri_as_string($1));
+#endif
+
+ if($1) {
+ $$ = raptor_new_term_from_uri(rdf_parser->world, $1);
+ raptor_free_uri($1);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+}
+| QNAME_LITERAL
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource qname URI=<%s>\n", raptor_uri_as_string($1));
+#endif
+
+ if($1) {
+ $$ = raptor_new_term_from_uri(rdf_parser->world, $1);
+ raptor_free_uri($1);
+ if(!$$)
+ YYERROR;
+ } else
+ $$ = NULL;
+}
+;
+
+
+predicateObjectListOpt: predicateObjectList
+{
+ $$ = $1;
+}
+| %empty
+{
+ $$ = NULL;
+}
+;
+
+
+blankNode: BLANK_LITERAL
+{
+ const unsigned char *id;
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("subject blank=\"%s\"\n", $1);
+#endif
+ id = raptor_world_internal_generate_id(rdf_parser->world, $1);
+ if(!id)
+ YYERROR;
+
+ $$ = raptor_new_term_from_blank(rdf_parser->world, id);
+ RAPTOR_FREE(char*, id);
+
+ if(!$$)
+ YYERROR;
+}
+;
+
+blankNodePropertyList: LEFT_SQUARE predicateObjectListOpt RIGHT_SQUARE
+{
+ int i;
+ const unsigned char *id;
+
+ id = raptor_world_generate_bnodeid(rdf_parser->world);
+ if(!id) {
+ if($2)
+ raptor_free_sequence($2);
+ YYERROR;
+ }
+
+ $$ = raptor_new_term_from_blank(rdf_parser->world, id);
+ RAPTOR_FREE(char*, id);
+ if(!$$) {
+ if($2)
+ raptor_free_sequence($2);
+ YYERROR;
+ }
+
+ if($2 == NULL) {
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource\n predicateObjectList=");
+ raptor_term_print_as_ntriples($$, stdout);
+ printf("\n");
+#endif
+ } else {
+ /* non-empty property list, handle it */
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource\n predicateObjectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+#endif
+
+ for(i = 0; i < raptor_sequence_size($2); i++) {
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i);
+ t2->subject = raptor_term_copy($$);
+ raptor_turtle_defer_statement(rdf_parser, t2);
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution objectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n\n");
+#endif
+
+ raptor_free_sequence($2);
+
+ }
+
+}
+;
+
+
+collection: LEFT_ROUND itemList RIGHT_ROUND
+{
+ int i;
+ raptor_world* world = rdf_parser->world;
+ raptor_term* first_identifier = NULL;
+ raptor_term* rest_identifier = NULL;
+ raptor_term* object = NULL;
+ raptor_term* blank = NULL;
+ char const *errmsg = NULL;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("collection\n objectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+#endif
+
+ first_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_first_URI(world));
+ if(!first_identifier)
+ YYERR_MSG_GOTO(err_collection, "Cannot create rdf:first term");
+ rest_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_rest_URI(world));
+ if(!rest_identifier)
+ YYERR_MSG_GOTO(err_collection, "Cannot create rdf:rest term");
+
+ /* non-empty property list, handle it */
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("resource\n predicateObjectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n");
+#endif
+
+ object = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world));
+ if(!object)
+ YYERR_MSG_GOTO(err_collection, "Cannot create rdf:nil term");
+
+ for(i = raptor_sequence_size($2)-1; i>=0; i--) {
+ raptor_term* temp;
+ raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i);
+ const unsigned char *blank_id;
+
+ blank_id = raptor_world_generate_bnodeid(rdf_parser->world);
+ if(!blank_id)
+ YYERR_MSG_GOTO(err_collection, "Cannot create bnodeid");
+
+ blank = raptor_new_term_from_blank(rdf_parser->world,
+ blank_id);
+ RAPTOR_FREE(char*, blank_id);
+ if(!blank)
+ YYERR_MSG_GOTO(err_collection, "Cannot create bnode");
+
+ t2->subject = blank;
+ t2->predicate = first_identifier;
+ /* t2->object already set to the value we want */
+ raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2);
+
+ temp = t2->object;
+
+ t2->subject = blank;
+ t2->predicate = rest_identifier;
+ t2->object = object;
+ raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2);
+
+ t2->subject = NULL;
+ t2->predicate = NULL;
+ t2->object = temp;
+
+ raptor_free_term(object);
+ object = blank;
+ blank = NULL;
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf(" after substitution objectList=");
+ raptor_sequence_print($2, stdout);
+ printf("\n\n");
+#endif
+
+ raptor_free_sequence($2);
+
+ raptor_free_term(first_identifier);
+ raptor_free_term(rest_identifier);
+
+ $$=object;
+
+ err_collection:
+ if(errmsg) {
+ if(blank)
+ raptor_free_term(blank);
+
+ if(object)
+ raptor_free_term(object);
+
+ if(rest_identifier)
+ raptor_free_term(rest_identifier);
+
+ if(first_identifier)
+ raptor_free_term(first_identifier);
+
+ raptor_free_sequence($2);
+
+ YYERROR_MSG(errmsg);
+ }
+}
+| LEFT_ROUND RIGHT_ROUND
+{
+ raptor_world* world = rdf_parser->world;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("collection\n empty\n");
+#endif
+
+ $$ = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world));
+ if(!$$)
+ YYERROR;
+}
+;
+
+
+%%
+
+
+/* Support functions */
+
+/* Error handler with scanner context, during parsing */
+int
+turtle_parser_error(raptor_parser* rdf_parser, void* scanner,
+ const char *msg)
+{
+ raptor_turtle_parser* turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(turtle_parser->consumed == turtle_parser->consumable &&
+ turtle_parser->processed < turtle_parser->consumed &&
+ !turtle_parser->is_end) {
+ /* we encountered an error on or around the last byte of the buffer
+ * sorting it in the next run aye? */
+ return 0;
+ }
+
+ if(turtle_parser->error_count++)
+ return 0;
+
+ rdf_parser->locator.line = turtle_parser->lineno;
+#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+ rdf_parser->locator.column = turtle_lexer_get_column(yyscanner);
+#endif
+
+ raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR,
+ &rdf_parser->locator, msg);
+
+ return 0;
+}
+
+
+/* Error handler within raptor functions and callbacks */
+static void
+turtle_parser_error_simple(void* user_data, const char *msg, ...)
+{
+ raptor_parser* rdf_parser = (raptor_parser*)user_data;
+ raptor_turtle_parser* turtle_parser;
+ va_list args;
+
+ va_start(args, msg);
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(turtle_parser->consumed == turtle_parser->consumable &&
+ turtle_parser->processed < turtle_parser->consumed &&
+ !turtle_parser->is_end) {
+ /* we encountered an error on or around the last byte of the buffer
+ * sorting it in the next run aye? */
+ goto tidy;
+ }
+
+ if(turtle_parser->error_count++)
+ goto tidy;
+
+ rdf_parser->locator.line = turtle_parser->lineno;
+#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+ rdf_parser->locator.column = turtle_lexer_get_column(yyscanner);
+#endif
+
+ raptor_log_error_varargs(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR,
+ &rdf_parser->locator, msg,
+ args);
+
+tidy:
+ va_end(args);
+}
+
+
+int
+turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...)
+{
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ va_list arguments;
+
+ if(!turtle_parser)
+ return 1;
+
+ if(turtle_parser->error_count++)
+ return 0;
+
+ rdf_parser->locator.line = turtle_parser->lineno;
+#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+ rdf_parser->locator.column = turtle_lexer_get_column(yyscanner);
+#endif
+
+ va_start(arguments, message);
+
+ raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser),
+ RAPTOR_LOG_LEVEL_ERROR, message, arguments);
+
+ va_end(arguments);
+
+ return 0;
+}
+
+
+raptor_uri*
+turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len)
+{
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(!turtle_parser)
+ return NULL;
+
+ rdf_parser->locator.line = turtle_parser->lineno;
+#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS
+ rdf_parser->locator.column = turtle_lexer_get_column(yyscanner);
+#endif
+
+ name_len = raptor_turtle_expand_qname_escapes(name, name_len,
+ (raptor_simple_message_handler)turtle_parser_error_simple, rdf_parser);
+ if(!name_len)
+ return NULL;
+
+ return raptor_qname_string_to_uri(&turtle_parser->namespaces, name, name_len);
+}
+
+
+
+#ifndef TURTLE_PUSH_PARSE
+static int
+turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length)
+{
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ int rc;
+
+ if(!string || !*string)
+ return 0;
+
+ if(turtle_lexer_lex_init(&turtle_parser->scanner))
+ return 1;
+ turtle_parser->scanner_set = 1;
+
+#if defined(YYDEBUG) && YYDEBUG > 0
+ turtle_lexer_set_debug(1 ,&turtle_parser->scanner);
+ turtle_parser_debug = 1;
+#endif
+
+ turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner);
+ (void)turtle_lexer__scan_bytes((char *)string, (yy_size_t)length, turtle_parser->scanner);
+
+ rc = turtle_parser_parse(rdf_parser, turtle_parser->scanner);
+
+ turtle_lexer_lex_destroy(turtle_parser->scanner);
+ turtle_parser->scanner_set = 0;
+
+ return rc;
+}
+#endif
+
+
+#ifdef TURTLE_PUSH_PARSE
+static int
+turtle_push_parse(raptor_parser *rdf_parser,
+ const char *string, size_t length)
+{
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ raptor_world* world = rdf_parser->world;
+#endif
+ raptor_turtle_parser* turtle_parser;
+ void *buffer;
+ int status;
+ yypstate *ps;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(!string || !*string)
+ return 0;
+
+ if(turtle_lexer_lex_init(&turtle_parser->scanner))
+ return 1;
+ turtle_parser->scanner_set = 1;
+
+#if defined(YYDEBUG) && YYDEBUG > 0
+ turtle_lexer_set_debug(1 ,&turtle_parser->scanner);
+ turtle_parser_debug = 1;
+#endif
+
+ turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner);
+ buffer = turtle_lexer__scan_bytes(string, (yy_size_t)length, turtle_parser->scanner);
+
+ /* returns a parser instance or 0 on out of memory */
+ ps = yypstate_new();
+ if(!ps)
+ return 1;
+
+ do {
+ TURTLE_PARSER_YYSTYPE lval;
+ int token;
+
+ memset(&lval, 0, sizeof(TURTLE_PARSER_YYSTYPE));
+
+ token = turtle_lexer_lex(&lval, turtle_parser->scanner);
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ printf("token %s\n", turtle_token_print(world, token, &lval));
+#endif
+
+ status = yypush_parse(ps, token, &lval, rdf_parser, turtle_parser->scanner);
+
+ /* turtle_token_free(world, token, &lval); */
+
+ if(!token || token == EOF || token == ERROR_TOKEN)
+ break;
+ } while (status == YYPUSH_MORE);
+ yypstate_delete(ps);
+
+ turtle_lexer_lex_destroy(turtle_parser->scanner);
+ turtle_parser->scanner_set = 0;
+
+ return 0;
+}
+#endif
+
+
+/**
+ * raptor_turtle_parse_init - Initialise the Raptor Turtle parser
+ *
+ * Return value: non 0 on failure
+ **/
+
+static int
+raptor_turtle_parse_init(raptor_parser* rdf_parser, const char *name) {
+ raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ if(raptor_namespaces_init(rdf_parser->world, &turtle_parser->namespaces, 0))
+ return 1;
+
+ turtle_parser->trig = !strcmp(name, "trig");
+
+ return 0;
+}
+
+
+/* PUBLIC FUNCTIONS */
+
+
+/*
+ * raptor_turtle_parse_terminate - Free the Raptor Turtle parser
+ * @rdf_parser: parser object
+ *
+ **/
+static void
+raptor_turtle_parse_terminate(raptor_parser *rdf_parser) {
+ raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ raptor_namespaces_clear(&turtle_parser->namespaces);
+
+ if(turtle_parser->scanner_set) {
+ turtle_lexer_lex_destroy(turtle_parser->scanner);
+ turtle_parser->scanner_set = 0;
+ }
+
+ if(turtle_parser->buffer)
+ RAPTOR_FREE(cdata, turtle_parser->buffer);
+
+ if(turtle_parser->graph_name) {
+ raptor_free_term(turtle_parser->graph_name);
+ turtle_parser->graph_name = NULL;
+ }
+}
+
+
+static void
+raptor_turtle_clone_statement(raptor_parser *parser, raptor_statement *t)
+{
+ raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)parser->context;
+ raptor_statement *statement = &parser->statement;
+
+ if(!t->subject || !t->predicate || !t->object)
+ return;
+
+ if(turtle_parser->trig && turtle_parser->graph_name)
+ statement->graph = raptor_term_copy(turtle_parser->graph_name);
+
+ if(!parser->emitted_default_graph && !turtle_parser->graph_name) {
+ /* for non-TRIG - start default graph at first triple */
+ raptor_parser_start_graph(parser, NULL, 0);
+ parser->emitted_default_graph++;
+ }
+
+ /* Two choices for subject for Turtle */
+ if(t->subject->type == RAPTOR_TERM_TYPE_BLANK) {
+ statement->subject = raptor_new_term_from_blank(parser->world,
+ t->subject->value.blank.string);
+ } else {
+ /* RAPTOR_TERM_TYPE_URI */
+ RAPTOR_ASSERT(t->subject->type != RAPTOR_TERM_TYPE_URI,
+ "subject type is not resource");
+ statement->subject = raptor_new_term_from_uri(parser->world,
+ t->subject->value.uri);
+ }
+
+ /* Predicates are URIs but check for bad ordinals */
+ if(!strncmp((const char*)raptor_uri_as_string(t->predicate->value.uri),
+ "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) {
+ unsigned char* predicate_uri_string = raptor_uri_as_string(t->predicate->value.uri);
+ int predicate_ordinal = raptor_check_ordinal(predicate_uri_string+44);
+ if(predicate_ordinal <= 0)
+ raptor_parser_error(parser, "Illegal ordinal value %d in property '%s'.", predicate_ordinal, predicate_uri_string);
+ }
+
+ statement->predicate = raptor_new_term_from_uri(parser->world,
+ t->predicate->value.uri);
+
+
+ /* Three choices for object for Turtle */
+ if(t->object->type == RAPTOR_TERM_TYPE_URI) {
+ statement->object = raptor_new_term_from_uri(parser->world,
+ t->object->value.uri);
+ } else if(t->object->type == RAPTOR_TERM_TYPE_BLANK) {
+ statement->object = raptor_new_term_from_blank(parser->world,
+ t->object->value.blank.string);
+ } else {
+ /* RAPTOR_TERM_TYPE_LITERAL */
+ RAPTOR_ASSERT(t->object->type != RAPTOR_TERM_TYPE_LITERAL,
+ "object type is not literal");
+ statement->object = raptor_new_term_from_literal(parser->world,
+ t->object->value.literal.string,
+ t->object->value.literal.datatype,
+ t->object->value.literal.language);
+ }
+}
+
+static void
+raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *t)
+{
+ if(!t->subject || !t->predicate || !t->object)
+ return;
+
+ if(!parser->statement_handler)
+ return;
+
+ /* Generate the statement */
+ (*parser->statement_handler)(parser->user_data, t);
+}
+
+static void
+raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *t)
+{
+ raptor_turtle_clone_statement(parser, t);
+ raptor_turtle_handle_statement(parser, &parser->statement);
+ /* clear resources */
+ raptor_statement_clear(&parser->statement);
+}
+
+static void
+raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *t)
+{
+ raptor_statement* st;
+ raptor_turtle_parser* turtle_parser;
+
+ raptor_turtle_clone_statement(parser, t);
+ st = raptor_new_statement(parser->world);
+ if(!st) {
+ return;
+ }
+ /* copy static to dynamic statement, it's a move really */
+ st->subject = parser->statement.subject, parser->statement.subject = NULL;
+ st->predicate = parser->statement.predicate, parser->statement.predicate = NULL;
+ st->object = parser->statement.object, parser->statement.object = NULL;
+ st->graph = parser->statement.graph, parser->statement.graph = NULL;
+
+ /* prep deferred list */
+ turtle_parser = (raptor_turtle_parser*)parser->context;
+ if(!turtle_parser->deferred) {
+ turtle_parser->deferred = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL);
+ if(!turtle_parser->deferred) {
+ goto free_seq;
+ }
+ }
+ /* append to deferred list */
+ if(raptor_sequence_push(turtle_parser->deferred, st)) {
+ free_seq:
+ raptor_free_statement(st);
+ }
+}
+
+
+
+static int
+raptor_turtle_parse_chunk(raptor_parser* rdf_parser,
+ const unsigned char *s, size_t len,
+ int is_end)
+{
+ raptor_turtle_parser *turtle_parser;
+ char *ptr;
+ int rc;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len);
+#endif
+
+ if(!len && !is_end) {
+ /* nothing to do */
+ return 0;
+ }
+
+ /* the actual buffer will contained unprocessed characters from
+ * the last run plus the chunk passed here */
+ turtle_parser->end_of_buffer = turtle_parser->consumed + len;
+ if(turtle_parser->end_of_buffer > turtle_parser->buffer_length) {
+ /* resize */
+ size_t new_buffer_length = turtle_parser->end_of_buffer;
+
+ turtle_parser->buffer = RAPTOR_REALLOC(char*, turtle_parser->buffer,
+ new_buffer_length + 1);
+
+ /* adjust stored length */
+ turtle_parser->buffer_length = new_buffer_length;
+ }
+ if(!turtle_parser->buffer && turtle_parser->buffer_length) {
+ /* we tried to alloc a buffer but we failed */
+ raptor_parser_fatal_error(rdf_parser, "Out of memory");
+ return 1;
+ }
+ if(is_end && !turtle_parser->end_of_buffer) {
+ /* Nothing to do */
+ return 0;
+ }
+
+ /* move pointer to end of cdata buffer */
+ ptr = turtle_parser->buffer + turtle_parser->consumed;
+
+ /* now write new stuff at end of cdata buffer */
+ memcpy(ptr, s, len);
+ ptr += len;
+ *ptr = '\0';
+
+ /* reset processed counter */
+ turtle_parser->processed = 0U;
+ /* unconsume */
+ turtle_parser->consumed = 0U;
+ /* reset line numbers */
+ turtle_parser->lineno = turtle_parser->lineno_last_good;
+
+ /* let everyone know if this is the last chunk */
+ turtle_parser->is_end = is_end;
+ if(!is_end) {
+ /* it's safer not to pass the very last line to the lexer
+ * just in case we end up with EOB-in-the-middle-of-X situations */
+ size_t i = turtle_parser->end_of_buffer;
+ while(i > 0U && turtle_parser->buffer[--i] != '\n');
+ /* either i == 0U or i points to the last \n before the end-of-buffer */
+ turtle_parser->consumable = i;
+ } else {
+ /* otherwise the consumable number of bytes coincides with the EOB */
+ turtle_parser->consumable = turtle_parser->end_of_buffer;
+ }
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1
+ RAPTOR_DEBUG3("buffer buffer now '%s' (%ld bytes)\n",
+ turtle_parser->buffer, turtle_parser->buffer_length);
+#endif
+
+#ifdef TURTLE_PUSH_PARSE
+ rc = turtle_push_parse(rdf_parser,
+ turtle_parser->buffer, turtle_parser->consumable);
+#else
+ rc = turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->consumable);
+#endif
+
+ if(turtle_parser->error_count) {
+ rc = 1;
+ } else if(!is_end) {
+ /* move stuff to the beginning of the buffer */
+ turtle_parser->consumed = turtle_parser->end_of_buffer - turtle_parser->processed;
+ if(turtle_parser->consumed && turtle_parser->processed) {
+ memmove(turtle_parser->buffer,
+ turtle_parser->buffer + turtle_parser->processed,
+ turtle_parser->consumed);
+ /* cancel all deferred eval's */
+ if(turtle_parser->deferred) {
+ raptor_free_sequence(turtle_parser->deferred);
+ turtle_parser->deferred = NULL;
+ }
+ }
+ } else {
+ /* this was the last chunk, finalise */
+ if(turtle_parser->deferred) {
+ raptor_sequence* def = turtle_parser->deferred;
+ int i;
+ for(i = 0; i < raptor_sequence_size(def); i++) {
+ raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i);
+
+ raptor_turtle_handle_statement(rdf_parser, t2);
+ }
+ }
+ if(rdf_parser->emitted_default_graph) {
+ /* for non-TRIG - end default graph after last triple */
+ raptor_parser_end_graph(rdf_parser, NULL, 0);
+ rdf_parser->emitted_default_graph--;
+ }
+ if(turtle_parser->deferred) {
+ /* clear resources */
+ raptor_free_sequence(turtle_parser->deferred);
+ turtle_parser->deferred = NULL;
+ }
+ }
+ return rc;
+}
+
+
+static int
+raptor_turtle_parse_start(raptor_parser *rdf_parser)
+{
+ raptor_locator *locator=&rdf_parser->locator;
+ raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+
+ /* base URI required for Turtle */
+ if(!rdf_parser->base_uri)
+ return 1;
+
+ locator->line = 1;
+ locator->column= -1; /* No column info */
+ locator->byte= -1; /* No bytes info */
+
+ if(turtle_parser->buffer_length) {
+ RAPTOR_FREE(cdata, turtle_parser->buffer);
+ turtle_parser->buffer = NULL;
+ turtle_parser->buffer_length = 0;
+ }
+
+ turtle_parser->lineno = 1;
+
+ return 0;
+}
+
+
+static int
+raptor_turtle_parse_recognise_syntax(raptor_parser_factory* factory,
+ const unsigned char *buffer, size_t len,
+ const unsigned char *identifier,
+ const unsigned char *suffix,
+ const char *mime_type)
+{
+ int score= 0;
+
+ if(suffix) {
+ if(!strcmp((const char*)suffix, "ttl"))
+ score = 8;
+ if(!strcmp((const char*)suffix, "n3"))
+ score = 3;
+ }
+
+ if(mime_type) {
+ if(strstr((const char*)mime_type, "turtle"))
+ score += 6;
+ if(strstr((const char*)mime_type, "n3"))
+ score += 3;
+ }
+
+ /* Do this as long as N3 is not supported since it shares the same syntax */
+ if(buffer && len) {
+#define HAS_TURTLE_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL)
+/* The following could also be found with N-Triples but not with @prefix */
+#define HAS_TURTLE_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL)
+
+ if(HAS_TURTLE_PREFIX) {
+ score = 6;
+ if(HAS_TURTLE_RDF_URI)
+ score += 2;
+ }
+ }
+
+ return score;
+}
+
+
+static raptor_uri*
+raptor_turtle_get_graph(raptor_parser* rdf_parser)
+{
+ raptor_turtle_parser *turtle_parser;
+
+ turtle_parser = (raptor_turtle_parser*)rdf_parser->context;
+ if(turtle_parser->graph_name)
+ return raptor_uri_copy(turtle_parser->graph_name->value.uri);
+
+ return NULL;
+}
+
+
+#ifdef RAPTOR_PARSER_TRIG
+static int
+raptor_trig_parse_recognise_syntax(raptor_parser_factory* factory,
+ const unsigned char *buffer, size_t len,
+ const unsigned char *identifier,
+ const unsigned char *suffix,
+ const char *mime_type)
+{
+ int score= 0;
+
+ if(suffix) {
+ if(!strcmp((const char*)suffix, "trig"))
+ score = 9;
+#ifndef RAPTOR_PARSER_TURTLE
+ if(!strcmp((const char*)suffix, "ttl"))
+ score = 8;
+ if(!strcmp((const char*)suffix, "n3"))
+ score = 3;
+#endif
+ }
+
+ if(mime_type) {
+ if(strstr((const char*)mime_type, "trig"))
+ score = 6;
+#ifndef RAPTOR_PARSER_TURTLE
+ if(strstr((const char*)mime_type, "turtle"))
+ score += 6;
+ if(strstr((const char*)mime_type, "n3"))
+ score += 3;
+#endif
+ }
+
+#ifndef RAPTOR_PARSER_TURTLE
+ /* Do this as long as N3 is not supported since it shares the same syntax */
+ if(buffer && len) {
+#define HAS_TRIG_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL)
+/* The following could also be found with N-Triples but not with @prefix */
+#define HAS_TRIG_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL)
+
+ if(HAS_TRIG_PREFIX) {
+ score = 6;
+ if(HAS_TRIG_RDF_URI)
+ score += 2;
+ }
+ }
+#endif
+
+ return score;
+}
+#endif
+
+
+#ifdef RAPTOR_PARSER_TURTLE
+static const char* const turtle_names[4] = { "turtle", "ntriples-plus", "n3", NULL };
+
+static const char* const turtle_uri_strings[3] = {
+ "http://www.w3.org/ns/formats/Turtle",
+ "http://www.dajobe.org/2004/01/turtle/",
+ NULL
+};
+
+#define TURTLE_TYPES_COUNT 6
+static const raptor_type_q turtle_types[TURTLE_TYPES_COUNT + 1] = {
+ /* first one is the default */
+ { "text/turtle", 11, 10},
+ { "application/x-turtle", 20, 10},
+ { "application/turtle", 18, 10},
+ { "text/n3", 7, 3},
+ { "text/rdf+n3", 11, 3},
+ { "application/rdf+n3", 18, 3},
+ { NULL, 0}
+};
+
+static int
+raptor_turtle_parser_register_factory(raptor_parser_factory *factory)
+{
+ int rc = 0;
+
+ factory->desc.names = turtle_names;
+
+ factory->desc.mime_types = turtle_types;
+
+ factory->desc.label = "Turtle Terse RDF Triple Language";
+ factory->desc.uri_strings = turtle_uri_strings;
+
+ factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI;
+
+ factory->context_length = sizeof(raptor_turtle_parser);
+
+ factory->init = raptor_turtle_parse_init;
+ factory->terminate = raptor_turtle_parse_terminate;
+ factory->start = raptor_turtle_parse_start;
+ factory->chunk = raptor_turtle_parse_chunk;
+ factory->recognise_syntax = raptor_turtle_parse_recognise_syntax;
+ factory->get_graph = raptor_turtle_get_graph;
+
+ return rc;
+}
+#endif
+
+
+#ifdef RAPTOR_PARSER_TRIG
+static const char* const trig_names[2] = { "trig", NULL };
+
+static const char* const trig_uri_strings[2] = {
+ "http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/",
+ NULL
+};
+
+#define TRIG_TYPES_COUNT 1
+static const raptor_type_q trig_types[TRIG_TYPES_COUNT + 1] = {
+ /* first one is the default */
+ { "application/x-trig", 18, 10},
+ { NULL, 0, 0}
+};
+
+static int
+raptor_trig_parser_register_factory(raptor_parser_factory *factory)
+{
+ int rc = 0;
+
+ factory->desc.names = trig_names;
+
+ factory->desc.mime_types = trig_types;
+
+ factory->desc.label = "TriG - Turtle with Named Graphs";
+ factory->desc.uri_strings = trig_uri_strings;
+
+ factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI;
+
+ factory->context_length = sizeof(raptor_turtle_parser);
+
+ factory->init = raptor_turtle_parse_init;
+ factory->terminate = raptor_turtle_parse_terminate;
+ factory->start = raptor_turtle_parse_start;
+ factory->chunk = raptor_turtle_parse_chunk;
+ factory->recognise_syntax = raptor_trig_parse_recognise_syntax;
+ factory->get_graph = raptor_turtle_get_graph;
+
+ return rc;
+}
+#endif
+
+
+#ifdef RAPTOR_PARSER_TURTLE
+int
+raptor_init_parser_turtle(raptor_world* world)
+{
+ return !raptor_world_register_parser_factory(world,
+ &raptor_turtle_parser_register_factory);
+}
+#endif
+
+#ifdef RAPTOR_PARSER_TRIG
+int
+raptor_init_parser_trig(raptor_world* world)
+{
+ return !raptor_world_register_parser_factory(world,
+ &raptor_trig_parser_register_factory);
+}
+#endif
+
+
+#ifdef STANDALONE
+#include <stdio.h>
+#include <locale.h>
+
+#define TURTLE_FILE_BUF_SIZE 2048
+
+static void
+turtle_parser_print_statement(void *user,
+ raptor_statement *statement)
+{
+ FILE* stream = (FILE*)user;
+ raptor_statement_print(statement, stream);
+ putc('\n', stream);
+}
+
+
+
+int
+main(int argc, char *argv[])
+{
+ char string[TURTLE_FILE_BUF_SIZE];
+ raptor_parser rdf_parser; /* static */
+ raptor_turtle_parser turtle_parser; /* static */
+ raptor_locator *locator = &rdf_parser.locator;
+ FILE *fh;
+ const char *filename;
+ size_t nobj;
+
+#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2
+ turtle_parser_debug = 1;
+#endif
+
+ if(argc > 1) {
+ filename = argv[1];
+ fh = fopen(filename, "r");
+ if(!fh) {
+ fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename,
+ strerror(errno));
+ exit(1);
+ }
+ } else {
+ filename="<stdin>";
+ fh = stdin;
+ }
+
+ memset(string, 0, TURTLE_FILE_BUF_SIZE);
+ nobj = fread(string, TURTLE_FILE_BUF_SIZE, 1, fh);
+ if(nobj < TURTLE_FILE_BUF_SIZE) {
+ if(ferror(fh)) {
+ fprintf(stderr, "%s: file '%s' read failed - %s\n",
+ argv[0], filename, strerror(errno));
+ fclose(fh);
+ return(1);
+ }
+ }
+
+ if(argc > 1)
+ fclose(fh);
+
+ memset(&rdf_parser, 0, sizeof(rdf_parser));
+ memset(&turtle_parser, 0, sizeof(turtle_parser));
+
+ locator->line= locator->column = -1;
+ locator->file= filename;
+
+ turtle_parser.lineno= 1;
+
+ rdf_parser.world = raptor_new_world();
+ rdf_parser.context = &turtle_parser;
+ rdf_parser.base_uri = raptor_new_uri(rdf_parser.world,
+ (const unsigned char*)"http://example.org/fake-base-uri/");
+
+ raptor_parser_set_statement_handler(&rdf_parser, stdout,
+ turtle_parser_print_statement);
+ raptor_turtle_parse_init(&rdf_parser, "turtle");
+
+ turtle_parser.error_count = 0;
+
+#ifdef TURTLE_PUSH_PARSE
+ turtle_push_parse(&rdf_parser, string, strlen(string));
+#else
+ turtle_parse(&rdf_parser, string, strlen(string));
+#endif
+
+ raptor_turtle_parse_terminate(&rdf_parser);
+
+ raptor_free_uri(rdf_parser.base_uri);
+
+ raptor_free_world(rdf_parser.world);
+
+ return (0);
+}
+#endif