diff options
Diffstat (limited to 'src/turtle_parser.y')
-rw-r--r-- | src/turtle_parser.y | 2128 |
1 files changed, 2128 insertions, 0 deletions
diff --git a/src/turtle_parser.y b/src/turtle_parser.y new file mode 100644 index 0000000..1474a3d --- /dev/null +++ b/src/turtle_parser.y @@ -0,0 +1,2128 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_parser.y - Raptor Turtle / TRIG / N3 parsers - over tokens from turtle grammar lexer + * + * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * Turtle is defined in http://www.dajobe.org/2004/01/turtle/ + * + * Made from a subset of the terms in + * http://www.w3.org/DesignIssues/Notation3.html + * + * TRIG is defined in http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/ + */ + +%{ +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> + +#define YY_NO_UNISTD_H 1 +#undef yylex +#include <turtle_lexer.h> + +#include <turtle_common.h> + + +/* Set RAPTOR_DEBUG to 3 for super verbose parsing - watching the shift/reduces */ +#if 0 +#undef RAPTOR_DEBUG +#define RAPTOR_DEBUG 3 +#endif + + +/* Fail with an debug error message if RAPTOR_DEBUG > 1 */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +#define YYERROR_MSG(msg) do { fputs("** YYERROR ", RAPTOR_DEBUG_FH); fputs(msg, RAPTOR_DEBUG_FH); fputc('\n', RAPTOR_DEBUG_FH); YYERROR; } while(0) +#else +#define YYERROR_MSG(ignore) YYERROR +#endif +#define YYERR_MSG_GOTO(label,msg) do { errmsg = msg; goto label; } while(0) + +/* Slow down the grammar operation and watch it work */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 +#undef YYDEBUG +#define YYDEBUG 1 +#endif + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, + TURTLE_PARSER_STYPE *lval); +#endif + + +/* the lexer does not seem to track this */ +#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS + +/* set api.push-pull to "push" if this is defined */ +#undef TURTLE_PUSH_PARSE + +/* Prototypes */ +int turtle_parser_error(raptor_parser* rdf_parser, void* scanner, const char *msg); +static void turtle_parser_error_simple(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +/* Make lex/yacc interface as small as possible */ +#undef yylex +#define yylex turtle_lexer_lex + +/* Prototypes for local functions */ +static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *triple); + +%} + + +/* directives */ + +%require "3.0" + +/* File prefix (-b) */ +%file-prefix "turtle_parser" + +/* Bison 2.6+ : Symbol prefix */ +%define api.prefix {turtle_parser_} +/* Bison 3.4+ : Generated header file */ +%define api.header.include {<turtle_parser.h>} + +/* Write parser header file with macros (bison -d) */ +%defines + +/* Make verbose error messages for syntax errors */ +%define parse.error verbose + +/* Write output file with verbose descriptions of parser states */ +%verbose + +/* Generate code processing locations */ + /* %locations */ + +/* Pure parser - want a reentrant parser */ +%define api.pure full + +/* Push or pull parser? */ +%define api.push-pull pull + +/* Pure parser argument: lexer - yylex() and parser - yyparse() */ +%lex-param { yyscan_t yyscanner } +%parse-param { raptor_parser* rdf_parser } { void* yyscanner } + +/* Interface between lexer and parser */ +%union { + unsigned char *string; + raptor_term *identifier; + raptor_sequence *sequence; + raptor_uri *uri; +} + + +/* others */ + +%token A "a" +%token HAT "^" +%token DOT "." +%token COMMA "," +%token SEMICOLON ";" +%token LEFT_SQUARE "[" +%token RIGHT_SQUARE "]" +%token LEFT_ROUND "(" +%token RIGHT_ROUND ")" +%token LEFT_CURLY "{" +%token RIGHT_CURLY "}" +%token TRUE_TOKEN "true" +%token FALSE_TOKEN "false" +%token PREFIX "@prefix" +%token BASE "@base" +%token SPARQL_PREFIX "PREFIX" +%token SPARQL_BASE "BASE" + +/* literals */ +%token + <string> + STRING_LITERAL "string literal" + IDENTIFIER "identifier" + LANGTAG "langtag" + INTEGER_LITERAL "integer literal" + FLOATING_LITERAL "floating point literal" + DECIMAL_LITERAL "decimal literal" + BLANK_LITERAL "blank node" + <uri> + URI_LITERAL "URI literal" + GRAPH_NAME_LEFT_CURLY "Graph URI literal {" + QNAME_LITERAL "QName" + +/* syntax error */ +%token ERROR_TOKEN + +%type <identifier> subject predicate object verb literal resource blankNode collection blankNodePropertyList +%type <sequence> triples objectList itemList predicateObjectList predicateObjectListOpt + +/* tidy up tokens after errors */ + +%destructor { + if($$) + RAPTOR_FREE(char*, $$); +} <string> + +%destructor { + if($$) + raptor_free_uri($$); +} <uri> + +%destructor { + if($$) + raptor_free_term($$); +} <identifier> + +%destructor { + if($$) + raptor_free_sequence($$); +} <sequence> + +%% + +Document : statementList +;; + + +graph: GRAPH_NAME_LEFT_CURLY + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + if(turtle_parser->graph_name) + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + raptor_parser_start_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + } + } + graphBody RIGHT_CURLY +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + rdf_parser->emitted_default_graph = 0; + } +} +| +LEFT_CURLY + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + raptor_parser_start_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph++; + } + } + graphBody RIGHT_CURLY +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph = 0; + } +} +; + + +graphBody: triplesList +| %empty +; + +triplesList: dotTriplesList +| dotTriplesList DOT +; + +dotTriplesList: triples +{ + int i; + + if($1) { + for(i = 0; i < raptor_sequence_size($1); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($1); + } +} +| dotTriplesList DOT triples +{ + int i; + + if($3) { + for(i = 0; i < raptor_sequence_size($3); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($3, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($3); + } +} +; + +statementList: statementList statement +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* sync up consumed/processed so we know what to unwind */ + turtle_parser->processed = turtle_parser->consumed; + turtle_parser->lineno_last_good = turtle_parser->lineno; +} +| statementList error +| %empty +; + +statement: directive +| graph +| triples DOT +{ + raptor_turtle_parser* turtle_parser; + int i; + + /* yield deferred statements, if any */ + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + + if($1) { + for(i = 0; i < raptor_sequence_size($1); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($1); + } + + if(turtle_parser->deferred) { + /* debrief resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } +} +; + +triples: subject predicateObjectList +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 1\n subject="); + if($1) + raptor_term_print_as_ntriples($1, stdout); + else + fputs("NULL", stdout); + if($2) { + printf("\n predicateObjectList (reverse order to syntax)="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectList\n"); +#endif + + if($1 && $2) { + /* have subject and non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| blankNodePropertyList predicateObjectListOpt +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 2\n blankNodePropertyList="); + if($1) + raptor_term_print_as_ntriples($1, stdout); + else + fputs("NULL", stdout); + if($2) { + printf("\n predicateObjectListOpt (reverse order to syntax)="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectListOpt\n"); +#endif + + if($1 && $2) { + /* have subject and non-empty predicate object list, handle it */ + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectListOpt="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| error DOT +{ + $$ = NULL; +} +; + + +objectList: objectList COMMA object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if($3) { + printf(" object=\n"); + raptor_term_print_as_ntriples($3, stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if($1) { + printf(" objectList="); + raptor_sequence_print($1, stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!$3) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $3, NULL); + if(!triple) { + raptor_free_sequence($1); + YYERROR; + } + if(raptor_sequence_push($1, triple)) { + raptor_free_sequence($1); + YYERROR; + } + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +| object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if($1) { + printf(" object=\n"); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!$1) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!$$) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push($$, triple)) { + raptor_free_sequence($$); + $$ = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +; + +itemList: itemList object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if($2) { + printf(" object=\n"); + raptor_term_print_as_ntriples($2, stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if($1) { + printf(" objectList="); + raptor_sequence_print($1, stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!$2) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $2, NULL); + if(!triple) { + raptor_free_sequence($1); + YYERROR; + } + if(raptor_sequence_push($1, triple)) { + raptor_free_sequence($1); + YYERROR; + } + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +| object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if($1) { + printf(" object=\n"); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!$1) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!$$) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push($$, triple)) { + raptor_free_sequence($$); + $$ = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +; + +verb: predicate +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate="); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); +#endif + + $$ = $1; +} +| A +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate = rdf:type (a)\n"); +#endif + + $$ = raptor_term_copy(RAPTOR_RDF_type_term(rdf_parser->world)); + if(!$$) + YYERROR; +} +; + + +predicateObjectList: predicateObjectList SEMICOLON verb objectList +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 1\n verb="); + raptor_term_print_as_ntriples($3, stdout); + printf("\n objectList="); + raptor_sequence_print($4, stdout); + printf("\n predicateObjectList="); + raptor_sequence_print($1, stdout); + printf("\n\n"); +#endif + + if($4 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty objectList not processed\n"); +#endif + } else if($3 && $4) { + /* non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size($4); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($4, i); + t2->predicate = raptor_term_copy($3); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($4, stdout); + printf("\n"); +#endif + } + + if($1 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty predicateObjectList not copied\n\n"); +#endif + } else if($3 && $4 && $1) { + while(raptor_sequence_size($4)) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_unshift($4); + if(raptor_sequence_push($1, t2)) { + raptor_free_sequence($1); + raptor_free_term($3); + raptor_free_sequence($4); + YYERROR; + } + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after appending objectList (reverse order)="); + raptor_sequence_print($1, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($4); + } + + if($3) + raptor_free_term($3); + + $$ = $1; +} +| verb objectList +{ + int i; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 2\n verb="); + raptor_term_print_as_ntriples($1, stdout); + if($2) { + printf("\n objectList="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty objectList\n"); +#endif + + if($1 && $2) { + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->predicate = raptor_term_copy($1); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| predicateObjectList SEMICOLON +{ + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 5\n trailing semicolon returning existing list "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif +} +; + +directive : prefix | base +; + +prefix: PREFIX IDENTIFIER URI_LITERAL DOT +{ + unsigned char *prefix = $2; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive PREFIX %s %s\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3)); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix PREFIX : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if($2) + RAPTOR_FREE(char*, $2); + raptor_free_uri($3); + + if(!ns) + YYERROR; +} +| SPARQL_PREFIX IDENTIFIER URI_LITERAL +{ + unsigned char *prefix = $2; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive @prefix %s %s.\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3)); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix @prefix : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if($2) + RAPTOR_FREE(char*, $2); + raptor_free_uri($3); + + if(!ns) + YYERROR; +} +; + + +base: BASE URI_LITERAL DOT +{ + raptor_uri *uri=$2; + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +| SPARQL_BASE URI_LITERAL +{ + raptor_uri *uri=$2; + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +; + +subject: resource +{ + $$ = $1; +} +| blankNode +{ + $$ = $1; +} +| collection +{ + $$ = $1; +} +; + + +predicate: resource +{ + $$ = $1; +} +; + + +object: resource +{ + $$ = $1; +} +| blankNode +{ + $$ = $1; +} +| collection +{ + $$ = $1; +} +| blankNodePropertyList +{ + $$ = $1; +} +| literal +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("object literal="); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); +#endif + + $$ = $1; +} +; + + +literal: STRING_LITERAL LANGTAG +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language string=\"%s\"\n", $1); +#endif + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, $2); + RAPTOR_FREE(char*, $1); + RAPTOR_FREE(char*, $2); + if(!$$) + YYERROR; +} +| STRING_LITERAL LANGTAG HAT URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" uri=\"%s\"\n", $1, $2, raptor_uri_as_string($4)); +#endif + + if($4) { + if($2) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, $2); + $2 = NULL; + } + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($4); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL LANGTAG HAT QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" qname URI=<%s>\n", $1, $2, raptor_uri_as_string($4)); +#endif + + if($4) { + if($2) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, $2); + $2 = NULL; + } + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($4); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL HAT URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" uri=\"%s\"\n", $1, raptor_uri_as_string($3)); +#endif + + if($3) { + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($3); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL HAT QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" qname URI=<%s>\n", $1, raptor_uri_as_string($3)); +#endif + + if($3) { + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($3); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +| STRING_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal string=\"%s\"\n", $1); +#endif + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, NULL); + RAPTOR_FREE(char*, $1); + if(!$$) + YYERROR; +} +| INTEGER_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource integer=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_integer_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| FLOATING_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource double=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_double_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| DECIMAL_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource decimal=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_decimal_uri); + if(!uri) { + RAPTOR_FREE(char*, $1); + YYERROR; + } + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| TRUE_TOKEN +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean true\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"true", uri, NULL); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| FALSE_TOKEN +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean false\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"false", uri, NULL); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +; + + +resource: URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource URI=<%s>\n", raptor_uri_as_string($1)); +#endif + + if($1) { + $$ = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +| QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource qname URI=<%s>\n", raptor_uri_as_string($1)); +#endif + + if($1) { + $$ = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +; + + +predicateObjectListOpt: predicateObjectList +{ + $$ = $1; +} +| %empty +{ + $$ = NULL; +} +; + + +blankNode: BLANK_LITERAL +{ + const unsigned char *id; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("subject blank=\"%s\"\n", $1); +#endif + id = raptor_world_internal_generate_id(rdf_parser->world, $1); + if(!id) + YYERROR; + + $$ = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + + if(!$$) + YYERROR; +} +; + +blankNodePropertyList: LEFT_SQUARE predicateObjectListOpt RIGHT_SQUARE +{ + int i; + const unsigned char *id; + + id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!id) { + if($2) + raptor_free_sequence($2); + YYERROR; + } + + $$ = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + if(!$$) { + if($2) + raptor_free_sequence($2); + YYERROR; + } + + if($2 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_term_print_as_ntriples($$, stdout); + printf("\n"); +#endif + } else { + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($$); + raptor_turtle_defer_statement(rdf_parser, t2); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($2); + + } + +} +; + + +collection: LEFT_ROUND itemList RIGHT_ROUND +{ + int i; + raptor_world* world = rdf_parser->world; + raptor_term* first_identifier = NULL; + raptor_term* rest_identifier = NULL; + raptor_term* object = NULL; + raptor_term* blank = NULL; + char const *errmsg = NULL; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n objectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + first_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_first_URI(world)); + if(!first_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:first term"); + rest_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_rest_URI(world)); + if(!rest_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:rest term"); + + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + object = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!object) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:nil term"); + + for(i = raptor_sequence_size($2)-1; i>=0; i--) { + raptor_term* temp; + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + const unsigned char *blank_id; + + blank_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!blank_id) + YYERR_MSG_GOTO(err_collection, "Cannot create bnodeid"); + + blank = raptor_new_term_from_blank(rdf_parser->world, + blank_id); + RAPTOR_FREE(char*, blank_id); + if(!blank) + YYERR_MSG_GOTO(err_collection, "Cannot create bnode"); + + t2->subject = blank; + t2->predicate = first_identifier; + /* t2->object already set to the value we want */ + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + temp = t2->object; + + t2->subject = blank; + t2->predicate = rest_identifier; + t2->object = object; + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + t2->subject = NULL; + t2->predicate = NULL; + t2->object = temp; + + raptor_free_term(object); + object = blank; + blank = NULL; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($2); + + raptor_free_term(first_identifier); + raptor_free_term(rest_identifier); + + $$=object; + + err_collection: + if(errmsg) { + if(blank) + raptor_free_term(blank); + + if(object) + raptor_free_term(object); + + if(rest_identifier) + raptor_free_term(rest_identifier); + + if(first_identifier) + raptor_free_term(first_identifier); + + raptor_free_sequence($2); + + YYERROR_MSG(errmsg); + } +} +| LEFT_ROUND RIGHT_ROUND +{ + raptor_world* world = rdf_parser->world; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n empty\n"); +#endif + + $$ = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!$$) + YYERROR; +} +; + + +%% + + +/* Support functions */ + +/* Error handler with scanner context, during parsing */ +int +turtle_parser_error(raptor_parser* rdf_parser, void* scanner, + const char *msg) +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + return 0; + } + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg); + + return 0; +} + + +/* Error handler within raptor functions and callbacks */ +static void +turtle_parser_error_simple(void* user_data, const char *msg, ...) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_turtle_parser* turtle_parser; + va_list args; + + va_start(args, msg); + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + goto tidy; + } + + if(turtle_parser->error_count++) + goto tidy; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error_varargs(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg, + args); + +tidy: + va_end(args); +} + + +int +turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + if(!turtle_parser) + return 1; + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); + + return 0; +} + + +raptor_uri* +turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!turtle_parser) + return NULL; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + name_len = raptor_turtle_expand_qname_escapes(name, name_len, + (raptor_simple_message_handler)turtle_parser_error_simple, rdf_parser); + if(!name_len) + return NULL; + + return raptor_qname_string_to_uri(&turtle_parser->namespaces, name, name_len); +} + + + +#ifndef TURTLE_PUSH_PARSE +static int +turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + int rc; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + (void)turtle_lexer__scan_bytes((char *)string, (yy_size_t)length, turtle_parser->scanner); + + rc = turtle_parser_parse(rdf_parser, turtle_parser->scanner); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return rc; +} +#endif + + +#ifdef TURTLE_PUSH_PARSE +static int +turtle_push_parse(raptor_parser *rdf_parser, + const char *string, size_t length) +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_world* world = rdf_parser->world; +#endif + raptor_turtle_parser* turtle_parser; + void *buffer; + int status; + yypstate *ps; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + buffer = turtle_lexer__scan_bytes(string, (yy_size_t)length, turtle_parser->scanner); + + /* returns a parser instance or 0 on out of memory */ + ps = yypstate_new(); + if(!ps) + return 1; + + do { + TURTLE_PARSER_YYSTYPE lval; + int token; + + memset(&lval, 0, sizeof(TURTLE_PARSER_YYSTYPE)); + + token = turtle_lexer_lex(&lval, turtle_parser->scanner); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("token %s\n", turtle_token_print(world, token, &lval)); +#endif + + status = yypush_parse(ps, token, &lval, rdf_parser, turtle_parser->scanner); + + /* turtle_token_free(world, token, &lval); */ + + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } while (status == YYPUSH_MORE); + yypstate_delete(ps); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return 0; +} +#endif + + +/** + * raptor_turtle_parse_init - Initialise the Raptor Turtle parser + * + * Return value: non 0 on failure + **/ + +static int +raptor_turtle_parse_init(raptor_parser* rdf_parser, const char *name) { + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(raptor_namespaces_init(rdf_parser->world, &turtle_parser->namespaces, 0)) + return 1; + + turtle_parser->trig = !strcmp(name, "trig"); + + return 0; +} + + +/* PUBLIC FUNCTIONS */ + + +/* + * raptor_turtle_parse_terminate - Free the Raptor Turtle parser + * @rdf_parser: parser object + * + **/ +static void +raptor_turtle_parse_terminate(raptor_parser *rdf_parser) { + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + raptor_namespaces_clear(&turtle_parser->namespaces); + + if(turtle_parser->scanner_set) { + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + } + + if(turtle_parser->buffer) + RAPTOR_FREE(cdata, turtle_parser->buffer); + + if(turtle_parser->graph_name) { + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + } +} + + +static void +raptor_turtle_clone_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)parser->context; + raptor_statement *statement = &parser->statement; + + if(!t->subject || !t->predicate || !t->object) + return; + + if(turtle_parser->trig && turtle_parser->graph_name) + statement->graph = raptor_term_copy(turtle_parser->graph_name); + + if(!parser->emitted_default_graph && !turtle_parser->graph_name) { + /* for non-TRIG - start default graph at first triple */ + raptor_parser_start_graph(parser, NULL, 0); + parser->emitted_default_graph++; + } + + /* Two choices for subject for Turtle */ + if(t->subject->type == RAPTOR_TERM_TYPE_BLANK) { + statement->subject = raptor_new_term_from_blank(parser->world, + t->subject->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_URI */ + RAPTOR_ASSERT(t->subject->type != RAPTOR_TERM_TYPE_URI, + "subject type is not resource"); + statement->subject = raptor_new_term_from_uri(parser->world, + t->subject->value.uri); + } + + /* Predicates are URIs but check for bad ordinals */ + if(!strncmp((const char*)raptor_uri_as_string(t->predicate->value.uri), + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) { + unsigned char* predicate_uri_string = raptor_uri_as_string(t->predicate->value.uri); + int predicate_ordinal = raptor_check_ordinal(predicate_uri_string+44); + if(predicate_ordinal <= 0) + raptor_parser_error(parser, "Illegal ordinal value %d in property '%s'.", predicate_ordinal, predicate_uri_string); + } + + statement->predicate = raptor_new_term_from_uri(parser->world, + t->predicate->value.uri); + + + /* Three choices for object for Turtle */ + if(t->object->type == RAPTOR_TERM_TYPE_URI) { + statement->object = raptor_new_term_from_uri(parser->world, + t->object->value.uri); + } else if(t->object->type == RAPTOR_TERM_TYPE_BLANK) { + statement->object = raptor_new_term_from_blank(parser->world, + t->object->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_LITERAL */ + RAPTOR_ASSERT(t->object->type != RAPTOR_TERM_TYPE_LITERAL, + "object type is not literal"); + statement->object = raptor_new_term_from_literal(parser->world, + t->object->value.literal.string, + t->object->value.literal.datatype, + t->object->value.literal.language); + } +} + +static void +raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *t) +{ + if(!t->subject || !t->predicate || !t->object) + return; + + if(!parser->statement_handler) + return; + + /* Generate the statement */ + (*parser->statement_handler)(parser->user_data, t); +} + +static void +raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_clone_statement(parser, t); + raptor_turtle_handle_statement(parser, &parser->statement); + /* clear resources */ + raptor_statement_clear(&parser->statement); +} + +static void +raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_statement* st; + raptor_turtle_parser* turtle_parser; + + raptor_turtle_clone_statement(parser, t); + st = raptor_new_statement(parser->world); + if(!st) { + return; + } + /* copy static to dynamic statement, it's a move really */ + st->subject = parser->statement.subject, parser->statement.subject = NULL; + st->predicate = parser->statement.predicate, parser->statement.predicate = NULL; + st->object = parser->statement.object, parser->statement.object = NULL; + st->graph = parser->statement.graph, parser->statement.graph = NULL; + + /* prep deferred list */ + turtle_parser = (raptor_turtle_parser*)parser->context; + if(!turtle_parser->deferred) { + turtle_parser->deferred = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); + if(!turtle_parser->deferred) { + goto free_seq; + } + } + /* append to deferred list */ + if(raptor_sequence_push(turtle_parser->deferred, st)) { + free_seq: + raptor_free_statement(st); + } +} + + + +static int +raptor_turtle_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_turtle_parser *turtle_parser; + char *ptr; + int rc; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len); +#endif + + if(!len && !is_end) { + /* nothing to do */ + return 0; + } + + /* the actual buffer will contained unprocessed characters from + * the last run plus the chunk passed here */ + turtle_parser->end_of_buffer = turtle_parser->consumed + len; + if(turtle_parser->end_of_buffer > turtle_parser->buffer_length) { + /* resize */ + size_t new_buffer_length = turtle_parser->end_of_buffer; + + turtle_parser->buffer = RAPTOR_REALLOC(char*, turtle_parser->buffer, + new_buffer_length + 1); + + /* adjust stored length */ + turtle_parser->buffer_length = new_buffer_length; + } + if(!turtle_parser->buffer && turtle_parser->buffer_length) { + /* we tried to alloc a buffer but we failed */ + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } + if(is_end && !turtle_parser->end_of_buffer) { + /* Nothing to do */ + return 0; + } + + /* move pointer to end of cdata buffer */ + ptr = turtle_parser->buffer + turtle_parser->consumed; + + /* now write new stuff at end of cdata buffer */ + memcpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + /* reset processed counter */ + turtle_parser->processed = 0U; + /* unconsume */ + turtle_parser->consumed = 0U; + /* reset line numbers */ + turtle_parser->lineno = turtle_parser->lineno_last_good; + + /* let everyone know if this is the last chunk */ + turtle_parser->is_end = is_end; + if(!is_end) { + /* it's safer not to pass the very last line to the lexer + * just in case we end up with EOB-in-the-middle-of-X situations */ + size_t i = turtle_parser->end_of_buffer; + while(i > 0U && turtle_parser->buffer[--i] != '\n'); + /* either i == 0U or i points to the last \n before the end-of-buffer */ + turtle_parser->consumable = i; + } else { + /* otherwise the consumable number of bytes coincides with the EOB */ + turtle_parser->consumable = turtle_parser->end_of_buffer; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("buffer buffer now '%s' (%ld bytes)\n", + turtle_parser->buffer, turtle_parser->buffer_length); +#endif + +#ifdef TURTLE_PUSH_PARSE + rc = turtle_push_parse(rdf_parser, + turtle_parser->buffer, turtle_parser->consumable); +#else + rc = turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->consumable); +#endif + + if(turtle_parser->error_count) { + rc = 1; + } else if(!is_end) { + /* move stuff to the beginning of the buffer */ + turtle_parser->consumed = turtle_parser->end_of_buffer - turtle_parser->processed; + if(turtle_parser->consumed && turtle_parser->processed) { + memmove(turtle_parser->buffer, + turtle_parser->buffer + turtle_parser->processed, + turtle_parser->consumed); + /* cancel all deferred eval's */ + if(turtle_parser->deferred) { + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + } else { + /* this was the last chunk, finalise */ + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + int i; + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + if(rdf_parser->emitted_default_graph) { + /* for non-TRIG - end default graph after last triple */ + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + if(turtle_parser->deferred) { + /* clear resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + return rc; +} + + +static int +raptor_turtle_parse_start(raptor_parser *rdf_parser) +{ + raptor_locator *locator=&rdf_parser->locator; + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* base URI required for Turtle */ + if(!rdf_parser->base_uri) + return 1; + + locator->line = 1; + locator->column= -1; /* No column info */ + locator->byte= -1; /* No bytes info */ + + if(turtle_parser->buffer_length) { + RAPTOR_FREE(cdata, turtle_parser->buffer); + turtle_parser->buffer = NULL; + turtle_parser->buffer_length = 0; + } + + turtle_parser->lineno = 1; + + return 0; +} + + +static int +raptor_turtle_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; + } + + if(mime_type) { + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; + } + + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TURTLE_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TURTLE_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TURTLE_PREFIX) { + score = 6; + if(HAS_TURTLE_RDF_URI) + score += 2; + } + } + + return score; +} + + +static raptor_uri* +raptor_turtle_get_graph(raptor_parser* rdf_parser) +{ + raptor_turtle_parser *turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->graph_name) + return raptor_uri_copy(turtle_parser->graph_name->value.uri); + + return NULL; +} + + +#ifdef RAPTOR_PARSER_TRIG +static int +raptor_trig_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "trig")) + score = 9; +#ifndef RAPTOR_PARSER_TURTLE + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; +#endif + } + + if(mime_type) { + if(strstr((const char*)mime_type, "trig")) + score = 6; +#ifndef RAPTOR_PARSER_TURTLE + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; +#endif + } + +#ifndef RAPTOR_PARSER_TURTLE + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TRIG_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TRIG_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TRIG_PREFIX) { + score = 6; + if(HAS_TRIG_RDF_URI) + score += 2; + } + } +#endif + + return score; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +static const char* const turtle_names[4] = { "turtle", "ntriples-plus", "n3", NULL }; + +static const char* const turtle_uri_strings[3] = { + "http://www.w3.org/ns/formats/Turtle", + "http://www.dajobe.org/2004/01/turtle/", + NULL +}; + +#define TURTLE_TYPES_COUNT 6 +static const raptor_type_q turtle_types[TURTLE_TYPES_COUNT + 1] = { + /* first one is the default */ + { "text/turtle", 11, 10}, + { "application/x-turtle", 20, 10}, + { "application/turtle", 18, 10}, + { "text/n3", 7, 3}, + { "text/rdf+n3", 11, 3}, + { "application/rdf+n3", 18, 3}, + { NULL, 0} +}; + +static int +raptor_turtle_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = turtle_names; + + factory->desc.mime_types = turtle_types; + + factory->desc.label = "Turtle Terse RDF Triple Language"; + factory->desc.uri_strings = turtle_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_turtle_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TRIG +static const char* const trig_names[2] = { "trig", NULL }; + +static const char* const trig_uri_strings[2] = { + "http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/", + NULL +}; + +#define TRIG_TYPES_COUNT 1 +static const raptor_type_q trig_types[TRIG_TYPES_COUNT + 1] = { + /* first one is the default */ + { "application/x-trig", 18, 10}, + { NULL, 0, 0} +}; + +static int +raptor_trig_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = trig_names; + + factory->desc.mime_types = trig_types; + + factory->desc.label = "TriG - Turtle with Named Graphs"; + factory->desc.uri_strings = trig_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_trig_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +int +raptor_init_parser_turtle(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_turtle_parser_register_factory); +} +#endif + +#ifdef RAPTOR_PARSER_TRIG +int +raptor_init_parser_trig(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_trig_parser_register_factory); +} +#endif + + +#ifdef STANDALONE +#include <stdio.h> +#include <locale.h> + +#define TURTLE_FILE_BUF_SIZE 2048 + +static void +turtle_parser_print_statement(void *user, + raptor_statement *statement) +{ + FILE* stream = (FILE*)user; + raptor_statement_print(statement, stream); + putc('\n', stream); +} + + + +int +main(int argc, char *argv[]) +{ + char string[TURTLE_FILE_BUF_SIZE]; + raptor_parser rdf_parser; /* static */ + raptor_turtle_parser turtle_parser; /* static */ + raptor_locator *locator = &rdf_parser.locator; + FILE *fh; + const char *filename; + size_t nobj; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + turtle_parser_debug = 1; +#endif + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = stdin; + } + + memset(string, 0, TURTLE_FILE_BUF_SIZE); + nobj = fread(string, TURTLE_FILE_BUF_SIZE, 1, fh); + if(nobj < TURTLE_FILE_BUF_SIZE) { + if(ferror(fh)) { + fprintf(stderr, "%s: file '%s' read failed - %s\n", + argv[0], filename, strerror(errno)); + fclose(fh); + return(1); + } + } + + if(argc > 1) + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + locator->line= locator->column = -1; + locator->file= filename; + + turtle_parser.lineno= 1; + + rdf_parser.world = raptor_new_world(); + rdf_parser.context = &turtle_parser; + rdf_parser.base_uri = raptor_new_uri(rdf_parser.world, + (const unsigned char*)"http://example.org/fake-base-uri/"); + + raptor_parser_set_statement_handler(&rdf_parser, stdout, + turtle_parser_print_statement); + raptor_turtle_parse_init(&rdf_parser, "turtle"); + + turtle_parser.error_count = 0; + +#ifdef TURTLE_PUSH_PARSE + turtle_push_parse(&rdf_parser, string, strlen(string)); +#else + turtle_parse(&rdf_parser, string, strlen(string)); +#endif + + raptor_turtle_parse_terminate(&rdf_parser); + + raptor_free_uri(rdf_parser.base_uri); + + raptor_free_world(rdf_parser.world); + + return (0); +} +#endif |