diff options
Diffstat (limited to 'src/turtle_lexer.l')
-rw-r--r-- | src/turtle_lexer.l | 1124 |
1 files changed, 1124 insertions, 0 deletions
diff --git a/src/turtle_lexer.l b/src/turtle_lexer.l new file mode 100644 index 0000000..8d0c53e --- /dev/null +++ b/src/turtle_lexer.l @@ -0,0 +1,1124 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_lexer.l - Raptor Turtle lexer - making tokens for turtle grammar generator + * + * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * Turtle is defined in http://www.dajobe.org/2004/01/turtle/ + * + * To generate the C files from this source, rather than use the + * shipped turtle_lexer.c/.h needs a patched version of flex 2.5.31 such + * as the one available in Debian GNU/Linux. Details below + * near the %option descriptions. + * + */ + + +/* recognise 8-bits */ +%option 8bit +%option warn nodefault + +/* all symbols prefixed by this */ +%option prefix="turtle_lexer_" + +/* This is not needed, flex is invoked -oturtle_lexer.c */ +/* %option outfile="turtle_lexer.c" */ + +/* Emit a C header file for prototypes + * Only available in flex 2.5.13 or newer. + * It was renamed to header-file in flex 2.5.19 + */ +%option header-file="turtle_lexer.h" + +/* Do not emit #include <unistd.h> + * Only available in flex 2.5.7 or newer. + * Broken in flex 2.5.31 without patches. + */ +%option nounistd + +/* Never interactive */ +/* No isatty() check */ +%option never-interactive + +/* Batch scanner */ +%option batch + +/* Never use yyunput */ +%option nounput + +/* Supply our own alloc/realloc/free functions */ +%option noyyalloc noyyrealloc noyyfree + +/* Re-entrant scanner */ +%option reentrant + +%option extra-type="raptor_parser*" + +/* Makes yyget_lval() yyset_lval() and yylval appear */ +%option bison-bridge +/* Makes yyget_lloc() yyset_lloc() and yylloc appear */ +/* %option bison-locations */ + + /* definitions */ + +%{ + +/* NOTE: These headers are NOT included here but are inserted by + * fix-flex since otherwise it appears far too late in the generated C + */ + +/* +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SETJMP_H +#include <setjmp.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> +#include <turtle_common.h> + +#define YYSTYPE TURTLE_PARSER_STYPE + +/* Prototypes */ +static unsigned char *turtle_copy_token(unsigned char *text, size_t len); +static unsigned char *turtle_copy_string_token(raptor_parser* rdf_parser, unsigned char *text, size_t len, int delim); +void turtle_lexer_syntax_error(void* ctx, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval); +#endif + +#ifdef __cplusplus +#define INPUT_FN yyinput +#else +#define INPUT_FN input +#endif + + +#if FLEX_VERSION_DECIMAL < 20536 +/* debian flex 2.5.35-10.1 added these column header prototypes in + * re-entrant mode. standard flex omits them + */ +void turtle_lexer_set_column(int column_no, yyscan_t yyscanner); +int turtle_lexer_get_column(yyscan_t yyscanner); +#endif + +static void turtle_lexer_cleanup(yyscan_t yyscanner); +#undef yycleanup +#define yycleanup turtle_lexer_cleanup + +#ifdef HAVE_SETJMP +static jmp_buf turtle_lexer_fatal_error_longjmp_env; + +/* fatal error handler declaration */ +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + longjmp(turtle_lexer_fatal_error_longjmp_env, 1); \ +} while(0) +#else +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + abort(); \ +} while(0) +#endif + +/* Remove the re-fill function since it should never be called */ +#define YY_INPUT(buf,result,max_size) { return YY_NULL; } + +static void turtle_lexer_error(yyscan_t yyscanner, raptor_log_level level, yyconst char *message, ...) RAPTOR_PRINTF_FORMAT(3, 4); + +/* Fatal error handler that returns EOF instead of abort()/longjmp() + * so that parser can clean up properly */ +#define YY_FATAL_ERROR_EOF(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, "%s", msg); \ + yyterminate(); \ +} while(0) + +/* Out-of-memory reporting macro */ +#define TURTLE_LEXER_OOM() YY_FATAL_ERROR_EOF(turtle_lexer_oom_text) +static char turtle_lexer_oom_text[]="turtle_lexer: Out of memory"; + +/* Do not need input() to to read from stdin */ +#define YY_NO_INPUT 1 + +#define YY_USER_ACTION \ + turtle_parser->consumed += yyleng; + +%} + +/* Tokens from Turtle 2013 spec - lex-ifyed to remove unicode ranges */ +PN_CHARS_BASE [A-Za-z\x80-\xff] +PN_CHARS {PN_CHARS_BASE}|"_"|"-"|[0-9] +BS_ESCAPES [-_~\.!$&\'()*+,;=/?#@%] +HEX [0-9A-Fa-f] +PLX "%"{HEX}{HEX})|("\\"{BS_ESCAPES} + +LANGTAG "@"[A-Za-z][-A-Z_a-z0-9]* + +/* flex: only 1 level of definition expansion so have to expand PLX */ +BN_LABEL ({PN_CHARS_BASE}|"_"|[0-9])(({PN_CHARS}|".")*({PN_CHARS}))* +PN_PREFIX ({PN_CHARS_BASE})(({PN_CHARS}|".")*({PN_CHARS}))* +PN_LOCAL ({PN_CHARS_BASE}|"_"|[0-9]|":"|{PLX})(({PN_CHARS}|"."|":"|{PLX})*({PN_CHARS}|":"|{PLX}))* + +QNAME {PN_PREFIX}?":"{PN_LOCAL}? + +UCHAR "\\u"{HEX}{HEX}{HEX}{HEX}|"\\U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX} +IRI "<"([^\x00-\x20<>\"{}\|^`\\]|{UCHAR})*">" + +INTEGER [-+]?[0-9]+ +DECIMAL [-+]?[0-9]*"."[0-9]+ +DOUBLE [-+]?([0-9]+"."[0-9]*{EXPONENT}|"."[0-9]+{EXPONENT}|[0-9]+{EXPONENT}) +EXPONENT [eE][+-]?[0-9]+ + + +%x PREF LONG_DLITERAL LONG_SLITERAL + + +%% + /* rules */ + +%{ + raptor_parser *rdf_parser = yyextra; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#ifdef HAVE_SETJMP + if(setjmp(turtle_lexer_fatal_error_longjmp_env)) + return 1; +#endif +%} + + +\r\n|\r|\n { turtle_parser->lineno++; } + +[\ \t\v]+ { /* empty */ } + + +"a" { return A; } + +"." { return DOT; } +"," { return COMMA; } +";" { return SEMICOLON; } +"[" { return LEFT_SQUARE; } +"]" { return RIGHT_SQUARE; } +"@prefix" { BEGIN(PREF); return PREFIX; } +[Pp][Rr][Ee][Ff][Ii][Xx] { BEGIN(PREF); + return SPARQL_PREFIX; } +"@base" { return BASE; } +[Bb][Aa][Ss][Ee] { return SPARQL_BASE; } +"^^" { return HAT; } +"(" { return LEFT_ROUND; } +")" { return RIGHT_ROUND; } +"{" { return LEFT_CURLY; } +"}" { return RIGHT_CURLY; } +"true" { return TRUE_TOKEN; } +"false" { return FALSE_TOKEN; } + + +\"([^\"\\\n\r]|\\[^\n\r])*\" { yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + +\'([^\'\\\n\r]|\\[^\n\r])*\' { yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + +\"\"\" { BEGIN(LONG_DLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + +<LONG_DLITERAL>\"\"\" { + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + +<LONG_DLITERAL>\"|(\\.|[^\"\\]|\n)* { + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of literal"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + +<LONG_DLITERAL>\\ { + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + +<LONG_DLITERAL><<EOF>> { + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + +\'\'\' { BEGIN(LONG_SLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + +<LONG_SLITERAL>\'\'\' { + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + +<LONG_SLITERAL>\'|(\\.|[^\'\\]|\n)* { + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of \'\'\'literal\'\'\'"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + +<LONG_SLITERAL>\\ { + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + +<LONG_SLITERAL><<EOF>> { + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + +"_:"{BN_LABEL} { yylval->string = turtle_copy_token((unsigned char*)yytext+2, yyleng-2); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return BLANK_LITERAL; } + +{QNAME} { yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return QNAME_LITERAL; } + +{DECIMAL} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return DECIMAL_LITERAL; +} + +{DOUBLE} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return FLOATING_LITERAL; +} + +{INTEGER} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return INTEGER_LITERAL; } + +<PREF>[\ \t\v]+ { /* eat up leading whitespace */ } +<PREF>{PN_PREFIX}":" { yylval->string=turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + BEGIN(INITIAL); + return IDENTIFIER; } +<PREF>":" { BEGIN(INITIAL); + yylval->string = turtle_copy_token((unsigned char*)yytext, 0); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return IDENTIFIER; } + +<PREF>(.|\n) { BEGIN(INITIAL); + if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); } + + +{IRI}[\ \t\v\r\n]*("=")?[\ \t\v\r\n]*"{" { + raptor_stringbuffer* sb; + unsigned char* uri_string; + + /* make length just the IRI */ + while(yytext[yyleng - 1] != '>') + yyleng--; + + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + + /* start at yytext + 1 to skip '<' and operate over + * length-2 bytes to skip '<' and '>' + */ + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-2, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + + if(!*uri_string) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + + raptor_free_stringbuffer(sb); + + if(!yylval->uri) + TURTLE_LEXER_OOM(); + return GRAPH_NAME_LEFT_CURLY; } + +{QNAME}[\ \t\v\r\n]*("=")?[\ \t\v\r\n]*"{" { + while(1) { + int c = yytext[yyleng - 1]; + if(c == '{' || c == ' ' || c=='\t' || c == '\v' || c == '\n' || + c == '=') { + yyleng--; + } else + break; + } + yytext[yyleng] = '\0'; + + yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return GRAPH_NAME_LEFT_CURLY; } + +{IRI} { if(yyleng == 2) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else { + raptor_stringbuffer* sb; + unsigned char* uri_string; + + yytext[yyleng-1]='\0'; + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-1, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + if(!yylval->uri) { + raptor_free_stringbuffer(sb); + TURTLE_LEXER_OOM(); + } + raptor_free_stringbuffer(sb); + } + return URI_LITERAL; } + +{LANGTAG} { yylval->string = turtle_copy_token((unsigned char*)yytext+1, yyleng-1); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return LANGTAG; } + +\#[^\r\n]*(\r\n|\r|\n) { /* # comment */ + turtle_parser->lineno++; + } + +\#[^\r\n]* { /* # comment on the last line with no terminating newline */ + } + +. { if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); + } + +%% + /* user code */ + +int +yywrap (yyscan_t yyscanner) { + return 1; +} + + +static unsigned char * +turtle_copy_token(unsigned char *text, size_t len) +{ + unsigned char *s; + if(!len) + len = strlen((const char*)text); + s = RAPTOR_MALLOC(unsigned char*, len + 1); + if(s) { + memcpy(s, text, len); + s[len] = '\0'; + } + return s; +} + + +static unsigned char * +turtle_copy_string_token(raptor_parser* rdf_parser, + unsigned char *string, size_t len, int delim) +{ + raptor_stringbuffer* sb = NULL; + int rc; + + if(len) { + sb = raptor_new_stringbuffer(); + if(!sb) + return NULL; + + rc = raptor_stringbuffer_append_turtle_string(sb, string, len, delim, + (raptor_simple_message_handler)turtle_lexer_syntax_error, + rdf_parser, 0); + if(rc) { + raptor_free_stringbuffer(sb); + return NULL; + } + + len = raptor_stringbuffer_length(sb); + } + + string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(string) { + if(sb) + raptor_stringbuffer_copy_to_string(sb, string, len+1); + string[len]='\0'; + } + + if(sb) + raptor_free_stringbuffer(sb); + + return string; +} + + +void +turtle_lexer_syntax_error(void* ctx, const char *message, ...) +{ + raptor_parser* rdf_parser = (raptor_parser *)ctx; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); +} + + +/* + * turtle_lexer_error: + * @yyscanner: scanner object + * @level: log level RAPTOR_LOG_LEVEL_FATAL otherwise error + * @message: erro message + * + * INTERNAL - replacement for the generated error handler. + */ +static void turtle_lexer_error(yyscan_t yyscanner, + raptor_log_level level, + yyconst char *message, ...) +{ + raptor_parser *rdf_parser = NULL; + va_list arguments; + + va_start(arguments, message); + + if(yyscanner) + rdf_parser = (raptor_parser*)turtle_lexer_get_extra(yyscanner); + + /* This handles NULL rdf_parser properly */ + raptor_parser_log_error_varargs(rdf_parser, level, message, arguments); + + va_end(arguments); +} + + +/* Define LEXER_ALLOC_TRACKING to enable allocated memory tracking + * - fixes lexer memory leak when ensure_buffer_stack fails + */ + +#ifdef LEXER_ALLOC_TRACKING +typedef struct { + /* Number of void* slots allocated */ + int lexer_allocs_size; + /* Allocted void* slots follow in memory after this header */ +} lexer_alloc_tracker_header; + +/* Initial alloc tracker slot array size - 2 seems to be enough for almost all cases */ +static const int initial_lexer_allocs_size = 2; +#endif + +/* + * turtle_lexer_cleanup: + * @yyscanner: + * + * INTERNAL - Clean up unfreed lexer allocs if LEXER_ALLOC_TRACKING is enabled. + */ +static void turtle_lexer_cleanup(yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + if(!yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i]) + free(lexer_allocs[i]); + lexer_allocs[i] = NULL; + } + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = NULL; +#endif +} + + +/* + * turtle_lexer_alloc: + * @size + * @yyscanner + * + * INTERNAL - alloc replacement. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_alloc(yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *ptr; + + /* yyscanner not initialized -> probably initializing yyscanner itself + * -> just malloc without tracking + */ + if(!yyscanner) + return malloc(size); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_alloc: yyscanner extra not initialized"); + + /* try to allocate tracker if it does not exist */ + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) { + /* allocate tracker header + array of void* slots */ + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+initial_lexer_allocs_size*sizeof(void*)); + if(!tracker) + YY_FATAL_ERROR("lexer_alloc: cannot allocate tracker"); + tracker->lexer_allocs_size = initial_lexer_allocs_size; + rdf_parser->lexer_user_data = (void *)tracker; + } + lexer_allocs = (void**)&tracker[1]; + + /* allocate memory */ + ptr = malloc(size); + + /* find a free slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(!lexer_allocs[i]) { + lexer_allocs[i] = ptr; + break; + } + } + + /* no free slots -> grow tracker slot array */ + if(i>=tracker->lexer_allocs_size) { + int j; + void **dest; + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+i*2*sizeof(void*)); + if(!tracker) { + if(ptr) + free(ptr); + YY_FATAL_ERROR("lexer_alloc: cannot grow tracker"); + } + tracker->lexer_allocs_size = i*2; + + /* copy data from old tracker */ + dest = (void**)&tracker[1]; + for(j = 0; j < i; ++j) { + dest[j] = lexer_allocs[j]; + } + + /* set new item to first free slot */ + dest[j] = ptr; + + /* free old tracker and replace with new one */ + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = tracker; + } + + return ptr; +#else + return malloc(size); +#endif +} + + +/* + * turtle_lexer_realloc: + * + * INTERNAL - realloc replacement + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_realloc(void *ptr, yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *newptr; + + if(!yyscanner) + YY_FATAL_ERROR("lexer_realloc: yyscanner not initialized"); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_realloc: yyscanner extra not initialized"); + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + YY_FATAL_ERROR("lexer_realloc: no alloc tracker"); + lexer_allocs = (void**)&tracker[1]; + + /* find the old slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no old slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_realloc: cell not in tracker"); + + /* realloc */ + newptr = realloc((char*)ptr, size); + + /* replace entry in tracker */ + lexer_allocs[i] = newptr; + + return newptr; +#else + return realloc((char*)ptr, size); +#endif +} + + +/* + * turtle_lexer_free: + * + * INTERNAL - free replacement. + * Checks for NULL pointer to be freed unlike the default lexer free function. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void turtle_lexer_free(void *ptr, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + /* do not free NULL */ + if(!ptr) + return; + + /* free ptr even if we would encounter an error */ + free(ptr); + + /* yyscanner is allocated with turtle_lexer_alloc() but it's never stored in the tracker + * - we need yyscanner to access the tracker */ + if(!yyscanner || ptr == yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + /* find the slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_free: cell not in tracker"); + + /* remove entry from tracker */ + lexer_allocs[i] = NULL; +#else + if(ptr) + free(ptr); +#endif +} + + +#ifdef RAPTOR_DEBUG + +const char * +turtle_token_print(raptor_world* world, int token, YYSTYPE *lval) +{ + #define TTP_DEBUG_BUFFER_SIZE 2048 + static char buffer[TTP_DEBUG_BUFFER_SIZE]; + + if(!token) + return "<<EOF>>"; + + switch(token) { + case PREFIX: + return "PREFIX"; + + case BASE: + return "BASE"; + + case A: + return "A"; + + case DOT: + return "DOT"; + + case COMMA: + return "COMMA"; + + case SEMICOLON: + return "SEMICOLON"; + + case LEFT_SQUARE: + return "LEFT_SQUARE"; + + case RIGHT_SQUARE: + return "RIGHT_SQUARE"; + + case HAT: + return "HAT"; + + case STRING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "STRING_LITERAL(%s)", + lval->string); + return buffer; + + case URI_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "URI_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case BLANK_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "BLANK_LITERAL(%s)", + lval->string); + return buffer; + + case QNAME_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "QNAME_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case INTEGER_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "INTEGER_LITERAL(%s)", + lval->string); + return buffer; + + case FLOATING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "FLOATING_LITERAL(%s)", + lval->string); + return buffer; + + case IDENTIFIER: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "IDENTIFIER(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case LANGTAG: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "LANGTAG(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case DECIMAL_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "DECIMAL_LITERAL(%s)", + lval->string); + return buffer; + + case ERROR_TOKEN: + return "ERROR"; + + case LEFT_CURLY: + return "{"; + + case RIGHT_CURLY: + return "}"; + + case GRAPH_NAME_LEFT_CURLY: + return "GRAPH_NAME {"; + + default: + RAPTOR_DEBUG2("UNKNOWN token %d - add a new case\n", token); + return "(UNKNOWN)"; + } +} +#endif + + + +void +turtle_token_free(raptor_world* world, int token, YYSTYPE *lval) +{ + if(!token) + return; + + switch(token) { + case STRING_LITERAL: + case BLANK_LITERAL: + case IDENTIFIER: + if(lval->string) + RAPTOR_FREE(char*, lval->string); + break; + + case URI_LITERAL: + case QNAME_LITERAL: + if(lval->uri) + raptor_free_uri(lval->uri); + break; + default: + break; + } +} + + +#ifdef STANDALONE + +#define FILE_READ_BUF_SIZE 4096 + +int +main(int argc, char *argv[]) +{ + char *turtle_string = NULL; + raptor_parser rdf_parser; + raptor_turtle_parser turtle_parser; + yyscan_t scanner; + int token = EOF; + YYSTYPE lval; + const unsigned char *uri_string; + const char *filename = NULL; + char *buf = NULL; + size_t len; + raptor_world* world; + FILE *fh; + + world = raptor_new_world(); + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = (FILE*)stdin; + } + + turtle_string = RAPTOR_CALLOC(char*, FILE_READ_BUF_SIZE, 1); + fread(turtle_string, FILE_READ_BUF_SIZE, 1, fh); + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + rdf_parser.world = world; + + /* discard namespace errors - caused by not interpreting @prefix + * and hence causing failed qname construction + */ + raptor_namespaces_init(rdf_parser.world, &turtle_parser.namespaces, 0); + + yylex_init(&turtle_parser.scanner); + scanner = turtle_parser.scanner; + + len = strlen(RAPTOR_GOOD_CAST(const char*, turtle_string)); + buf = RAPTOR_MALLOC(char*, len + 3); + memcpy(buf, turtle_string, len); + buf[len] = ' '; + buf[len + 1] = buf[len + 2] = '\0'; /* YY_END_OF_BUFFER_CHAR; */ + (void)turtle_lexer__scan_buffer(buf, len + 3, scanner); + + turtle_lexer_set_extra(&rdf_parser, scanner); + + /* Initialise enough of the parser and locator to get error messages */ + rdf_parser.context = &turtle_parser; + turtle_parser.lineno = 1; + rdf_parser.locator.file = filename; + rdf_parser.locator.column = -1; + + uri_string = raptor_uri_filename_to_uri_string(filename); + rdf_parser.base_uri = raptor_new_uri(world, uri_string); + RAPTOR_FREE(char*, uri_string); + + while(1) { + memset(&lval, 0, sizeof(YYSTYPE)); + if(turtle_lexer_get_text(scanner) != NULL) + printf("yyinput '%s'\n", turtle_lexer_get_text(scanner)); + token = yylex(&lval, scanner); +#ifdef RAPTOR_DEBUG + printf("token %s\n", turtle_token_print(world, token, &lval)); +#else + printf("token %d\n", token); +#endif + turtle_token_free(world, token, &lval); + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } + + if(buf) + RAPTOR_FREE(char*, buf); + + yylex_destroy(scanner); + + raptor_namespaces_clear(&turtle_parser.namespaces); + + raptor_free_uri(rdf_parser.base_uri); + + RAPTOR_FREE(char*, turtle_string); + + raptor_free_world(world); + + + if(token == ERROR_TOKEN) + return 1; + + return 0; +} +#endif |