diff options
Diffstat (limited to '')
80 files changed, 69776 insertions, 0 deletions
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..41829e0 --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,413 @@ +# raptor/src/CMakeLists.txt +# +# Original listfile by Daniel Richard G. <skunk@iSKUNK.ORG> +# This file is in the public domain. +# +# Changes 01/20/15 J Kyle Medley +# + +IF(RAPTOR_PARSER_RDFA) + INCLUDE_DIRECTORIES(BEFORE ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa) +ENDIF(RAPTOR_PARSER_RDFA) + +# ** Parser sources ** + +# XML parser enabled +IF(RAPTOR_PARSER_RDFXML) + SET(raptor_parser_rdfxml_sources raptor_rdfxml.c) + SET(raptor_libxml_libs ${LIBXML2_LIBRARIES}) +ENDIF(RAPTOR_PARSER_RDFXML) + +# N triples parser enabled +IF(RAPTOR_PARSER_NTRIPLES OR RAPTOR_PARSER_NQUADS) + SET(raptor_parser_ntriples_nquads_sources ntriples_parse.c raptor_ntriples.c) +ENDIF(RAPTOR_PARSER_NTRIPLES OR RAPTOR_PARSER_NQUADS) + +# Turtle parser enabled +IF(RAPTOR_PARSER_TURTLE OR RAPTOR_PARSER_TRIG) + SET(raptor_parser_turtle_trig_sources +# turtle_common.h + ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.c +# turtle_lexer.h + ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.c +# turtle_parser.h + ) + + # Generate the turtle parser + ADD_CUSTOM_TARGET(turtle_tables_tgt DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.tab.c) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.tab.c + COMMAND ${BISON_EXECUTABLE} -o ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.c ${CMAKE_CURRENT_SOURCE_DIR}/turtle_parser.y + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/turtle_parser.y) + + ADD_CUSTOM_TARGET(turtle_parser_tgt DEPENDS turtle_tables_tgt ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.c ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.h) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.c + COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/fix-bison.pl ${CMAKE_CURRENT_BINARY_DIR}/turtle_parser.c + DEPENDS turtle_tables_tgt) + + # Generate the turtle lexer + ADD_CUSTOM_TARGET(turtle_flex_tgt DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.t) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.t + COMMAND ${FLEX_EXECUTABLE} -o ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.t ${CMAKE_CURRENT_SOURCE_DIR}/turtle_lexer.l + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/turtle_lexer.l) + + ADD_CUSTOM_TARGET(turtle_lexer_tgt DEPENDS turtle_flex_tgt) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.c + COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/fix-flex.pl ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.t > ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.c + COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/fix-flex.pl ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.h > ${CMAKE_CURRENT_BINARY_DIR}/t + COMMAND ${CMAKE_COMMAND} -E rename ${CMAKE_CURRENT_BINARY_DIR}/t ${CMAKE_CURRENT_BINARY_DIR}/turtle_lexer.h + DEPENDS turtle_flex_tgt) +ENDIF(RAPTOR_PARSER_TURTLE OR RAPTOR_PARSER_TRIG) + +# RSS feed parser enabled +IF(RAPTOR_PARSER_RSS OR RAPTOR_SERIALIZER_RSS_1_0) + SET(raptor_rss_common_sources raptor_rss_common.c raptor_rss.h) +ENDIF(RAPTOR_PARSER_RSS OR RAPTOR_SERIALIZER_RSS_1_0) +IF(RAPTOR_PARSER_RSS) + SET(raptor_parser_rss_sources raptor_rss.c) + SET(raptor_libxml_libs ${LIBXML2_LIBRARIES}) +ENDIF(RAPTOR_PARSER_RSS) + +# GRDDL parser enabled +IF(RAPTOR_PARSER_GRDDL) + SET(raptor_parser_grddl_sources raptor_grddl.c) + SET(raptor_libxml_libs ${LIBXML2_LIBRARIES}) + SET(raptor_libxslt_libs ${LIBXSLT_LIBRARIES}) +ENDIF(RAPTOR_PARSER_GRDDL) + +# Guess which parser to use based on context +IF(RAPTOR_PARSER_GUESS) + SET(raptor_parser_guess_sources raptor_guess.c) +ENDIF(RAPTOR_PARSER_GUESS) + +# RDFA parser enabled +IF(RAPTOR_PARSER_RDFA) + SET(raptor_parser_rdfa_sources raptor_librdfa.c) +ENDIF(RAPTOR_PARSER_RDFA) + +# JSON parser enabled +IF(RAPTOR_PARSER_JSON) + SET(raptor_parser_json_sources raptor_json.c) +ENDIF(RAPTOR_PARSER_JSON) + +# ** Serializers ** + +IF(RAPTOR_SERIALIZER_RDFXML) + SET(raptor_serializer_rdfxml_sources raptor_serialize_rdfxml.c) +ENDIF(RAPTOR_SERIALIZER_RDFXML) +IF(RAPTOR_SERIALIZER_NTRIPLES OR RAPTOR_SERIALIZER_NQUADS) + SET(raptor_serializer_ntriples_nquads_sources raptor_serialize_ntriples.c) +ENDIF(RAPTOR_SERIALIZER_NTRIPLES OR RAPTOR_SERIALIZER_NQUADS) +IF(RAPTOR_SERIALIZER_RDFXML_ABBREV OR RAPTOR_SERIALIZER_TURTLE OR RAPTOR_SERIALIZER_MKR) + SET(raptor_serializer_abbrev_sources raptor_abbrev.c) +ENDIF(RAPTOR_SERIALIZER_RDFXML_ABBREV OR RAPTOR_SERIALIZER_TURTLE OR RAPTOR_SERIALIZER_MKR) +IF(RAPTOR_SERIALIZER_RDFXML_ABBREV) + SET(raptor_serializer_rdfxml_abbrev_sources raptor_serialize_rdfxmla.c) +ENDIF(RAPTOR_SERIALIZER_RDFXML_ABBREV) +IF(RAPTOR_SERIALIZER_TURTLE OR RAPTOR_SERIALIZER_MKR) + SET(raptor_serializer_turtle_sources raptor_serialize_turtle.c) +ENDIF(RAPTOR_SERIALIZER_TURTLE OR RAPTOR_SERIALIZER_MKR) +IF(RAPTOR_SERIALIZER_RSS_1_0) + SET(raptor_serializer_rss_1_0_sources raptor_serialize_rss.c) +ENDIF(RAPTOR_SERIALIZER_RSS_1_0) +IF(RAPTOR_SERIALIZER_DOT) + SET(raptor_serializer_dot_sources raptor_serialize_dot.c) +ENDIF(RAPTOR_SERIALIZER_DOT) +IF(RAPTOR_SERIALIZER_HTML) + SET(raptor_serializer_html_sources raptor_serialize_html.c) +ENDIF(RAPTOR_SERIALIZER_HTML) +IF(RAPTOR_SERIALIZER_JSON) + SET(raptor_serializer_json_sources raptor_serialize_json.c) + SET(raptor_yajl_libs ${YAJL_LIBRARIES}) +ENDIF(RAPTOR_SERIALIZER_JSON) + +IF(RAPTOR_WWW STREQUAL "curl") + SET(raptor_www_sources raptor_www_curl.c) + SET(raptor_www_libs ${CURL_LIBRARIES}) +ELSEIF(RAPTOR_WWW STREQUAL "fetch") + SET(raptor_www_sources raptor_www_libfetch.c) + #SET(raptor_www_libs ${FETCH_LIBRARIES}) +ELSEIF(RAPTOR_WWW STREQUAL "xml") + SET(raptor_www_sources raptor_www_libxml.c) + SET(raptor_www_libs ${LIBXML2_LIBRARIES}) +ENDIF(RAPTOR_WWW STREQUAL "curl") + +IF(RAPTOR_XML STREQUAL "libxml") + SET(raptor_libxml_sources raptor_libxml.c) + SET(raptor_libxml_libs ${LIBXML2_LIBRARIES}) +ENDIF(RAPTOR_XML STREQUAL "libxml") +IF(RAPTOR_PARSER_RDFA) + SET(raptor_librdfa_sources + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/context.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/curie.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/iri.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/language.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/lists.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/namespace.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/rdfa.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/rdfa_utils.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/strtok_r.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/subject.c + ${CMAKE_CURRENT_SOURCE_DIR}/../librdfa/triple.c +# ${CMAKE_SOURCE_DIR}/librdfa/rdfa.h +# ${CMAKE_SOURCE_DIR}/librdfa/rdfa_utils.h +# ${CMAKE_SOURCE_DIR}/librdfa/strtok_r.h + ) +ENDIF(RAPTOR_PARSER_RDFA) + +IF(NOT HAVE_STRCASECMP AND NOT HAVE_STRICMP) + SET(raptor_strcasecmp_sources strcasecmp.c) +ENDIF(NOT HAVE_STRCASECMP AND NOT HAVE_STRICMP) + +IF(RAPTOR_PARSEDATE) + SET(raptor_parsedate_sources ${CMAKE_CURRENT_BINARY_DIR}/parsedate.c) + + # Generate the date/time parser + ADD_CUSTOM_TARGET(parsedate_tables_tgt DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/parsedate.tab.c) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/parsedate.tab.c + COMMAND ${BISON_EXECUTABLE} -o ${CMAKE_CURRENT_BINARY_DIR}/parsedate.c ${CMAKE_CURRENT_SOURCE_DIR}/parsedate.y + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/parsedate.y) + + ADD_CUSTOM_TARGET(parsedate_tgt DEPENDS parsedate_tables_tgt ${CMAKE_CURRENT_BINARY_DIR}/parsedate.c ${CMAKE_CURRENT_BINARY_DIR}/parsedate.h) + ADD_CUSTOM_COMMAND(OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/parsedate.c + COMMAND ${PERL_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/../scripts/fix-bison.pl ${CMAKE_CURRENT_BINARY_DIR}/parsedate.c + DEPENDS parsedate_tables_tgt) +ENDIF(RAPTOR_PARSEDATE) + +IF(BUILD_SHARED_LIBS) + SET(LIB_TYPE SHARED) +ELSE(BUILD_SHARED_LIBS) + SET(LIB_TYPE STATIC) +ENDIF(BUILD_SHARED_LIBS) + +ADD_LIBRARY(raptor2 ${LIB_TYPE} + raptor_avltree.c + raptor_concepts.c + raptor_escaped.c + raptor_general.c + raptor_iostream.c + raptor_json_writer.c + raptor_locator.c + raptor_log.c + raptor_memstr.c + raptor_namespace.c + raptor_option.c + raptor_parse.c + raptor_qname.c + raptor_rfc2396.c + raptor_sax2.c + raptor_sequence.c + raptor_serialize.c + raptor_set.c + raptor_statement.c + raptor_stringbuffer.c + raptor_syntax_description.c + raptor_term.c + raptor_turtle_writer.c + raptor_unicode.c + raptor_uri.c + raptor_www.c + raptor_xml.c + raptor_xml_writer.c + snprintf.c + sort_r.c + turtle_common.c + ${raptor_parser_rdfxml_sources} + ${raptor_parser_ntriples_nquads_sources} + ${raptor_parser_turtle_trig_sources} + ${raptor_rss_common_sources} + ${raptor_parser_rss_sources} + ${raptor_parser_grddl_sources} + ${raptor_parser_guess_sources} + ${raptor_parser_rdfa_sources} + ${raptor_parser_json_sources} + ${raptor_serializer_rdfxml_sources} + ${raptor_serializer_ntriples_nquads_sources} + ${raptor_serializer_abbrev_sources} + ${raptor_serializer_rdfxml_abbrev_sources} + ${raptor_serializer_turtle_sources} + ${raptor_serializer_rss_1_0_sources} + ${raptor_serializer_dot_sources} + ${raptor_serializer_html_sources} + ${raptor_serializer_json_sources} + ${raptor_www_sources} + ${raptor_libxml_sources} + ${raptor_librdfa_sources} + ${raptor_strcasecmp_sources} + ${raptor_parsedate_sources} +) +IF(RAPTOR_PARSER_TURTLE OR RAPTOR_PARSER_TRIG) + add_dependencies(raptor2 turtle_parser_tgt turtle_lexer_tgt) +ENDIF() +IF(RAPTOR_PARSEDATE) + add_dependencies(raptor2 parsedate_tgt) +ENDIF() + +TARGET_LINK_LIBRARIES(raptor2 + ${raptor_libxslt_libs} + ${raptor_libxml_libs} + ${raptor_yajl_libs} + ${raptor_www_libs} +) + +SET_TARGET_PROPERTIES( + raptor2 + PROPERTIES + COMPILE_DEFINITIONS "LIBRDFA_IN_RAPTOR;RAPTOR_INTERNAL;${LIBXML2_DEFINITIONS};${LIBXSLT_DEFINITIONS}" +) + +ADD_EXECUTABLE(turtle_lexer_test turtle_lexer.c) +TARGET_LINK_LIBRARIES(turtle_lexer_test raptor2) + +IF(NOT WIN32 OR NOT BUILD_SHARED_LIBS) + # This currently cannot be built on Windows with DLL linkage + ADD_EXECUTABLE(turtle_parser_test turtle_parser.c) + TARGET_LINK_LIBRARIES(turtle_parser_test raptor2) + SET_TARGET_PROPERTIES( + turtle_parser_test + PROPERTIES + COMPILE_DEFINITIONS "RAPTOR_INTERNAL;STANDALONE" + ) +ENDIF(NOT WIN32 OR NOT BUILD_SHARED_LIBS) + +ADD_EXECUTABLE(raptor_parse_test raptor_parse.c) +TARGET_LINK_LIBRARIES(raptor_parse_test raptor2) +ADD_TEST(raptor_parse_test raptor_parse_test) + +ADD_EXECUTABLE(raptor_rfc2396_test raptor_rfc2396.c) +TARGET_LINK_LIBRARIES(raptor_rfc2396_test raptor2) +ADD_TEST(raptor_rfc2396_test raptor_rfc2396_test) + +ADD_EXECUTABLE(raptor_uri_test raptor_uri.c) +TARGET_LINK_LIBRARIES(raptor_uri_test raptor2) +ADD_TEST(raptor_uri_test raptor_uri_test) + +ADD_EXECUTABLE(raptor_namespace_test raptor_namespace.c) +TARGET_LINK_LIBRARIES(raptor_namespace_test raptor2) +ADD_TEST(raptor_namespace_test raptor_namespace_test) + +ADD_EXECUTABLE(strcasecmp_test strcasecmp.c) +TARGET_LINK_LIBRARIES(strcasecmp_test raptor2) +ADD_TEST(strcasecmp_test strcasecmp_test) + +ADD_EXECUTABLE(raptor_www_test raptor_www_test.c) +TARGET_LINK_LIBRARIES(raptor_www_test raptor2) +ADD_TEST(raptor_www_test raptor_www_test) + +ADD_EXECUTABLE(raptor_sequence_test raptor_sequence.c) +TARGET_LINK_LIBRARIES(raptor_sequence_test raptor2) +ADD_TEST(raptor_sequence_test raptor_sequence_test) + +ADD_EXECUTABLE(raptor_stringbuffer_test raptor_stringbuffer.c) +TARGET_LINK_LIBRARIES(raptor_stringbuffer_test raptor2) +ADD_TEST(raptor_stringbuffer_test raptor_stringbuffer_test) + +ADD_EXECUTABLE(raptor_iostream_test raptor_iostream.c) +TARGET_LINK_LIBRARIES(raptor_iostream_test raptor2) +ADD_TEST(raptor_iostream_test raptor_iostream_test) + +ADD_EXECUTABLE(raptor_xml_writer_test raptor_xml_writer.c) +TARGET_LINK_LIBRARIES(raptor_xml_writer_test raptor2) +ADD_TEST(raptor_xml_writer_test raptor_xml_writer_test) + +ADD_EXECUTABLE(raptor_turtle_writer_test raptor_turtle_writer.c) +TARGET_LINK_LIBRARIES(raptor_turtle_writer_test raptor2) +ADD_TEST(raptor_turtle_writer_test raptor_turtle_writer_test) + +ADD_EXECUTABLE(raptor_avltree_test raptor_avltree.c) +TARGET_LINK_LIBRARIES(raptor_avltree_test raptor2) +ADD_TEST(raptor_avltree_test raptor_avltree_test) + +ADD_EXECUTABLE(raptor_term_test raptor_term.c) +TARGET_LINK_LIBRARIES(raptor_term_test raptor2) +ADD_TEST(raptor_term_test raptor_term_test) + +ADD_EXECUTABLE(raptor_permute_test raptor_permute_test.c) +TARGET_LINK_LIBRARIES(raptor_permute_test raptor2) +ADD_TEST(raptor_permute_test raptor_permute_test) + +ADD_EXECUTABLE(raptor_snprintf_test snprintf.c) +TARGET_LINK_LIBRARIES(raptor_snprintf_test raptor2) +ADD_TEST(raptor_snprintf_test raptor_snprintf_test) + +ADD_EXECUTABLE(raptor_sort_r_test sort_r.c) +TARGET_LINK_LIBRARIES(raptor_sort_r_test raptor2) +ADD_TEST(raptor_sort_r_test raptor_sort_r_test) + +SET_TARGET_PROPERTIES( + turtle_lexer_test + #turtle_parser_test + raptor_parse_test + raptor_rfc2396_test + raptor_uri_test + raptor_namespace_test + strcasecmp_test + raptor_www_test + raptor_sequence_test + raptor_stringbuffer_test + raptor_iostream_test + raptor_xml_writer_test + raptor_turtle_writer_test + raptor_avltree_test + raptor_term_test + raptor_permute_test + raptor_snprintf_test + raptor_sort_r_test + PROPERTIES + COMPILE_DEFINITIONS "RAPTOR_INTERNAL;STANDALONE" +) + +IF(RAPTOR_PARSER_RDFXML) + ADD_EXECUTABLE(raptor_set_test raptor_set.c) + TARGET_LINK_LIBRARIES(raptor_set_test raptor2) + ADD_TEST(raptor_set_test raptor_set_test) + + ADD_EXECUTABLE(raptor_xml_test raptor_xml.c) + TARGET_LINK_LIBRARIES(raptor_xml_test raptor2) + ADD_TEST(raptor_xml_test raptor_xml_test) + + SET_TARGET_PROPERTIES( + raptor_set_test + raptor_xml_test + PROPERTIES + COMPILE_DEFINITIONS "RAPTOR_INTERNAL;STANDALONE" + ) +ENDIF(RAPTOR_PARSER_RDFXML) + +# Generate pkg-config metadata file +# +FILE(WRITE ${CMAKE_CURRENT_BINARY_DIR}/raptor2.pc +"prefix=${CMAKE_INSTALL_PREFIX} +exec_prefix=\${prefix} +libdir=${CMAKE_INSTALL_FULL_LIBDIR} +includedir=${CMAKE_INSTALL_FULL_INCLUDEDIR}/raptor2 + +Name: Raptor RDF Parsing Library +Description: RDF Parser Toolkit Library +Version: ${VERSION} +Libs: -L\${libdir} -lraptor2 +Libs.private: ${raptor_libxslt_libs} ${raptor_libxml_libs} +Cflags: -I\${includedir} +") + +INSTALL(FILES + raptor.h + ${CMAKE_CURRENT_BINARY_DIR}/raptor2.h + DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/raptor2 +) + +INSTALL(FILES + ${CMAKE_CURRENT_BINARY_DIR}/raptor2.pc + DESTINATION ${CMAKE_INSTALL_LIBDIR}/pkgconfig +) + +INSTALL( + TARGETS raptor2 + EXPORT Raptor2Config + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR} +) + +INSTALL(EXPORT Raptor2Config DESTINATION ${CMAKE_INSTALL_LIBDIR}/cmake) + +# end raptor/src/CMakeLists.txt diff --git a/src/Makefile.am b/src/Makefile.am new file mode 100644 index 0000000..15ea200 --- /dev/null +++ b/src/Makefile.am @@ -0,0 +1,350 @@ +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Raptor libraptor +# +# Copyright (C) 2000-2011, David Beckett http://www.dajobe.org/ +# Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ +# +# This package is Free Software and part of Redland http://librdf.org/ +# +# It is licensed under the following three licenses as alternatives: +# 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version +# 2. GNU General Public License (GPL) V2 or any newer version +# 3. Apache License, V2.0 or any newer version +# +# You may not use this file except in compliance with at least one of +# the above three licenses. +# +# See LICENSE.html or LICENSE.txt at the top of this package for the +# complete terms and further detail along with the license texts for +# the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. +# +# + + +lib_LTLIBRARIES = libraptor2.la + +pkginclude_HEADERS = raptor.h +nodist_pkginclude_HEADERS = raptor2.h + +noinst_HEADERS = raptor_internal.h + +TESTS=raptor_parse_test raptor_rfc2396_test raptor_uri_test \ +raptor_namespace_test strcasecmp_test raptor_www_test \ +raptor_sequence_test raptor_stringbuffer_test \ +raptor_uri_win32_test raptor_iostream_test raptor_xml_writer_test \ +raptor_turtle_writer_test raptor_avltree_test raptor_term_test \ +raptor_permute_test raptor_snprintf_test raptor_sort_r_test +if RAPTOR_PARSER_RDFXML +TESTS += raptor_set_test raptor_xml_test +endif + +CLEANFILES=$(TESTS) \ +turtle_lexer_test turtle_parser_test \ +*.plist \ +git-version.h + +MAINTAINERCLEANFILES=turtle_lexer.c turtle_lexer.h \ +turtle_parser.c turtle_parser.h turtle_parser.output + +# Memory debugging +MEM=@MEM@ +MEM_LIBS=@MEM_LIBS@ + +AM_CPPFLAGS = $(MEM) + +ANALYZE = clang +ANALYZE_FLAGS = "--analyze" +# Based on COMPILE target +ANALYZE_COMMAND = $(ANALYZE) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) \ + $(ANALYZE_FLAGS) + +BUILT_SOURCES = turtle_lexer.c turtle_lexer.h turtle_parser.c turtle_parser.h + +libraptor2_la_SOURCES = raptor_parse.c raptor_serialize.c \ +raptor_rfc2396.c raptor_uri.c raptor_log.c raptor_locator.c \ +raptor_namespace.c raptor_qname.c \ +raptor_option.c raptor_general.c raptor_unicode.c \ +raptor_www.c \ +raptor_statement.c \ +raptor_term.c \ +raptor_sequence.c raptor_stringbuffer.c raptor_iostream.c \ +raptor_xml.c raptor_xml_writer.c raptor_set.c turtle_common.c \ +raptor_turtle_writer.c raptor_avltree.c snprintf.c \ +raptor_json_writer.c raptor_memstr.c raptor_concepts.c \ +raptor_syntax_description.c \ +raptor_sax2.c raptor_escaped.c \ +raptor_ntriples.c \ +sort_r.c sort_r.h ssort.h +if RAPTOR_XML_LIBXML +libraptor2_la_SOURCES += raptor_libxml.c +endif +if RAPTOR_PARSER_RDFXML +libraptor2_la_SOURCES += raptor_rdfxml.c +endif +if RAPTOR_PARSER_TURTLE +libraptor2_la_SOURCES += turtle_lexer.c turtle_lexer.h turtle_parser.c turtle_parser.h turtle_common.h +else +if RAPTOR_PARSER_TRIG +libraptor2_la_SOURCES += turtle_lexer.c turtle_lexer.h turtle_parser.c turtle_parser.h turtle_common.h +endif +endif +if RAPTOR_PARSER_NTRIPLES +libraptor2_la_SOURCES += ntriples_parse.c +else +if RAPTOR_PARSER_NQUADS +libraptor2_la_SOURCES += ntriples_parse.c +endif +endif +if RAPTOR_RSS_COMMON +libraptor2_la_SOURCES += raptor_rss_common.c raptor_rss.h +endif +if RAPTOR_PARSER_RSS +libraptor2_la_SOURCES += raptor_rss.c +endif +if RAPTOR_PARSER_GRDDL +libraptor2_la_SOURCES += raptor_grddl.c +endif +if RAPTOR_PARSER_GUESS +libraptor2_la_SOURCES += raptor_guess.c +endif +if RAPTOR_PARSER_RDFA +libraptor2_la_SOURCES += raptor_librdfa.c +endif +if RAPTOR_PARSER_JSON +libraptor2_la_SOURCES += raptor_json.c +endif +if RAPTOR_SERIALIZER_RDFXML +libraptor2_la_SOURCES += raptor_serialize_rdfxml.c +endif + +if RAPTOR_SERIALIZER_NTRIPLES +libraptor2_la_SOURCES += raptor_serialize_ntriples.c +else +if RAPTOR_SERIALIZER_NQUADS +libraptor2_la_SOURCES += raptor_serialize_ntriples.c +endif +endif + +#raptor_abbrev.c required by both turtle and xml-abbrev +if RAPTOR_SERIALIZER_RDFXML_ABBREV +libraptor2_la_SOURCES += raptor_abbrev.c +else +if RAPTOR_SERIALIZER_TURTLE +libraptor2_la_SOURCES += raptor_abbrev.c +else +if RAPTOR_SERIALIZER_MKR +libraptor2_la_SOURCES += raptor_abbrev.c +endif +endif +endif + +if RAPTOR_SERIALIZER_RDFXML_ABBREV +libraptor2_la_SOURCES += raptor_serialize_rdfxmla.c +endif +if RAPTOR_SERIALIZER_TURTLE +libraptor2_la_SOURCES += raptor_serialize_turtle.c +else +if RAPTOR_SERIALIZER_MKR +libraptor2_la_SOURCES += raptor_serialize_turtle.c +endif +endif +if RAPTOR_SERIALIZER_RSS_1_0 +libraptor2_la_SOURCES += raptor_serialize_rss.c +endif +if RAPTOR_SERIALIZER_DOT +libraptor2_la_SOURCES += raptor_serialize_dot.c +endif +if RAPTOR_SERIALIZER_HTML +libraptor2_la_SOURCES += raptor_serialize_html.c +endif +if RAPTOR_SERIALIZER_JSON +libraptor2_la_SOURCES += raptor_serialize_json.c +endif +if STRCASECMP +libraptor2_la_SOURCES += strcasecmp.c +endif +if PARSEDATE +libraptor2_la_SOURCES += parsedate.c parsedate.h +BUILT_SOURCES += parsedate.c parsedate.h +endif + + +libraptor2_la_LIBADD = + +if LIBRDFA +AM_CPPFLAGS += -DLIBRDFA_IN_RAPTOR -I$(top_srcdir)/librdfa +libraptor2_la_LIBADD += $(top_builddir)/librdfa/librdfa.la +endif + +libraptor2_la_LDFLAGS = -version-info @RAPTOR_LIBTOOL_VERSION@ \ +@RAPTOR_LDFLAGS@ $(MEM_LIBS) +libraptor2_la_LIBADD += @LTLIBOBJS@ + + +EXTRA_DIST=\ +CMakeLists.txt \ +raptor_config_cmake.h.in \ +raptor_permute_test.c \ +raptor_www_test.c \ +raptor_nfc_test.c \ +raptor_win32.c \ +$(man_MANS) \ +turtle_lexer.l turtle_parser.y \ +parsedate.y + + +nodist_libraptor2_la_SOURCES = + +if MAINTAINER_MODE +nodist_libraptor2_la_SOURCES += git-version.h + +# raptor_parse.c is the first source file so this ensures +# git-version.h is created before it and before the actual file +# dependencies are calculated as a side-effect of compilation +raptor_parse.c: git-version.h + +# Actually it needs turtle_parser.h but nevermind +turtle_lexer.c: $(srcdir)/turtle_lexer.l turtle_parser.c $(top_srcdir)/scripts/fix-flex.pl + $(AM_V_GEN) \ + $(LEX) -o$@ $(srcdir)/turtle_lexer.l; \ + for file in turtle_lexer.c turtle_lexer.h; do \ + $(PERL) $(top_srcdir)/scripts/fix-flex.pl $$file > turtle_lexer.t || rm -f $$file; \ + mv -f turtle_lexer.t $$file; \ + done + +turtle_lexer.h: turtle_lexer.c ; @exit 0 + +turtle_parser.c: $(srcdir)/turtle_parser.y $(top_srcdir)/scripts/fix-bison.pl + $(AM_V_GEN) \ + $(BISON) -o $@ $(srcdir)/turtle_parser.y; \ + $(PERL) $(top_srcdir)/scripts/fix-bison.pl $@ + +turtle_parser.h: turtle_parser.c ; @exit 0 + +parsedate.c: $(srcdir)/parsedate.y $(top_srcdir)/scripts/fix-bison.pl + $(AM_V_GEN) \ + $(BISON) -o $@ $(srcdir)/parsedate.y; \ + $(PERL) $(top_srcdir)/scripts/fix-bison.pl $@ + +parsedate.h: parsedate.c ; @exit 0 + +endif + +# Actually it needs turtle_parser.h but nevermind +turtle_lexer_test: $(srcdir)/turtle_lexer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/turtle_lexer.c libraptor2.la $(LIBS) + +turtle_parser_test: $(srcdir)/turtle_parser.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/turtle_parser.c libraptor2.la $(LIBS) + +raptor_parse_test: $(srcdir)/raptor_parse.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_parse.c libraptor2.la $(LIBS) + +raptor_rfc2396_test: $(srcdir)/raptor_rfc2396.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_rfc2396.c libraptor2.la $(LIBS) + +raptor_uri_test: $(srcdir)/raptor_uri.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_uri.c libraptor2.la $(LIBS) + +raptor_uri_win32_test: $(srcdir)/raptor_uri.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE -DWIN32_URI_TEST $(srcdir)/raptor_uri.c libraptor2.la $(LIBS) + +raptor_namespace_test: $(srcdir)/raptor_namespace.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_namespace.c libraptor2.la $(LIBS) + +strcasecmp_test: $(srcdir)/strcasecmp.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/strcasecmp.c libraptor2.la $(LIBS) + +raptor_www_test: $(srcdir)/raptor_www_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_www_test.c libraptor2.la $(LIBS) + +raptor_set_test: $(srcdir)/raptor_set.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_set.c libraptor2.la $(LIBS) + +raptor_xml_test: $(srcdir)/raptor_xml.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_xml.c libraptor2.la $(LIBS) + +raptor_sequence_test: $(srcdir)/raptor_sequence.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_sequence.c libraptor2.la $(LIBS) + +raptor_stringbuffer_test: $(srcdir)/raptor_stringbuffer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_stringbuffer.c libraptor2.la $(LIBS) + +raptor_nfc_test: $(srcdir)/raptor_nfc_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_nfc_test.c libraptor2.la $(LIBS) + +raptor_iostream_test: $(srcdir)/raptor_iostream.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_iostream.c libraptor2.la $(LIBS) + +raptor_xml_writer_test: $(srcdir)/raptor_xml_writer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_xml_writer.c libraptor2.la $(LIBS) + +raptor_turtle_writer_test: $(srcdir)/raptor_turtle_writer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_turtle_writer.c libraptor2.la $(LIBS) + +raptor_avltree_test: $(srcdir)/raptor_avltree.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_avltree.c libraptor2.la $(LIBS) + +raptor_term_test: $(srcdir)/raptor_term.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_term.c libraptor2.la $(LIBS) + +raptor_permute_test: $(srcdir)/raptor_permute_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_permute_test.c libraptor2.la $(LIBS) + +raptor_snprintf_test: $(srcdir)/snprintf.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/snprintf.c libraptor2.la $(LIBS) + +raptor_sort_r_test: $(srcdir)/sort_r.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/sort_r.c libraptor2.la $(LIBS) + +$(top_builddir)/librdfa/librdfa.la: + cd $(top_builddir)/librdfa && $(MAKE) librdfa.la + +# Some people need a little help ;-) +test: check + + +if MAINTAINER_MODE +git-version.h: check-version + +# Always run this in maintainer mode but do not always change git-version.h +.PHONY: check-version +check-version: + @file="git-version.h" ; \ + if test -d ../.git; then \ + git_version=`cd .. && git rev-parse HEAD`; \ + else \ + git_version="unknown"; \ + fi; \ + $(RECHO) "GIT version $$git_version"; \ + tmp="$$file.tmp"; \ + $(RECHO) "#define GIT_VERSION \"$$git_version\"" > $$tmp; \ + if test -f $$file; then \ + if cmp $$file $$tmp >/dev/null 2>&1; then \ + rm $$tmp; \ + else \ + mv $$tmp $$file; \ + fi; \ + else \ + mv $$tmp $$file; \ + fi + +# Run Clang static analyzer over sources. +analyze: $(SOURCES) + @list='$(SOURCES)'; \ + result=0; \ + for file in $$list; do \ + if echo $$file | grep '\.c$$' >/dev/null 2>&1; then \ + $(RECHO) "Analyzing $$file"; \ + $(ANALYZE_COMMAND) $(srcdir)/$$file; \ + status=$$?; \ + if test $$status != 0; then \ + result=1; \ + fi; \ + fi; \ + done; \ + set -e; exit $$result +endif diff --git a/src/Makefile.in b/src/Makefile.in new file mode 100644 index 0000000..5c6cac9 --- /dev/null +++ b/src/Makefile.in @@ -0,0 +1,1807 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# -*- Mode: Makefile -*- +# +# Makefile.am - automake file for Raptor libraptor +# +# Copyright (C) 2000-2011, David Beckett http://www.dajobe.org/ +# Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ +# +# This package is Free Software and part of Redland http://librdf.org/ +# +# It is licensed under the following three licenses as alternatives: +# 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version +# 2. GNU General Public License (GPL) V2 or any newer version +# 3. Apache License, V2.0 or any newer version +# +# You may not use this file except in compliance with at least one of +# the above three licenses. +# +# See LICENSE.html or LICENSE.txt at the top of this package for the +# complete terms and further detail along with the license texts for +# the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. +# +# + + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@RAPTOR_PARSER_RDFXML_TRUE@am__append_1 = raptor_set_test raptor_xml_test +@RAPTOR_XML_LIBXML_TRUE@am__append_2 = raptor_libxml.c +@RAPTOR_PARSER_RDFXML_TRUE@am__append_3 = raptor_rdfxml.c +@RAPTOR_PARSER_TURTLE_TRUE@am__append_4 = turtle_lexer.c turtle_lexer.h turtle_parser.c turtle_parser.h turtle_common.h +@RAPTOR_PARSER_TRIG_TRUE@@RAPTOR_PARSER_TURTLE_FALSE@am__append_5 = turtle_lexer.c turtle_lexer.h turtle_parser.c turtle_parser.h turtle_common.h +@RAPTOR_PARSER_NTRIPLES_TRUE@am__append_6 = ntriples_parse.c +@RAPTOR_PARSER_NQUADS_TRUE@@RAPTOR_PARSER_NTRIPLES_FALSE@am__append_7 = ntriples_parse.c +@RAPTOR_RSS_COMMON_TRUE@am__append_8 = raptor_rss_common.c raptor_rss.h +@RAPTOR_PARSER_RSS_TRUE@am__append_9 = raptor_rss.c +@RAPTOR_PARSER_GRDDL_TRUE@am__append_10 = raptor_grddl.c +@RAPTOR_PARSER_GUESS_TRUE@am__append_11 = raptor_guess.c +@RAPTOR_PARSER_RDFA_TRUE@am__append_12 = raptor_librdfa.c +@RAPTOR_PARSER_JSON_TRUE@am__append_13 = raptor_json.c +@RAPTOR_SERIALIZER_RDFXML_TRUE@am__append_14 = raptor_serialize_rdfxml.c +@RAPTOR_SERIALIZER_NTRIPLES_TRUE@am__append_15 = raptor_serialize_ntriples.c +@RAPTOR_SERIALIZER_NQUADS_TRUE@@RAPTOR_SERIALIZER_NTRIPLES_FALSE@am__append_16 = raptor_serialize_ntriples.c + +#raptor_abbrev.c required by both turtle and xml-abbrev +@RAPTOR_SERIALIZER_RDFXML_ABBREV_TRUE@am__append_17 = raptor_abbrev.c +@RAPTOR_SERIALIZER_RDFXML_ABBREV_FALSE@@RAPTOR_SERIALIZER_TURTLE_TRUE@am__append_18 = raptor_abbrev.c +@RAPTOR_SERIALIZER_MKR_TRUE@@RAPTOR_SERIALIZER_RDFXML_ABBREV_FALSE@@RAPTOR_SERIALIZER_TURTLE_FALSE@am__append_19 = raptor_abbrev.c +@RAPTOR_SERIALIZER_RDFXML_ABBREV_TRUE@am__append_20 = raptor_serialize_rdfxmla.c +@RAPTOR_SERIALIZER_TURTLE_TRUE@am__append_21 = raptor_serialize_turtle.c +@RAPTOR_SERIALIZER_MKR_TRUE@@RAPTOR_SERIALIZER_TURTLE_FALSE@am__append_22 = raptor_serialize_turtle.c +@RAPTOR_SERIALIZER_RSS_1_0_TRUE@am__append_23 = raptor_serialize_rss.c +@RAPTOR_SERIALIZER_DOT_TRUE@am__append_24 = raptor_serialize_dot.c +@RAPTOR_SERIALIZER_HTML_TRUE@am__append_25 = raptor_serialize_html.c +@RAPTOR_SERIALIZER_JSON_TRUE@am__append_26 = raptor_serialize_json.c +@STRCASECMP_TRUE@am__append_27 = strcasecmp.c +@PARSEDATE_TRUE@am__append_28 = parsedate.c parsedate.h +@PARSEDATE_TRUE@am__append_29 = parsedate.c parsedate.h +@LIBRDFA_TRUE@am__append_30 = -DLIBRDFA_IN_RAPTOR -I$(top_srcdir)/librdfa +@LIBRDFA_TRUE@am__append_31 = $(top_builddir)/librdfa/librdfa.la +@MAINTAINER_MODE_TRUE@am__append_32 = git-version.h +subdir = src +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/build/gtk-doc.m4 \ + $(top_srcdir)/build/libtool.m4 \ + $(top_srcdir)/build/ltoptions.m4 \ + $(top_srcdir)/build/ltsugar.m4 \ + $(top_srcdir)/build/ltversion.m4 \ + $(top_srcdir)/build/lt~obsolete.m4 $(top_srcdir)/build/pkg.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(noinst_HEADERS) \ + $(pkginclude_HEADERS) $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = raptor_config.h +CONFIG_CLEAN_FILES = raptor2.h +CONFIG_CLEAN_VPATH_FILES = +am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`; +am__vpath_adj = case $$p in \ + $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \ + *) f=$$p;; \ + esac; +am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`; +am__install_max = 40 +am__nobase_strip_setup = \ + srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'` +am__nobase_strip = \ + for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||" +am__nobase_list = $(am__nobase_strip_setup); \ + for p in $$list; do echo "$$p $$p"; done | \ + sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \ + $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \ + if (++n[$$2] == $(am__install_max)) \ + { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \ + END { for (dir in files) print dir, files[dir] }' +am__base_list = \ + sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \ + sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g' +am__uninstall_files_from_dir = { \ + test -z "$$files" \ + || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \ + || { echo " ( cd '$$dir' && rm -f" $$files ")"; \ + $(am__cd) "$$dir" && rm -f $$files; }; \ + } +am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)" \ + "$(DESTDIR)$(pkgincludedir)" +LTLIBRARIES = $(lib_LTLIBRARIES) +libraptor2_la_DEPENDENCIES = $(am__append_31) @LTLIBOBJS@ +am__libraptor2_la_SOURCES_DIST = raptor_parse.c raptor_serialize.c \ + raptor_rfc2396.c raptor_uri.c raptor_log.c raptor_locator.c \ + raptor_namespace.c raptor_qname.c raptor_option.c \ + raptor_general.c raptor_unicode.c raptor_www.c \ + raptor_statement.c raptor_term.c raptor_sequence.c \ + raptor_stringbuffer.c raptor_iostream.c raptor_xml.c \ + raptor_xml_writer.c raptor_set.c turtle_common.c \ + raptor_turtle_writer.c raptor_avltree.c snprintf.c \ + raptor_json_writer.c raptor_memstr.c raptor_concepts.c \ + raptor_syntax_description.c raptor_sax2.c raptor_escaped.c \ + raptor_ntriples.c sort_r.c sort_r.h ssort.h raptor_libxml.c \ + raptor_rdfxml.c turtle_lexer.c turtle_lexer.h turtle_parser.c \ + turtle_parser.h turtle_common.h ntriples_parse.c \ + raptor_rss_common.c raptor_rss.h raptor_rss.c raptor_grddl.c \ + raptor_guess.c raptor_librdfa.c raptor_json.c \ + raptor_serialize_rdfxml.c raptor_serialize_ntriples.c \ + raptor_abbrev.c raptor_serialize_rdfxmla.c \ + raptor_serialize_turtle.c raptor_serialize_rss.c \ + raptor_serialize_dot.c raptor_serialize_html.c \ + raptor_serialize_json.c strcasecmp.c parsedate.c parsedate.h +@RAPTOR_XML_LIBXML_TRUE@am__objects_1 = raptor_libxml.lo +@RAPTOR_PARSER_RDFXML_TRUE@am__objects_2 = raptor_rdfxml.lo +@RAPTOR_PARSER_TURTLE_TRUE@am__objects_3 = turtle_lexer.lo \ +@RAPTOR_PARSER_TURTLE_TRUE@ turtle_parser.lo +@RAPTOR_PARSER_TRIG_TRUE@@RAPTOR_PARSER_TURTLE_FALSE@am__objects_4 = turtle_lexer.lo \ +@RAPTOR_PARSER_TRIG_TRUE@@RAPTOR_PARSER_TURTLE_FALSE@ turtle_parser.lo +@RAPTOR_PARSER_NTRIPLES_TRUE@am__objects_5 = ntriples_parse.lo +@RAPTOR_PARSER_NQUADS_TRUE@@RAPTOR_PARSER_NTRIPLES_FALSE@am__objects_6 = ntriples_parse.lo +@RAPTOR_RSS_COMMON_TRUE@am__objects_7 = raptor_rss_common.lo +@RAPTOR_PARSER_RSS_TRUE@am__objects_8 = raptor_rss.lo +@RAPTOR_PARSER_GRDDL_TRUE@am__objects_9 = raptor_grddl.lo +@RAPTOR_PARSER_GUESS_TRUE@am__objects_10 = raptor_guess.lo +@RAPTOR_PARSER_RDFA_TRUE@am__objects_11 = raptor_librdfa.lo +@RAPTOR_PARSER_JSON_TRUE@am__objects_12 = raptor_json.lo +@RAPTOR_SERIALIZER_RDFXML_TRUE@am__objects_13 = \ +@RAPTOR_SERIALIZER_RDFXML_TRUE@ raptor_serialize_rdfxml.lo +@RAPTOR_SERIALIZER_NTRIPLES_TRUE@am__objects_14 = \ +@RAPTOR_SERIALIZER_NTRIPLES_TRUE@ raptor_serialize_ntriples.lo +@RAPTOR_SERIALIZER_NQUADS_TRUE@@RAPTOR_SERIALIZER_NTRIPLES_FALSE@am__objects_15 = raptor_serialize_ntriples.lo +@RAPTOR_SERIALIZER_RDFXML_ABBREV_TRUE@am__objects_16 = \ +@RAPTOR_SERIALIZER_RDFXML_ABBREV_TRUE@ raptor_abbrev.lo +@RAPTOR_SERIALIZER_RDFXML_ABBREV_FALSE@@RAPTOR_SERIALIZER_TURTLE_TRUE@am__objects_17 = raptor_abbrev.lo +@RAPTOR_SERIALIZER_MKR_TRUE@@RAPTOR_SERIALIZER_RDFXML_ABBREV_FALSE@@RAPTOR_SERIALIZER_TURTLE_FALSE@am__objects_18 = raptor_abbrev.lo +@RAPTOR_SERIALIZER_RDFXML_ABBREV_TRUE@am__objects_19 = raptor_serialize_rdfxmla.lo +@RAPTOR_SERIALIZER_TURTLE_TRUE@am__objects_20 = \ +@RAPTOR_SERIALIZER_TURTLE_TRUE@ raptor_serialize_turtle.lo +@RAPTOR_SERIALIZER_MKR_TRUE@@RAPTOR_SERIALIZER_TURTLE_FALSE@am__objects_21 = raptor_serialize_turtle.lo +@RAPTOR_SERIALIZER_RSS_1_0_TRUE@am__objects_22 = \ +@RAPTOR_SERIALIZER_RSS_1_0_TRUE@ raptor_serialize_rss.lo +@RAPTOR_SERIALIZER_DOT_TRUE@am__objects_23 = raptor_serialize_dot.lo +@RAPTOR_SERIALIZER_HTML_TRUE@am__objects_24 = \ +@RAPTOR_SERIALIZER_HTML_TRUE@ raptor_serialize_html.lo +@RAPTOR_SERIALIZER_JSON_TRUE@am__objects_25 = \ +@RAPTOR_SERIALIZER_JSON_TRUE@ raptor_serialize_json.lo +@STRCASECMP_TRUE@am__objects_26 = strcasecmp.lo +@PARSEDATE_TRUE@am__objects_27 = parsedate.lo +am_libraptor2_la_OBJECTS = raptor_parse.lo raptor_serialize.lo \ + raptor_rfc2396.lo raptor_uri.lo raptor_log.lo \ + raptor_locator.lo raptor_namespace.lo raptor_qname.lo \ + raptor_option.lo raptor_general.lo raptor_unicode.lo \ + raptor_www.lo raptor_statement.lo raptor_term.lo \ + raptor_sequence.lo raptor_stringbuffer.lo raptor_iostream.lo \ + raptor_xml.lo raptor_xml_writer.lo raptor_set.lo \ + turtle_common.lo raptor_turtle_writer.lo raptor_avltree.lo \ + snprintf.lo raptor_json_writer.lo raptor_memstr.lo \ + raptor_concepts.lo raptor_syntax_description.lo raptor_sax2.lo \ + raptor_escaped.lo raptor_ntriples.lo sort_r.lo \ + $(am__objects_1) $(am__objects_2) $(am__objects_3) \ + $(am__objects_4) $(am__objects_5) $(am__objects_6) \ + $(am__objects_7) $(am__objects_8) $(am__objects_9) \ + $(am__objects_10) $(am__objects_11) $(am__objects_12) \ + $(am__objects_13) $(am__objects_14) $(am__objects_15) \ + $(am__objects_16) $(am__objects_17) $(am__objects_18) \ + $(am__objects_19) $(am__objects_20) $(am__objects_21) \ + $(am__objects_22) $(am__objects_23) $(am__objects_24) \ + $(am__objects_25) $(am__objects_26) $(am__objects_27) +am__objects_28 = +nodist_libraptor2_la_OBJECTS = $(am__objects_28) +libraptor2_la_OBJECTS = $(am_libraptor2_la_OBJECTS) \ + $(nodist_libraptor2_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +libraptor2_la_LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(libraptor2_la_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ +depcomp = $(SHELL) $(top_srcdir)/build/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = $(DEPDIR)/raptor_nfc_icu.Plo \ + $(DEPDIR)/raptor_www_curl.Plo \ + $(DEPDIR)/raptor_www_libfetch.Plo \ + $(DEPDIR)/raptor_www_libxml.Plo ./$(DEPDIR)/ntriples_parse.Plo \ + ./$(DEPDIR)/parsedate.Plo ./$(DEPDIR)/raptor_abbrev.Plo \ + ./$(DEPDIR)/raptor_avltree.Plo ./$(DEPDIR)/raptor_concepts.Plo \ + ./$(DEPDIR)/raptor_escaped.Plo ./$(DEPDIR)/raptor_general.Plo \ + ./$(DEPDIR)/raptor_grddl.Plo ./$(DEPDIR)/raptor_guess.Plo \ + ./$(DEPDIR)/raptor_iostream.Plo ./$(DEPDIR)/raptor_json.Plo \ + ./$(DEPDIR)/raptor_json_writer.Plo \ + ./$(DEPDIR)/raptor_librdfa.Plo ./$(DEPDIR)/raptor_libxml.Plo \ + ./$(DEPDIR)/raptor_locator.Plo ./$(DEPDIR)/raptor_log.Plo \ + ./$(DEPDIR)/raptor_memstr.Plo ./$(DEPDIR)/raptor_namespace.Plo \ + ./$(DEPDIR)/raptor_ntriples.Plo ./$(DEPDIR)/raptor_option.Plo \ + ./$(DEPDIR)/raptor_parse.Plo ./$(DEPDIR)/raptor_qname.Plo \ + ./$(DEPDIR)/raptor_rdfxml.Plo ./$(DEPDIR)/raptor_rfc2396.Plo \ + ./$(DEPDIR)/raptor_rss.Plo ./$(DEPDIR)/raptor_rss_common.Plo \ + ./$(DEPDIR)/raptor_sax2.Plo ./$(DEPDIR)/raptor_sequence.Plo \ + ./$(DEPDIR)/raptor_serialize.Plo \ + ./$(DEPDIR)/raptor_serialize_dot.Plo \ + ./$(DEPDIR)/raptor_serialize_html.Plo \ + ./$(DEPDIR)/raptor_serialize_json.Plo \ + ./$(DEPDIR)/raptor_serialize_ntriples.Plo \ + ./$(DEPDIR)/raptor_serialize_rdfxml.Plo \ + ./$(DEPDIR)/raptor_serialize_rdfxmla.Plo \ + ./$(DEPDIR)/raptor_serialize_rss.Plo \ + ./$(DEPDIR)/raptor_serialize_turtle.Plo \ + ./$(DEPDIR)/raptor_set.Plo ./$(DEPDIR)/raptor_statement.Plo \ + ./$(DEPDIR)/raptor_stringbuffer.Plo \ + ./$(DEPDIR)/raptor_syntax_description.Plo \ + ./$(DEPDIR)/raptor_term.Plo \ + ./$(DEPDIR)/raptor_turtle_writer.Plo \ + ./$(DEPDIR)/raptor_unicode.Plo ./$(DEPDIR)/raptor_uri.Plo \ + ./$(DEPDIR)/raptor_www.Plo ./$(DEPDIR)/raptor_xml.Plo \ + ./$(DEPDIR)/raptor_xml_writer.Plo ./$(DEPDIR)/snprintf.Plo \ + ./$(DEPDIR)/sort_r.Plo ./$(DEPDIR)/strcasecmp.Plo \ + ./$(DEPDIR)/turtle_common.Plo ./$(DEPDIR)/turtle_lexer.Plo \ + ./$(DEPDIR)/turtle_parser.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(libraptor2_la_SOURCES) $(nodist_libraptor2_la_SOURCES) +DIST_SOURCES = $(am__libraptor2_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +HEADERS = $(nodist_pkginclude_HEADERS) $(noinst_HEADERS) \ + $(pkginclude_HEADERS) +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) \ + raptor_config.h.in +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__tty_colors_dummy = \ + mgn= red= grn= lgn= blu= brg= std=; \ + am__color_tests=no +am__tty_colors = { \ + $(am__tty_colors_dummy); \ + if test "X$(AM_COLOR_TESTS)" = Xno; then \ + am__color_tests=no; \ + elif test "X$(AM_COLOR_TESTS)" = Xalways; then \ + am__color_tests=yes; \ + elif test "X$$TERM" != Xdumb && { test -t 1; } 2>/dev/null; then \ + am__color_tests=yes; \ + fi; \ + if test $$am__color_tests = yes; then \ + red='[0;31m'; \ + grn='[0;32m'; \ + lgn='[1;32m'; \ + blu='[1;34m'; \ + mgn='[0;35m'; \ + brg='[1m'; \ + std='[m'; \ + fi; \ +} +am__recheck_rx = ^[ ]*:recheck:[ ]* +am__global_test_result_rx = ^[ ]*:global-test-result:[ ]* +am__copy_in_global_log_rx = ^[ ]*:copy-in-global-log:[ ]* +# A command that, given a newline-separated list of test names on the +# standard input, print the name of the tests that are to be re-run +# upon "make recheck". +am__list_recheck_tests = $(AWK) '{ \ + recheck = 1; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + { \ + if ((getline line2 < ($$0 ".log")) < 0) \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[nN][Oo]/) \ + { \ + recheck = 0; \ + break; \ + } \ + else if (line ~ /$(am__recheck_rx)[yY][eE][sS]/) \ + { \ + break; \ + } \ + }; \ + if (recheck) \ + print $$0; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# A command that, given a newline-separated list of test names on the +# standard input, create the global log from their .trs and .log files. +am__create_global_log = $(AWK) ' \ +function fatal(msg) \ +{ \ + print "fatal: making $@: " msg | "cat >&2"; \ + exit 1; \ +} \ +function rst_section(header) \ +{ \ + print header; \ + len = length(header); \ + for (i = 1; i <= len; i = i + 1) \ + printf "="; \ + printf "\n\n"; \ +} \ +{ \ + copy_in_global_log = 1; \ + global_test_result = "RUN"; \ + while ((rc = (getline line < ($$0 ".trs"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".trs"); \ + if (line ~ /$(am__global_test_result_rx)/) \ + { \ + sub("$(am__global_test_result_rx)", "", line); \ + sub("[ ]*$$", "", line); \ + global_test_result = line; \ + } \ + else if (line ~ /$(am__copy_in_global_log_rx)[nN][oO]/) \ + copy_in_global_log = 0; \ + }; \ + if (copy_in_global_log) \ + { \ + rst_section(global_test_result ": " $$0); \ + while ((rc = (getline line < ($$0 ".log"))) != 0) \ + { \ + if (rc < 0) \ + fatal("failed to read from " $$0 ".log"); \ + print line; \ + }; \ + printf "\n"; \ + }; \ + close ($$0 ".trs"); \ + close ($$0 ".log"); \ +}' +# Restructured Text title. +am__rst_title = { sed 's/.*/ & /;h;s/./=/g;p;x;s/ *$$//;p;g' && echo; } +# Solaris 10 'make', and several other traditional 'make' implementations, +# pass "-e" to $(SHELL), and POSIX 2008 even requires this. Work around it +# by disabling -e (using the XSI extension "set +e") if it's set. +am__sh_e_setup = case $$- in *e*) set +e;; esac +# Default flags passed to test drivers. +am__common_driver_flags = \ + --color-tests "$$am__color_tests" \ + --enable-hard-errors "$$am__enable_hard_errors" \ + --expect-failure "$$am__expect_failure" +# To be inserted before the command running the test. Creates the +# directory for the log if needed. Stores in $dir the directory +# containing $f, in $tst the test, in $log the log. Executes the +# developer- defined test setup AM_TESTS_ENVIRONMENT (if any), and +# passes TESTS_ENVIRONMENT. Set up options for the wrapper that +# will run the test scripts (or their associated LOG_COMPILER, if +# thy have one). +am__check_pre = \ +$(am__sh_e_setup); \ +$(am__vpath_adj_setup) $(am__vpath_adj) \ +$(am__tty_colors); \ +srcdir=$(srcdir); export srcdir; \ +case "$@" in \ + */*) am__odir=`echo "./$@" | sed 's|/[^/]*$$||'`;; \ + *) am__odir=.;; \ +esac; \ +test "x$$am__odir" = x"." || test -d "$$am__odir" \ + || $(MKDIR_P) "$$am__odir" || exit $$?; \ +if test -f "./$$f"; then dir=./; \ +elif test -f "$$f"; then dir=; \ +else dir="$(srcdir)/"; fi; \ +tst=$$dir$$f; log='$@'; \ +if test -n '$(DISABLE_HARD_ERRORS)'; then \ + am__enable_hard_errors=no; \ +else \ + am__enable_hard_errors=yes; \ +fi; \ +case " $(XFAIL_TESTS) " in \ + *[\ \ ]$$f[\ \ ]* | *[\ \ ]$$dir$$f[\ \ ]*) \ + am__expect_failure=yes;; \ + *) \ + am__expect_failure=no;; \ +esac; \ +$(AM_TESTS_ENVIRONMENT) $(TESTS_ENVIRONMENT) +# A shell command to get the names of the tests scripts with any registered +# extension removed (i.e., equivalently, the names of the test logs, with +# the '.log' extension removed). The result is saved in the shell variable +# '$bases'. This honors runtime overriding of TESTS and TEST_LOGS. Sadly, +# we cannot use something simpler, involving e.g., "$(TEST_LOGS:.log=)", +# since that might cause problem with VPATH rewrites for suffix-less tests. +# See also 'test-harness-vpath-rewrite.sh' and 'test-trs-basic.sh'. +am__set_TESTS_bases = \ + bases='$(TEST_LOGS)'; \ + bases=`for i in $$bases; do echo $$i; done | sed 's/\.log$$//'`; \ + bases=`echo $$bases` +AM_TESTSUITE_SUMMARY_HEADER = ' for $(PACKAGE_STRING)' +RECHECK_LOGS = $(TEST_LOGS) +AM_RECURSIVE_TARGETS = check recheck +TEST_SUITE_LOG = test-suite.log +TEST_EXTENSIONS = @EXEEXT@ .test +LOG_DRIVER = $(SHELL) $(top_srcdir)/build/test-driver +LOG_COMPILE = $(LOG_COMPILER) $(AM_LOG_FLAGS) $(LOG_FLAGS) +am__set_b = \ + case '$@' in \ + */*) \ + case '$*' in \ + */*) b='$*';; \ + *) b=`echo '$@' | sed 's/\.log$$//'`; \ + esac;; \ + *) \ + b='$*';; \ + esac +am__test_logs1 = $(TESTS:=.log) +am__test_logs2 = $(am__test_logs1:@EXEEXT@.log=.log) +TEST_LOGS = $(am__test_logs2:.test.log=.log) +TEST_LOG_DRIVER = $(SHELL) $(top_srcdir)/build/test-driver +TEST_LOG_COMPILE = $(TEST_LOG_COMPILER) $(AM_TEST_LOG_FLAGS) \ + $(TEST_LOG_FLAGS) +am__DIST_COMMON = $(srcdir)/Makefile.in $(srcdir)/raptor2.h.in \ + $(srcdir)/raptor_config.h.in $(top_srcdir)/build/depcomp \ + $(top_srcdir)/build/test-driver raptor_nfc_icu.c \ + raptor_www_curl.c raptor_www_libfetch.c raptor_www_libxml.c +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BISON = @BISON@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CURL_CONFIG = @CURL_CONFIG@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GREP = @GREP@ +GTKDOC_CHECK = @GTKDOC_CHECK@ +GTKDOC_CHECK_PATH = @GTKDOC_CHECK_PATH@ +GTKDOC_DEPS_CFLAGS = @GTKDOC_DEPS_CFLAGS@ +GTKDOC_DEPS_LIBS = @GTKDOC_DEPS_LIBS@ +GTKDOC_MKPDF = @GTKDOC_MKPDF@ +GTKDOC_REBASE = @GTKDOC_REBASE@ +HTML_DIR = @HTML_DIR@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_LIBS = @ICU_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JING = @JING@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBCURL_CFLAGS = @LIBCURL_CFLAGS@ +LIBCURL_LIBS = @LIBCURL_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBXML_CFLAGS = @LIBXML_CFLAGS@ +LIBXML_LIBS = @LIBXML_LIBS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ + +# Memory debugging +MEM = @MEM@ +MEM_LIBS = @MEM_LIBS@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PKG_CONFIG_REQUIRES = @PKG_CONFIG_REQUIRES@ +RANLIB = @RANLIB@ +RAPTOR_LDFLAGS = @RAPTOR_LDFLAGS@ +RAPTOR_LIBTOOLLIBS = @RAPTOR_LIBTOOLLIBS@ +RAPTOR_LIBTOOL_VERSION = @RAPTOR_LIBTOOL_VERSION@ +RAPTOR_PARSERS = @RAPTOR_PARSERS@ +RAPTOR_SERIALIZERS = @RAPTOR_SERIALIZERS@ +RAPTOR_VERSION = @RAPTOR_VERSION@ +RAPTOR_VERSION_DECIMAL = @RAPTOR_VERSION_DECIMAL@ +RAPTOR_VERSION_MAJOR = @RAPTOR_VERSION_MAJOR@ +RAPTOR_VERSION_MINOR = @RAPTOR_VERSION_MINOR@ +RAPTOR_VERSION_RELEASE = @RAPTOR_VERSION_RELEASE@ +RAPTOR_WWW_LIBRARY = @RAPTOR_WWW_LIBRARY@ +RAPTOR_XML_PARSER = @RAPTOR_XML_PARSER@ +RECHO = @RECHO@ +RECHO_C = @RECHO_C@ +RECHO_N = @RECHO_N@ +RPM_RELEASE = @RPM_RELEASE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TAR = @TAR@ +VERSION = @VERSION@ +XML_CONFIG = @XML_CONFIG@ +XSLT_CFLAGS = @XSLT_CFLAGS@ +XSLT_CONFIG = @XSLT_CONFIG@ +XSLT_LIBS = @XSLT_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +lib_LTLIBRARIES = libraptor2.la +pkginclude_HEADERS = raptor.h +nodist_pkginclude_HEADERS = raptor2.h +noinst_HEADERS = raptor_internal.h +TESTS = raptor_parse_test raptor_rfc2396_test raptor_uri_test \ + raptor_namespace_test strcasecmp_test raptor_www_test \ + raptor_sequence_test raptor_stringbuffer_test \ + raptor_uri_win32_test raptor_iostream_test \ + raptor_xml_writer_test raptor_turtle_writer_test \ + raptor_avltree_test raptor_term_test raptor_permute_test \ + raptor_snprintf_test raptor_sort_r_test $(am__append_1) +CLEANFILES = $(TESTS) \ +turtle_lexer_test turtle_parser_test \ +*.plist \ +git-version.h + +MAINTAINERCLEANFILES = turtle_lexer.c turtle_lexer.h \ +turtle_parser.c turtle_parser.h turtle_parser.output + +AM_CPPFLAGS = $(MEM) $(am__append_30) +ANALYZE = clang +ANALYZE_FLAGS = "--analyze" +# Based on COMPILE target +ANALYZE_COMMAND = $(ANALYZE) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) \ + $(ANALYZE_FLAGS) + +BUILT_SOURCES = turtle_lexer.c turtle_lexer.h turtle_parser.c \ + turtle_parser.h $(am__append_29) +libraptor2_la_SOURCES = raptor_parse.c raptor_serialize.c \ + raptor_rfc2396.c raptor_uri.c raptor_log.c raptor_locator.c \ + raptor_namespace.c raptor_qname.c raptor_option.c \ + raptor_general.c raptor_unicode.c raptor_www.c \ + raptor_statement.c raptor_term.c raptor_sequence.c \ + raptor_stringbuffer.c raptor_iostream.c raptor_xml.c \ + raptor_xml_writer.c raptor_set.c turtle_common.c \ + raptor_turtle_writer.c raptor_avltree.c snprintf.c \ + raptor_json_writer.c raptor_memstr.c raptor_concepts.c \ + raptor_syntax_description.c raptor_sax2.c raptor_escaped.c \ + raptor_ntriples.c sort_r.c sort_r.h ssort.h $(am__append_2) \ + $(am__append_3) $(am__append_4) $(am__append_5) \ + $(am__append_6) $(am__append_7) $(am__append_8) \ + $(am__append_9) $(am__append_10) $(am__append_11) \ + $(am__append_12) $(am__append_13) $(am__append_14) \ + $(am__append_15) $(am__append_16) $(am__append_17) \ + $(am__append_18) $(am__append_19) $(am__append_20) \ + $(am__append_21) $(am__append_22) $(am__append_23) \ + $(am__append_24) $(am__append_25) $(am__append_26) \ + $(am__append_27) $(am__append_28) +libraptor2_la_LIBADD = $(am__append_31) @LTLIBOBJS@ +libraptor2_la_LDFLAGS = -version-info @RAPTOR_LIBTOOL_VERSION@ \ +@RAPTOR_LDFLAGS@ $(MEM_LIBS) + +EXTRA_DIST = \ +CMakeLists.txt \ +raptor_config_cmake.h.in \ +raptor_permute_test.c \ +raptor_www_test.c \ +raptor_nfc_test.c \ +raptor_win32.c \ +$(man_MANS) \ +turtle_lexer.l turtle_parser.y \ +parsedate.y + +nodist_libraptor2_la_SOURCES = $(am__append_32) +all: $(BUILT_SOURCES) raptor_config.h + $(MAKE) $(AM_MAKEFLAGS) all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .log .o .obj .test .test$(EXEEXT) .trs +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu src/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu src/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +raptor_config.h: stamp-h1 + @test -f $@ || rm -f stamp-h1 + @test -f $@ || $(MAKE) $(AM_MAKEFLAGS) stamp-h1 + +stamp-h1: $(srcdir)/raptor_config.h.in $(top_builddir)/config.status + @rm -f stamp-h1 + cd $(top_builddir) && $(SHELL) ./config.status src/raptor_config.h +$(srcdir)/raptor_config.h.in: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + ($(am__cd) $(top_srcdir) && $(AUTOHEADER)) + rm -f stamp-h1 + touch $@ + +distclean-hdr: + -rm -f raptor_config.h stamp-h1 +raptor2.h: $(top_builddir)/config.status $(srcdir)/raptor2.h.in + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ + +install-libLTLIBRARIES: $(lib_LTLIBRARIES) + @$(NORMAL_INSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + list2=; for p in $$list; do \ + if test -f $$p; then \ + list2="$$list2 $$p"; \ + else :; fi; \ + done; \ + test -z "$$list2" || { \ + echo " $(MKDIR_P) '$(DESTDIR)$(libdir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(libdir)" || exit 1; \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 '$(DESTDIR)$(libdir)'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=install $(INSTALL) $(INSTALL_STRIP_FLAG) $$list2 "$(DESTDIR)$(libdir)"; \ + } + +uninstall-libLTLIBRARIES: + @$(NORMAL_UNINSTALL) + @list='$(lib_LTLIBRARIES)'; test -n "$(libdir)" || list=; \ + for p in $$list; do \ + $(am__strip_dir) \ + echo " $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f '$(DESTDIR)$(libdir)/$$f'"; \ + $(LIBTOOL) $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=uninstall rm -f "$(DESTDIR)$(libdir)/$$f"; \ + done + +clean-libLTLIBRARIES: + -test -z "$(lib_LTLIBRARIES)" || rm -f $(lib_LTLIBRARIES) + @list='$(lib_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +libraptor2.la: $(libraptor2_la_OBJECTS) $(libraptor2_la_DEPENDENCIES) $(EXTRA_libraptor2_la_DEPENDENCIES) + $(AM_V_CCLD)$(libraptor2_la_LINK) -rpath $(libdir) $(libraptor2_la_OBJECTS) $(libraptor2_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/raptor_nfc_icu.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/raptor_www_curl.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/raptor_www_libfetch.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@$(DEPDIR)/raptor_www_libxml.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/ntriples_parse.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parsedate.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_abbrev.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_avltree.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_concepts.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_escaped.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_general.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_grddl.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_guess.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_iostream.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_json.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_json_writer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_librdfa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_libxml.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_locator.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_log.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_memstr.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_namespace.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_ntriples.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_option.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_parse.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_qname.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_rdfxml.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_rfc2396.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_rss.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_rss_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_sax2.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_sequence.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_dot.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_html.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_json.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_ntriples.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_rdfxml.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_rdfxmla.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_rss.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_serialize_turtle.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_set.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_statement.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_stringbuffer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_syntax_description.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_term.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_turtle_writer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_unicode.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_uri.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_www.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_xml.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/raptor_xml_writer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/snprintf.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sort_r.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strcasecmp.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/turtle_common.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/turtle_lexer.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/turtle_parser.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs +install-nodist_pkgincludeHEADERS: $(nodist_pkginclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(nodist_pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludedir)" || exit $$?; \ + done + +uninstall-nodist_pkgincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(nodist_pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgincludedir)'; $(am__uninstall_files_from_dir) +install-pkgincludeHEADERS: $(pkginclude_HEADERS) + @$(NORMAL_INSTALL) + @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + if test -n "$$list"; then \ + echo " $(MKDIR_P) '$(DESTDIR)$(pkgincludedir)'"; \ + $(MKDIR_P) "$(DESTDIR)$(pkgincludedir)" || exit 1; \ + fi; \ + for p in $$list; do \ + if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \ + echo "$$d$$p"; \ + done | $(am__base_list) | \ + while read files; do \ + echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(pkgincludedir)'"; \ + $(INSTALL_HEADER) $$files "$(DESTDIR)$(pkgincludedir)" || exit $$?; \ + done + +uninstall-pkgincludeHEADERS: + @$(NORMAL_UNINSTALL) + @list='$(pkginclude_HEADERS)'; test -n "$(pkgincludedir)" || list=; \ + files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \ + dir='$(DESTDIR)$(pkgincludedir)'; $(am__uninstall_files_from_dir) + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags + +# Recover from deleted '.trs' file; this should ensure that +# "rm -f foo.log; make foo.trs" re-run 'foo.test', and re-create +# both 'foo.log' and 'foo.trs'. Break the recipe in two subshells +# to avoid problems with "make -n". +.log.trs: + rm -f $< $@ + $(MAKE) $(AM_MAKEFLAGS) $< + +# Leading 'am--fnord' is there to ensure the list of targets does not +# expand to empty, as could happen e.g. with make check TESTS=''. +am--fnord $(TEST_LOGS) $(TEST_LOGS:.log=.trs): $(am__force_recheck) +am--force-recheck: + @: + +$(TEST_SUITE_LOG): $(TEST_LOGS) + @$(am__set_TESTS_bases); \ + am__f_ok () { test -f "$$1" && test -r "$$1"; }; \ + redo_bases=`for i in $$bases; do \ + am__f_ok $$i.trs && am__f_ok $$i.log || echo $$i; \ + done`; \ + if test -n "$$redo_bases"; then \ + redo_logs=`for i in $$redo_bases; do echo $$i.log; done`; \ + redo_results=`for i in $$redo_bases; do echo $$i.trs; done`; \ + if $(am__make_dryrun); then :; else \ + rm -f $$redo_logs && rm -f $$redo_results || exit 1; \ + fi; \ + fi; \ + if test -n "$$am__remaking_logs"; then \ + echo "fatal: making $(TEST_SUITE_LOG): possible infinite" \ + "recursion detected" >&2; \ + elif test -n "$$redo_logs"; then \ + am__remaking_logs=yes $(MAKE) $(AM_MAKEFLAGS) $$redo_logs; \ + fi; \ + if $(am__make_dryrun); then :; else \ + st=0; \ + errmsg="fatal: making $(TEST_SUITE_LOG): failed to create"; \ + for i in $$redo_bases; do \ + test -f $$i.trs && test -r $$i.trs \ + || { echo "$$errmsg $$i.trs" >&2; st=1; }; \ + test -f $$i.log && test -r $$i.log \ + || { echo "$$errmsg $$i.log" >&2; st=1; }; \ + done; \ + test $$st -eq 0 || exit 1; \ + fi + @$(am__sh_e_setup); $(am__tty_colors); $(am__set_TESTS_bases); \ + ws='[ ]'; \ + results=`for b in $$bases; do echo $$b.trs; done`; \ + test -n "$$results" || results=/dev/null; \ + all=` grep "^$$ws*:test-result:" $$results | wc -l`; \ + pass=` grep "^$$ws*:test-result:$$ws*PASS" $$results | wc -l`; \ + fail=` grep "^$$ws*:test-result:$$ws*FAIL" $$results | wc -l`; \ + skip=` grep "^$$ws*:test-result:$$ws*SKIP" $$results | wc -l`; \ + xfail=`grep "^$$ws*:test-result:$$ws*XFAIL" $$results | wc -l`; \ + xpass=`grep "^$$ws*:test-result:$$ws*XPASS" $$results | wc -l`; \ + error=`grep "^$$ws*:test-result:$$ws*ERROR" $$results | wc -l`; \ + if test `expr $$fail + $$xpass + $$error` -eq 0; then \ + success=true; \ + else \ + success=false; \ + fi; \ + br='==================='; br=$$br$$br$$br$$br; \ + result_count () \ + { \ + if test x"$$1" = x"--maybe-color"; then \ + maybe_colorize=yes; \ + elif test x"$$1" = x"--no-color"; then \ + maybe_colorize=no; \ + else \ + echo "$@: invalid 'result_count' usage" >&2; exit 4; \ + fi; \ + shift; \ + desc=$$1 count=$$2; \ + if test $$maybe_colorize = yes && test $$count -gt 0; then \ + color_start=$$3 color_end=$$std; \ + else \ + color_start= color_end=; \ + fi; \ + echo "$${color_start}# $$desc $$count$${color_end}"; \ + }; \ + create_testsuite_report () \ + { \ + result_count $$1 "TOTAL:" $$all "$$brg"; \ + result_count $$1 "PASS: " $$pass "$$grn"; \ + result_count $$1 "SKIP: " $$skip "$$blu"; \ + result_count $$1 "XFAIL:" $$xfail "$$lgn"; \ + result_count $$1 "FAIL: " $$fail "$$red"; \ + result_count $$1 "XPASS:" $$xpass "$$red"; \ + result_count $$1 "ERROR:" $$error "$$mgn"; \ + }; \ + { \ + echo "$(PACKAGE_STRING): $(subdir)/$(TEST_SUITE_LOG)" | \ + $(am__rst_title); \ + create_testsuite_report --no-color; \ + echo; \ + echo ".. contents:: :depth: 2"; \ + echo; \ + for b in $$bases; do echo $$b; done \ + | $(am__create_global_log); \ + } >$(TEST_SUITE_LOG).tmp || exit 1; \ + mv $(TEST_SUITE_LOG).tmp $(TEST_SUITE_LOG); \ + if $$success; then \ + col="$$grn"; \ + else \ + col="$$red"; \ + test x"$$VERBOSE" = x || cat $(TEST_SUITE_LOG); \ + fi; \ + echo "$${col}$$br$${std}"; \ + echo "$${col}Testsuite summary"$(AM_TESTSUITE_SUMMARY_HEADER)"$${std}"; \ + echo "$${col}$$br$${std}"; \ + create_testsuite_report --maybe-color; \ + echo "$$col$$br$$std"; \ + if $$success; then :; else \ + echo "$${col}See $(subdir)/$(TEST_SUITE_LOG)$${std}"; \ + if test -n "$(PACKAGE_BUGREPORT)"; then \ + echo "$${col}Please report to $(PACKAGE_BUGREPORT)$${std}"; \ + fi; \ + echo "$$col$$br$$std"; \ + fi; \ + $$success || exit 1 + +check-TESTS: + @list='$(RECHECK_LOGS)'; test -z "$$list" || rm -f $$list + @list='$(RECHECK_LOGS:.log=.trs)'; test -z "$$list" || rm -f $$list + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + trs_list=`for i in $$bases; do echo $$i.trs; done`; \ + log_list=`echo $$log_list`; trs_list=`echo $$trs_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) TEST_LOGS="$$log_list"; \ + exit $$?; +recheck: all + @test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + @set +e; $(am__set_TESTS_bases); \ + bases=`for i in $$bases; do echo $$i; done \ + | $(am__list_recheck_tests)` || exit 1; \ + log_list=`for i in $$bases; do echo $$i.log; done`; \ + log_list=`echo $$log_list`; \ + $(MAKE) $(AM_MAKEFLAGS) $(TEST_SUITE_LOG) \ + am__force_recheck=am--force-recheck \ + TEST_LOGS="$$log_list"; \ + exit $$? +raptor_parse_test.log: raptor_parse_test + @p='raptor_parse_test'; \ + b='raptor_parse_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_rfc2396_test.log: raptor_rfc2396_test + @p='raptor_rfc2396_test'; \ + b='raptor_rfc2396_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_uri_test.log: raptor_uri_test + @p='raptor_uri_test'; \ + b='raptor_uri_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_namespace_test.log: raptor_namespace_test + @p='raptor_namespace_test'; \ + b='raptor_namespace_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +strcasecmp_test.log: strcasecmp_test + @p='strcasecmp_test'; \ + b='strcasecmp_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_www_test.log: raptor_www_test + @p='raptor_www_test'; \ + b='raptor_www_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_sequence_test.log: raptor_sequence_test + @p='raptor_sequence_test'; \ + b='raptor_sequence_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_stringbuffer_test.log: raptor_stringbuffer_test + @p='raptor_stringbuffer_test'; \ + b='raptor_stringbuffer_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_uri_win32_test.log: raptor_uri_win32_test + @p='raptor_uri_win32_test'; \ + b='raptor_uri_win32_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_iostream_test.log: raptor_iostream_test + @p='raptor_iostream_test'; \ + b='raptor_iostream_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_xml_writer_test.log: raptor_xml_writer_test + @p='raptor_xml_writer_test'; \ + b='raptor_xml_writer_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_turtle_writer_test.log: raptor_turtle_writer_test + @p='raptor_turtle_writer_test'; \ + b='raptor_turtle_writer_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_avltree_test.log: raptor_avltree_test + @p='raptor_avltree_test'; \ + b='raptor_avltree_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_term_test.log: raptor_term_test + @p='raptor_term_test'; \ + b='raptor_term_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_permute_test.log: raptor_permute_test + @p='raptor_permute_test'; \ + b='raptor_permute_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_snprintf_test.log: raptor_snprintf_test + @p='raptor_snprintf_test'; \ + b='raptor_snprintf_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_sort_r_test.log: raptor_sort_r_test + @p='raptor_sort_r_test'; \ + b='raptor_sort_r_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_set_test.log: raptor_set_test + @p='raptor_set_test'; \ + b='raptor_set_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +raptor_xml_test.log: raptor_xml_test + @p='raptor_xml_test'; \ + b='raptor_xml_test'; \ + $(am__check_pre) $(LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_LOG_DRIVER_FLAGS) $(LOG_DRIVER_FLAGS) -- $(LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +.test.log: + @p='$<'; \ + $(am__set_b); \ + $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ + --log-file $$b.log --trs-file $$b.trs \ + $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ + "$$tst" $(AM_TESTS_FD_REDIRECT) +@am__EXEEXT_TRUE@.test$(EXEEXT).log: +@am__EXEEXT_TRUE@ @p='$<'; \ +@am__EXEEXT_TRUE@ $(am__set_b); \ +@am__EXEEXT_TRUE@ $(am__check_pre) $(TEST_LOG_DRIVER) --test-name "$$f" \ +@am__EXEEXT_TRUE@ --log-file $$b.log --trs-file $$b.trs \ +@am__EXEEXT_TRUE@ $(am__common_driver_flags) $(AM_TEST_LOG_DRIVER_FLAGS) $(TEST_LOG_DRIVER_FLAGS) -- $(TEST_LOG_COMPILE) \ +@am__EXEEXT_TRUE@ "$$tst" $(AM_TESTS_FD_REDIRECT) +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am + $(MAKE) $(AM_MAKEFLAGS) check-TESTS +check: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) check-am +all-am: Makefile $(LTLIBRARIES) $(HEADERS) raptor_config.h +installdirs: + for dir in "$(DESTDIR)$(libdir)" "$(DESTDIR)$(pkgincludedir)" "$(DESTDIR)$(pkgincludedir)"; do \ + test -z "$$dir" || $(MKDIR_P) "$$dir"; \ + done +install: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-am +install-exec: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + -test -z "$(TEST_LOGS)" || rm -f $(TEST_LOGS) + -test -z "$(TEST_LOGS:.log=.trs)" || rm -f $(TEST_LOGS:.log=.trs) + -test -z "$(TEST_SUITE_LOG)" || rm -f $(TEST_SUITE_LOG) + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." + -test -z "$(BUILT_SOURCES)" || rm -f $(BUILT_SOURCES) + -test -z "$(MAINTAINERCLEANFILES)" || rm -f $(MAINTAINERCLEANFILES) +clean: clean-am + +clean-am: clean-generic clean-libLTLIBRARIES clean-libtool \ + mostlyclean-am + +distclean: distclean-am + -rm -f $(DEPDIR)/raptor_nfc_icu.Plo + -rm -f $(DEPDIR)/raptor_www_curl.Plo + -rm -f $(DEPDIR)/raptor_www_libfetch.Plo + -rm -f $(DEPDIR)/raptor_www_libxml.Plo + -rm -f ./$(DEPDIR)/ntriples_parse.Plo + -rm -f ./$(DEPDIR)/parsedate.Plo + -rm -f ./$(DEPDIR)/raptor_abbrev.Plo + -rm -f ./$(DEPDIR)/raptor_avltree.Plo + -rm -f ./$(DEPDIR)/raptor_concepts.Plo + -rm -f ./$(DEPDIR)/raptor_escaped.Plo + -rm -f ./$(DEPDIR)/raptor_general.Plo + -rm -f ./$(DEPDIR)/raptor_grddl.Plo + -rm -f ./$(DEPDIR)/raptor_guess.Plo + -rm -f ./$(DEPDIR)/raptor_iostream.Plo + -rm -f ./$(DEPDIR)/raptor_json.Plo + -rm -f ./$(DEPDIR)/raptor_json_writer.Plo + -rm -f ./$(DEPDIR)/raptor_librdfa.Plo + -rm -f ./$(DEPDIR)/raptor_libxml.Plo + -rm -f ./$(DEPDIR)/raptor_locator.Plo + -rm -f ./$(DEPDIR)/raptor_log.Plo + -rm -f ./$(DEPDIR)/raptor_memstr.Plo + -rm -f ./$(DEPDIR)/raptor_namespace.Plo + -rm -f ./$(DEPDIR)/raptor_ntriples.Plo + -rm -f ./$(DEPDIR)/raptor_option.Plo + -rm -f ./$(DEPDIR)/raptor_parse.Plo + -rm -f ./$(DEPDIR)/raptor_qname.Plo + -rm -f ./$(DEPDIR)/raptor_rdfxml.Plo + -rm -f ./$(DEPDIR)/raptor_rfc2396.Plo + -rm -f ./$(DEPDIR)/raptor_rss.Plo + -rm -f ./$(DEPDIR)/raptor_rss_common.Plo + -rm -f ./$(DEPDIR)/raptor_sax2.Plo + -rm -f ./$(DEPDIR)/raptor_sequence.Plo + -rm -f ./$(DEPDIR)/raptor_serialize.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_dot.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_html.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_json.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_ntriples.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rdfxml.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rdfxmla.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rss.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_turtle.Plo + -rm -f ./$(DEPDIR)/raptor_set.Plo + -rm -f ./$(DEPDIR)/raptor_statement.Plo + -rm -f ./$(DEPDIR)/raptor_stringbuffer.Plo + -rm -f ./$(DEPDIR)/raptor_syntax_description.Plo + -rm -f ./$(DEPDIR)/raptor_term.Plo + -rm -f ./$(DEPDIR)/raptor_turtle_writer.Plo + -rm -f ./$(DEPDIR)/raptor_unicode.Plo + -rm -f ./$(DEPDIR)/raptor_uri.Plo + -rm -f ./$(DEPDIR)/raptor_www.Plo + -rm -f ./$(DEPDIR)/raptor_xml.Plo + -rm -f ./$(DEPDIR)/raptor_xml_writer.Plo + -rm -f ./$(DEPDIR)/snprintf.Plo + -rm -f ./$(DEPDIR)/sort_r.Plo + -rm -f ./$(DEPDIR)/strcasecmp.Plo + -rm -f ./$(DEPDIR)/turtle_common.Plo + -rm -f ./$(DEPDIR)/turtle_lexer.Plo + -rm -f ./$(DEPDIR)/turtle_parser.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-hdr distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: install-nodist_pkgincludeHEADERS \ + install-pkgincludeHEADERS + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: install-libLTLIBRARIES + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f $(DEPDIR)/raptor_nfc_icu.Plo + -rm -f $(DEPDIR)/raptor_www_curl.Plo + -rm -f $(DEPDIR)/raptor_www_libfetch.Plo + -rm -f $(DEPDIR)/raptor_www_libxml.Plo + -rm -f ./$(DEPDIR)/ntriples_parse.Plo + -rm -f ./$(DEPDIR)/parsedate.Plo + -rm -f ./$(DEPDIR)/raptor_abbrev.Plo + -rm -f ./$(DEPDIR)/raptor_avltree.Plo + -rm -f ./$(DEPDIR)/raptor_concepts.Plo + -rm -f ./$(DEPDIR)/raptor_escaped.Plo + -rm -f ./$(DEPDIR)/raptor_general.Plo + -rm -f ./$(DEPDIR)/raptor_grddl.Plo + -rm -f ./$(DEPDIR)/raptor_guess.Plo + -rm -f ./$(DEPDIR)/raptor_iostream.Plo + -rm -f ./$(DEPDIR)/raptor_json.Plo + -rm -f ./$(DEPDIR)/raptor_json_writer.Plo + -rm -f ./$(DEPDIR)/raptor_librdfa.Plo + -rm -f ./$(DEPDIR)/raptor_libxml.Plo + -rm -f ./$(DEPDIR)/raptor_locator.Plo + -rm -f ./$(DEPDIR)/raptor_log.Plo + -rm -f ./$(DEPDIR)/raptor_memstr.Plo + -rm -f ./$(DEPDIR)/raptor_namespace.Plo + -rm -f ./$(DEPDIR)/raptor_ntriples.Plo + -rm -f ./$(DEPDIR)/raptor_option.Plo + -rm -f ./$(DEPDIR)/raptor_parse.Plo + -rm -f ./$(DEPDIR)/raptor_qname.Plo + -rm -f ./$(DEPDIR)/raptor_rdfxml.Plo + -rm -f ./$(DEPDIR)/raptor_rfc2396.Plo + -rm -f ./$(DEPDIR)/raptor_rss.Plo + -rm -f ./$(DEPDIR)/raptor_rss_common.Plo + -rm -f ./$(DEPDIR)/raptor_sax2.Plo + -rm -f ./$(DEPDIR)/raptor_sequence.Plo + -rm -f ./$(DEPDIR)/raptor_serialize.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_dot.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_html.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_json.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_ntriples.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rdfxml.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rdfxmla.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_rss.Plo + -rm -f ./$(DEPDIR)/raptor_serialize_turtle.Plo + -rm -f ./$(DEPDIR)/raptor_set.Plo + -rm -f ./$(DEPDIR)/raptor_statement.Plo + -rm -f ./$(DEPDIR)/raptor_stringbuffer.Plo + -rm -f ./$(DEPDIR)/raptor_syntax_description.Plo + -rm -f ./$(DEPDIR)/raptor_term.Plo + -rm -f ./$(DEPDIR)/raptor_turtle_writer.Plo + -rm -f ./$(DEPDIR)/raptor_unicode.Plo + -rm -f ./$(DEPDIR)/raptor_uri.Plo + -rm -f ./$(DEPDIR)/raptor_www.Plo + -rm -f ./$(DEPDIR)/raptor_xml.Plo + -rm -f ./$(DEPDIR)/raptor_xml_writer.Plo + -rm -f ./$(DEPDIR)/snprintf.Plo + -rm -f ./$(DEPDIR)/sort_r.Plo + -rm -f ./$(DEPDIR)/strcasecmp.Plo + -rm -f ./$(DEPDIR)/turtle_common.Plo + -rm -f ./$(DEPDIR)/turtle_lexer.Plo + -rm -f ./$(DEPDIR)/turtle_parser.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: uninstall-libLTLIBRARIES \ + uninstall-nodist_pkgincludeHEADERS uninstall-pkgincludeHEADERS + +.MAKE: all check check-am install install-am install-exec \ + install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-TESTS \ + check-am clean clean-generic clean-libLTLIBRARIES \ + clean-libtool cscopelist-am ctags ctags-am distclean \ + distclean-compile distclean-generic distclean-hdr \ + distclean-libtool distclean-tags distdir dvi dvi-am html \ + html-am info info-am install install-am install-data \ + install-data-am install-dvi install-dvi-am install-exec \ + install-exec-am install-html install-html-am install-info \ + install-info-am install-libLTLIBRARIES install-man \ + install-nodist_pkgincludeHEADERS install-pdf install-pdf-am \ + install-pkgincludeHEADERS install-ps install-ps-am \ + install-strip installcheck installcheck-am installdirs \ + maintainer-clean maintainer-clean-generic mostlyclean \ + mostlyclean-compile mostlyclean-generic mostlyclean-libtool \ + pdf pdf-am ps ps-am recheck tags tags-am uninstall \ + uninstall-am uninstall-libLTLIBRARIES \ + uninstall-nodist_pkgincludeHEADERS uninstall-pkgincludeHEADERS + +.PRECIOUS: Makefile + + +# raptor_parse.c is the first source file so this ensures +# git-version.h is created before it and before the actual file +# dependencies are calculated as a side-effect of compilation +@MAINTAINER_MODE_TRUE@raptor_parse.c: git-version.h + +# Actually it needs turtle_parser.h but nevermind +@MAINTAINER_MODE_TRUE@turtle_lexer.c: $(srcdir)/turtle_lexer.l turtle_parser.c $(top_srcdir)/scripts/fix-flex.pl +@MAINTAINER_MODE_TRUE@ $(AM_V_GEN) \ +@MAINTAINER_MODE_TRUE@ $(LEX) -o$@ $(srcdir)/turtle_lexer.l; \ +@MAINTAINER_MODE_TRUE@ for file in turtle_lexer.c turtle_lexer.h; do \ +@MAINTAINER_MODE_TRUE@ $(PERL) $(top_srcdir)/scripts/fix-flex.pl $$file > turtle_lexer.t || rm -f $$file; \ +@MAINTAINER_MODE_TRUE@ mv -f turtle_lexer.t $$file; \ +@MAINTAINER_MODE_TRUE@ done + +@MAINTAINER_MODE_TRUE@turtle_lexer.h: turtle_lexer.c ; @exit 0 + +@MAINTAINER_MODE_TRUE@turtle_parser.c: $(srcdir)/turtle_parser.y $(top_srcdir)/scripts/fix-bison.pl +@MAINTAINER_MODE_TRUE@ $(AM_V_GEN) \ +@MAINTAINER_MODE_TRUE@ $(BISON) -o $@ $(srcdir)/turtle_parser.y; \ +@MAINTAINER_MODE_TRUE@ $(PERL) $(top_srcdir)/scripts/fix-bison.pl $@ + +@MAINTAINER_MODE_TRUE@turtle_parser.h: turtle_parser.c ; @exit 0 + +@MAINTAINER_MODE_TRUE@parsedate.c: $(srcdir)/parsedate.y $(top_srcdir)/scripts/fix-bison.pl +@MAINTAINER_MODE_TRUE@ $(AM_V_GEN) \ +@MAINTAINER_MODE_TRUE@ $(BISON) -o $@ $(srcdir)/parsedate.y; \ +@MAINTAINER_MODE_TRUE@ $(PERL) $(top_srcdir)/scripts/fix-bison.pl $@ + +@MAINTAINER_MODE_TRUE@parsedate.h: parsedate.c ; @exit 0 + +# Actually it needs turtle_parser.h but nevermind +turtle_lexer_test: $(srcdir)/turtle_lexer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/turtle_lexer.c libraptor2.la $(LIBS) + +turtle_parser_test: $(srcdir)/turtle_parser.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/turtle_parser.c libraptor2.la $(LIBS) + +raptor_parse_test: $(srcdir)/raptor_parse.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_parse.c libraptor2.la $(LIBS) + +raptor_rfc2396_test: $(srcdir)/raptor_rfc2396.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_rfc2396.c libraptor2.la $(LIBS) + +raptor_uri_test: $(srcdir)/raptor_uri.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_uri.c libraptor2.la $(LIBS) + +raptor_uri_win32_test: $(srcdir)/raptor_uri.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE -DWIN32_URI_TEST $(srcdir)/raptor_uri.c libraptor2.la $(LIBS) + +raptor_namespace_test: $(srcdir)/raptor_namespace.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_namespace.c libraptor2.la $(LIBS) + +strcasecmp_test: $(srcdir)/strcasecmp.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/strcasecmp.c libraptor2.la $(LIBS) + +raptor_www_test: $(srcdir)/raptor_www_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_www_test.c libraptor2.la $(LIBS) + +raptor_set_test: $(srcdir)/raptor_set.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_set.c libraptor2.la $(LIBS) + +raptor_xml_test: $(srcdir)/raptor_xml.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_xml.c libraptor2.la $(LIBS) + +raptor_sequence_test: $(srcdir)/raptor_sequence.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_sequence.c libraptor2.la $(LIBS) + +raptor_stringbuffer_test: $(srcdir)/raptor_stringbuffer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_stringbuffer.c libraptor2.la $(LIBS) + +raptor_nfc_test: $(srcdir)/raptor_nfc_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_nfc_test.c libraptor2.la $(LIBS) + +raptor_iostream_test: $(srcdir)/raptor_iostream.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_iostream.c libraptor2.la $(LIBS) + +raptor_xml_writer_test: $(srcdir)/raptor_xml_writer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_xml_writer.c libraptor2.la $(LIBS) + +raptor_turtle_writer_test: $(srcdir)/raptor_turtle_writer.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_turtle_writer.c libraptor2.la $(LIBS) + +raptor_avltree_test: $(srcdir)/raptor_avltree.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_avltree.c libraptor2.la $(LIBS) + +raptor_term_test: $(srcdir)/raptor_term.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_term.c libraptor2.la $(LIBS) + +raptor_permute_test: $(srcdir)/raptor_permute_test.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/raptor_permute_test.c libraptor2.la $(LIBS) + +raptor_snprintf_test: $(srcdir)/snprintf.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/snprintf.c libraptor2.la $(LIBS) + +raptor_sort_r_test: $(srcdir)/sort_r.c libraptor2.la + $(LINK) $(DEFS) $(CPPFLAGS) -I$(srcdir) -I. -DSTANDALONE $(srcdir)/sort_r.c libraptor2.la $(LIBS) + +$(top_builddir)/librdfa/librdfa.la: + cd $(top_builddir)/librdfa && $(MAKE) librdfa.la + +# Some people need a little help ;-) +test: check + +@MAINTAINER_MODE_TRUE@git-version.h: check-version + +# Always run this in maintainer mode but do not always change git-version.h +@MAINTAINER_MODE_TRUE@.PHONY: check-version +@MAINTAINER_MODE_TRUE@check-version: +@MAINTAINER_MODE_TRUE@ @file="git-version.h" ; \ +@MAINTAINER_MODE_TRUE@ if test -d ../.git; then \ +@MAINTAINER_MODE_TRUE@ git_version=`cd .. && git rev-parse HEAD`; \ +@MAINTAINER_MODE_TRUE@ else \ +@MAINTAINER_MODE_TRUE@ git_version="unknown"; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ $(RECHO) "GIT version $$git_version"; \ +@MAINTAINER_MODE_TRUE@ tmp="$$file.tmp"; \ +@MAINTAINER_MODE_TRUE@ $(RECHO) "#define GIT_VERSION \"$$git_version\"" > $$tmp; \ +@MAINTAINER_MODE_TRUE@ if test -f $$file; then \ +@MAINTAINER_MODE_TRUE@ if cmp $$file $$tmp >/dev/null 2>&1; then \ +@MAINTAINER_MODE_TRUE@ rm $$tmp; \ +@MAINTAINER_MODE_TRUE@ else \ +@MAINTAINER_MODE_TRUE@ mv $$tmp $$file; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ else \ +@MAINTAINER_MODE_TRUE@ mv $$tmp $$file; \ +@MAINTAINER_MODE_TRUE@ fi + +# Run Clang static analyzer over sources. +@MAINTAINER_MODE_TRUE@analyze: $(SOURCES) +@MAINTAINER_MODE_TRUE@ @list='$(SOURCES)'; \ +@MAINTAINER_MODE_TRUE@ result=0; \ +@MAINTAINER_MODE_TRUE@ for file in $$list; do \ +@MAINTAINER_MODE_TRUE@ if echo $$file | grep '\.c$$' >/dev/null 2>&1; then \ +@MAINTAINER_MODE_TRUE@ $(RECHO) "Analyzing $$file"; \ +@MAINTAINER_MODE_TRUE@ $(ANALYZE_COMMAND) $(srcdir)/$$file; \ +@MAINTAINER_MODE_TRUE@ status=$$?; \ +@MAINTAINER_MODE_TRUE@ if test $$status != 0; then \ +@MAINTAINER_MODE_TRUE@ result=1; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ done; \ +@MAINTAINER_MODE_TRUE@ set -e; exit $$result + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/src/ntriples_parse.c b/src/ntriples_parse.c new file mode 100644 index 0000000..1d74431 --- /dev/null +++ b/src/ntriples_parse.c @@ -0,0 +1,799 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * ntriples_parse.c - Raptor N-Triples Parser implementation + * + * N-Triples + * http://www.w3.org/TR/rdf-testcases/#ntriples + * + * Copyright (C) 2001-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2001-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +/* Set RAPTOR_DEBUG to > 1 to get lots of buffer related debugging */ +/* +#undef RAPTOR_DEBUG +#define RAPTOR_DEBUG 2 +*/ + + +/* Prototypes for local functions */ +static void raptor_ntriples_generate_statement(raptor_parser* parser, raptor_term* subject_term, raptor_term* predicate_term, raptor_term* object_term, raptor_term* graph_term); + +/* + * NTriples parser object + */ +struct raptor_ntriples_parser_context_s { + /* current line */ + unsigned char *line; + /* current line length */ + size_t line_length; + /* current char in line buffer */ + size_t offset; + + char last_char; + + /* static statement for use in passing to user code */ + raptor_statement statement; + + /* Non-0 if N-Quads */ + int is_nquads; + + int literal_graph_warning; +}; + + +typedef struct raptor_ntriples_parser_context_s raptor_ntriples_parser_context; + + + +/** + * raptor_ntriples_parse_init: + * + * Initialise the Raptor NTriples parser. + * + * Return value: non 0 on failure + **/ + +static int +raptor_ntriples_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_ntriples_parser_context *ntriples_parser; + ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; + + raptor_statement_init(&ntriples_parser->statement, rdf_parser->world); + + if(!strcmp(name, "nquads")) + ntriples_parser->is_nquads = 1; + + return 0; +} + + +/* PUBLIC FUNCTIONS */ + + +/* + * raptor_ntriples_parse_terminate - Free the Raptor NTriples parser + * @rdf_parser: parser object + * + **/ +static void +raptor_ntriples_parse_terminate(raptor_parser* rdf_parser) +{ + raptor_ntriples_parser_context *ntriples_parser; + ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; + if(ntriples_parser->line_length) + RAPTOR_FREE(cdata, ntriples_parser->line); +} + + +static void +raptor_ntriples_generate_statement(raptor_parser* parser, + raptor_term *subject, + raptor_term *predicate, + raptor_term *object, + raptor_term *graph) +{ + /* raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)parser->context; */ + raptor_statement *statement = &parser->statement; + + if(!parser->emitted_default_graph) { + raptor_parser_start_graph(parser, NULL, 0); + parser->emitted_default_graph++; + } + + statement->subject = subject; + statement->predicate = predicate; + statement->object = object; + statement->graph = graph; + + /* Do not generate a partial triple - but do clean up */ + if(!subject || !predicate || !object) + goto cleanup; + + /* If there is no statement handler - there is nothing else to do */ + if(!parser->statement_handler) + goto cleanup; + + /* Generate the statement */ + (*parser->statement_handler)(parser->user_data, statement); + + cleanup: + raptor_free_statement(statement); +} + + + +#define MAX_NTRIPLES_TERMS 4 + +static int +raptor_ntriples_parse_line(raptor_parser* rdf_parser, + unsigned char *buffer, size_t len, + int max_terms) +{ + raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; + int i; + unsigned char *p; + raptor_term* terms[MAX_NTRIPLES_TERMS+1] = {NULL, NULL, NULL, NULL, NULL}; + int rc = 0; + + /* ASSERTION: + * p always points to first char we are considering + * p[len-1] always points to last char + */ + + /* Handle empty lines */ + if(!len) + return 0; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("handling line '%s' (%d bytes)\n", buffer, (unsigned int)len); +#endif + + p = buffer; + + while(len > 0 && isspace((int)*p)) { + p++; + rdf_parser->locator.column++; + rdf_parser->locator.byte++; + len--; + } + + /* Handle empty - all whitespace lines */ + if(!len) + return 0; + + /* Handle comment lines */ + if(*p == '#') + return 0; + + /* Remove trailing spaces */ + while(len > 0 && isspace((int)p[len-1])) { + p[len-1] = '\0'; + len--; + } + + /* can't be empty now - that would have been caught above */ + + /* Must be triple/quad */ + + for(i = 0; i < MAX_NTRIPLES_TERMS + 1; i++) { + size_t term_len; + + if(!len) { + if(ntriples_parser->is_nquads) { + /* context is optional in nquads */ + if(i == 3 || i ==4) + break; + } else { + if(i == 3) + break; + } + raptor_parser_error(rdf_parser, "Unexpected end of line"); + goto cleanup; + } + + + if(i == 3) { + /* graph term (3): blank node or <URI> */ + if(*p != '<' && *p != '_') { + raptor_parser_error(rdf_parser, "Saw '%c', expected Graph term <URIref>, _:bnodeID", *p); + goto cleanup; + } + } else if(i == 2) { + /* object term (2): expect either <URI> or _:name or literal */ + if(*p != '<' && *p != '_' && *p != '"') { + raptor_parser_error(rdf_parser, "Saw '%c', expected object term <URIref>, _:bnodeID or \"literal\"", *p); + goto cleanup; + } + } else if(i == 1) { + /* predicate term (1): expect URI only */ + if(*p != '<') { + raptor_parser_error(rdf_parser, "Saw '%c', expected predict term <URIref>", *p); + goto cleanup; + } + } else { + /* subject (0) or graph (3) terms: expect <URI> or _:name */ + if(*p != '<' && *p != '_') { + raptor_parser_error(rdf_parser, "Saw '%c', expected subject term <URIref> or _:bnodeID", *p); + goto cleanup; + } + } + + + term_len = raptor_ntriples_parse_term(rdf_parser->world, &rdf_parser->locator, + p, &len, &terms[i], 0); + if(!term_len) { + rc = 1; + goto cleanup; + } + + p += term_len; + rc = 0; + + if(terms[i] && terms[i]->type == RAPTOR_TERM_TYPE_URI) { + unsigned const char* uri_string; + + /* Check for absolute URI */ + uri_string = raptor_uri_as_string(terms[i]->value.uri); + if(!raptor_uri_uri_string_is_absolute(uri_string)) { + raptor_parser_error(rdf_parser, "URI %s is not absolute", uri_string); + goto cleanup; + } + } + + /* Skip whitespace after terms */ + while(len > 0 && isspace((int)*p)) { + p++; + len--; + rdf_parser->locator.column++; + rdf_parser->locator.byte++; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(terms[i]) { + unsigned char* c = raptor_term_to_string(terms[i]); + fprintf(stderr, "item %d: term '%s' type %d\n", + i, c, terms[i]->type); + raptor_free_memory(c); + } else + fprintf(stderr, "item %d: NULL term\n", i); +#endif + + /* Look for terminating '.' after 3rd (ntriples) or 3rd/4th (nquads) term */ + if(i == (ntriples_parser->is_nquads ? 4 : 3) && *p != '.') { + raptor_parser_error(rdf_parser, "Missing terminating \".\""); + return 0; + } + + /* Still may be optional so check again */ + if(*p == '.') { + p++; + len--; + rdf_parser->locator.column++; + rdf_parser->locator.byte++; + + /* Skip whitespace after '.' */ + while(len > 0 && isspace((int)*p)) { + p++; + len--; + rdf_parser->locator.column++; + rdf_parser->locator.byte++; + } + + /* Only a comment is allowed here */ + if(*p && *p != '#') { + raptor_parser_error(rdf_parser, "Junk after terminating \".\""); + return 0; + } + + p += len; len = 0; + } + } + + + if(ntriples_parser->is_nquads) { + /* Check N-Quads has 3 or 4 terms */ + if(terms[4]) { + raptor_free_term(terms[4]); + terms[4] = NULL; + raptor_parser_error(rdf_parser, "N-Quads only allows 3 or 4 terms"); + goto cleanup; + } + } else { + /* Check N-Triples has only 3 terms */ + if(terms[3] || terms[4]) { + if(terms[4]) { + raptor_free_term(terms[4]); + terms[4] = NULL; + } + if(terms[3]) { + raptor_free_term(terms[3]); + terms[3] = NULL; + } + raptor_parser_error(rdf_parser, "N-Triples only allows 3 terms"); + goto cleanup; + } + } + + if(terms[3] && terms[3]->type == RAPTOR_TERM_TYPE_LITERAL) { + if(!ntriples_parser->literal_graph_warning++) + raptor_parser_warning(rdf_parser, "Ignoring N-Quad literal contexts"); + + raptor_free_term(terms[3]); + terms[3] = NULL; + } + + raptor_ntriples_generate_statement(rdf_parser, + terms[0], terms[1], terms[2], terms[3]); + + rdf_parser->locator.byte += RAPTOR_BAD_CAST(int, len); + + cleanup: + + return rc; +} + + +static int +raptor_ntriples_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + unsigned char *buffer; + unsigned char *ptr; + unsigned char *start; + raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; + int max_terms = ntriples_parser->is_nquads ? 4 : 3; + unsigned char* end_ptr; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("adding %d bytes to buffer\n", (unsigned int)len); +#endif + + if(len) { + buffer = RAPTOR_MALLOC(unsigned char*, ntriples_parser->line_length + len + 1); + if(!buffer) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } + + if(ntriples_parser->line_length) { + memcpy(buffer, ntriples_parser->line, ntriples_parser->line_length); + RAPTOR_FREE(char*, ntriples_parser->line); + } + + ntriples_parser->line = buffer; + + /* move pointer to end of cdata buffer */ + ptr = buffer + ntriples_parser->line_length; + + /* adjust stored length */ + ntriples_parser->line_length += len; + + /* now write new stuff at end of cdata buffer */ + memcpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + } else + buffer = ntriples_parser->line; + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("buffer now %ld bytes\n", ntriples_parser->line_length); +#endif + + if(!ntriples_parser->line_length) + return 0; + + ptr = buffer + ntriples_parser->offset; + end_ptr = buffer + ntriples_parser->line_length; + while((start = ptr) < end_ptr) { + unsigned char *line_start = ptr; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("line buffer now '%s' (offset %ld)\n", ptr, ptr-(buffer+ntriples_parser->offset)); +#endif + + /* skip \n when just seen \r - i.e. \r\n or CR LF */ + if(ntriples_parser->last_char == '\r' && *ptr == '\n') { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG1("skipping a \\n\n"); +#endif + ptr++; + rdf_parser->locator.byte++; + rdf_parser->locator.column = 0; + start = line_start = ptr; + } + + if(1) { + int quote = '\0'; + int in_uri = '\0'; + int bq = 0; + while(ptr < end_ptr) { + if(!bq) { + if(*ptr == '\\') { + bq = 1; + ptr++; + continue; + } + + if(*ptr == '<') + in_uri = 1; + else if (in_uri && *ptr == '>') + in_uri = 0; + + if(!quote) { + if((!in_uri && *ptr == '\'') || *ptr == '"') + quote = *ptr; + if(*ptr == '\n' || *ptr == '\r') + break; + } else { + if(*ptr == quote) + quote = 0; + } + } + ptr++; + bq = 0; + } + } + + if(ptr == end_ptr) { + if(!is_end) + /* middle of line */ + break; + } else { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("found newline \\x%02x at offset %ld\n", *ptr, + ptr-line_start); +#endif + ntriples_parser->last_char = *ptr; + } + + len = ptr - line_start; + rdf_parser->locator.column = 0; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("line (%ld) : >>>", len); + fwrite(line_start, sizeof(char), len, stderr); + fputs("<<<\n", stderr); +#endif + *ptr = '\0'; + if(raptor_ntriples_parse_line(rdf_parser, line_start, len, max_terms)) + return 1; + + rdf_parser->locator.line++; + + /* go past newline */ + if(ptr < end_ptr) { + ptr++; + rdf_parser->locator.byte++; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + /* Do not peek if too far */ + if(RAPTOR_BAD_CAST(size_t, ptr - buffer) < ntriples_parser->line_length) + RAPTOR_DEBUG2("next char is \\x%02x\n", *ptr); + else + RAPTOR_DEBUG1("next char unknown - end of buffer\n"); +#endif + } + + ntriples_parser->offset = start - buffer; + + len = ntriples_parser->line_length - ntriples_parser->offset; + + if(len && ntriples_parser->line_length != len) { + /* collapse buffer */ + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("collapsing buffer from %ld to %ld bytes\n", ntriples_parser->line_length, len); +#endif + buffer = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!buffer) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } + + memcpy(buffer, + ntriples_parser->line + ntriples_parser->line_length - len, + len); + buffer[len] = '\0'; + + RAPTOR_FREE(char*, ntriples_parser->line); + + ntriples_parser->line = buffer; + ntriples_parser->line_length -= ntriples_parser->offset; + ntriples_parser->offset = 0; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("buffer now '%s' (%ld bytes)\n", ntriples_parser->line, ntriples_parser->line_length); +#endif + } + + /* exit now, no more input */ + if(is_end) { + if(ntriples_parser->offset != ntriples_parser->line_length) { + raptor_parser_error(rdf_parser, "Junk at end of input."); + return 1; + } + + if(rdf_parser->emitted_default_graph) { + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + + return 0; + } + + return 0; +} + + +static int +raptor_ntriples_parse_start(raptor_parser* rdf_parser) +{ + raptor_locator *locator = &rdf_parser->locator; + raptor_ntriples_parser_context *ntriples_parser = (raptor_ntriples_parser_context*)rdf_parser->context; + + locator->line = 1; + locator->column = 0; + locator->byte = 0; + + ntriples_parser->last_char = '\0'; + + return 0; +} + + +#if defined RAPTOR_PARSER_NTRIPLES || defined RAPTOR_PARSER_NQUADS +static int +raptor_ntriples_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "nt")) + score = 8; + + /* Explicitly refuse to do anything with Turtle or N3 named content */ + if(!strcmp((const char*)suffix, "ttl") || + !strcmp((const char*)suffix, "n3")) { + return 0; + } + } + + if(mime_type) { + if(strstr((const char*)mime_type, "ntriples")) + score += 6; + } + + if(buffer && len) { + int has_ntriples_3; + + /* recognizing N-Triples is tricky but rely that it is line based + * and that all URLs are absolute, and there are a lot of http: + * URLs + */ +#define HAS_AT_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) + +#define HAS_NTRIPLES_START_1_LEN 8 +#define HAS_NTRIPLES_START_1 (!memcmp((const char*)buffer, "<http://", HAS_NTRIPLES_START_1_LEN)) +#define HAS_NTRIPLES_START_2_LEN 2 +#define HAS_NTRIPLES_START_2 (!memcmp((const char*)buffer, "_:", HAS_NTRIPLES_START_2_LEN)) + +#define HAS_NTRIPLES_1 (raptor_memstr((const char*)buffer, len, "\n<http://") != NULL) +#define HAS_NTRIPLES_2 (raptor_memstr((const char*)buffer, len, "\r<http://") != NULL) +#define HAS_NTRIPLES_3 (raptor_memstr((const char*)buffer, len, "> <http://") != NULL) +#define HAS_NTRIPLES_4 (raptor_memstr((const char*)buffer, len, "> <") != NULL) +#define HAS_NTRIPLES_5 (raptor_memstr((const char*)buffer, len, "> \"") != NULL) + if(HAS_AT_PREFIX) + /* Turtle */ + return 0; + + has_ntriples_3 = HAS_NTRIPLES_3; + + /* Bonus if the first bytes look N-Triples-like */ + if(len >= HAS_NTRIPLES_START_1_LEN && HAS_NTRIPLES_START_1) + score++; + if(len >= HAS_NTRIPLES_START_2_LEN && HAS_NTRIPLES_START_2) + score++; + + if(HAS_NTRIPLES_1 || HAS_NTRIPLES_2) { + /* N-Triples file with newlines and HTTP subjects */ + score += 6; + if(has_ntriples_3) + score++; + } else if(has_ntriples_3) { + /* an HTTP URL predicate or object but no HTTP subject */ + score += 3; + } else if(HAS_NTRIPLES_4) { + /* non HTTP urls - weak check */ + score += 2; + if(HAS_NTRIPLES_5) + /* bonus for a literal object */ + score++; + } + } + + return score; +} + + +static const char* const ntriples_names[2] = { "ntriples", NULL }; + +static const char* const ntriples_uri_strings[3] = { + "http://www.w3.org/ns/formats/N-Triples", + "http://www.w3.org/TR/rdf-testcases/#ntriples", + NULL +}; + +#define NTRIPLES_TYPES_COUNT 2 +static const raptor_type_q ntriples_types[NTRIPLES_TYPES_COUNT + 1] = { + { "application/n-triples", 21, 10}, + { "text/plain", 10, 1}, + { NULL, 0, 0} +}; + +static int +raptor_ntriples_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = ntriples_names; + + factory->desc.mime_types = ntriples_types; + + factory->desc.label = "N-Triples"; + factory->desc.uri_strings = ntriples_uri_strings; + + factory->desc.flags = 0; + + factory->context_length = sizeof(raptor_ntriples_parser_context); + + factory->init = raptor_ntriples_parse_init; + factory->terminate = raptor_ntriples_parse_terminate; + factory->start = raptor_ntriples_parse_start; + factory->chunk = raptor_ntriples_parse_chunk; + factory->recognise_syntax = raptor_ntriples_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_ntriples(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_ntriples_parser_register_factory); +} + +#endif + + +#ifdef RAPTOR_PARSER_NQUADS +static int +raptor_nquads_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + int ntriples_score; + + if(suffix) { + if(!strcmp((const char*)suffix, "nq")) + score = 2; + + /* Explicitly refuse to do anything with N-Triples, Turtle or N3 + * named content + */ + if(!strcmp((const char*)suffix, "nt") || + !strcmp((const char*)suffix, "ttl") || + !strcmp((const char*)suffix, "n3")) { + return 0; + } + } + + if(mime_type) { + if(strstr((const char*)mime_type, "nquads")) + score += 2; + } + + /* ntriples is a subset of nquads, score higher than ntriples */ + ntriples_score = raptor_ntriples_parse_recognise_syntax(factory, buffer, len, identifier, suffix, mime_type); + if(ntriples_score > 0) { + score += ntriples_score + 1; + } + + return score; +} + + +static const char* const nquads_names[2] = { "nquads", NULL }; + +static const char* const nquads_uri_strings[2] = { + "http://sw.deri.org/2008/07/n-quads/", + NULL +}; + +#define NQUADS_TYPES_COUNT 1 +static const raptor_type_q nquads_types[NQUADS_TYPES_COUNT + 1] = { + { "text/x-nquads", 13, 10}, + { NULL, 0, 0} +}; + +static int +raptor_nquads_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = nquads_names; + + factory->desc.mime_types = nquads_types; + + factory->desc.label = "N-Quads"; + factory->desc.uri_strings = nquads_uri_strings; + + factory->desc.flags = 0; + + factory->context_length = sizeof(raptor_ntriples_parser_context); + + factory->init = raptor_ntriples_parse_init; + factory->terminate = raptor_ntriples_parse_terminate; + factory->start = raptor_ntriples_parse_start; + factory->chunk = raptor_ntriples_parse_chunk; + factory->recognise_syntax = raptor_nquads_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_nquads(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_nquads_parser_register_factory); +} +#endif diff --git a/src/parsedate.c b/src/parsedate.c new file mode 100644 index 0000000..87dc1bb --- /dev/null +++ b/src/parsedate.c @@ -0,0 +1,2760 @@ +/* A Bison parser, made by GNU Bison 3.8.2. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output, and Bison version. */ +#define YYBISON 30802 + +/* Bison version string. */ +#define YYBISON_VERSION "3.8.2" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 1 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + +/* Substitute the type names. */ +#define YYSTYPE RAPTOR_PARSEDATE_STYPE +/* Substitute the variable and function names. */ +#define yyparse raptor_parsedate_parse +#define yylex raptor_parsedate_lex +#define yyerror raptor_parsedate_error +#define yydebug raptor_parsedate_debug +#define yynerrs raptor_parsedate_nerrs + +/* First part of user prologue. */ +#line 1 "./parsedate.y" + +/* + * Imported from the public domain source in PHP 4.4 + * Fri May 20 07:14:01 2005 + * https://github.com/php/php-src/blob/379c4af44aa6456fbdbc69dba3ead166ba7ff67d/ext/standard/parsedate.y + * + * and patched from there + * + * Later versions removed this from PHP and replaced it with entirely + * new code written under the PHP license. That code is not used here + * and cannot be used. + * + */ + + +/* +** Originally written by Steven M. Bellovin <smb@research.att.com> while +** at the University of North Carolina at Chapel Hill. Later tweaked by +** a couple of people on Usenet. Completely overhauled by Rich $alz +** <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990. +** +** This code is in the public domain and has no copyright. +*/ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <sys/types.h> +#include <ctype.h> + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#if defined(_HPUX_SOURCE) +#include <alloca.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + + +#define ISDIGIT(c) ((unsigned) (c) - '0' <= 9) + +#ifdef HAVE_STRING_H +# include <string.h> +#endif + +#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +/* Some old versions of bison generate parsers that use bcopy. + That loses on systems that don't provide the function, so we have + to redefine it here. */ +#if !defined (HAVE_BCOPY) && defined (HAVE_MEMCPY) && !defined (bcopy) +# define bcopy(from, to, len) memcpy ((to), (from), (len)) +#endif + +/* Prototypes */ +static int raptor_parsedate_error(void* parm, const char *msg); + + +#define EPOCH 1970 +#define HOUR(x) ((x) * 60) + +#define MAX_BUFF_LEN 128 /* size of buffer to read the date into */ + +/* +** An entry in the lexical lookup table. +*/ +typedef struct _TABLE { + const char *name; + int type; + int value; +} TABLE; + + +/* +** Meridian: am, pm, or 24-hour style. +*/ +typedef enum _MERIDIAN { + MERam, MERpm, MER24 +} MERIDIAN; + +struct date_yy { + const char *yyInput; + int yyDayOrdinal; + int yyDayNumber; + int yyHaveDate; + int yyHaveDay; + int yyHaveRel; + int yyHaveTime; + int yyHaveZone; + int yyTimezone; + int yyDay; + int yyHour; + int yyMinutes; + int yyMonth; + int yySeconds; + int yyYear; + MERIDIAN yyMeridian; + int yyRelDay; + int yyRelHour; + int yyRelMinutes; + int yyRelMonth; + int yyRelSeconds; + int yyRelYear; +}; + +typedef union _date_ll { + int Number; + enum _MERIDIAN Meridian; +} date_ll; + +#define YYPARSE_PARAM parm +#define YYLEX_PARAM parm +#define YYSTYPE date_ll +#define YYLTYPE void + +static int yylex (YYSTYPE *lvalp, void *parm); + +static int ToHour (int Hours, MERIDIAN Meridian); +static int ToYear (int Year); +static int LookupWord (YYSTYPE *lvalp, char *buff); + + +#line 219 "parsedate.c" + +# ifndef YY_CAST +# ifdef __cplusplus +# define YY_CAST(Type, Val) static_cast<Type> (Val) +# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val) +# else +# define YY_CAST(Type, Val) ((Type) (Val)) +# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) +# endif +# endif +# ifndef YY_NULLPTR +# if defined __cplusplus +# if 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# else +# define YY_NULLPTR ((void*)0) +# endif +# endif + +#include "parsedate.h" +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_tAGO = 3, /* tAGO */ + YYSYMBOL_tDAY = 4, /* tDAY */ + YYSYMBOL_tDAY_UNIT = 5, /* tDAY_UNIT */ + YYSYMBOL_tDAYZONE = 6, /* tDAYZONE */ + YYSYMBOL_tDST = 7, /* tDST */ + YYSYMBOL_tHOUR_UNIT = 8, /* tHOUR_UNIT */ + YYSYMBOL_tID = 9, /* tID */ + YYSYMBOL_tTZONE = 10, /* tTZONE */ + YYSYMBOL_tWZONE = 11, /* tWZONE */ + YYSYMBOL_tZZONE = 12, /* tZZONE */ + YYSYMBOL_tMERIDIAN = 13, /* tMERIDIAN */ + YYSYMBOL_tMINUTE_UNIT = 14, /* tMINUTE_UNIT */ + YYSYMBOL_tMONTH = 15, /* tMONTH */ + YYSYMBOL_tMONTH_UNIT = 16, /* tMONTH_UNIT */ + YYSYMBOL_tSEC_UNIT = 17, /* tSEC_UNIT */ + YYSYMBOL_tSNUMBER = 18, /* tSNUMBER */ + YYSYMBOL_tUNUMBER = 19, /* tUNUMBER */ + YYSYMBOL_tYEAR_UNIT = 20, /* tYEAR_UNIT */ + YYSYMBOL_tZONE = 21, /* tZONE */ + YYSYMBOL_22_ = 22, /* '.' */ + YYSYMBOL_23_ = 23, /* ':' */ + YYSYMBOL_24_ = 24, /* ',' */ + YYSYMBOL_25_ = 25, /* '/' */ + YYSYMBOL_YYACCEPT = 26, /* $accept */ + YYSYMBOL_spec = 27, /* spec */ + YYSYMBOL_item = 28, /* item */ + YYSYMBOL_time = 29, /* time */ + YYSYMBOL_iso8601time_colon = 30, /* iso8601time_colon */ + YYSYMBOL_iso8601zonepart = 31, /* iso8601zonepart */ + YYSYMBOL_sec_fraction_part = 32, /* sec_fraction_part */ + YYSYMBOL_zonepart_numeric_without_colon = 33, /* zonepart_numeric_without_colon */ + YYSYMBOL_zonepart_numeric_with_colon = 34, /* zonepart_numeric_with_colon */ + YYSYMBOL_HMStime_with_colon = 35, /* HMStime_with_colon */ + YYSYMBOL_HMtime_with_colon = 36, /* HMtime_with_colon */ + YYSYMBOL_zone = 37, /* zone */ + YYSYMBOL_day = 38, /* day */ + YYSYMBOL_date = 39, /* date */ + YYSYMBOL_iso8601datetime = 40, /* iso8601datetime */ + YYSYMBOL_iso8601date = 41, /* iso8601date */ + YYSYMBOL_iso8601weekspec = 42, /* iso8601weekspec */ + YYSYMBOL_iso8601time = 43, /* iso8601time */ + YYSYMBOL_rel = 44, /* rel */ + YYSYMBOL_relunit = 45, /* relunit */ + YYSYMBOL_number = 46, /* number */ + YYSYMBOL_o_merid = 47 /* o_merid */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + + +#ifdef short +# undef short +#endif + +/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure + <limits.h> and (if available) <stdint.h> are included + so that the code can choose integer types of a good width. */ + +#ifndef __PTRDIFF_MAX__ +# include <limits.h> /* INFRINGES ON USER NAME SPACE */ +# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stdint.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_STDINT_H +# endif +#endif + +/* Narrow types that promote to a signed type and that can represent a + signed or unsigned integer of at least N bits. In tables they can + save space and decrease cache pressure. Promoting to a signed type + helps avoid bugs in integer arithmetic. */ + +#ifdef __INT_LEAST8_MAX__ +typedef __INT_LEAST8_TYPE__ yytype_int8; +#elif defined YY_STDINT_H +typedef int_least8_t yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef __INT_LEAST16_MAX__ +typedef __INT_LEAST16_TYPE__ yytype_int16; +#elif defined YY_STDINT_H +typedef int_least16_t yytype_int16; +#else +typedef short yytype_int16; +#endif + +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>. */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + +#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST8_TYPE__ yytype_uint8; +#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST8_MAX <= INT_MAX) +typedef uint_least8_t yytype_uint8; +#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX +typedef unsigned char yytype_uint8; +#else +typedef short yytype_uint8; +#endif + +#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST16_TYPE__ yytype_uint16; +#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST16_MAX <= INT_MAX) +typedef uint_least16_t yytype_uint16; +#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX +typedef unsigned short yytype_uint16; +#else +typedef int yytype_uint16; +#endif + +#ifndef YYPTRDIFF_T +# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ +# define YYPTRDIFF_T __PTRDIFF_TYPE__ +# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ +# elif defined PTRDIFF_MAX +# ifndef ptrdiff_t +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# endif +# define YYPTRDIFF_T ptrdiff_t +# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX +# else +# define YYPTRDIFF_T long +# define YYPTRDIFF_MAXIMUM LONG_MAX +# endif +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned +# endif +#endif + +#define YYSIZE_MAXIMUM \ + YY_CAST (YYPTRDIFF_T, \ + (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ + ? YYPTRDIFF_MAXIMUM \ + : YY_CAST (YYSIZE_T, -1))) + +#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + + +/* Stored state numbers (used for stacks). */ +typedef yytype_int8 yy_state_t; + +/* State numbers in computations. */ +typedef int yy_state_fast_t; + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + + +#ifndef YY_ATTRIBUTE_PURE +# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define YY_ATTRIBUTE_PURE +# endif +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +# else +# define YY_ATTRIBUTE_UNUSED +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YY_USE(E) ((void) (E)) +#else +# define YY_USE(E) /* empty */ +#endif + +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ +# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") +# else +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# endif +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + +#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ +# define YY_IGNORE_USELESS_CAST_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") +# define YY_IGNORE_USELESS_CAST_END \ + _Pragma ("GCC diagnostic pop") +#endif +#ifndef YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_END +#endif + + +#define YY_ASSERT(E) ((void) (0 && (E))) + +#if !defined yyoverflow + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +# endif +# endif +# endif +#endif /* !defined yyoverflow */ + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined RAPTOR_PARSEDATE_STYPE_IS_TRIVIAL && RAPTOR_PARSEDATE_STYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yy_state_t yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYPTRDIFF_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / YYSIZEOF (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYPTRDIFF_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 2 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 110 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 26 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 22 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 77 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 100 + +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK 276 + + +/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, with out-of-bounds checking. */ +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex. */ +static const yytype_int8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 24, 2, 22, 25, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 23, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21 +}; + +#if RAPTOR_PARSEDATE_DEBUG +/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_int16 yyrline[] = +{ + 0, 177, 177, 178, 181, 184, 187, 190, 193, 196, + 197, 200, 206, 210, 213, 217, 220, 226, 229, 232, + 235, 238, 240, 243, 253, 259, 265, 281, 284, 287, + 290, 293, 296, 301, 305, 309, 315, 319, 330, 348, + 349, 352, 358, 363, 371, 376, 384, 391, 392, 411, + 417, 423, 435, 438, 444, 445, 470, 484, 487, 490, + 493, 496, 499, 502, 505, 508, 511, 514, 517, 520, + 523, 526, 529, 532, 535, 538, 543, 578 +}; +#endif + +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if RAPTOR_PARSEDATE_DEBUG || 0 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "\"end of file\"", "error", "\"invalid token\"", "tAGO", "tDAY", + "tDAY_UNIT", "tDAYZONE", "tDST", "tHOUR_UNIT", "tID", "tTZONE", "tWZONE", + "tZZONE", "tMERIDIAN", "tMINUTE_UNIT", "tMONTH", "tMONTH_UNIT", + "tSEC_UNIT", "tSNUMBER", "tUNUMBER", "tYEAR_UNIT", "tZONE", "'.'", "':'", + "','", "'/'", "$accept", "spec", "item", "time", "iso8601time_colon", + "iso8601zonepart", "sec_fraction_part", "zonepart_numeric_without_colon", + "zonepart_numeric_with_colon", "HMStime_with_colon", "HMtime_with_colon", + "zone", "day", "date", "iso8601datetime", "iso8601date", + "iso8601weekspec", "iso8601time", "rel", "relunit", "number", "o_merid", YY_NULLPTR +}; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif + +#define YYPACT_NINF (-60) + +#define yypact_value_is_default(Yyn) \ + ((Yyn) == YYPACT_NINF) + +#define YYTABLE_NINF (-1) + +#define yytable_value_is_error(Yyn) \ + 0 + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + -60, 2, -60, -13, -60, -60, -60, -60, -60, -60, + -60, -60, 13, -60, -60, 69, 20, -60, 32, -60, + -60, -60, 29, 4, -60, -60, -60, -60, 44, -60, + 58, -60, -60, -60, -15, -60, -60, -60, -60, -60, + -60, -60, -60, -60, 46, 48, -60, -60, 28, -60, + -60, 37, -60, 56, 57, -60, -60, 59, 52, 61, + 52, 46, -60, 64, 62, 26, -60, -60, 66, -60, + -60, -60, -60, 68, -60, 36, 74, -60, -60, -60, + -60, -60, -60, -60, -60, -60, 76, -60, 86, -60, + 80, 81, 78, 79, -60, -60, -60, 84, 87, -60 +}; + +/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_int8 yydefact[] = +{ + 2, 0, 1, 33, 66, 31, 69, 27, 28, 29, + 77, 72, 0, 63, 75, 0, 76, 60, 30, 3, + 4, 12, 22, 22, 5, 7, 6, 40, 39, 8, + 57, 9, 10, 34, 43, 65, 68, 71, 62, 74, + 59, 35, 64, 67, 0, 0, 11, 70, 45, 61, + 73, 50, 58, 0, 0, 51, 32, 0, 20, 0, + 20, 0, 56, 42, 0, 22, 54, 48, 52, 41, + 46, 49, 26, 36, 21, 23, 0, 15, 17, 18, + 19, 13, 25, 16, 14, 47, 0, 44, 20, 53, + 0, 0, 0, 23, 55, 38, 24, 0, 0, 37 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -60, -60, -60, -60, 104, -59, -23, -60, -60, -60, + -60, 107, -60, -60, -60, -60, -60, 49, -55, -60, + -60, -60 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + 0, 1, 19, 20, 66, 77, 58, 78, 79, 22, + 23, 80, 25, 26, 27, 28, 55, 67, 29, 30, + 31, 32 +}; + +/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_int8 yytable[] = +{ + 60, 83, 2, 81, 63, 84, 3, 4, 5, 64, + 6, 33, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 41, 42, 57, 59, 43, 94, + 44, 45, 34, 46, 47, 48, 49, 50, 51, 56, + 52, 35, 88, 53, 36, 54, 69, 70, 57, 53, + 37, 57, 38, 39, 61, 71, 40, 4, 5, 91, + 6, 62, 7, 8, 9, 65, 11, 68, 13, 14, + 75, 76, 17, 18, 35, 72, 73, 36, 74, 42, + 82, 87, 43, 37, 89, 38, 39, 86, 47, 40, + 49, 50, 5, 90, 52, 92, 7, 8, 9, 95, + 96, 97, 91, 98, 93, 21, 99, 18, 24, 0, + 85 +}; + +static const yytype_int8 yycheck[] = +{ + 23, 60, 0, 58, 19, 60, 4, 5, 6, 24, + 8, 24, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 4, 5, 22, 23, 8, 88, + 10, 11, 19, 13, 14, 15, 16, 17, 18, 7, + 20, 5, 65, 23, 8, 25, 18, 19, 22, 23, + 14, 22, 16, 17, 10, 18, 20, 5, 6, 23, + 8, 3, 10, 11, 12, 19, 14, 19, 16, 17, + 18, 19, 20, 21, 5, 19, 19, 8, 19, 5, + 19, 19, 8, 14, 18, 16, 17, 23, 14, 20, + 16, 17, 6, 25, 20, 19, 10, 11, 12, 19, + 19, 23, 23, 19, 18, 1, 19, 21, 1, -1, + 61 +}; + +/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of + state STATE-NUM. */ +static const yytype_int8 yystos[] = +{ + 0, 27, 0, 4, 5, 6, 8, 10, 11, 12, + 13, 14, 15, 16, 17, 18, 19, 20, 21, 28, + 29, 30, 35, 36, 37, 38, 39, 40, 41, 44, + 45, 46, 47, 24, 19, 5, 8, 14, 16, 17, + 20, 4, 5, 8, 10, 11, 13, 14, 15, 16, + 17, 18, 20, 23, 25, 42, 7, 22, 32, 23, + 32, 10, 3, 19, 24, 19, 30, 43, 19, 18, + 19, 18, 19, 19, 19, 18, 19, 31, 33, 34, + 37, 44, 19, 31, 44, 43, 23, 19, 32, 18, + 25, 23, 19, 18, 31, 19, 19, 23, 19, 19 +}; + +/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr1[] = +{ + 0, 26, 27, 27, 28, 28, 28, 28, 28, 28, + 28, 29, 29, 30, 30, 30, 30, 31, 31, 31, + 31, 32, 32, 33, 34, 35, 36, 37, 37, 37, + 37, 37, 37, 38, 38, 38, 39, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 40, 40, 41, + 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, + 45, 45, 45, 45, 45, 45, 45, 45, 45, 45, + 45, 45, 45, 45, 45, 45, 46, 47 +}; + +/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr2[] = +{ + 0, 2, 0, 2, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 3, 3, 3, 3, 1, 1, 1, + 0, 2, 0, 1, 3, 3, 3, 1, 1, 1, + 1, 1, 2, 1, 2, 2, 3, 8, 5, 1, + 1, 3, 3, 2, 4, 2, 3, 3, 3, 3, + 2, 2, 2, 3, 1, 3, 2, 1, 2, 2, + 1, 2, 2, 1, 2, 2, 1, 2, 2, 1, + 2, 2, 1, 2, 2, 1, 1, 1 +}; + + +enum { YYENOMEM = -2 }; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = RAPTOR_PARSEDATE_EMPTY) + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab +#define YYNOMEM goto yyexhaustedlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ + do \ + if (yychar == RAPTOR_PARSEDATE_EMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (parm, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ + while (0) + +/* Backward compatibility with an undocumented macro. + Use RAPTOR_PARSEDATE_error or RAPTOR_PARSEDATE_UNDEF. */ +#define YYERRCODE RAPTOR_PARSEDATE_UNDEF + + +/* Enable debugging if requested. */ +#if RAPTOR_PARSEDATE_DEBUG + +# ifndef YYFPRINTF +# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + + + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Kind, Value, parm); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*-----------------------------------. +| Print this symbol's value on YYO. | +`-----------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, struct date_yy *parm) +{ + FILE *yyoutput = yyo; + YY_USE (yyoutput); + YY_USE (parm); + if (!yyvaluep) + return; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + +/*---------------------------. +| Print this symbol on YYO. | +`---------------------------*/ + +static void +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, struct date_yy *parm) +{ + YYFPRINTF (yyo, "%s %s (", + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); + + yy_symbol_value_print (yyo, yykind, yyvaluep, parm); + YYFPRINTF (yyo, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, + int yyrule, struct date_yy *parm) +{ + int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], parm); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, parm); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !RAPTOR_PARSEDATE_DEBUG */ +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !RAPTOR_PARSEDATE_DEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + + + + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, struct date_yy *parm) +{ + YY_USE (yyvaluep); + YY_USE (parm); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (struct date_yy *parm) +{ +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +YY_INITIAL_VALUE (static YYSTYPE yyval_default;) +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + + /* Number of syntax errors so far. */ + int yynerrs = 0; + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = RAPTOR_PARSEDATE_EMPTY; /* Cause a token to be read. */ + + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + YYNOMEM; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + YYNOMEM; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + YYNOMEM; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == RAPTOR_PARSEDATE_EMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex (&yylval, parm); + } + + if (yychar <= RAPTOR_PARSEDATE_EOF) + { + yychar = RAPTOR_PARSEDATE_EOF; + yytoken = YYSYMBOL_YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == RAPTOR_PARSEDATE_error) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = RAPTOR_PARSEDATE_UNDEF; + yytoken = YYSYMBOL_YYerror; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + /* Discard the shifted token. */ + yychar = RAPTOR_PARSEDATE_EMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 4: /* item: time */ +#line 181 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveTime++; + } +#line 1328 "parsedate.c" + break; + + case 5: /* item: zone */ +#line 184 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveZone++; + } +#line 1336 "parsedate.c" + break; + + case 6: /* item: date */ +#line 187 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveDate++; + } +#line 1344 "parsedate.c" + break; + + case 7: /* item: day */ +#line 190 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveDay++; + } +#line 1352 "parsedate.c" + break; + + case 8: /* item: rel */ +#line 193 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveRel++; + } +#line 1360 "parsedate.c" + break; + + case 11: /* time: tUNUMBER tMERIDIAN */ +#line 200 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHour = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyMinutes = 0; + ((struct date_yy *)parm)->yySeconds = 0; + ((struct date_yy *)parm)->yyMeridian = (yyvsp[0].Meridian); + } +#line 1371 "parsedate.c" + break; + + case 13: /* iso8601time_colon: HMStime_with_colon sec_fraction_part rel */ +#line 210 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMeridian = MER24; + } +#line 1379 "parsedate.c" + break; + + case 14: /* iso8601time_colon: HMtime_with_colon sec_fraction_part rel */ +#line 213 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMeridian = MER24; + ((struct date_yy *)parm)->yySeconds = 0; + } +#line 1388 "parsedate.c" + break; + + case 15: /* iso8601time_colon: HMStime_with_colon sec_fraction_part iso8601zonepart */ +#line 217 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMeridian = MER24; + } +#line 1396 "parsedate.c" + break; + + case 16: /* iso8601time_colon: HMtime_with_colon sec_fraction_part iso8601zonepart */ +#line 220 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMeridian = MER24; + ((struct date_yy *)parm)->yySeconds = 0; + } +#line 1405 "parsedate.c" + break; + + case 17: /* iso8601zonepart: zonepart_numeric_without_colon */ +#line 226 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveZone++; + } +#line 1413 "parsedate.c" + break; + + case 18: /* iso8601zonepart: zonepart_numeric_with_colon */ +#line 229 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveZone++; + } +#line 1421 "parsedate.c" + break; + + case 19: /* iso8601zonepart: zone */ +#line 232 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveZone++; + } +#line 1429 "parsedate.c" + break; + + case 21: /* sec_fraction_part: '.' tUNUMBER */ +#line 238 "./parsedate.y" + { + } +#line 1436 "parsedate.c" + break; + + case 23: /* zonepart_numeric_without_colon: tSNUMBER */ +#line 243 "./parsedate.y" + { + /* format: [+-]hhmm */ + if((yyvsp[0].Number) <= -100 || (yyvsp[0].Number) >= 100) { + ((struct date_yy *)parm)->yyTimezone = (-(yyvsp[0].Number) / 100) * 60 + (-(yyvsp[0].Number) % 100); + } else if((yyvsp[0].Number) >= -99 && (yyvsp[0].Number) <= 99) { + ((struct date_yy *)parm)->yyTimezone = -(yyvsp[0].Number) * 60; + } + } +#line 1449 "parsedate.c" + break; + + case 24: /* zonepart_numeric_with_colon: tSNUMBER ':' tUNUMBER */ +#line 253 "./parsedate.y" + { + /* format: [+-]hh:mm */ + ((struct date_yy *)parm)->yyTimezone = -(yyvsp[-2].Number) * 60 + ((yyvsp[-2].Number) > 0 ? -(yyvsp[0].Number): (yyvsp[0].Number)); + } +#line 1458 "parsedate.c" + break; + + case 25: /* HMStime_with_colon: HMtime_with_colon ':' tUNUMBER */ +#line 259 "./parsedate.y" + { + /* format: hh:mm:ss */ + ((struct date_yy *)parm)->yySeconds = (yyvsp[0].Number); + } +#line 1467 "parsedate.c" + break; + + case 26: /* HMtime_with_colon: tUNUMBER ':' tUNUMBER */ +#line 265 "./parsedate.y" + { + /* format: hh:mm */ + ((struct date_yy *)parm)->yyHour = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyMinutes = (yyvsp[0].Number); + } +#line 1477 "parsedate.c" + break; + + case 27: /* zone: tTZONE */ +#line 281 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[0].Number); + } +#line 1485 "parsedate.c" + break; + + case 28: /* zone: tWZONE */ +#line 284 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[0].Number); + } +#line 1493 "parsedate.c" + break; + + case 29: /* zone: tZZONE */ +#line 287 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[0].Number); + } +#line 1501 "parsedate.c" + break; + + case 30: /* zone: tZONE */ +#line 290 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[0].Number); + } +#line 1509 "parsedate.c" + break; + + case 31: /* zone: tDAYZONE */ +#line 293 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[0].Number) - 60; + } +#line 1517 "parsedate.c" + break; + + case 32: /* zone: tZONE tDST */ +#line 296 "./parsedate.y" + { + ((struct date_yy *)parm)->yyTimezone = (yyvsp[-1].Number) - 60; + } +#line 1525 "parsedate.c" + break; + + case 33: /* day: tDAY */ +#line 301 "./parsedate.y" + { + ((struct date_yy *)parm)->yyDayOrdinal = 1; + ((struct date_yy *)parm)->yyDayNumber = (yyvsp[0].Number); + } +#line 1534 "parsedate.c" + break; + + case 34: /* day: tDAY ',' */ +#line 305 "./parsedate.y" + { + ((struct date_yy *)parm)->yyDayOrdinal = 1; + ((struct date_yy *)parm)->yyDayNumber = (yyvsp[-1].Number); + } +#line 1543 "parsedate.c" + break; + + case 35: /* day: tUNUMBER tDAY */ +#line 309 "./parsedate.y" + { + ((struct date_yy *)parm)->yyDayOrdinal = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyDayNumber = (yyvsp[0].Number); + } +#line 1552 "parsedate.c" + break; + + case 36: /* date: tUNUMBER '/' tUNUMBER */ +#line 315 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[0].Number); + } +#line 1561 "parsedate.c" + break; + + case 37: /* date: tMONTH tUNUMBER tUNUMBER ':' tUNUMBER ':' tUNUMBER tUNUMBER */ +#line 319 "./parsedate.y" + { + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + ((struct date_yy *)parm)->yyMonth = (yyvsp[-7].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[-6].Number); + + ((struct date_yy *)parm)->yyHour = (yyvsp[-5].Number); + ((struct date_yy *)parm)->yyMinutes = (yyvsp[-3].Number); + ((struct date_yy *)parm)->yySeconds = (yyvsp[-1].Number); + + ((struct date_yy *)parm)->yyHaveTime = 1; + } +#line 1577 "parsedate.c" + break; + + case 38: /* date: tUNUMBER '/' tUNUMBER '/' tUNUMBER */ +#line 330 "./parsedate.y" + { + /* Interpret as YYYY/MM/DD if $1 >= 1000, otherwise as MM/DD/YY. + The goal in recognizing YYYY/MM/DD is solely to support legacy + machine-generated dates like those in an RCS log listing. If + you want portability, use the ISO 8601 format. */ + if((yyvsp[-4].Number) >= 1000) + { + ((struct date_yy *)parm)->yyYear = (yyvsp[-4].Number); + ((struct date_yy *)parm)->yyMonth = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[0].Number); + } + else + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-4].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + } + } +#line 1600 "parsedate.c" + break; + + case 40: /* date: iso8601datetime */ +#line 349 "./parsedate.y" + { + ((struct date_yy *)parm)->yyHaveTime++; + } +#line 1608 "parsedate.c" + break; + + case 41: /* date: tUNUMBER tMONTH tSNUMBER */ +#line 352 "./parsedate.y" + { + /* e.g. 17-JUN-1992. */ + ((struct date_yy *)parm)->yyDay = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyMonth = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyYear = -(yyvsp[0].Number); + } +#line 1619 "parsedate.c" + break; + + case 42: /* date: tMONTH tUNUMBER tUNUMBER */ +#line 358 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + } +#line 1629 "parsedate.c" + break; + + case 43: /* date: tMONTH tUNUMBER */ +#line 363 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-1].Number); + if((yyvsp[0].Number) > 1000) { + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + } else { + ((struct date_yy *)parm)->yyDay = (yyvsp[0].Number); + } + } +#line 1642 "parsedate.c" + break; + + case 44: /* date: tMONTH tUNUMBER ',' tUNUMBER */ +#line 371 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-3].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + } +#line 1652 "parsedate.c" + break; + + case 45: /* date: tUNUMBER tMONTH */ +#line 376 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[0].Number); + if((yyvsp[-1].Number) > 1000) { + ((struct date_yy *)parm)->yyYear = (yyvsp[-1].Number); + } else { + ((struct date_yy *)parm)->yyDay = (yyvsp[-1].Number); + } + } +#line 1665 "parsedate.c" + break; + + case 46: /* date: tUNUMBER tMONTH tUNUMBER */ +#line 384 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMonth = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyDay = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + } +#line 1675 "parsedate.c" + break; + + case 48: /* iso8601datetime: tUNUMBER tTZONE iso8601time */ +#line 392 "./parsedate.y" + { + int i = (yyvsp[-2].Number); + + if(i >= 10000) { + /* format: yyyymmdd */ + ((struct date_yy *)parm)->yyYear = i / 10000; + i %= 10000; + ((struct date_yy *)parm)->yyMonth = i / 100; + i %= 100; + ((struct date_yy *)parm)->yyDay = i; + } else if(i >= 1000 && i <= 9999) { + /* format: yyyy */ + ((struct date_yy *)parm)->yyYear = i; + ((struct date_yy *)parm)->yyDay= 1; + ((struct date_yy *)parm)->yyMonth = 1; + } + } +#line 1697 "parsedate.c" + break; + + case 49: /* iso8601date: tUNUMBER tSNUMBER tSNUMBER */ +#line 411 "./parsedate.y" + { + /* ISO 8601 format. yyyy-mm-dd. */ + ((struct date_yy *)parm)->yyYear = (yyvsp[-2].Number); + ((struct date_yy *)parm)->yyMonth = -(yyvsp[-1].Number); + ((struct date_yy *)parm)->yyDay = -(yyvsp[0].Number); + } +#line 1708 "parsedate.c" + break; + + case 50: /* iso8601date: tUNUMBER tSNUMBER */ +#line 417 "./parsedate.y" + { + /* ISO 8601 format yyyy-mm */ + ((struct date_yy *)parm)->yyYear = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyMonth = -(yyvsp[0].Number); + ((struct date_yy *)parm)->yyDay = 1; + } +#line 1719 "parsedate.c" + break; + + case 51: /* iso8601date: tUNUMBER iso8601weekspec */ +#line 423 "./parsedate.y" + { + const int om = (1 + 9) % 12; /* offset month */ + const int oy = (yyvsp[-1].Number) - 1; /* offset year */ + + ((struct date_yy *)parm)->yyYear = (yyvsp[-1].Number); + ((struct date_yy *)parm)->yyMonth = 1; + /* Zeller's formula */ + ((struct date_yy *)parm)->yyDay -= ((13 * om + 12) / 5 + + oy + oy / 4 + oy / 400 - oy / 100) % 7 - 1; + } +#line 1734 "parsedate.c" + break; + + case 52: /* iso8601weekspec: tWZONE tUNUMBER */ +#line 435 "./parsedate.y" + { + ((struct date_yy *)parm)->yyDay = ((yyvsp[0].Number) / 10) * 7 + ((yyvsp[0].Number) % 10) - 8; + } +#line 1742 "parsedate.c" + break; + + case 53: /* iso8601weekspec: tWZONE tUNUMBER tSNUMBER */ +#line 438 "./parsedate.y" + { + ((struct date_yy *)parm)->yyDay = (yyvsp[-1].Number) * 7 - (yyvsp[0].Number) - 8; + } +#line 1750 "parsedate.c" + break; + + case 55: /* iso8601time: tUNUMBER sec_fraction_part iso8601zonepart */ +#line 445 "./parsedate.y" + { + int i = (yyvsp[-2].Number); + + if(i <= -100000 || i >= 100000) { + ((struct date_yy *)parm)->yyHour = i / 10000; + i %= 10000; + ((struct date_yy *)parm)->yyMinutes = i / 100; + i %= 100; + ((struct date_yy *)parm)->yySeconds = i; + } else if(i <= -1000 || i >= 1000) { + ((struct date_yy *)parm)->yyHour = i / 100; + i %= 100; + ((struct date_yy *)parm)->yyMinutes = i; + ((struct date_yy *)parm)->yySeconds = 0; + } else if(i >= -99 && i <= 99) { + ((struct date_yy *)parm)->yyHour = i; + ((struct date_yy *)parm)->yyMinutes = 0; + ((struct date_yy *)parm)->yySeconds = 0; + } else { + ((struct date_yy *)parm)->yyHaveTime = 0; + } + ((struct date_yy *)parm)->yyMeridian = MER24; + } +#line 1778 "parsedate.c" + break; + + case 56: /* rel: relunit tAGO */ +#line 470 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelSeconds = + -((struct date_yy *)parm)->yyRelSeconds; + ((struct date_yy *)parm)->yyRelMinutes = + -((struct date_yy *)parm)->yyRelMinutes; + ((struct date_yy *)parm)->yyRelHour = + -((struct date_yy *)parm)->yyRelHour; + ((struct date_yy *)parm)->yyRelDay = + -((struct date_yy *)parm)->yyRelDay; + ((struct date_yy *)parm)->yyRelMonth = + -((struct date_yy *)parm)->yyRelMonth; + ((struct date_yy *)parm)->yyRelYear = + -((struct date_yy *)parm)->yyRelYear; + } +#line 1797 "parsedate.c" + break; + + case 58: /* relunit: tUNUMBER tYEAR_UNIT */ +#line 487 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelYear += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1805 "parsedate.c" + break; + + case 59: /* relunit: tSNUMBER tYEAR_UNIT */ +#line 490 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelYear += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1813 "parsedate.c" + break; + + case 60: /* relunit: tYEAR_UNIT */ +#line 493 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelYear += (yyvsp[0].Number); + } +#line 1821 "parsedate.c" + break; + + case 61: /* relunit: tUNUMBER tMONTH_UNIT */ +#line 496 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMonth += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1829 "parsedate.c" + break; + + case 62: /* relunit: tSNUMBER tMONTH_UNIT */ +#line 499 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMonth += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1837 "parsedate.c" + break; + + case 63: /* relunit: tMONTH_UNIT */ +#line 502 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMonth += (yyvsp[0].Number); + } +#line 1845 "parsedate.c" + break; + + case 64: /* relunit: tUNUMBER tDAY_UNIT */ +#line 505 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelDay += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1853 "parsedate.c" + break; + + case 65: /* relunit: tSNUMBER tDAY_UNIT */ +#line 508 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelDay += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1861 "parsedate.c" + break; + + case 66: /* relunit: tDAY_UNIT */ +#line 511 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelDay += (yyvsp[0].Number); + } +#line 1869 "parsedate.c" + break; + + case 67: /* relunit: tUNUMBER tHOUR_UNIT */ +#line 514 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelHour += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1877 "parsedate.c" + break; + + case 68: /* relunit: tSNUMBER tHOUR_UNIT */ +#line 517 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelHour += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1885 "parsedate.c" + break; + + case 69: /* relunit: tHOUR_UNIT */ +#line 520 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelHour += (yyvsp[0].Number); + } +#line 1893 "parsedate.c" + break; + + case 70: /* relunit: tUNUMBER tMINUTE_UNIT */ +#line 523 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMinutes += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1901 "parsedate.c" + break; + + case 71: /* relunit: tSNUMBER tMINUTE_UNIT */ +#line 526 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMinutes += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1909 "parsedate.c" + break; + + case 72: /* relunit: tMINUTE_UNIT */ +#line 529 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelMinutes += (yyvsp[0].Number); + } +#line 1917 "parsedate.c" + break; + + case 73: /* relunit: tUNUMBER tSEC_UNIT */ +#line 532 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelSeconds += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1925 "parsedate.c" + break; + + case 74: /* relunit: tSNUMBER tSEC_UNIT */ +#line 535 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelSeconds += (yyvsp[-1].Number) * (yyvsp[0].Number); + } +#line 1933 "parsedate.c" + break; + + case 75: /* relunit: tSEC_UNIT */ +#line 538 "./parsedate.y" + { + ((struct date_yy *)parm)->yyRelSeconds += (yyvsp[0].Number); + } +#line 1941 "parsedate.c" + break; + + case 76: /* number: tUNUMBER */ +#line 544 "./parsedate.y" + { + if(((struct date_yy *)parm)->yyHaveTime && + ((struct date_yy *)parm)->yyHaveDate && + !((struct date_yy *)parm)->yyHaveRel) + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number); + else + { + if((yyvsp[0].Number) > 10000) + { + ((struct date_yy *)parm)->yyHaveDate++; + ((struct date_yy *)parm)->yyDay= ((yyvsp[0].Number))%100; + ((struct date_yy *)parm)->yyMonth= ((yyvsp[0].Number)/100)%100; + ((struct date_yy *)parm)->yyYear = (yyvsp[0].Number)/10000; + } + else + { + ((struct date_yy *)parm)->yyHaveTime++; + if((yyvsp[0].Number) < 100) + { + ((struct date_yy *)parm)->yyHour = (yyvsp[0].Number); + ((struct date_yy *)parm)->yyMinutes = 0; + } + else + { + ((struct date_yy *)parm)->yyHour = (yyvsp[0].Number) / 100; + ((struct date_yy *)parm)->yyMinutes = (yyvsp[0].Number) % 100; + } + ((struct date_yy *)parm)->yySeconds = 0; + ((struct date_yy *)parm)->yyMeridian = MER24; + } + } + } +#line 1978 "parsedate.c" + break; + + case 77: /* o_merid: tMERIDIAN */ +#line 579 "./parsedate.y" + { + ((struct date_yy *)parm)->yyMeridian = (yyvsp[0].Meridian); + } +#line 1986 "parsedate.c" + break; + + +#line 1990 "parsedate.c" + + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *yyssp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == RAPTOR_PARSEDATE_EMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; + yyerror (parm, YY_("syntax error")); + } + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= RAPTOR_PARSEDATE_EOF) + { + /* Return failure if at end of input. */ + if (yychar == RAPTOR_PARSEDATE_EOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, parm); + yychar = RAPTOR_PARSEDATE_EMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + /* Pacify compilers when the user code never invokes YYERROR and the + label yyerrorlab therefore never appears in user code. */ + if (0) + YYERROR; + ++yynerrs; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + /* Pop stack until we find a state that shifts the error token. */ + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + YY_ACCESSING_SYMBOL (yystate), yyvsp, parm); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturnlab; + + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturnlab; + + +/*-----------------------------------------------------------. +| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | +`-----------------------------------------------------------*/ +yyexhaustedlab: + yyerror (parm, YY_("memory exhausted")); + yyresult = 2; + goto yyreturnlab; + + +/*----------------------------------------------------------. +| yyreturnlab -- parsing is finished, clean up and return. | +`----------------------------------------------------------*/ +yyreturnlab: + if (yychar != RAPTOR_PARSEDATE_EMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, parm); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, parm); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + + return yyresult; +} + +#line 584 "./parsedate.y" + + +time_t get_date (char *p, time_t *now); + +/* Month and day table. */ +static TABLE const MonthDayTable[] = { + { "january", tMONTH, 1 }, + { "february", tMONTH, 2 }, + { "march", tMONTH, 3 }, + { "april", tMONTH, 4 }, + { "may", tMONTH, 5 }, + { "june", tMONTH, 6 }, + { "july", tMONTH, 7 }, + { "august", tMONTH, 8 }, + { "september", tMONTH, 9 }, + { "sept", tMONTH, 9 }, + { "october", tMONTH, 10 }, + { "november", tMONTH, 11 }, + { "december", tMONTH, 12 }, + { "sunday", tDAY, 0 }, + { "monday", tDAY, 1 }, + { "tuesday", tDAY, 2 }, + { "tues", tDAY, 2 }, + { "wednesday", tDAY, 3 }, + { "wednes", tDAY, 3 }, + { "thursday", tDAY, 4 }, + { "thur", tDAY, 4 }, + { "thurs", tDAY, 4 }, + { "friday", tDAY, 5 }, + { "saturday", tDAY, 6 }, + { NULL, 0, 0 } +}; + +/* Time units table. */ +static TABLE const UnitsTable[] = { + { "year", tYEAR_UNIT, 1 }, + { "month", tMONTH_UNIT, 1 }, + { "fortnight", tDAY_UNIT, 14 }, + { "week", tDAY_UNIT, 7 }, + { "day", tDAY_UNIT, 1 }, + { "hour", tHOUR_UNIT, 1 }, + { "minute", tMINUTE_UNIT, 1 }, + { "min", tMINUTE_UNIT, 1 }, + { "second", tSEC_UNIT, 1 }, + { "sec", tSEC_UNIT, 1 }, + { NULL, 0, 0 } +}; + +/* Assorted relative-time words. */ +static TABLE const OtherTable[] = { + { "tomorrow", tDAY_UNIT, 1 }, + { "yesterday", tDAY_UNIT, -1 }, + { "today", tDAY_UNIT, 0 }, + { "now", tDAY_UNIT, 0 }, + { "last", tUNUMBER, -1 }, + { "this", tUNUMBER, 0 }, + { "next", tUNUMBER, 1 }, + { "first", tUNUMBER, 1 }, +/* { "second", tUNUMBER, 2 }, */ + { "third", tUNUMBER, 3 }, + { "fourth", tUNUMBER, 4 }, + { "fifth", tUNUMBER, 5 }, + { "sixth", tUNUMBER, 6 }, + { "seventh", tUNUMBER, 7 }, + { "eighth", tUNUMBER, 8 }, + { "ninth", tUNUMBER, 9 }, + { "tenth", tUNUMBER, 10 }, + { "eleventh", tUNUMBER, 11 }, + { "twelfth", tUNUMBER, 12 }, + { "ago", tAGO, 1 }, + { NULL, 0, 0 } +}; + +/* The timezone table. */ +static TABLE const TimezoneTable[] = { + { "gmt", tZONE, HOUR ( 0) }, /* Greenwich Mean */ + { "ut", tZONE, HOUR ( 0) }, /* Universal (Coordinated) */ + { "utc", tZONE, HOUR ( 0) }, + { "wet", tZONE, HOUR ( 0) }, /* Western European */ + { "bst", tDAYZONE, HOUR ( 0) }, /* British Summer */ + { "wat", tZONE, HOUR ( 1) }, /* West Africa */ + { "at", tZONE, HOUR ( 2) }, /* Azores */ +#if 0 + /* For completeness. BST is also British Summer, and GST is + * also Guam Standard. */ + { "bst", tZONE, HOUR ( 3) }, /* Brazil Standard */ + { "gst", tZONE, HOUR ( 3) }, /* Greenland Standard */ +#endif +#if 0 + { "nft", tZONE, HOUR (3.5) }, /* Newfoundland */ + { "nst", tZONE, HOUR (3.5) }, /* Newfoundland Standard */ + { "ndt", tDAYZONE, HOUR (3.5) }, /* Newfoundland Daylight */ +#endif + { "ast", tZONE, HOUR ( 4) }, /* Atlantic Standard */ + { "adt", tDAYZONE, HOUR ( 4) }, /* Atlantic Daylight */ + { "est", tZONE, HOUR ( 5) }, /* Eastern Standard */ + { "edt", tDAYZONE, HOUR ( 5) }, /* Eastern Daylight */ + { "cst", tZONE, HOUR ( 6) }, /* Central Standard */ + { "cdt", tDAYZONE, HOUR ( 6) }, /* Central Daylight */ + { "mst", tZONE, HOUR ( 7) }, /* Mountain Standard */ + { "mdt", tDAYZONE, HOUR ( 7) }, /* Mountain Daylight */ + { "pst", tZONE, HOUR ( 8) }, /* Pacific Standard */ + { "pdt", tDAYZONE, HOUR ( 8) }, /* Pacific Daylight */ + { "yst", tZONE, HOUR ( 9) }, /* Yukon Standard */ + { "ydt", tDAYZONE, HOUR ( 9) }, /* Yukon Daylight */ + { "hst", tZONE, HOUR (10) }, /* Hawaii Standard */ + { "hdt", tDAYZONE, HOUR (10) }, /* Hawaii Daylight */ + { "cat", tZONE, HOUR (10) }, /* Central Alaska */ + { "akst", tZONE, HOUR (10) }, /* Alaska Standard */ + { "akdt", tZONE, HOUR (10) }, /* Alaska Daylight */ + { "ahst", tZONE, HOUR (10) }, /* Alaska-Hawaii Standard */ + { "nt", tZONE, HOUR (11) }, /* Nome */ + { "idlw", tZONE, HOUR (12) }, /* International Date Line West */ + { "cet", tZONE, -HOUR (1) }, /* Central European */ + { "cest", tDAYZONE, -HOUR (1) }, /* Central European Summer */ + { "met", tZONE, -HOUR (1) }, /* Middle European */ + { "mewt", tZONE, -HOUR (1) }, /* Middle European Winter */ + { "mest", tDAYZONE, -HOUR (1) }, /* Middle European Summer */ + { "mesz", tDAYZONE, -HOUR (1) }, /* Middle European Summer */ + { "swt", tZONE, -HOUR (1) }, /* Swedish Winter */ + { "sst", tDAYZONE, -HOUR (1) }, /* Swedish Summer */ + { "fwt", tZONE, -HOUR (1) }, /* French Winter */ + { "fst", tDAYZONE, -HOUR (1) }, /* French Summer */ + { "eet", tZONE, -HOUR (2) }, /* Eastern Europe, USSR Zone 1 */ + { "bt", tZONE, -HOUR (3) }, /* Baghdad, USSR Zone 2 */ +#if 0 + { "it", tZONE, -HOUR (3.5) },/* Iran */ +#endif + { "zp4", tZONE, -HOUR (4) }, /* USSR Zone 3 */ + { "zp5", tZONE, -HOUR (5) }, /* USSR Zone 4 */ +#if 0 + { "ist", tZONE, -HOUR (5.5) },/* Indian Standard */ +#endif + { "zp6", tZONE, -HOUR (6) }, /* USSR Zone 5 */ +#if 0 + /* For completeness. NST is also Newfoundland Standard, and SST is + * also Swedish Summer. */ + { "nst", tZONE, -HOUR (6.5) },/* North Sumatra */ + { "sst", tZONE, -HOUR (7) }, /* South Sumatra, USSR Zone 6 */ +#endif /* 0 */ + { "wast", tZONE, -HOUR (7) }, /* West Australian Standard */ + { "wadt", tDAYZONE, -HOUR (7) }, /* West Australian Daylight */ +#if 0 + { "jt", tZONE, -HOUR (7.5) },/* Java (3pm in Cronusland!) */ +#endif + { "cct", tZONE, -HOUR (8) }, /* China Coast, USSR Zone 7 */ + { "jst", tZONE, -HOUR (9) }, /* Japan Standard, USSR Zone 8 */ +#if 0 + { "cast", tZONE, -HOUR (9.5) },/* Central Australian Standard */ + { "cadt", tDAYZONE, -HOUR (9.5) },/* Central Australian Daylight */ +#endif + { "east", tZONE, -HOUR (10) }, /* Eastern Australian Standard */ + { "eadt", tDAYZONE, -HOUR (10) }, /* Eastern Australian Daylight */ + { "gst", tZONE, -HOUR (10) }, /* Guam Standard, USSR Zone 9 */ + { "nzt", tZONE, -HOUR (12) }, /* New Zealand */ + { "nzst", tZONE, -HOUR (12) }, /* New Zealand Standard */ + { "nzdt", tDAYZONE, -HOUR (12) }, /* New Zealand Daylight */ + { "idle", tZONE, -HOUR (12) }, /* International Date Line East */ + { NULL, 0, 0 } +}; + +/* Military timezone table. */ +static TABLE const MilitaryTable[] = { + { "a", tZONE, HOUR (- 1) }, + { "b", tZONE, HOUR (- 2) }, + { "c", tZONE, HOUR (- 3) }, + { "d", tZONE, HOUR (- 4) }, + { "e", tZONE, HOUR (- 5) }, + { "f", tZONE, HOUR (- 6) }, + { "g", tZONE, HOUR (- 7) }, + { "h", tZONE, HOUR (- 8) }, + { "i", tZONE, HOUR (- 9) }, + { "k", tZONE, HOUR (-10) }, + { "l", tZONE, HOUR (-11) }, + { "m", tZONE, HOUR (-12) }, + { "n", tZONE, HOUR ( 1) }, + { "o", tZONE, HOUR ( 2) }, + { "p", tZONE, HOUR ( 3) }, + { "q", tZONE, HOUR ( 4) }, + { "r", tZONE, HOUR ( 5) }, + { "s", tZONE, HOUR ( 6) }, + { "t", tTZONE, HOUR ( 7) }, + { "u", tZONE, HOUR ( 8) }, + { "v", tZONE, HOUR ( 9) }, + { "w", tWZONE, HOUR ( 10) }, + { "x", tZONE, HOUR ( 11) }, + { "y", tZONE, HOUR ( 12) }, + { "z", tZZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + + + + +/* ARGSUSED */ +static int +yyerror(void* parm, const char *s) +{ + return 0; +} + +static int +ToHour(int Hours, MERIDIAN Meridian) +{ + switch (Meridian) + { + case MER24: + if(Hours < 0 || Hours > 23) + return -1; + return Hours; + case MERam: + if(Hours < 1 || Hours > 12) + return -1; + if(Hours == 12) + Hours = 0; + return Hours; + case MERpm: + if(Hours < 1 || Hours > 12) + return -1; + if(Hours == 12) + Hours = 0; + return Hours + 12; + default: +#ifdef RAPTOR_DEBUG + fprintf(stderr, "%s:%d:%s: UNKNOWN Meridian %d - add a new case", + __FILE__, __LINE__, __FUNCTION__, (int)Meridian); +#endif + return -1; + } + /* NOTREACHED */ +} + +static int +ToYear(int Year) +{ + if(Year < 0) + Year = -Year; + + /* XPG4 suggests that years 00-68 map to 2000-2068, and + years 69-99 map to 1969-1999. */ + if(Year < 69) + Year += 2000; + else if(Year < 100) + Year += 1900; + + return Year; +} + +static int +LookupWord (YYSTYPE *lvalp, char *buff) +{ + char *p; + char *q; + const TABLE *tp; + int i; + int abbrev; + + /* Make it lowercase. */ + for(p = buff; *p; p++) + if(isupper ((unsigned char) *p)) + *p = tolower (*p); + + if(strcmp (buff, "am") == 0 || strcmp (buff, "a.m.") == 0) + { + lvalp->Meridian = MERam; + return tMERIDIAN; + } + if(strcmp (buff, "pm") == 0 || strcmp (buff, "p.m.") == 0) + { + lvalp->Meridian = MERpm; + return tMERIDIAN; + } + + /* See if we have an abbreviation for a month. */ + if(strlen (buff) == 3) + abbrev = 1; + else if(strlen (buff) == 4 && buff[3] == '.') + { + abbrev = 1; + buff[3] = '\0'; + } + else + abbrev = 0; + + for(tp = MonthDayTable; tp->name; tp++) + { + if(abbrev) + { + if(strncmp (buff, tp->name, 3) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + else if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + + for(tp = TimezoneTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + if(strcmp (buff, "dst") == 0) + return tDST; + + for(tp = UnitsTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + /* Strip off any plural and try the units table again. */ + i = RAPTOR_BAD_CAST(int, strlen(buff)) - 1; + if(buff[i] == 's') + { + buff[i] = '\0'; + for(tp = UnitsTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + buff[i] = 's'; /* Put back for "this" in OtherTable. */ + } + + for(tp = OtherTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + /* Military timezones. */ + if(buff[1] == '\0' && isalpha ((unsigned char) *buff)) + { + for(tp = MilitaryTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + + /* Drop out any periods and try the timezone table again. */ + for(i = 0, p = q = buff; *q; q++) + if(*q != '.') + *p++ = *q; + else + i++; + *p = '\0'; + if(i) + for(tp = TimezoneTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + return tID; +} + +int yylex(YYSTYPE *lvalp, void *parm) +{ + unsigned char c; + char *p; + char buff[20]; + int Count; + int sign; + struct date_yy * date = (struct date_yy *)parm; + + for(;;) + { + while(isspace ((unsigned char) *date->yyInput)) + date->yyInput++; + + if(isdigit (c = *date->yyInput) || c == '-' || c == '+') + { + if(c == '-' || c == '+') + { + sign = c == '-' ? -1 : 1; + if(!ISDIGIT (*++date->yyInput)) + /* skip the '-' sign */ + continue; + } + else + sign = 0; + for(lvalp->Number = 0; ISDIGIT (c = *date->yyInput++);) + lvalp->Number = 10 * lvalp->Number + c - '0'; + date->yyInput--; + if(sign < 0) + lvalp->Number = -lvalp->Number; + /* Ignore ordinal suffixes on numbers */ + c = *date->yyInput; + if(c == 's' || c == 'n' || c == 'r' || c == 't') { + c = *++date->yyInput; + if(c == 't' || c == 'd' || c == 'h') { + date->yyInput++; + } else { + date->yyInput--; + } + } + return sign ? tSNUMBER : tUNUMBER; + } + if(isalpha (c)) + { + for(p = buff; (c = *date->yyInput++, isalpha (c)) || c == '.';) + if(p < &buff[sizeof buff - 1]) + *p++ = c; + *p = '\0'; + date->yyInput--; + return LookupWord (lvalp, buff); + } + if(c != '(') + return *date->yyInput++; + Count = 0; + do + { + c = *date->yyInput++; + if(c == '\0') + return c; + if(c == '(') + Count++; + else if(c == ')') + Count--; + } + while(Count > 0); + } +} + +#define TM_YEAR_ORIGIN 1900 + +/* Yield A - B, measured in seconds. */ +static long +difftm (struct tm *a, struct tm *b) +{ + int ay = a->tm_year + (TM_YEAR_ORIGIN - 1); + int by = b->tm_year + (TM_YEAR_ORIGIN - 1); + long days = ( + /* difference in day of year */ + a->tm_yday - b->tm_yday + /* + intervening leap days */ + + ((ay >> 2) - (by >> 2)) + - (ay / 100 - by / 100) + + ((ay / 100 >> 2) - (by / 100 >> 2)) + /* + difference in years * 365 */ + + (long) (ay - by) * 365 + ); + return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + + (a->tm_min - b->tm_min)) + + (a->tm_sec - b->tm_sec)); +} + +time_t raptor_parse_date(const char *p, time_t *now) +{ + struct tm tm, tm0, *tmp; + time_t Start; + struct date_yy date; + + date.yyInput = p; + Start = now ? *now : time ((time_t *) NULL); + tmp = localtime (&Start); + if(!tmp) + return -1; + date.yyYear = tmp->tm_year + TM_YEAR_ORIGIN; + date.yyMonth = tmp->tm_mon + 1; + date.yyDay = tmp->tm_mday; + date.yyHour = tmp->tm_hour; + date.yyMinutes = tmp->tm_min; + date.yySeconds = tmp->tm_sec; + tm.tm_isdst = tmp->tm_isdst; + date.yyMeridian = MER24; + date.yyRelSeconds = 0; + date.yyRelMinutes = 0; + date.yyRelHour = 0; + date.yyRelDay = 0; + date.yyRelMonth = 0; + date.yyRelYear = 0; + date.yyHaveDate = 0; + date.yyHaveDay = 0; + date.yyHaveRel = 0; + date.yyHaveTime = 0; + date.yyHaveZone = 0; + + if(yyparse (&date) + || date.yyHaveTime > 1 || date.yyHaveZone > 1 + || date.yyHaveDate > 1 || date.yyHaveDay > 1) { + return -1; + } + tm.tm_year = ToYear (date.yyYear) - TM_YEAR_ORIGIN + date.yyRelYear; + tm.tm_mon = date.yyMonth - 1 + date.yyRelMonth; + tm.tm_mday = date.yyDay + date.yyRelDay; + if(date.yyHaveTime || (date.yyHaveRel && !date.yyHaveDate && !date.yyHaveDay)) + { + tm.tm_hour = ToHour (date.yyHour, date.yyMeridian); + if(tm.tm_hour < 0) + return -1; + tm.tm_min = date.yyMinutes; + tm.tm_sec = date.yySeconds; + } + else + { + tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + } + tm.tm_hour += date.yyRelHour; + tm.tm_min += date.yyRelMinutes; + tm.tm_sec += date.yyRelSeconds; + + /* Let mktime deduce tm_isdst if we have an absolute timestamp, + or if the relative timestamp mentions days, months, or years. */ + if(date.yyHaveDate | date.yyHaveDay | date.yyHaveTime | date.yyRelDay | date.yyRelMonth | date.yyRelYear) + tm.tm_isdst = -1; + + tm0 = tm; + + Start = mktime (&tm); + + if(Start == (time_t) -1) + { + + /* Guard against falsely reporting errors near the time_t boundaries + when parsing times in other time zones. For example, if the min + time_t value is 1970-01-01 00:00:00 UTC and we are 8 hours ahead + of UTC, then the min localtime value is 1970-01-01 08:00:00; if + we apply mktime to 1970-01-01 00:00:00 we will get an error, so + we apply mktime to 1970-01-02 08:00:00 instead and adjust the time + zone by 24 hours to compensate. This algorithm assumes that + there is no DST transition within a day of the time_t boundaries. */ + if(date.yyHaveZone) + { + tm = tm0; + if(tm.tm_year <= EPOCH - TM_YEAR_ORIGIN) + { + tm.tm_mday++; + date.yyTimezone -= 24 * 60; + } + else + { + tm.tm_mday--; + date.yyTimezone += 24 * 60; + } + Start = mktime (&tm); + } + + if(Start == (time_t) -1) + return Start; + } + + if(date.yyHaveDay && !date.yyHaveDate) + { + tm.tm_mday += ((date.yyDayNumber - tm.tm_wday + 7) % 7 + + 7 * (date.yyDayOrdinal - (0 < date.yyDayOrdinal))); + Start = mktime (&tm); + if(Start == (time_t) -1) + return Start; + } + + if(date.yyHaveZone) + { + long delta; + struct tm *gmt = gmtime (&Start); + if(!gmt) + return -1; + delta = date.yyTimezone * 60L + difftm (&tm, gmt); + + if((Start + delta < Start) != (delta < 0)) + return -1; /* time_t overflow */ + Start += delta; + } + + return Start; +} diff --git a/src/parsedate.h b/src/parsedate.h new file mode 100644 index 0000000..ad62b5a --- /dev/null +++ b/src/parsedate.h @@ -0,0 +1,96 @@ +/* A Bison parser, made by GNU Bison 3.8.2. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_RAPTOR_PARSEDATE_PARSEDATE_H_INCLUDED +# define YY_RAPTOR_PARSEDATE_PARSEDATE_H_INCLUDED +/* Debug traces. */ +#ifndef RAPTOR_PARSEDATE_DEBUG +# if defined YYDEBUG +#if YYDEBUG +# define RAPTOR_PARSEDATE_DEBUG 1 +# else +# define RAPTOR_PARSEDATE_DEBUG 0 +# endif +# else /* ! defined YYDEBUG */ +# define RAPTOR_PARSEDATE_DEBUG 0 +# endif /* ! defined YYDEBUG */ +#endif /* ! defined RAPTOR_PARSEDATE_DEBUG */ +#if RAPTOR_PARSEDATE_DEBUG +extern int raptor_parsedate_debug; +#endif + +/* Token kinds. */ +#ifndef RAPTOR_PARSEDATE_TOKENTYPE +# define RAPTOR_PARSEDATE_TOKENTYPE + enum raptor_parsedate_tokentype + { + RAPTOR_PARSEDATE_EMPTY = -2, + RAPTOR_PARSEDATE_EOF = 0, /* "end of file" */ + RAPTOR_PARSEDATE_error = 256, /* error */ + RAPTOR_PARSEDATE_UNDEF = 257, /* "invalid token" */ + tAGO = 258, /* tAGO */ + tDAY = 259, /* tDAY */ + tDAY_UNIT = 260, /* tDAY_UNIT */ + tDAYZONE = 261, /* tDAYZONE */ + tDST = 262, /* tDST */ + tHOUR_UNIT = 263, /* tHOUR_UNIT */ + tID = 264, /* tID */ + tTZONE = 265, /* tTZONE */ + tWZONE = 266, /* tWZONE */ + tZZONE = 267, /* tZZONE */ + tMERIDIAN = 268, /* tMERIDIAN */ + tMINUTE_UNIT = 269, /* tMINUTE_UNIT */ + tMONTH = 270, /* tMONTH */ + tMONTH_UNIT = 271, /* tMONTH_UNIT */ + tSEC_UNIT = 272, /* tSEC_UNIT */ + tSNUMBER = 273, /* tSNUMBER */ + tUNUMBER = 274, /* tUNUMBER */ + tYEAR_UNIT = 275, /* tYEAR_UNIT */ + tZONE = 276 /* tZONE */ + }; + typedef enum raptor_parsedate_tokentype raptor_parsedate_token_kind_t; +#endif + +/* Value type. */ + + + + +int raptor_parsedate_parse (struct date_yy *parm); + + +#endif /* !YY_RAPTOR_PARSEDATE_PARSEDATE_H_INCLUDED */ diff --git a/src/parsedate.y b/src/parsedate.y new file mode 100644 index 0000000..77f5d4f --- /dev/null +++ b/src/parsedate.y @@ -0,0 +1,1161 @@ +%{ +/* + * Imported from the public domain source in PHP 4.4 + * Fri May 20 07:14:01 2005 + * https://github.com/php/php-src/blob/379c4af44aa6456fbdbc69dba3ead166ba7ff67d/ext/standard/parsedate.y + * + * and patched from there + * + * Later versions removed this from PHP and replaced it with entirely + * new code written under the PHP license. That code is not used here + * and cannot be used. + * + */ + + +/* +** Originally written by Steven M. Bellovin <smb@research.att.com> while +** at the University of North Carolina at Chapel Hill. Later tweaked by +** a couple of people on Usenet. Completely overhauled by Rich $alz +** <rsalz@bbn.com> and Jim Berets <jberets@bbn.com> in August, 1990. +** +** This code is in the public domain and has no copyright. +*/ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <sys/types.h> +#include <ctype.h> + +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#if defined(_HPUX_SOURCE) +#include <alloca.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + + +#define ISDIGIT(c) ((unsigned) (c) - '0' <= 9) + +#ifdef HAVE_STRING_H +# include <string.h> +#endif + +#if !defined(__GNUC__) || __GNUC__ < 2 || (__GNUC__ == 2 && __GNUC_MINOR__ < 7) +# define __attribute__(x) +#endif + +#ifndef ATTRIBUTE_UNUSED +# define ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +#endif + +/* Some old versions of bison generate parsers that use bcopy. + That loses on systems that don't provide the function, so we have + to redefine it here. */ +#if !defined (HAVE_BCOPY) && defined (HAVE_MEMCPY) && !defined (bcopy) +# define bcopy(from, to, len) memcpy ((to), (from), (len)) +#endif + +/* Prototypes */ +static int raptor_parsedate_error(void* parm, const char *msg); + + +#define EPOCH 1970 +#define HOUR(x) ((x) * 60) + +#define MAX_BUFF_LEN 128 /* size of buffer to read the date into */ + +/* +** An entry in the lexical lookup table. +*/ +typedef struct _TABLE { + const char *name; + int type; + int value; +} TABLE; + + +/* +** Meridian: am, pm, or 24-hour style. +*/ +typedef enum _MERIDIAN { + MERam, MERpm, MER24 +} MERIDIAN; + +struct date_yy { + const char *yyInput; + int yyDayOrdinal; + int yyDayNumber; + int yyHaveDate; + int yyHaveDay; + int yyHaveRel; + int yyHaveTime; + int yyHaveZone; + int yyTimezone; + int yyDay; + int yyHour; + int yyMinutes; + int yyMonth; + int yySeconds; + int yyYear; + MERIDIAN yyMeridian; + int yyRelDay; + int yyRelHour; + int yyRelMinutes; + int yyRelMonth; + int yyRelSeconds; + int yyRelYear; +}; + +typedef union _date_ll { + int Number; + enum _MERIDIAN Meridian; +} date_ll; + +#define YYPARSE_PARAM parm +#define YYLEX_PARAM parm +#define YYSTYPE date_ll +#define YYLTYPE void + +static int yylex (YYSTYPE *lvalp, void *parm); + +static int ToHour (int Hours, MERIDIAN Meridian); +static int ToYear (int Year); +static int LookupWord (YYSTYPE *lvalp, char *buff); + +%} + +/* directives */ + +%require "3.0" + +/* File prefix (bison -b) */ +%file-prefix "parsedate" + +/* Symbol prefix (bison -d : deprecated) */ +%define api.prefix {raptor_parsedate_} + +/* Write parser header file with macros (bison -d) */ +%defines + +/* Write output file with verbose descriptions of parser states */ +%verbose + +%define api.pure true + +/* This grammar has 56 shift/reduce conflicts. */ +%expect 56 + +%param { struct date_yy *parm } + +%token tAGO tDAY tDAY_UNIT tDAYZONE tDST tHOUR_UNIT tID tTZONE tWZONE tZZONE +%token tMERIDIAN tMINUTE_UNIT tMONTH tMONTH_UNIT +%token tSEC_UNIT tSNUMBER tUNUMBER tYEAR_UNIT tZONE + +%type <Number> tDAY tDAY_UNIT tDAYZONE tHOUR_UNIT tMINUTE_UNIT +%type <Number> tMONTH tMONTH_UNIT +%type <Number> tSEC_UNIT tSNUMBER tUNUMBER tYEAR_UNIT tZONE tTZONE tWZONE tZZONE +%type <Meridian> tMERIDIAN + +%% + +spec : /* NULL */ + | spec item + ; + +item : time { + ((struct date_yy *)parm)->yyHaveTime++; + } + | zone { + ((struct date_yy *)parm)->yyHaveZone++; + } + | date { + ((struct date_yy *)parm)->yyHaveDate++; + } + | day { + ((struct date_yy *)parm)->yyHaveDay++; + } + | rel { + ((struct date_yy *)parm)->yyHaveRel++; + } + | number + | o_merid + ; + +time : tUNUMBER tMERIDIAN { + ((struct date_yy *)parm)->yyHour = $1; + ((struct date_yy *)parm)->yyMinutes = 0; + ((struct date_yy *)parm)->yySeconds = 0; + ((struct date_yy *)parm)->yyMeridian = $2; + } + | iso8601time_colon + /* | pgsqltime ... shares a common spec with ISO8601 */ + ; + +iso8601time_colon: HMStime_with_colon sec_fraction_part rel { + ((struct date_yy *)parm)->yyMeridian = MER24; + } + | HMtime_with_colon sec_fraction_part rel { + ((struct date_yy *)parm)->yyMeridian = MER24; + ((struct date_yy *)parm)->yySeconds = 0; + } + | HMStime_with_colon sec_fraction_part iso8601zonepart { + ((struct date_yy *)parm)->yyMeridian = MER24; + } + | HMtime_with_colon sec_fraction_part iso8601zonepart { + ((struct date_yy *)parm)->yyMeridian = MER24; + ((struct date_yy *)parm)->yySeconds = 0; + } + ; + +iso8601zonepart: zonepart_numeric_without_colon { + ((struct date_yy *)parm)->yyHaveZone++; + } + | zonepart_numeric_with_colon { + ((struct date_yy *)parm)->yyHaveZone++; + } + | zone { + ((struct date_yy *)parm)->yyHaveZone++; + } + | /* empty */ + ; + +sec_fraction_part: '.' tUNUMBER { + } + | /* empty */ + ; + +zonepart_numeric_without_colon: tSNUMBER { + /* format: [+-]hhmm */ + if($1 <= -100 || $1 >= 100) { + ((struct date_yy *)parm)->yyTimezone = (-$1 / 100) * 60 + (-$1 % 100); + } else if($1 >= -99 && $1 <= 99) { + ((struct date_yy *)parm)->yyTimezone = -$1 * 60; + } + } + ; + +zonepart_numeric_with_colon: tSNUMBER ':' tUNUMBER { + /* format: [+-]hh:mm */ + ((struct date_yy *)parm)->yyTimezone = -$1 * 60 + ($1 > 0 ? -$3: $3); + } + ; + +HMStime_with_colon: HMtime_with_colon ':' tUNUMBER { + /* format: hh:mm:ss */ + ((struct date_yy *)parm)->yySeconds = $3; + } + ; + +HMtime_with_colon: tUNUMBER ':' tUNUMBER { + /* format: hh:mm */ + ((struct date_yy *)parm)->yyHour = $1; + ((struct date_yy *)parm)->yyMinutes = $3; + } + ; + + + /* we have to deal with a special case for the datetime format + of XML Schema here: '2003-11-18T22:40:00Z' + the combination of a 'T' timezone specifier later followed + by a 'Z' is now recognized and allowed + TODO: change the grammer so that the exact positions are checked + right now '2003-11-18 22:40:00 TZ' is also accepted (hartmut) + */ + +zone : tTZONE { + ((struct date_yy *)parm)->yyTimezone = $1; + } + | tWZONE { + ((struct date_yy *)parm)->yyTimezone = $1; + } + | tZZONE { + ((struct date_yy *)parm)->yyTimezone = $1; + } + | tZONE { + ((struct date_yy *)parm)->yyTimezone = $1; + } + | tDAYZONE { + ((struct date_yy *)parm)->yyTimezone = $1 - 60; + } + | tZONE tDST { + ((struct date_yy *)parm)->yyTimezone = $1 - 60; + } + ; + +day : tDAY { + ((struct date_yy *)parm)->yyDayOrdinal = 1; + ((struct date_yy *)parm)->yyDayNumber = $1; + } + | tDAY ',' { + ((struct date_yy *)parm)->yyDayOrdinal = 1; + ((struct date_yy *)parm)->yyDayNumber = $1; + } + | tUNUMBER tDAY { + ((struct date_yy *)parm)->yyDayOrdinal = $1; + ((struct date_yy *)parm)->yyDayNumber = $2; + } + ; + +date : tUNUMBER '/' tUNUMBER { + ((struct date_yy *)parm)->yyMonth = $1; + ((struct date_yy *)parm)->yyDay = $3; + } + | tMONTH tUNUMBER tUNUMBER ':' tUNUMBER ':' tUNUMBER tUNUMBER { + ((struct date_yy *)parm)->yyYear = $8; + ((struct date_yy *)parm)->yyMonth = $1; + ((struct date_yy *)parm)->yyDay = $2; + + ((struct date_yy *)parm)->yyHour = $3; + ((struct date_yy *)parm)->yyMinutes = $5; + ((struct date_yy *)parm)->yySeconds = $7; + + ((struct date_yy *)parm)->yyHaveTime = 1; + } + | tUNUMBER '/' tUNUMBER '/' tUNUMBER { + /* Interpret as YYYY/MM/DD if $1 >= 1000, otherwise as MM/DD/YY. + The goal in recognizing YYYY/MM/DD is solely to support legacy + machine-generated dates like those in an RCS log listing. If + you want portability, use the ISO 8601 format. */ + if($1 >= 1000) + { + ((struct date_yy *)parm)->yyYear = $1; + ((struct date_yy *)parm)->yyMonth = $3; + ((struct date_yy *)parm)->yyDay = $5; + } + else + { + ((struct date_yy *)parm)->yyMonth = $1; + ((struct date_yy *)parm)->yyDay = $3; + ((struct date_yy *)parm)->yyYear = $5; + } + } + | iso8601date + | iso8601datetime { + ((struct date_yy *)parm)->yyHaveTime++; + } + | tUNUMBER tMONTH tSNUMBER { + /* e.g. 17-JUN-1992. */ + ((struct date_yy *)parm)->yyDay = $1; + ((struct date_yy *)parm)->yyMonth = $2; + ((struct date_yy *)parm)->yyYear = -$3; + } + | tMONTH tUNUMBER tUNUMBER { + ((struct date_yy *)parm)->yyMonth = $1; + ((struct date_yy *)parm)->yyDay = $2; + ((struct date_yy *)parm)->yyYear = $3; + } + | tMONTH tUNUMBER { + ((struct date_yy *)parm)->yyMonth = $1; + if($2 > 1000) { + ((struct date_yy *)parm)->yyYear = $2; + } else { + ((struct date_yy *)parm)->yyDay = $2; + } + } + | tMONTH tUNUMBER ',' tUNUMBER { + ((struct date_yy *)parm)->yyMonth = $1; + ((struct date_yy *)parm)->yyDay = $2; + ((struct date_yy *)parm)->yyYear = $4; + } + | tUNUMBER tMONTH { + ((struct date_yy *)parm)->yyMonth = $2; + if($1 > 1000) { + ((struct date_yy *)parm)->yyYear = $1; + } else { + ((struct date_yy *)parm)->yyDay = $1; + } + } + | tUNUMBER tMONTH tUNUMBER { + ((struct date_yy *)parm)->yyMonth = $2; + ((struct date_yy *)parm)->yyDay = $1; + ((struct date_yy *)parm)->yyYear = $3; + } + ; + +iso8601datetime: iso8601date tTZONE iso8601time + | tUNUMBER tTZONE iso8601time { + int i = $1; + + if(i >= 10000) { + /* format: yyyymmdd */ + ((struct date_yy *)parm)->yyYear = i / 10000; + i %= 10000; + ((struct date_yy *)parm)->yyMonth = i / 100; + i %= 100; + ((struct date_yy *)parm)->yyDay = i; + } else if(i >= 1000 && i <= 9999) { + /* format: yyyy */ + ((struct date_yy *)parm)->yyYear = i; + ((struct date_yy *)parm)->yyDay= 1; + ((struct date_yy *)parm)->yyMonth = 1; + } + } + ; + +iso8601date: tUNUMBER tSNUMBER tSNUMBER { + /* ISO 8601 format. yyyy-mm-dd. */ + ((struct date_yy *)parm)->yyYear = $1; + ((struct date_yy *)parm)->yyMonth = -$2; + ((struct date_yy *)parm)->yyDay = -$3; + } + | tUNUMBER tSNUMBER { + /* ISO 8601 format yyyy-mm */ + ((struct date_yy *)parm)->yyYear = $1; + ((struct date_yy *)parm)->yyMonth = -$2; + ((struct date_yy *)parm)->yyDay = 1; + } + | tUNUMBER iso8601weekspec { + const int om = (1 + 9) % 12; /* offset month */ + const int oy = $1 - 1; /* offset year */ + + ((struct date_yy *)parm)->yyYear = $1; + ((struct date_yy *)parm)->yyMonth = 1; + /* Zeller's formula */ + ((struct date_yy *)parm)->yyDay -= ((13 * om + 12) / 5 + + oy + oy / 4 + oy / 400 - oy / 100) % 7 - 1; + } + ; + +iso8601weekspec: tWZONE tUNUMBER { + ((struct date_yy *)parm)->yyDay = ($2 / 10) * 7 + ($2 % 10) - 8; + } + | tWZONE tUNUMBER tSNUMBER { + ((struct date_yy *)parm)->yyDay = $2 * 7 - $3 - 8; + } + ; + +iso8601time: + iso8601time_colon + | tUNUMBER sec_fraction_part iso8601zonepart { + int i = $1; + + if(i <= -100000 || i >= 100000) { + ((struct date_yy *)parm)->yyHour = i / 10000; + i %= 10000; + ((struct date_yy *)parm)->yyMinutes = i / 100; + i %= 100; + ((struct date_yy *)parm)->yySeconds = i; + } else if(i <= -1000 || i >= 1000) { + ((struct date_yy *)parm)->yyHour = i / 100; + i %= 100; + ((struct date_yy *)parm)->yyMinutes = i; + ((struct date_yy *)parm)->yySeconds = 0; + } else if(i >= -99 && i <= 99) { + ((struct date_yy *)parm)->yyHour = i; + ((struct date_yy *)parm)->yyMinutes = 0; + ((struct date_yy *)parm)->yySeconds = 0; + } else { + ((struct date_yy *)parm)->yyHaveTime = 0; + } + ((struct date_yy *)parm)->yyMeridian = MER24; + } + ; + +rel : relunit tAGO { + ((struct date_yy *)parm)->yyRelSeconds = + -((struct date_yy *)parm)->yyRelSeconds; + ((struct date_yy *)parm)->yyRelMinutes = + -((struct date_yy *)parm)->yyRelMinutes; + ((struct date_yy *)parm)->yyRelHour = + -((struct date_yy *)parm)->yyRelHour; + ((struct date_yy *)parm)->yyRelDay = + -((struct date_yy *)parm)->yyRelDay; + ((struct date_yy *)parm)->yyRelMonth = + -((struct date_yy *)parm)->yyRelMonth; + ((struct date_yy *)parm)->yyRelYear = + -((struct date_yy *)parm)->yyRelYear; + } + | relunit + ; + +relunit : tUNUMBER tYEAR_UNIT { + ((struct date_yy *)parm)->yyRelYear += $1 * $2; + } + | tSNUMBER tYEAR_UNIT { + ((struct date_yy *)parm)->yyRelYear += $1 * $2; + } + | tYEAR_UNIT { + ((struct date_yy *)parm)->yyRelYear += $1; + } + | tUNUMBER tMONTH_UNIT { + ((struct date_yy *)parm)->yyRelMonth += $1 * $2; + } + | tSNUMBER tMONTH_UNIT { + ((struct date_yy *)parm)->yyRelMonth += $1 * $2; + } + | tMONTH_UNIT { + ((struct date_yy *)parm)->yyRelMonth += $1; + } + | tUNUMBER tDAY_UNIT { + ((struct date_yy *)parm)->yyRelDay += $1 * $2; + } + | tSNUMBER tDAY_UNIT { + ((struct date_yy *)parm)->yyRelDay += $1 * $2; + } + | tDAY_UNIT { + ((struct date_yy *)parm)->yyRelDay += $1; + } + | tUNUMBER tHOUR_UNIT { + ((struct date_yy *)parm)->yyRelHour += $1 * $2; + } + | tSNUMBER tHOUR_UNIT { + ((struct date_yy *)parm)->yyRelHour += $1 * $2; + } + | tHOUR_UNIT { + ((struct date_yy *)parm)->yyRelHour += $1; + } + | tUNUMBER tMINUTE_UNIT { + ((struct date_yy *)parm)->yyRelMinutes += $1 * $2; + } + | tSNUMBER tMINUTE_UNIT { + ((struct date_yy *)parm)->yyRelMinutes += $1 * $2; + } + | tMINUTE_UNIT { + ((struct date_yy *)parm)->yyRelMinutes += $1; + } + | tUNUMBER tSEC_UNIT { + ((struct date_yy *)parm)->yyRelSeconds += $1 * $2; + } + | tSNUMBER tSEC_UNIT { + ((struct date_yy *)parm)->yyRelSeconds += $1 * $2; + } + | tSEC_UNIT { + ((struct date_yy *)parm)->yyRelSeconds += $1; + } + ; + +number : tUNUMBER + { + if(((struct date_yy *)parm)->yyHaveTime && + ((struct date_yy *)parm)->yyHaveDate && + !((struct date_yy *)parm)->yyHaveRel) + ((struct date_yy *)parm)->yyYear = $1; + else + { + if($1 > 10000) + { + ((struct date_yy *)parm)->yyHaveDate++; + ((struct date_yy *)parm)->yyDay= ($1)%100; + ((struct date_yy *)parm)->yyMonth= ($1/100)%100; + ((struct date_yy *)parm)->yyYear = $1/10000; + } + else + { + ((struct date_yy *)parm)->yyHaveTime++; + if($1 < 100) + { + ((struct date_yy *)parm)->yyHour = $1; + ((struct date_yy *)parm)->yyMinutes = 0; + } + else + { + ((struct date_yy *)parm)->yyHour = $1 / 100; + ((struct date_yy *)parm)->yyMinutes = $1 % 100; + } + ((struct date_yy *)parm)->yySeconds = 0; + ((struct date_yy *)parm)->yyMeridian = MER24; + } + } + } + ; + +o_merid : tMERIDIAN + { + ((struct date_yy *)parm)->yyMeridian = $1; + } + ; + +%% + +time_t get_date (char *p, time_t *now); + +/* Month and day table. */ +static TABLE const MonthDayTable[] = { + { "january", tMONTH, 1 }, + { "february", tMONTH, 2 }, + { "march", tMONTH, 3 }, + { "april", tMONTH, 4 }, + { "may", tMONTH, 5 }, + { "june", tMONTH, 6 }, + { "july", tMONTH, 7 }, + { "august", tMONTH, 8 }, + { "september", tMONTH, 9 }, + { "sept", tMONTH, 9 }, + { "october", tMONTH, 10 }, + { "november", tMONTH, 11 }, + { "december", tMONTH, 12 }, + { "sunday", tDAY, 0 }, + { "monday", tDAY, 1 }, + { "tuesday", tDAY, 2 }, + { "tues", tDAY, 2 }, + { "wednesday", tDAY, 3 }, + { "wednes", tDAY, 3 }, + { "thursday", tDAY, 4 }, + { "thur", tDAY, 4 }, + { "thurs", tDAY, 4 }, + { "friday", tDAY, 5 }, + { "saturday", tDAY, 6 }, + { NULL, 0, 0 } +}; + +/* Time units table. */ +static TABLE const UnitsTable[] = { + { "year", tYEAR_UNIT, 1 }, + { "month", tMONTH_UNIT, 1 }, + { "fortnight", tDAY_UNIT, 14 }, + { "week", tDAY_UNIT, 7 }, + { "day", tDAY_UNIT, 1 }, + { "hour", tHOUR_UNIT, 1 }, + { "minute", tMINUTE_UNIT, 1 }, + { "min", tMINUTE_UNIT, 1 }, + { "second", tSEC_UNIT, 1 }, + { "sec", tSEC_UNIT, 1 }, + { NULL, 0, 0 } +}; + +/* Assorted relative-time words. */ +static TABLE const OtherTable[] = { + { "tomorrow", tDAY_UNIT, 1 }, + { "yesterday", tDAY_UNIT, -1 }, + { "today", tDAY_UNIT, 0 }, + { "now", tDAY_UNIT, 0 }, + { "last", tUNUMBER, -1 }, + { "this", tUNUMBER, 0 }, + { "next", tUNUMBER, 1 }, + { "first", tUNUMBER, 1 }, +/* { "second", tUNUMBER, 2 }, */ + { "third", tUNUMBER, 3 }, + { "fourth", tUNUMBER, 4 }, + { "fifth", tUNUMBER, 5 }, + { "sixth", tUNUMBER, 6 }, + { "seventh", tUNUMBER, 7 }, + { "eighth", tUNUMBER, 8 }, + { "ninth", tUNUMBER, 9 }, + { "tenth", tUNUMBER, 10 }, + { "eleventh", tUNUMBER, 11 }, + { "twelfth", tUNUMBER, 12 }, + { "ago", tAGO, 1 }, + { NULL, 0, 0 } +}; + +/* The timezone table. */ +static TABLE const TimezoneTable[] = { + { "gmt", tZONE, HOUR ( 0) }, /* Greenwich Mean */ + { "ut", tZONE, HOUR ( 0) }, /* Universal (Coordinated) */ + { "utc", tZONE, HOUR ( 0) }, + { "wet", tZONE, HOUR ( 0) }, /* Western European */ + { "bst", tDAYZONE, HOUR ( 0) }, /* British Summer */ + { "wat", tZONE, HOUR ( 1) }, /* West Africa */ + { "at", tZONE, HOUR ( 2) }, /* Azores */ +#if 0 + /* For completeness. BST is also British Summer, and GST is + * also Guam Standard. */ + { "bst", tZONE, HOUR ( 3) }, /* Brazil Standard */ + { "gst", tZONE, HOUR ( 3) }, /* Greenland Standard */ +#endif +#if 0 + { "nft", tZONE, HOUR (3.5) }, /* Newfoundland */ + { "nst", tZONE, HOUR (3.5) }, /* Newfoundland Standard */ + { "ndt", tDAYZONE, HOUR (3.5) }, /* Newfoundland Daylight */ +#endif + { "ast", tZONE, HOUR ( 4) }, /* Atlantic Standard */ + { "adt", tDAYZONE, HOUR ( 4) }, /* Atlantic Daylight */ + { "est", tZONE, HOUR ( 5) }, /* Eastern Standard */ + { "edt", tDAYZONE, HOUR ( 5) }, /* Eastern Daylight */ + { "cst", tZONE, HOUR ( 6) }, /* Central Standard */ + { "cdt", tDAYZONE, HOUR ( 6) }, /* Central Daylight */ + { "mst", tZONE, HOUR ( 7) }, /* Mountain Standard */ + { "mdt", tDAYZONE, HOUR ( 7) }, /* Mountain Daylight */ + { "pst", tZONE, HOUR ( 8) }, /* Pacific Standard */ + { "pdt", tDAYZONE, HOUR ( 8) }, /* Pacific Daylight */ + { "yst", tZONE, HOUR ( 9) }, /* Yukon Standard */ + { "ydt", tDAYZONE, HOUR ( 9) }, /* Yukon Daylight */ + { "hst", tZONE, HOUR (10) }, /* Hawaii Standard */ + { "hdt", tDAYZONE, HOUR (10) }, /* Hawaii Daylight */ + { "cat", tZONE, HOUR (10) }, /* Central Alaska */ + { "akst", tZONE, HOUR (10) }, /* Alaska Standard */ + { "akdt", tZONE, HOUR (10) }, /* Alaska Daylight */ + { "ahst", tZONE, HOUR (10) }, /* Alaska-Hawaii Standard */ + { "nt", tZONE, HOUR (11) }, /* Nome */ + { "idlw", tZONE, HOUR (12) }, /* International Date Line West */ + { "cet", tZONE, -HOUR (1) }, /* Central European */ + { "cest", tDAYZONE, -HOUR (1) }, /* Central European Summer */ + { "met", tZONE, -HOUR (1) }, /* Middle European */ + { "mewt", tZONE, -HOUR (1) }, /* Middle European Winter */ + { "mest", tDAYZONE, -HOUR (1) }, /* Middle European Summer */ + { "mesz", tDAYZONE, -HOUR (1) }, /* Middle European Summer */ + { "swt", tZONE, -HOUR (1) }, /* Swedish Winter */ + { "sst", tDAYZONE, -HOUR (1) }, /* Swedish Summer */ + { "fwt", tZONE, -HOUR (1) }, /* French Winter */ + { "fst", tDAYZONE, -HOUR (1) }, /* French Summer */ + { "eet", tZONE, -HOUR (2) }, /* Eastern Europe, USSR Zone 1 */ + { "bt", tZONE, -HOUR (3) }, /* Baghdad, USSR Zone 2 */ +#if 0 + { "it", tZONE, -HOUR (3.5) },/* Iran */ +#endif + { "zp4", tZONE, -HOUR (4) }, /* USSR Zone 3 */ + { "zp5", tZONE, -HOUR (5) }, /* USSR Zone 4 */ +#if 0 + { "ist", tZONE, -HOUR (5.5) },/* Indian Standard */ +#endif + { "zp6", tZONE, -HOUR (6) }, /* USSR Zone 5 */ +#if 0 + /* For completeness. NST is also Newfoundland Standard, and SST is + * also Swedish Summer. */ + { "nst", tZONE, -HOUR (6.5) },/* North Sumatra */ + { "sst", tZONE, -HOUR (7) }, /* South Sumatra, USSR Zone 6 */ +#endif /* 0 */ + { "wast", tZONE, -HOUR (7) }, /* West Australian Standard */ + { "wadt", tDAYZONE, -HOUR (7) }, /* West Australian Daylight */ +#if 0 + { "jt", tZONE, -HOUR (7.5) },/* Java (3pm in Cronusland!) */ +#endif + { "cct", tZONE, -HOUR (8) }, /* China Coast, USSR Zone 7 */ + { "jst", tZONE, -HOUR (9) }, /* Japan Standard, USSR Zone 8 */ +#if 0 + { "cast", tZONE, -HOUR (9.5) },/* Central Australian Standard */ + { "cadt", tDAYZONE, -HOUR (9.5) },/* Central Australian Daylight */ +#endif + { "east", tZONE, -HOUR (10) }, /* Eastern Australian Standard */ + { "eadt", tDAYZONE, -HOUR (10) }, /* Eastern Australian Daylight */ + { "gst", tZONE, -HOUR (10) }, /* Guam Standard, USSR Zone 9 */ + { "nzt", tZONE, -HOUR (12) }, /* New Zealand */ + { "nzst", tZONE, -HOUR (12) }, /* New Zealand Standard */ + { "nzdt", tDAYZONE, -HOUR (12) }, /* New Zealand Daylight */ + { "idle", tZONE, -HOUR (12) }, /* International Date Line East */ + { NULL, 0, 0 } +}; + +/* Military timezone table. */ +static TABLE const MilitaryTable[] = { + { "a", tZONE, HOUR (- 1) }, + { "b", tZONE, HOUR (- 2) }, + { "c", tZONE, HOUR (- 3) }, + { "d", tZONE, HOUR (- 4) }, + { "e", tZONE, HOUR (- 5) }, + { "f", tZONE, HOUR (- 6) }, + { "g", tZONE, HOUR (- 7) }, + { "h", tZONE, HOUR (- 8) }, + { "i", tZONE, HOUR (- 9) }, + { "k", tZONE, HOUR (-10) }, + { "l", tZONE, HOUR (-11) }, + { "m", tZONE, HOUR (-12) }, + { "n", tZONE, HOUR ( 1) }, + { "o", tZONE, HOUR ( 2) }, + { "p", tZONE, HOUR ( 3) }, + { "q", tZONE, HOUR ( 4) }, + { "r", tZONE, HOUR ( 5) }, + { "s", tZONE, HOUR ( 6) }, + { "t", tTZONE, HOUR ( 7) }, + { "u", tZONE, HOUR ( 8) }, + { "v", tZONE, HOUR ( 9) }, + { "w", tWZONE, HOUR ( 10) }, + { "x", tZONE, HOUR ( 11) }, + { "y", tZONE, HOUR ( 12) }, + { "z", tZZONE, HOUR ( 0) }, + { NULL, 0, 0 } +}; + + + + +/* ARGSUSED */ +static int +yyerror(void* parm, const char *s) +{ + return 0; +} + +static int +ToHour(int Hours, MERIDIAN Meridian) +{ + switch (Meridian) + { + case MER24: + if(Hours < 0 || Hours > 23) + return -1; + return Hours; + case MERam: + if(Hours < 1 || Hours > 12) + return -1; + if(Hours == 12) + Hours = 0; + return Hours; + case MERpm: + if(Hours < 1 || Hours > 12) + return -1; + if(Hours == 12) + Hours = 0; + return Hours + 12; + default: +#ifdef RAPTOR_DEBUG + fprintf(stderr, "%s:%d:%s: UNKNOWN Meridian %d - add a new case", + __FILE__, __LINE__, __FUNCTION__, (int)Meridian); +#endif + return -1; + } + /* NOTREACHED */ +} + +static int +ToYear(int Year) +{ + if(Year < 0) + Year = -Year; + + /* XPG4 suggests that years 00-68 map to 2000-2068, and + years 69-99 map to 1969-1999. */ + if(Year < 69) + Year += 2000; + else if(Year < 100) + Year += 1900; + + return Year; +} + +static int +LookupWord (YYSTYPE *lvalp, char *buff) +{ + char *p; + char *q; + const TABLE *tp; + int i; + int abbrev; + + /* Make it lowercase. */ + for(p = buff; *p; p++) + if(isupper ((unsigned char) *p)) + *p = tolower (*p); + + if(strcmp (buff, "am") == 0 || strcmp (buff, "a.m.") == 0) + { + lvalp->Meridian = MERam; + return tMERIDIAN; + } + if(strcmp (buff, "pm") == 0 || strcmp (buff, "p.m.") == 0) + { + lvalp->Meridian = MERpm; + return tMERIDIAN; + } + + /* See if we have an abbreviation for a month. */ + if(strlen (buff) == 3) + abbrev = 1; + else if(strlen (buff) == 4 && buff[3] == '.') + { + abbrev = 1; + buff[3] = '\0'; + } + else + abbrev = 0; + + for(tp = MonthDayTable; tp->name; tp++) + { + if(abbrev) + { + if(strncmp (buff, tp->name, 3) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + else if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + + for(tp = TimezoneTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + if(strcmp (buff, "dst") == 0) + return tDST; + + for(tp = UnitsTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + /* Strip off any plural and try the units table again. */ + i = RAPTOR_BAD_CAST(int, strlen(buff)) - 1; + if(buff[i] == 's') + { + buff[i] = '\0'; + for(tp = UnitsTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + buff[i] = 's'; /* Put back for "this" in OtherTable. */ + } + + for(tp = OtherTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + /* Military timezones. */ + if(buff[1] == '\0' && isalpha ((unsigned char) *buff)) + { + for(tp = MilitaryTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + } + + /* Drop out any periods and try the timezone table again. */ + for(i = 0, p = q = buff; *q; q++) + if(*q != '.') + *p++ = *q; + else + i++; + *p = '\0'; + if(i) + for(tp = TimezoneTable; tp->name; tp++) + if(strcmp (buff, tp->name) == 0) + { + lvalp->Number = tp->value; + return tp->type; + } + + return tID; +} + +int yylex(YYSTYPE *lvalp, void *parm) +{ + unsigned char c; + char *p; + char buff[20]; + int Count; + int sign; + struct date_yy * date = (struct date_yy *)parm; + + for(;;) + { + while(isspace ((unsigned char) *date->yyInput)) + date->yyInput++; + + if(isdigit (c = *date->yyInput) || c == '-' || c == '+') + { + if(c == '-' || c == '+') + { + sign = c == '-' ? -1 : 1; + if(!ISDIGIT (*++date->yyInput)) + /* skip the '-' sign */ + continue; + } + else + sign = 0; + for(lvalp->Number = 0; ISDIGIT (c = *date->yyInput++);) + lvalp->Number = 10 * lvalp->Number + c - '0'; + date->yyInput--; + if(sign < 0) + lvalp->Number = -lvalp->Number; + /* Ignore ordinal suffixes on numbers */ + c = *date->yyInput; + if(c == 's' || c == 'n' || c == 'r' || c == 't') { + c = *++date->yyInput; + if(c == 't' || c == 'd' || c == 'h') { + date->yyInput++; + } else { + date->yyInput--; + } + } + return sign ? tSNUMBER : tUNUMBER; + } + if(isalpha (c)) + { + for(p = buff; (c = *date->yyInput++, isalpha (c)) || c == '.';) + if(p < &buff[sizeof buff - 1]) + *p++ = c; + *p = '\0'; + date->yyInput--; + return LookupWord (lvalp, buff); + } + if(c != '(') + return *date->yyInput++; + Count = 0; + do + { + c = *date->yyInput++; + if(c == '\0') + return c; + if(c == '(') + Count++; + else if(c == ')') + Count--; + } + while(Count > 0); + } +} + +#define TM_YEAR_ORIGIN 1900 + +/* Yield A - B, measured in seconds. */ +static long +difftm (struct tm *a, struct tm *b) +{ + int ay = a->tm_year + (TM_YEAR_ORIGIN - 1); + int by = b->tm_year + (TM_YEAR_ORIGIN - 1); + long days = ( + /* difference in day of year */ + a->tm_yday - b->tm_yday + /* + intervening leap days */ + + ((ay >> 2) - (by >> 2)) + - (ay / 100 - by / 100) + + ((ay / 100 >> 2) - (by / 100 >> 2)) + /* + difference in years * 365 */ + + (long) (ay - by) * 365 + ); + return (60 * (60 * (24 * days + (a->tm_hour - b->tm_hour)) + + (a->tm_min - b->tm_min)) + + (a->tm_sec - b->tm_sec)); +} + +time_t raptor_parse_date(const char *p, time_t *now) +{ + struct tm tm, tm0, *tmp; + time_t Start; + struct date_yy date; + + date.yyInput = p; + Start = now ? *now : time ((time_t *) NULL); + tmp = localtime (&Start); + if(!tmp) + return -1; + date.yyYear = tmp->tm_year + TM_YEAR_ORIGIN; + date.yyMonth = tmp->tm_mon + 1; + date.yyDay = tmp->tm_mday; + date.yyHour = tmp->tm_hour; + date.yyMinutes = tmp->tm_min; + date.yySeconds = tmp->tm_sec; + tm.tm_isdst = tmp->tm_isdst; + date.yyMeridian = MER24; + date.yyRelSeconds = 0; + date.yyRelMinutes = 0; + date.yyRelHour = 0; + date.yyRelDay = 0; + date.yyRelMonth = 0; + date.yyRelYear = 0; + date.yyHaveDate = 0; + date.yyHaveDay = 0; + date.yyHaveRel = 0; + date.yyHaveTime = 0; + date.yyHaveZone = 0; + + if(yyparse (&date) + || date.yyHaveTime > 1 || date.yyHaveZone > 1 + || date.yyHaveDate > 1 || date.yyHaveDay > 1) { + return -1; + } + tm.tm_year = ToYear (date.yyYear) - TM_YEAR_ORIGIN + date.yyRelYear; + tm.tm_mon = date.yyMonth - 1 + date.yyRelMonth; + tm.tm_mday = date.yyDay + date.yyRelDay; + if(date.yyHaveTime || (date.yyHaveRel && !date.yyHaveDate && !date.yyHaveDay)) + { + tm.tm_hour = ToHour (date.yyHour, date.yyMeridian); + if(tm.tm_hour < 0) + return -1; + tm.tm_min = date.yyMinutes; + tm.tm_sec = date.yySeconds; + } + else + { + tm.tm_hour = tm.tm_min = tm.tm_sec = 0; + } + tm.tm_hour += date.yyRelHour; + tm.tm_min += date.yyRelMinutes; + tm.tm_sec += date.yyRelSeconds; + + /* Let mktime deduce tm_isdst if we have an absolute timestamp, + or if the relative timestamp mentions days, months, or years. */ + if(date.yyHaveDate | date.yyHaveDay | date.yyHaveTime | date.yyRelDay | date.yyRelMonth | date.yyRelYear) + tm.tm_isdst = -1; + + tm0 = tm; + + Start = mktime (&tm); + + if(Start == (time_t) -1) + { + + /* Guard against falsely reporting errors near the time_t boundaries + when parsing times in other time zones. For example, if the min + time_t value is 1970-01-01 00:00:00 UTC and we are 8 hours ahead + of UTC, then the min localtime value is 1970-01-01 08:00:00; if + we apply mktime to 1970-01-01 00:00:00 we will get an error, so + we apply mktime to 1970-01-02 08:00:00 instead and adjust the time + zone by 24 hours to compensate. This algorithm assumes that + there is no DST transition within a day of the time_t boundaries. */ + if(date.yyHaveZone) + { + tm = tm0; + if(tm.tm_year <= EPOCH - TM_YEAR_ORIGIN) + { + tm.tm_mday++; + date.yyTimezone -= 24 * 60; + } + else + { + tm.tm_mday--; + date.yyTimezone += 24 * 60; + } + Start = mktime (&tm); + } + + if(Start == (time_t) -1) + return Start; + } + + if(date.yyHaveDay && !date.yyHaveDate) + { + tm.tm_mday += ((date.yyDayNumber - tm.tm_wday + 7) % 7 + + 7 * (date.yyDayOrdinal - (0 < date.yyDayOrdinal))); + Start = mktime (&tm); + if(Start == (time_t) -1) + return Start; + } + + if(date.yyHaveZone) + { + long delta; + struct tm *gmt = gmtime (&Start); + if(!gmt) + return -1; + delta = date.yyTimezone * 60L + difftm (&tm, gmt); + + if((Start + delta < Start) != (delta < 0)) + return -1; /* time_t overflow */ + Start += delta; + } + + return Start; +} diff --git a/src/raptor.h b/src/raptor.h new file mode 100644 index 0000000..91d361e --- /dev/null +++ b/src/raptor.h @@ -0,0 +1 @@ +#include <raptor2.h> diff --git a/src/raptor2.h.in b/src/raptor2.h.in new file mode 100644 index 0000000..f97326a --- /dev/null +++ b/src/raptor2.h.in @@ -0,0 +1,2203 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor.h - Redland Parser Toolkit for RDF (Raptor) - public API + * + * Copyright (C) 2000-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + + +#ifndef RAPTOR_H +#define RAPTOR_H + + +#ifdef __cplusplus +extern "C" { +#endif + +#include <stdio.h> + +/* Required for va_list in raptor_vsnprintf */ +#include <stdarg.h> + + +/** + * RAPTOR_V2_AVAILABLE + * + * Flag for marking raptor2 API availability. + */ +#define RAPTOR_V2_AVAILABLE 1 + + +/** + * RAPTOR_VERSION: + * + * Raptor library version number + * + * Format: major * 10000 + minor * 100 + release + */ +#define RAPTOR_VERSION @RAPTOR_VERSION_DECIMAL@ + +/** + * RAPTOR_VERSION_STRING: + * + * Raptor library version string + */ +#define RAPTOR_VERSION_STRING "@VERSION@" + +/** + * RAPTOR_VERSION_MAJOR: + * + * Raptor library major version + */ +#define RAPTOR_VERSION_MAJOR @RAPTOR_VERSION_MAJOR@ + +/** + * RAPTOR_VERSION_MINOR: + * + * Raptor library minor version + */ +#define RAPTOR_VERSION_MINOR @RAPTOR_VERSION_MINOR@ + +/** + * RAPTOR_VERSION_RELEASE: + * + * Raptor library release + */ +#define RAPTOR_VERSION_RELEASE @RAPTOR_VERSION_RELEASE@ + +/** + * RAPTOR_API: + * + * Macro for wrapping API function call declarations. + * + */ +#ifndef RAPTOR_API +# ifdef WIN32 +# ifdef __GNUC__ +# undef _declspec +# define _declspec(x) __declspec(x) +# endif +# ifdef RAPTOR_STATIC +# define RAPTOR_API +# else +# ifdef RAPTOR_INTERNAL +# define RAPTOR_API _declspec(dllexport) +# else +# define RAPTOR_API _declspec(dllimport) +# endif +# endif +# else +# define RAPTOR_API +# endif +#endif + +/* Use gcc 3.1+ feature to allow marking of deprecated API calls. + * This gives a warning during compiling. + */ +#if defined(__GNUC__) && (__GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 1)) +#define RAPTOR_DEPRECATED __attribute__((deprecated)) +#define RAPTOR_NORETURN __attribute__((__noreturn__)) +#else +#define RAPTOR_DEPRECATED +#define RAPTOR_NORETURN +#endif + +/** + * RAPTOR_PRINTF_FORMAT: + * @string_index: ignore me + * @first_to_check_index: ignore me + * + * Internal macro + */ +#if defined(__GNUC__) && (__GNUC__ > 2 || (__GNUC__ == 2 && __GNUC_MINOR__ >= 5)) +#define RAPTOR_PRINTF_FORMAT(string_index, first_to_check_index) \ + __attribute__((__format__(__printf__, string_index, first_to_check_index))) +#else +#define RAPTOR_PRINTF_FORMAT(string_index, first_to_check_index) +#endif + +/** + * raptor_uri: + * + * Raptor URI Class. + */ +typedef struct raptor_uri_s raptor_uri; + + +/* Public statics */ + +/** + * raptor_short_copyright_string: + * + * Short copyright string (one line). + */ +RAPTOR_API +extern const char * const raptor_short_copyright_string; + +/** + * raptor_copyright_string: + * + * Copyright string (multiple lines). + */ +RAPTOR_API +extern const char * const raptor_copyright_string; + +/** + * raptor_version_string: + * + * Raptor version as a string. + */ +RAPTOR_API +extern const char * const raptor_version_string; + +/** + * raptor_version_major: + * + * Raptor major version number. + */ +RAPTOR_API +extern const unsigned int raptor_version_major; + +/** + * raptor_version_minor: + * + * Raptor minor version number. + */ +RAPTOR_API +extern const unsigned int raptor_version_minor; + +/** + * raptor_version_release: + * + * Raptor release version number. + */ +RAPTOR_API +extern const unsigned int raptor_version_release; + +/** + * raptor_version_decimal: + * + * Raptor version as a decimal number. + * + * Format: major * 10000 + minor * 100 + release + */ +RAPTOR_API +extern const unsigned int raptor_version_decimal; + +/** + * raptor_license_string: + * + * Raptor license string. + */ +RAPTOR_API +extern const char * const raptor_license_string; + +/** + * raptor_home_url_string: + * + * Raptor home page URL. + */ +RAPTOR_API +extern const char * const raptor_home_url_string; + +/** + * raptor_xml_namespace_uri: + * + * XML Namespace (xml:) URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_xml_namespace_uri; + + +/** + * raptor_rdf_namespace_uri: + * + * RDF Namespace (rdf:) URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_rdf_namespace_uri; + +/** + * raptor_rdf_namespace_uri_len: + * + * Length of #raptor_rdf_namespace_uri string + */ +RAPTOR_API +extern const unsigned int raptor_rdf_namespace_uri_len; + +/** + * raptor_rdf_schema_namespace_uri: + * + * RDF Schema (rdfs:) Namespace URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_rdf_schema_namespace_uri; + +/** + * raptor_rdf_schenma_namespace_uri_len: + * + * Length of #raptor_rdf_schenma_namespace_uri string + */ +RAPTOR_API +extern const unsigned int raptor_rdf_schema_namespace_uri_len; + +/** + * raptor_xmlschema_datatypes_namespace_uri: + * + * XML Schema datatypes (xsd:) namespace URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_xmlschema_datatypes_namespace_uri; + +/** + * raptor_owl_namespace_uri: + * + * OWL (owl:) Namespace URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_owl_namespace_uri; + +/** + * raptor_xml_literal_datatype_uri_string: + * + * XML Literal datatype (rdf:XMLLiteral) URI string. + */ +RAPTOR_API +extern const unsigned char * const raptor_xml_literal_datatype_uri_string; + +/** + * raptor_xml_literal_datatype_uri_string_len: + * + * Length of #raptor_xml_literal_datatype_uri_string + */ +RAPTOR_API +extern const unsigned int raptor_xml_literal_datatype_uri_string_len; + + +/* Public structure */ +/** + * raptor_world: + * + * Raptor world class. + */ +typedef struct raptor_world_s raptor_world; +/** + * raptor_parser: + * + * Raptor Parser class + */ +typedef struct raptor_parser_s raptor_parser; +/** + * raptor_serializer: + * + * Raptor Serializer class + */ +typedef struct raptor_serializer_s raptor_serializer; + +/** + * raptor_www: + * + * Raptor WWW class + */ +typedef struct raptor_www_s raptor_www; +/** + * raptor_iostream: + * + * Raptor I/O Stream class + */ +typedef struct raptor_iostream_s raptor_iostream; +/** + * raptor_xml_element: + * + * Raptor XML Element class + */ +typedef struct raptor_xml_element_s raptor_xml_element; +/** + * raptor_xml_writer: + * + * Raptor XML Writer class + */ +typedef struct raptor_xml_writer_s raptor_xml_writer; +/** + * raptor_qname: + * + * Raptor XML qname class + */ +typedef struct raptor_qname_s raptor_qname; +/** + * raptor_namespace: + * + * Raptor XML Namespace class + */ +typedef struct raptor_namespace_s raptor_namespace; +/** + * raptor_namespace_stack: + * + * Raptor XML Namespace Stack class + */ +typedef struct raptor_namespace_stack_s raptor_namespace_stack; + +/** + * raptor_sax2: + * + * Raptor SAX2 class + */ +typedef struct raptor_sax2_s raptor_sax2; + + +/** + * raptor_type_q: + * @mime_type: MIME type string + * @mime_type_len: length of @mime_type + * @q: Q value 0-10 standing for decimal 0.0-1.0 + * + * (MIME Type, Q) pair + */ +typedef struct { + const char* mime_type; + size_t mime_type_len; + unsigned char q; +} raptor_type_q; + + +/** + * raptor_syntax_bitflags: + * @RAPTOR_SYNTAX_NEED_BASE_URI: the syntax requires a base URI + * + * Bit flags for #raptor_syntax_description flags field + */ +typedef enum { + RAPTOR_SYNTAX_NEED_BASE_URI = 1 +} raptor_syntax_bitflags; + + +/** + * raptor_syntax_description: + * @names: array of syntax names - the first one (required) is the public name, the rest are aliases. The array is NULL terminated. + * @names_count: size of @names array + * @label: long descriptive label for syntax + * @mime_types: Array of (MIME type, Q) values associated with the syntax (or NULL). If present the array is NULL terminated. + * @mime_types_count: size of @mime_types array + * @uri_strings: array of URIs identifying the syntax (or NULL). The first one if present is the main URI, the rest are aliases. The array is NULL terminated. + * @uri_strings_count: size of @uri_strings array + * @flags: See #raptor_syntax_bitflags for the bits + * + * Description of a syntax or file format. + * + */ +typedef struct { + const char* const* names; + unsigned int names_count; + + const char* label; + + const raptor_type_q* mime_types; + unsigned int mime_types_count; + + const char* const* uri_strings; + unsigned int uri_strings_count; + + unsigned int flags; +} raptor_syntax_description; + + +/** + * raptor_term_type: + * @RAPTOR_TERM_TYPE_URI: RDF URI + * @RAPTOR_TERM_TYPE_LITERAL: RDF literal + * @RAPTOR_TERM_TYPE_BLANK: RDF blank node + * @RAPTOR_TERM_TYPE_UNKNOWN: Internal + * + * Type of term in a #raptor_statement + * + * Node type 3 is unused but exists to preserve numeric compatibility + * with librdf_node_type values. + */ +typedef enum { + RAPTOR_TERM_TYPE_UNKNOWN = 0, + RAPTOR_TERM_TYPE_URI = 1, + RAPTOR_TERM_TYPE_LITERAL = 2, + /* unused type 3 */ + RAPTOR_TERM_TYPE_BLANK = 4 +} raptor_term_type; + + +/** + * raptor_locator: + * @uri: URI of location (or NULL) + * @file: Filename of location (or NULL) + * @line: Line number of location (or <0 for no line) + * @column: Column number of location (or <0 for no column) + * @byte: Byte number of location (or <0 for no byte) + * + * Location information for an error, warning or information message. + */ +typedef struct { + raptor_uri *uri; + const char *file; + int line; + int column; + int byte; +} raptor_locator; + +/** + * raptor_option: + * @RAPTOR_OPTION_SCANNING: If true (default false), the RDF/XML + * parser will look for embedded rdf:RDF elements inside the XML + * content, and not require that the XML start with an rdf:RDF root + * element. + * @RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES: If true (default true) + * then the RDF/XML parser will allow non-XML namespaced attributes + * to be accepted as well as rdf: namespaced ones. For example, + * 'about' and 'ID' will be interpreted as if they were rdf:about + * and rdf:ID respectively. + * @RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES: If true (default true) + * then the RDF/XML parser will allow unknown parsetypes to be + * present and will pass them on to the user. Unimplemented at + * present. + * @RAPTOR_OPTION_ALLOW_BAGID: If true (default true) then the + * RDF/XML parser will support the rdf:bagID attribute that was + * removed from the RDF/XML language when it was revised. This + * support may be removed in future. + * @RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST: If true (default false) + * then the RDF/XML parser will generate the idList rdf:type + * rdf:List triple in the handling of rdf:parseType="Collection". + * This triple was removed during the revising of RDF/XML after + * collections were initially added. + * @RAPTOR_OPTION_NORMALIZE_LANGUAGE: If true (default true) then + * XML language values such as from xml:lang will be normalized to + * lowercase. + * @RAPTOR_OPTION_NON_NFC_FATAL: If true (default false) then + * illegal Unicode Normal Form C in literals will give a fatal + * error, otherwise just a warning. + * @RAPTOR_OPTION_WARN_OTHER_PARSETYPES: If true (default true) then + * the RDF/XML parser will warn about unknown rdf:parseType values. + * @RAPTOR_OPTION_CHECK_RDF_ID: If true (default true) then the + * RDF/XML will check rdf:ID attribute values for duplicates and + * cause an error if any are found. + * @RAPTOR_OPTION_RELATIVE_URIS: If true (default true) then + * relative URIs will be used wherever possible when serializing. + * @RAPTOR_OPTION_WRITER_AUTO_INDENT: Automatically indent elements when + * seriailizing. + * @RAPTOR_OPTION_WRITER_AUTO_EMPTY: Automatically detect and + * abbreviate empty elements when serializing. + * @RAPTOR_OPTION_WRITER_INDENT_WIDTH: Integer number of spaces to use + * for each indent level when serializing with auto indent. + * @RAPTOR_OPTION_WRITER_XML_VERSION: Integer XML version XML 1.0 (10) or XML 1.1 (11) + * @RAPTOR_OPTION_WRITER_XML_DECLARATION: Write XML 1.0 or 1.1 declaration. + * @RAPTOR_OPTION_NO_NET: Deny network requests inside other requests. + * @RAPTOR_OPTION_RESOURCE_BORDER: Border color of resource + * nodes for GraphViz DOT serializer. + * @RAPTOR_OPTION_LITERAL_BORDER: Border color of literal nodes + * for GraphViz DOT serializer. + * @RAPTOR_OPTION_BNODE_BORDER: Border color of blank nodes for + * GraphViz DOT serializer. + * @RAPTOR_OPTION_RESOURCE_FILL: Fill color of resource nodes + * for GraphViz DOT serializer. + * @RAPTOR_OPTION_LITERAL_FILL: Fill color of literal nodes for + * GraphViz DOT serializer. + * @RAPTOR_OPTION_BNODE_FILL: Fill color of blank nodes for + * GraphViz DOT serializer. + * @RAPTOR_OPTION_HTML_TAG_SOUP: Use a lax HTML parser if an XML parser + * fails when read HTML for GRDDL parser. + * @RAPTOR_OPTION_MICROFORMATS: Look for microformats for GRDDL parser. + * @RAPTOR_OPTION_HTML_LINK: Look for head <link> to type rdf/xml + * for GRDDL parser. + * @RAPTOR_OPTION_WWW_TIMEOUT: Set timeout for internal WWW URI requests + * for GRDDL parser. + * @RAPTOR_OPTION_WRITE_BASE_URI: Write @base directive for Turtle/N3. + * @RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL: HTTP Cache-Control: header + * @RAPTOR_OPTION_WWW_HTTP_USER_AGENT: HTTP User-Agent: header + * @RAPTOR_OPTION_JSON_CALLBACK: JSON serializer callback function. + * @RAPTOR_OPTION_JSON_EXTRA_DATA: JSON serializer extra top-level data + * @RAPTOR_OPTION_RSS_TRIPLES: Atom/RSS serializer writes extra RDF triples it finds (none, rdf-xml, atom-triples) + * @RAPTOR_OPTION_ATOM_ENTRY_URI: Atom entry URI. If given, generate an Atom Entry Document with the item having the given URI, otherwise generate an Atom Feed Document with any items found. + * @RAPTOR_OPTION_PREFIX_ELEMENTS: Integer. If set, generate Atom/RSS1.0 documents with prefixed elements, otherwise unprefixed. + * @RAPTOR_OPTION_STRICT: Boolean. If set, operate in strict conformance mode. + * @RAPTOR_OPTION_WWW_CERT_FILENAME: String. SSL client certificate filename + * @RAPTOR_OPTION_WWW_CERT_TYPE: String. SSL client certificate type + * @RAPTOR_OPTION_WWW_CERT_PASSPHRASE: String. SSL client certificate passphrase + * @RAPTOR_OPTION_WWW_SSL_VERIFY_PEER: Integer. SSL verify peer - non-0 to verify peer SSL certificate (default) + * @RAPTOR_OPTION_WWW_SSL_VERIFY_HOST: Integer. SSL verify host - 0 none, 1 CN match, 2 host match (default). Other values are ignored. + * @RAPTOR_OPTION_NO_FILE: Deny file reading requests inside other requests. + * @RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES: When reading XML, load external entities. + * @RAPTOR_OPTION_LAST: Internal + * + * Raptor parser, serializer or XML writer options. + */ +typedef enum { + RAPTOR_OPTION_SCANNING, + RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, + RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, + RAPTOR_OPTION_ALLOW_BAGID, + RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, + RAPTOR_OPTION_NORMALIZE_LANGUAGE, + RAPTOR_OPTION_NON_NFC_FATAL, + RAPTOR_OPTION_WARN_OTHER_PARSETYPES, + RAPTOR_OPTION_CHECK_RDF_ID, + RAPTOR_OPTION_RELATIVE_URIS, + RAPTOR_OPTION_WRITER_AUTO_INDENT, + RAPTOR_OPTION_WRITER_AUTO_EMPTY, + RAPTOR_OPTION_WRITER_INDENT_WIDTH, + RAPTOR_OPTION_WRITER_XML_VERSION, + RAPTOR_OPTION_WRITER_XML_DECLARATION, + RAPTOR_OPTION_NO_NET, + RAPTOR_OPTION_RESOURCE_BORDER, + RAPTOR_OPTION_LITERAL_BORDER, + RAPTOR_OPTION_BNODE_BORDER, + RAPTOR_OPTION_RESOURCE_FILL, + RAPTOR_OPTION_LITERAL_FILL, + RAPTOR_OPTION_BNODE_FILL, + RAPTOR_OPTION_HTML_TAG_SOUP, + RAPTOR_OPTION_MICROFORMATS, + RAPTOR_OPTION_HTML_LINK, + RAPTOR_OPTION_WWW_TIMEOUT, + RAPTOR_OPTION_WRITE_BASE_URI, + RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL, + RAPTOR_OPTION_WWW_HTTP_USER_AGENT, + RAPTOR_OPTION_JSON_CALLBACK, + RAPTOR_OPTION_JSON_EXTRA_DATA, + RAPTOR_OPTION_RSS_TRIPLES, + RAPTOR_OPTION_ATOM_ENTRY_URI, + RAPTOR_OPTION_PREFIX_ELEMENTS, + RAPTOR_OPTION_STRICT, + RAPTOR_OPTION_WWW_CERT_FILENAME, + RAPTOR_OPTION_WWW_CERT_TYPE, + RAPTOR_OPTION_WWW_CERT_PASSPHRASE, + RAPTOR_OPTION_NO_FILE, + RAPTOR_OPTION_WWW_SSL_VERIFY_PEER, + RAPTOR_OPTION_WWW_SSL_VERIFY_HOST, + RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, + RAPTOR_OPTION_LAST = RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES +} raptor_option; + + +/** + * raptor_term_literal_value: + * @string: literal string + * @string_len: length of string + * @datatype: datatype URI (or NULL) + * @language: literal language (or NULL) + * @language_len: length of language + * + * Literal term value - this typedef exists solely for use in #raptor_term + * + * Either @datatype or @language may be non-NULL but not both. + */ +typedef struct { + unsigned char *string; + unsigned int string_len; + + raptor_uri *datatype; + + unsigned char *language; + unsigned char language_len; +} raptor_term_literal_value; + + +/** + * raptor_term_blank_value: + * @string: literal string + * @string_len: length of string + * + * Blank term value - this typedef exists solely for use in #raptor_term + * + */ +typedef struct { + unsigned char *string; + unsigned int string_len; +} raptor_term_blank_value; + + +/** + * raptor_term_value: + * @uri: uri value when term type is #RAPTOR_TERM_TYPE_URI + * @literal: literal value when term type is #RAPTOR_TERM_TYPE_LITERAL + * @blank: blank value when term type is #RAPTOR_TERM_TYPE_BLANK + * + * Term value - this typedef exists solely for use in #raptor_term + * + **/ +typedef union { + raptor_uri *uri; + + raptor_term_literal_value literal; + + raptor_term_blank_value blank; +} raptor_term_value; + + +/** + * raptor_term: + * @world: world + * @usage: usage reference count (if >0) + * @type: term type + * @value: term values per type + * + * An RDF statement term + * + */ +typedef struct { + raptor_world* world; + + int usage; + + raptor_term_type type; + + raptor_term_value value; + +} raptor_term; + + +/** + * raptor_statement: + * @world: world pointer + * @usage: usage count + * @subject: statement subject + * @predicate: statement predicate + * @object: statement object + * @graph: statement graph name (or NULL if not present) + * + * An RDF triple with optional graph name (quad) + * + * See #raptor_term for a description of how the fields may be used. + * As returned by a parser statement_handler. + */ +typedef struct { + raptor_world* world; + int usage; + raptor_term* subject; + raptor_term* predicate; + raptor_term* object; + raptor_term* graph; +} raptor_statement; + + +/** + * raptor_log_level: + * @RAPTOR_LOG_LEVEL_NONE: Internal + * @RAPTOR_LOG_LEVEL_TRACE: very fine-grained tracing messages information + * @RAPTOR_LOG_LEVEL_DEBUG: fine-grained tracing messages suitable for debugging + * @RAPTOR_LOG_LEVEL_INFO: coarse-grained information messages + * @RAPTOR_LOG_LEVEL_WARN: warning messages of potentially harmful problems + * @RAPTOR_LOG_LEVEL_ERROR: error messages where the application can continue + * @RAPTOR_LOG_LEVEL_FATAL: fatal error message where the application will likely abort + * @RAPTOR_LOG_LEVEL_LAST: Internal + * + * Log levels + */ +typedef enum { + RAPTOR_LOG_LEVEL_NONE, + RAPTOR_LOG_LEVEL_TRACE, + RAPTOR_LOG_LEVEL_DEBUG, + RAPTOR_LOG_LEVEL_INFO, + RAPTOR_LOG_LEVEL_WARN, + RAPTOR_LOG_LEVEL_ERROR, + RAPTOR_LOG_LEVEL_FATAL, + RAPTOR_LOG_LEVEL_LAST = RAPTOR_LOG_LEVEL_FATAL +} raptor_log_level; + + +/** + * raptor_domain: + * @RAPTOR_DOMAIN_IOSTREAM: I/O stream + * @RAPTOR_DOMAIN_NAMESPACE: XML Namespace / namespace stack + * @RAPTOR_DOMAIN_PARSER: RDF Parser + * @RAPTOR_DOMAIN_QNAME: XML QName + * @RAPTOR_DOMAIN_SAX2: XML SAX2 + * @RAPTOR_DOMAIN_SERIALIZER: RDF Serializer + * @RAPTOR_DOMAIN_TERM: RDF Term + * @RAPTOR_DOMAIN_TURTLE_WRITER: Turtle Writer + * @RAPTOR_DOMAIN_URI: RDF Uri + * @RAPTOR_DOMAIN_WORLD: RDF world + * @RAPTOR_DOMAIN_WWW: WWW + * @RAPTOR_DOMAIN_XML_WRITER: XML Writer + * @RAPTOR_DOMAIN_NONE: Internal + * @RAPTOR_DOMAIN_LAST: Internal + * + * Log domain + */ +typedef enum { + RAPTOR_DOMAIN_NONE, + RAPTOR_DOMAIN_IOSTREAM, + RAPTOR_DOMAIN_NAMESPACE, + RAPTOR_DOMAIN_PARSER, + RAPTOR_DOMAIN_QNAME, + RAPTOR_DOMAIN_SAX2, + RAPTOR_DOMAIN_SERIALIZER, + RAPTOR_DOMAIN_TERM, + RAPTOR_DOMAIN_TURTLE_WRITER, + RAPTOR_DOMAIN_URI, + RAPTOR_DOMAIN_WORLD, + RAPTOR_DOMAIN_WWW, + RAPTOR_DOMAIN_XML_WRITER, + RAPTOR_DOMAIN_LAST = RAPTOR_DOMAIN_XML_WRITER +} raptor_domain; + + +/** + * raptor_log_message: + * @code: error code or < 0 if not used or known + * @domain: message domain or #RAPTOR_DOMAIN_NONE if not used or known + * @level: log message level + * @locator: location associated with message or NULL if not known + * @text: message string + * + * Log message. + */ +typedef struct { + int code; + raptor_domain domain; + raptor_log_level level; + raptor_locator *locator; + const char *text; +} raptor_log_message; + + +/** + * raptor_log_handler: + * @user_data: user data + * @message: log message + * + * Handler function for log messages with location + * + * Used during parsing and serializing for errors and warnings that + * may include location information. Handlers may be set + * by raptor_world_set_log_handler(). + * + */ +typedef void (*raptor_log_handler)(void *user_data, raptor_log_message *message); + + +/** + * raptor_statement_handler: + * @user_data: user data + * @statement: statement to report + * + * Statement (triple) reporting handler function. + * + * This handler function set with + * raptor_parser_set_statement_handler() on a parser receives + * statements as the parsing proceeds. The @statement argument to the + * handler is shared and must be copied by the caller with + * raptor_statement_copy(). + */ +typedef void (*raptor_statement_handler)(void *user_data, raptor_statement *statement); + +/** + * raptor_graph_mark_flags: + * @RAPTOR_GRAPH_MARK_START: mark is start of graph (otherwise is end) + * @RAPTOR_GRAPH_MARK_DECLARED: mark was declared in syntax rather than implict + * + * Graph mark handler bitmask flags + */ +typedef enum { + RAPTOR_GRAPH_MARK_START = 1, + RAPTOR_GRAPH_MARK_DECLARED = 2 +} raptor_graph_mark_flags; + + +/** + * raptor_graph_mark_handler: + * @user_data: user data + * @graph: graph to report, NULL for the default graph + * @flags: bitmask of #raptor_graph_mark_flags flags + * + * Graph start/end mark handler function. + * + * Records start and end of graphs happening in a stream of generated + * #raptor_statement via the statement handler. The callback starts a + * graph when @flags has #RAPTOR_GRAPH_MARK_START bit set. + * + * The start and ends may be either declared in the syntax via some + * keyword or mechanism such as TRiG {} syntax when @flags has bit + * #RAPTOR_GRAPH_MARK_DECLARED set, or be implied by the start/end of + * the data in other syntaxes, and the bit will be unset. + */ +typedef void (*raptor_graph_mark_handler)(void *user_data, raptor_uri *graph, int flags); + +/** + * raptor_generate_bnodeid_handler: + * @user_data: user data + * @user_bnodeid: a user-specified ID or NULL if none available. + * + * Generate a blank node identifier handler function. + * + * Return value: new blank node ID to use + */ +typedef unsigned char* (*raptor_generate_bnodeid_handler)(void *user_data, unsigned char* user_bnodeid); + +/** + * raptor_namespace_handler: + * @user_data: user data + * @nspace: #raptor_namespace declared + * + * XML Namespace declaration reporting handler set by + * raptor_parser_set_namespace_handler(). + */ +typedef void (*raptor_namespace_handler)(void* user_data, raptor_namespace *nspace); + + +/** + * raptor_www_write_bytes_handler: + * @www: WWW object + * @userdata: user data + * @ptr: data pointer + * @size: size of individual item + * @nmemb: number of items + * + * Receiving bytes of data from WWW retrieval handler. + * + * Set by raptor_www_set_write_bytes_handler(). + */ +typedef void (*raptor_www_write_bytes_handler)(raptor_www* www, void *userdata, const void *ptr, size_t size, size_t nmemb); + +/** + * raptor_www_content_type_handler: + * @www: WWW object + * @userdata: user data + * @content_type: content type seen + * + * Receiving Content-Type: header from WWW retrieval handler. + * + * Set by raptor_www_set_content_type_handler(). + */ +typedef void (*raptor_www_content_type_handler)(raptor_www* www, void *userdata, const char *content_type); + +/** + * raptor_www_final_uri_handler: + * @www: WWW object + * @userdata: user data + * @final_uri: final URI seen + * + * Receiving the final resolved URI from a WWW retrieval + * + * Set by raptor_www_set_final_uri_handler(). + */ +typedef void (*raptor_www_final_uri_handler)(raptor_www* www, void *userdata, raptor_uri *final_uri); + +/** + * raptor_uri_filter_func: + * @user_data: user data + * @uri: #raptor_uri URI to check + * + * Callback function for #raptor_www_set_uri_filter + * + * Return value: non-0 to filter the URI + */ +typedef int (*raptor_uri_filter_func)(void *user_data, raptor_uri* uri); + + +/** + * raptor_world_flag: + * @RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE: if set (non-0 value) - save/restore the libxml generic error handler when raptor library initializes (default set) + * @RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE: if set (non-0 value) - save/restore the libxml structured error handler when raptor library terminates (default set) + * @RAPTOR_WORLD_FLAG_URI_INTERNING: if set (non-0 value) - each URI is saved interned in-memory and reused (default set) + * @RAPTOR_WORLD_FLAG_WWW_SKIP_INIT_FINISH: if set (non-0 value) the raptor will neither initialise or terminate the lower level WWW library. Usually in raptor initialising either curl_global_init (for libcurl) are called and in raptor cleanup, curl_global_cleanup is called. This flag allows the application finer control over these libraries such as setting other global options or potentially calling and terminating raptor several times. It does mean that applications which use this call must do their own extra work in order to allocate and free all resources to the system. + * + * Raptor world flags + * + * These are used by raptor_world_set_flags() to control raptor-wide + * options across classes. These must be set before + * raptor_world_open() is called explicitly or implicitly (by + * creating a raptor object). There is no enumeration function for + * these flags because they are not user options and must be set + * before the library is initialised. For similar reasons, there is + * no get function. + * + * If any libxml handler saving/restoring is enabled, any existing + * handler and context is saved before parsing and restored + * afterwards. Otherwise, no saving/restoring is performed. + * + */ +typedef enum { + RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE = 1, + RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE = 2, + RAPTOR_WORLD_FLAG_URI_INTERNING = 3, + RAPTOR_WORLD_FLAG_WWW_SKIP_INIT_FINISH = 4 +} raptor_world_flag; + + +/** + * raptor_data_compare_arg_handler: + * @data1: first object + * @data2: second object + * @user_data: user data argument + * + * Function to compare two data objects with a user data argument + * + * Designed to be used with raptor_sort_r() and compatible functions + * such as raptor_sequence_sort_r() which uses it. + * + * Return value: compare value <0 if @data1 is before @data2, =0 if equal, >0 if @data1 is after @data2 + */ +typedef int (*raptor_data_compare_arg_handler)(const void *data1, const void *data2, void *user_data); + + +/** + * raptor_data_compare_handler: + * @data1: first data object + * @data2: second data object + * + * Function to compare two data objects - signature like strcmp() and function pssed to qsort() + * + * Designed to be passed into generic data structure constructors + * like raptor_new_avltree(). + * + * Return value: compare value <0 if @data1 is before @data2, =0 if equal, >0 if @data1 is after @data2 + */ +typedef int (*raptor_data_compare_handler)(const void* data1, const void* data2); + + +/** + * raptor_data_malloc_handler: + * @size: data size + * + * Typedef for a function to allocate memory - signature like malloc() + * + * Designed to be passed into constructors + * like raptor_www_fetch_to_string + * + * Return value: pointer to newly allocated memory or NULL on failure + */ +typedef void* (*raptor_data_malloc_handler)(size_t size); + + +/** + * raptor_data_free_handler: + * @data: data object or NULL + * + * Typedef for function to free a data object - signature like free() + * + * Designed to be passed into generic data structure constructors + * like raptor_new_avltree(). If @data is NULL, nothing should be done. + */ +typedef void (*raptor_data_free_handler)(void* data); + + +/** + * raptor_data_context_free_handler: + * @context: context data for the free function + * @object: object to free + * + * Handler function for freeing a sequence item with a contextual pointer. + * + * Set by raptor_new_sequence_with_context(). +*/ +typedef void (*raptor_data_context_free_handler)(void* context, void* object); + +/** + * raptor_data_print_handler: + * @object: object to print + * @fh: FILE* to print to + * + * Handler function for printing an object to a stream. + * + * Set by raptor_new_sequence() + * + * Return value: non-0 on failure + */ +typedef int (*raptor_data_print_handler)(void *object, FILE *fh); + +/** + * raptor_data_context_print_handler: + * @context: context data for the print function + * @object: object to print + * @fh: FILE* to print to + * + * Function function for printing an object with data context to a stream. + * + * Set by raptor_new_sequence_with_context() + * + * Return value: non-0 on failure + */ +typedef int (*raptor_data_context_print_handler)(void *context, void *object, FILE *fh); + +/** + * raptor_stringbuffer: + * + * Raptor string buffer class + */ +typedef struct raptor_stringbuffer_s raptor_stringbuffer; + + +/* Public functions */ + +#define raptor_new_world() raptor_new_world_internal(RAPTOR_VERSION) +/* The real target of the raptor_new_world() macro */ +RAPTOR_API +raptor_world *raptor_new_world_internal(unsigned int version_decimal); +RAPTOR_API +int raptor_world_open(raptor_world* world); +RAPTOR_API +void raptor_free_world(raptor_world* world); +RAPTOR_API +int raptor_world_set_libxslt_security_preferences(raptor_world *world, void *security_preferences); +RAPTOR_API +int raptor_world_set_flag(raptor_world *world, raptor_world_flag flag, int value); +RAPTOR_API +int raptor_world_set_log_handler(raptor_world *world, void *user_data, raptor_log_handler handler); +RAPTOR_API +void raptor_world_set_generate_bnodeid_handler(raptor_world* world, void *user_data, raptor_generate_bnodeid_handler handler); +RAPTOR_API +unsigned char* raptor_world_generate_bnodeid(raptor_world *world); +RAPTOR_API +void raptor_world_set_generate_bnodeid_parameters(raptor_world* world, char *prefix, int base); +RAPTOR_API +const char* raptor_log_level_get_label(raptor_log_level level); +RAPTOR_API +const char* raptor_domain_get_label(raptor_domain domain); + +/* Names */ +RAPTOR_API +int raptor_world_is_parser_name(raptor_world* world, const char *name); +RAPTOR_API +const char* raptor_world_guess_parser_name(raptor_world* world, raptor_uri *uri, const char *mime_type, const unsigned char *buffer, size_t len, const unsigned char *identifier); +RAPTOR_API +int raptor_world_is_serializer_name(raptor_world* world, const char *name); + +/* Syntax descriptions */ +RAPTOR_API +int raptor_world_get_parsers_count(raptor_world* world); +RAPTOR_API +int raptor_world_get_serializers_count(raptor_world* world); +RAPTOR_API +const raptor_syntax_description* raptor_world_get_parser_description(raptor_world* world, unsigned int counter); +RAPTOR_API +const raptor_syntax_description* raptor_world_get_serializer_description(raptor_world* world, unsigned int counter); +RAPTOR_API +int raptor_syntax_description_validate(raptor_syntax_description* desc); + +RAPTOR_API +raptor_option raptor_world_get_option_from_uri(raptor_world* world, raptor_uri *uri); + + +/* Term Class */ +RAPTOR_API +raptor_term* raptor_new_term_from_uri(raptor_world* world, raptor_uri* uri); +RAPTOR_API +raptor_term* raptor_new_term_from_counted_uri_string(raptor_world* world, const unsigned char *uri_string, size_t length); +RAPTOR_API +raptor_term* raptor_new_term_from_uri_string(raptor_world* world, const unsigned char *uri_string); +RAPTOR_API +raptor_term* raptor_new_term_from_literal(raptor_world* world, const unsigned char* literal, raptor_uri* datatype, const unsigned char* language); +RAPTOR_API +raptor_term* raptor_new_term_from_counted_literal(raptor_world* world, const unsigned char* literal, size_t literal_len, raptor_uri* datatype, const unsigned char* language, unsigned char language_len); +RAPTOR_API +raptor_term* raptor_new_term_from_blank(raptor_world* world, const unsigned char* blank); +RAPTOR_API +raptor_term* raptor_new_term_from_counted_blank(raptor_world* world, const unsigned char* blank, size_t length); +RAPTOR_API +raptor_term* raptor_new_term_from_counted_string(raptor_world* world, unsigned char* string, size_t length); +RAPTOR_API +raptor_term* raptor_term_copy(raptor_term* term); +RAPTOR_API +int raptor_term_compare(const raptor_term *t1, const raptor_term *t2); +RAPTOR_API +int raptor_term_equals(raptor_term* t1, raptor_term* t2); +RAPTOR_API +void raptor_free_term(raptor_term *term); + +RAPTOR_API +unsigned char* raptor_term_to_counted_string(raptor_term *term, size_t* len_p); +RAPTOR_API +unsigned char* raptor_term_to_string(raptor_term *term); +RAPTOR_API +int raptor_term_escaped_write(const raptor_term *term, unsigned int flags, raptor_iostream* iostr); +RAPTOR_API RAPTOR_DEPRECATED +int raptor_term_ntriples_write(const raptor_term *term, raptor_iostream* iostr); +RAPTOR_API +int raptor_uri_turtle_write(raptor_world *world, raptor_iostream* iostr, raptor_uri* uri, raptor_namespace_stack *nstack, raptor_uri *base_uri); +RAPTOR_API +int raptor_term_turtle_write(raptor_iostream* iostr, raptor_term* term, raptor_namespace_stack *nstack, raptor_uri *base_uri); +RAPTOR_API +unsigned char* raptor_uri_to_turtle_counted_string(raptor_world *world, raptor_uri* uri, raptor_namespace_stack *nstack, raptor_uri *base_uri, size_t *len_p); +RAPTOR_API +unsigned char* raptor_uri_to_turtle_string(raptor_world *world, raptor_uri* uri, raptor_namespace_stack *nstack, raptor_uri *base_uri); +RAPTOR_API +unsigned char* raptor_term_to_turtle_counted_string(raptor_term* term, raptor_namespace_stack *nstack, raptor_uri *base_uri, size_t *len_p); +RAPTOR_API +unsigned char* raptor_term_to_turtle_string(raptor_term* term, raptor_namespace_stack *nstack, raptor_uri *base_uri); + + +/* Statement Class */ +RAPTOR_API +void raptor_statement_init(raptor_statement *statement, raptor_world *world); +RAPTOR_API +void raptor_statement_clear(raptor_statement *statement); +RAPTOR_API +raptor_statement* raptor_new_statement(raptor_world *world); +RAPTOR_API +raptor_statement* raptor_new_statement_from_nodes(raptor_world* world, raptor_term *subject, raptor_term *predicate, raptor_term *object, raptor_term *graph); +RAPTOR_API +raptor_statement* raptor_statement_copy(raptor_statement *statement); +RAPTOR_API +void raptor_free_statement(raptor_statement *statement); + +RAPTOR_API +int raptor_statement_print(const raptor_statement * statement, FILE *stream); +RAPTOR_API +int raptor_statement_print_as_ntriples(const raptor_statement * statement, FILE *stream); +RAPTOR_API +int raptor_statement_compare(const raptor_statement *s1, const raptor_statement *s2); +RAPTOR_API +int raptor_statement_equals(const raptor_statement* s1, const raptor_statement* s2); + + +/* Parser Class */ +RAPTOR_API +raptor_parser* raptor_new_parser(raptor_world* world, const char *name); +RAPTOR_API +raptor_parser* raptor_new_parser_for_content(raptor_world* world, raptor_uri *uri, const char *mime_type, const unsigned char *buffer, size_t len, const unsigned char *identifier); +RAPTOR_API +void raptor_free_parser(raptor_parser* parser); + +/* methods */ + +/* Handlers */ +RAPTOR_API +void raptor_parser_set_statement_handler(raptor_parser* parser, void *user_data, raptor_statement_handler handler); +RAPTOR_API +void raptor_parser_set_graph_mark_handler(raptor_parser* parser, void *user_data, raptor_graph_mark_handler handler); +RAPTOR_API +void raptor_parser_set_namespace_handler(raptor_parser* parser, void *user_data, raptor_namespace_handler handler); +RAPTOR_API +void raptor_parser_set_uri_filter(raptor_parser* parser, raptor_uri_filter_func filter, void* user_data); +RAPTOR_API +raptor_locator* raptor_parser_get_locator(raptor_parser* rdf_parser); + + +/* Parsing functions */ +RAPTOR_API +int raptor_parser_parse_start(raptor_parser *rdf_parser, raptor_uri *uri); +RAPTOR_API +int raptor_parser_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end); +RAPTOR_API +int raptor_parser_parse_file_stream(raptor_parser* rdf_parser, FILE *stream, const char *filename, raptor_uri *base_uri); +RAPTOR_API +int raptor_parser_parse_file(raptor_parser* rdf_parser, raptor_uri *uri, raptor_uri *base_uri); +RAPTOR_API +int raptor_parser_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri, raptor_uri *base_uri); +RAPTOR_API +int raptor_parser_parse_uri_with_connection(raptor_parser* rdf_parser, raptor_uri *uri, raptor_uri *base_uri, void *connection); +RAPTOR_API +int raptor_parser_parse_iostream(raptor_parser* rdf_parser, raptor_iostream *iostr, raptor_uri *base_uri); +RAPTOR_API +void raptor_parser_parse_abort(raptor_parser* rdf_parser); +RAPTOR_API +const char* raptor_parser_get_name(raptor_parser *rdf_parser); +RAPTOR_API +const raptor_syntax_description* raptor_parser_get_description(raptor_parser *rdf_parser); + +/* parser option methods */ +RAPTOR_API +int raptor_parser_set_option(raptor_parser *parser, raptor_option option, const char* string, int integer); +RAPTOR_API +int raptor_parser_get_option(raptor_parser *parser, raptor_option option, char** string_p, int* integer_p); + +/* parser utility methods */ +RAPTOR_API +const char* raptor_parser_get_accept_header(raptor_parser* rdf_parser); +RAPTOR_API +raptor_world* raptor_parser_get_world(raptor_parser* rdf_parser); +RAPTOR_API +raptor_uri* raptor_parser_get_graph(raptor_parser* rdf_parser); + + +/* Locator Class */ +/* methods */ +RAPTOR_API +int raptor_locator_print(raptor_locator* locator, FILE *stream); +RAPTOR_API +int raptor_locator_format(char *buffer, size_t length, raptor_locator* locator); +RAPTOR_API +int raptor_locator_line(raptor_locator *locator); +RAPTOR_API +int raptor_locator_column(raptor_locator *locator); +RAPTOR_API +int raptor_locator_byte(raptor_locator *locator); +RAPTOR_API +const char* raptor_locator_file(raptor_locator *locator); +RAPTOR_API +const char* raptor_locator_uri(raptor_locator *locator); + + +/* Serializer Class */ +RAPTOR_API +raptor_serializer* raptor_new_serializer(raptor_world* world, const char *name); +RAPTOR_API +void raptor_free_serializer(raptor_serializer* rdf_serializer); + +/* methods */ +RAPTOR_API +int raptor_serializer_start_to_iostream(raptor_serializer *rdf_serializer, raptor_uri *uri, raptor_iostream *iostream); +RAPTOR_API +int raptor_serializer_start_to_filename(raptor_serializer *rdf_serializer, const char *filename); +RAPTOR_API +int raptor_serializer_start_to_string(raptor_serializer *rdf_serializer, raptor_uri *uri, void **string_p, size_t *length_p); +RAPTOR_API +int raptor_serializer_start_to_file_handle(raptor_serializer *rdf_serializer, raptor_uri *uri, FILE *fh); +RAPTOR_API +int raptor_serializer_set_namespace(raptor_serializer* rdf_serializer, raptor_uri *uri, const unsigned char *prefix); +RAPTOR_API +int raptor_serializer_set_namespace_from_namespace(raptor_serializer* rdf_serializer, raptor_namespace *nspace); +RAPTOR_API +int raptor_serializer_serialize_statement(raptor_serializer* rdf_serializer, raptor_statement *statement); +RAPTOR_API +int raptor_serializer_serialize_end(raptor_serializer *rdf_serializer); +RAPTOR_API +raptor_iostream* raptor_serializer_get_iostream(raptor_serializer *serializer); +RAPTOR_API +raptor_locator* raptor_serializer_get_locator(raptor_serializer *rdf_serializer); +RAPTOR_API +int raptor_serializer_flush(raptor_serializer *rdf_serializer); +RAPTOR_API +const raptor_syntax_description* raptor_serializer_get_description(raptor_serializer *rdf_serializer); + +/* serializer option methods */ +RAPTOR_API +int raptor_serializer_set_option(raptor_serializer *serializer, raptor_option option, const char* string, int integer); +RAPTOR_API +int raptor_serializer_get_option(raptor_serializer *serializer, raptor_option option, char** string_p, int* integer_p); + +/* utility methods */ +RAPTOR_API +raptor_world* raptor_serializer_get_world(raptor_serializer* rdf_serializer); + + +/* memory functions */ +RAPTOR_API +void raptor_free_memory(void *ptr); +RAPTOR_API +void* raptor_alloc_memory(size_t size); +RAPTOR_API +void* raptor_calloc_memory(size_t nmemb, size_t size); + + +/* URI Class */ +RAPTOR_API +raptor_uri* raptor_new_uri_from_counted_string(raptor_world* world, const unsigned char *uri_string, size_t length); +RAPTOR_API +raptor_uri* raptor_new_uri(raptor_world* world, const unsigned char *uri_string); +RAPTOR_API +raptor_uri* raptor_new_uri_from_uri_local_name(raptor_world* world, raptor_uri *uri, const unsigned char *local_name); +RAPTOR_API +raptor_uri* raptor_new_uri_relative_to_base(raptor_world* world, raptor_uri *base_uri, const unsigned char *uri_string); +RAPTOR_API +raptor_uri* raptor_new_uri_relative_to_base_counted(raptor_world* world, raptor_uri *base_uri, const unsigned char *uri_string, size_t uri_len); +RAPTOR_API +raptor_uri* raptor_new_uri_from_id(raptor_world* world, raptor_uri *base_uri, const unsigned char *id); +RAPTOR_API +raptor_uri* raptor_new_uri_from_uri_or_file_string(raptor_world* world, raptor_uri* base_uri, const unsigned char* uri_or_file_string); +RAPTOR_API +raptor_uri* raptor_new_uri_for_rdf_concept(raptor_world* world, const unsigned char *name); +RAPTOR_API +raptor_uri* raptor_new_uri_for_xmlbase(raptor_uri* old_uri); +RAPTOR_API +raptor_uri* raptor_new_uri_for_retrieval(raptor_uri* old_uri); +RAPTOR_API +void raptor_free_uri(raptor_uri *uri); + +/* methods */ +RAPTOR_API +int raptor_uri_equals(raptor_uri* uri1, raptor_uri* uri2); +RAPTOR_API +int raptor_uri_compare(raptor_uri* uri1, raptor_uri* uri2); +RAPTOR_API +raptor_uri* raptor_uri_copy(raptor_uri *uri); +RAPTOR_API +unsigned char* raptor_uri_as_string(raptor_uri *uri); +RAPTOR_API +unsigned char* raptor_uri_as_counted_string(raptor_uri *uri, size_t* len_p); +RAPTOR_API +unsigned char* raptor_uri_to_relative_counted_uri_string(raptor_uri *base_uri, raptor_uri *reference_uri, size_t *length_p); +RAPTOR_API +unsigned char* raptor_uri_to_relative_uri_string(raptor_uri *base_uri, raptor_uri *reference_uri); +RAPTOR_API +int raptor_uri_print(const raptor_uri* uri, FILE *stream); +RAPTOR_API +unsigned char* raptor_uri_to_counted_string(raptor_uri *uri, size_t *len_p); +RAPTOR_API +unsigned char* raptor_uri_to_string(raptor_uri *uri); +RAPTOR_API +raptor_world* raptor_uri_get_world(raptor_uri *uri); +RAPTOR_API +int raptor_uri_file_exists(raptor_uri* uri); +RAPTOR_API +int raptor_uri_escaped_write(raptor_uri* uri, raptor_uri* base_uri, unsigned int flags, raptor_iostream *iostr); + +/* XML utility functions */ +RAPTOR_API +int raptor_xml_escape_string_any(raptor_world* world, const unsigned char *string, size_t len, unsigned char *buffer, size_t length, char quote, int xml_version); +RAPTOR_API +int raptor_xml_escape_string_any_write(const unsigned char *string, size_t len, char quote, int xml_version, raptor_iostream* iostr); +RAPTOR_API +int raptor_xml_escape_string(raptor_world *world, const unsigned char *string, size_t len, unsigned char *buffer, size_t length, char quote); +RAPTOR_API +int raptor_xml_escape_string_write(const unsigned char *string, size_t len, char quote, raptor_iostream* iostr); +RAPTOR_API +int raptor_xml_name_check(const unsigned char *string, size_t length, int xml_version); + + +/* portable vsnprintf utility function */ +RAPTOR_API RAPTOR_DEPRECATED +char* raptor_vsnprintf(const char *format, va_list arguments) RAPTOR_PRINTF_FORMAT(1, 0); +RAPTOR_API +int raptor_vsnprintf2(char *buffer, size_t size, const char *format, va_list arguments) RAPTOR_PRINTF_FORMAT(3, 0); +RAPTOR_API +int raptor_snprintf(char *buffer, size_t size, const char *format, ...) RAPTOR_PRINTF_FORMAT(3, 4); +RAPTOR_API +int raptor_vasprintf(char **ret, const char *format, va_list arguments) RAPTOR_PRINTF_FORMAT(2, 0); + +/* RFC2396 URI resolving functions */ +RAPTOR_API +size_t raptor_uri_resolve_uri_reference(const unsigned char *base_uri, const unsigned char *reference_uri, unsigned char* buffer, size_t length); + +/* URI String utility functions */ +RAPTOR_API +unsigned char* raptor_uri_counted_filename_to_uri_string(const char *filename, size_t filename_len); +RAPTOR_API +unsigned char* raptor_uri_filename_to_uri_string(const char *filename); +RAPTOR_API +int raptor_uri_filename_exists(const unsigned char* path); +RAPTOR_API +char* raptor_uri_uri_string_to_filename(const unsigned char *uri_string); +RAPTOR_API +char* raptor_uri_uri_string_to_filename_fragment(const unsigned char *uri_string, unsigned char **fragment_p); +RAPTOR_API +int raptor_uri_uri_string_is_file_uri(const unsigned char* uri_string); +RAPTOR_API +int raptor_stringbuffer_append_uri_escaped_counted_string(raptor_stringbuffer* sb, const char* string, size_t length, int space_is_plus); +RAPTOR_API +char* raptor_uri_uri_string_to_counted_filename_fragment(const unsigned char *uri_string, size_t* len_p, unsigned char **fragment_p, size_t* fragment_len_p); +RAPTOR_API +int raptor_uri_uri_string_is_absolute(const unsigned char* uri_string); + + +/** + * RAPTOR_RDF_MS_URI: + * + * RDF Namespace URI (rdf:). + * + * Copy with raptor_uri_copy() to use. + */ +#define RAPTOR_RDF_MS_URI raptor_rdf_namespace_uri + +/** + * RAPTOR_RDF_SCHEMA_URI: + * + * RDF Schema Namespace URI (rdfs:). + * + * Copy with raptor_uri_copy() to use. + */ +#define RAPTOR_RDF_SCHEMA_URI raptor_rdf_schema_namespace_uri + +/** + * RAPTOR_XMLSCHEMA_DATATYPES_URI: + * + * XML Schema Datatypes URI (xsd:). + * + * Copy with raptor_uri_copy() to use. + */ +#define RAPTOR_XMLSCHEMA_DATATYPES_URI raptor_xmlschema_datatypes_namespace_uri + +/** + * RAPTOR_OWL_URI: + * + * OWL Namespace URI (owl:). + * + * Copy with raptor_uri_copy() to use. + */ +#define RAPTOR_OWL_URI raptor_owl_namespace_uri + + +/* raptor_www */ +RAPTOR_API +raptor_www* raptor_new_www(raptor_world* world); +RAPTOR_API +raptor_www* raptor_new_www_with_connection(raptor_world* world, void* connection); +RAPTOR_API +void raptor_free_www(raptor_www *www); +RAPTOR_API +int raptor_www_set_ssl_cert_options(raptor_www* www, const char* cert_filename, const char* cert_type, const char* cert_passphrase); +RAPTOR_API +int raptor_www_set_ssl_verify_options(raptor_www* www, int verify_peer, int verify_host); +RAPTOR_API +int raptor_www_set_user_agent2(raptor_www *www, const char *user_agent, size_t user_agent_len); +RAPTOR_API RAPTOR_DEPRECATED +void raptor_www_set_user_agent(raptor_www *www, const char *user_agent); +RAPTOR_API +int raptor_www_set_proxy2(raptor_www *www, const char *proxy, size_t proxy_len); +RAPTOR_API RAPTOR_DEPRECATED +void raptor_www_set_proxy(raptor_www *www, const char *proxy); +RAPTOR_API +int raptor_www_set_http_accept2(raptor_www *www, const char *value, size_t value_len); +RAPTOR_API RAPTOR_DEPRECATED +void raptor_www_set_http_accept(raptor_www *www, const char *value); +RAPTOR_API +void raptor_www_set_write_bytes_handler(raptor_www *www, raptor_www_write_bytes_handler handler, void *user_data); +RAPTOR_API +void raptor_www_set_content_type_handler(raptor_www *www, raptor_www_content_type_handler handler, void *user_data); +RAPTOR_API +void raptor_www_set_final_uri_handler(raptor_www* www, raptor_www_final_uri_handler handler, void *user_data); +RAPTOR_API +void raptor_www_set_uri_filter(raptor_www* www, raptor_uri_filter_func filter, void* user_data); +RAPTOR_API +void raptor_www_set_connection_timeout(raptor_www* www, int timeout); +RAPTOR_API +int raptor_www_set_http_cache_control(raptor_www* www, const char* cache_control); +RAPTOR_API +int raptor_www_fetch(raptor_www *www, raptor_uri *uri); +RAPTOR_API +int raptor_www_fetch_to_string(raptor_www *www, raptor_uri *uri, void **string_p, size_t *length_p, raptor_data_malloc_handler const malloc_handler); +RAPTOR_API +void* raptor_www_get_connection(raptor_www *www); +RAPTOR_API +void raptor_www_abort(raptor_www *www, const char *reason); +RAPTOR_API +raptor_uri* raptor_www_get_final_uri(raptor_www* www); + + +/* XML QNames Class */ +RAPTOR_API +raptor_qname* raptor_new_qname(raptor_namespace_stack *nstack, const unsigned char *name, const unsigned char *value); +RAPTOR_API +raptor_qname* raptor_new_qname_from_namespace_local_name(raptor_world* world, raptor_namespace *ns, const unsigned char *local_name, const unsigned char *value); + +/* methods */ +RAPTOR_API +raptor_qname* raptor_qname_copy(raptor_qname *qname); +RAPTOR_API +void raptor_free_qname(raptor_qname* name); +RAPTOR_API +int raptor_qname_equal(raptor_qname *name1, raptor_qname *name2); +RAPTOR_API +unsigned char* raptor_qname_to_counted_name(raptor_qname *qname, size_t* length_p); +RAPTOR_API +const raptor_namespace* raptor_qname_get_namespace(raptor_qname* name); +RAPTOR_API +const unsigned char* raptor_qname_get_local_name(raptor_qname* name); +RAPTOR_API +const unsigned char* raptor_qname_get_value(raptor_qname* name); +RAPTOR_API +const unsigned char* raptor_qname_get_counted_value(raptor_qname* name, size_t* length_p); +RAPTOR_API +int raptor_qname_write(raptor_qname *qname, raptor_iostream* iostr); + +/* QName String utility functions */ +RAPTOR_API +raptor_uri* raptor_qname_string_to_uri(raptor_namespace_stack *nstack, const unsigned char *name, size_t name_len); +RAPTOR_API +unsigned char* raptor_qname_format_as_xml(const raptor_qname *qname, size_t *length_p); + +/* XML Namespaces Stack class */ +RAPTOR_API +raptor_namespace* raptor_new_namespace_from_uri(raptor_namespace_stack *nstack, const unsigned char *prefix, raptor_uri* ns_uri, int depth); +RAPTOR_API +raptor_namespace_stack* raptor_new_namespaces(raptor_world* world, int defaults); +RAPTOR_API +int raptor_namespaces_init(raptor_world* world, raptor_namespace_stack *nstack, int defaults); +RAPTOR_API +void raptor_namespaces_clear(raptor_namespace_stack *nstack); +RAPTOR_API +void raptor_free_namespaces(raptor_namespace_stack *nstack); + +/* methods */ +RAPTOR_API +void raptor_namespaces_start_namespace(raptor_namespace_stack *nstack, raptor_namespace *nspace); +RAPTOR_API +int raptor_namespaces_start_namespace_full(raptor_namespace_stack *nstack, const unsigned char *prefix, const unsigned char *ns_uri_string, int depth); +RAPTOR_API +void raptor_namespaces_end_for_depth(raptor_namespace_stack *nstack, int depth); +RAPTOR_API +raptor_namespace* raptor_namespaces_get_default_namespace(raptor_namespace_stack *nstack); +RAPTOR_API +raptor_namespace* raptor_namespaces_find_namespace(raptor_namespace_stack *nstack, const unsigned char *prefix, int prefix_length); +RAPTOR_API +raptor_namespace* raptor_namespaces_find_namespace_by_uri(raptor_namespace_stack *nstack, raptor_uri *ns_uri); +RAPTOR_API +int raptor_namespaces_namespace_in_scope(raptor_namespace_stack *nstack, const raptor_namespace *nspace); +RAPTOR_API +raptor_qname* raptor_new_qname_from_namespace_uri(raptor_namespace_stack *nstack, raptor_uri *uri, int xml_version); + + +/* XML Namespace Class */ +RAPTOR_API +raptor_namespace* raptor_new_namespace(raptor_namespace_stack *nstack, const unsigned char *prefix, const unsigned char *ns_uri_string, int depth); +RAPTOR_API +void raptor_free_namespace(raptor_namespace *ns); +RAPTOR_API +int raptor_namespace_stack_start_namespace(raptor_namespace_stack *nstack, raptor_namespace *ns, int new_depth); +RAPTOR_API +raptor_uri* raptor_namespace_get_uri(const raptor_namespace *ns); +RAPTOR_API +const unsigned char* raptor_namespace_get_prefix(const raptor_namespace *ns); +RAPTOR_API +const unsigned char* raptor_namespace_get_counted_prefix(const raptor_namespace *ns, size_t *length_p); +RAPTOR_API +unsigned char* raptor_namespace_format_as_xml(const raptor_namespace *ns, size_t *length_p); +RAPTOR_API +int raptor_namespace_write(raptor_namespace *ns, raptor_iostream* iostr); + +/* namespace string utility function */ +RAPTOR_API +int raptor_xml_namespace_string_parse(const unsigned char *string, unsigned char **prefix, unsigned char **uri_string); + +/* Sequence class */ +/** + * raptor_sequence: + * + * Raptor sequence class + */ +typedef struct raptor_sequence_s raptor_sequence; + +/* Sequence Class */ +RAPTOR_API +raptor_sequence* raptor_new_sequence(raptor_data_free_handler free_handler, raptor_data_print_handler print_handler); +RAPTOR_API +raptor_sequence* raptor_new_sequence_with_context(raptor_data_context_free_handler free_handler, raptor_data_context_print_handler print_handler, void* handler_context); +RAPTOR_API +void raptor_free_sequence(raptor_sequence* seq); + +/* methods */ +RAPTOR_API +int raptor_sequence_size(raptor_sequence* seq); +RAPTOR_API +int raptor_sequence_set_at(raptor_sequence* seq, int idx, void *data); +RAPTOR_API +int raptor_sequence_push(raptor_sequence* seq, void *data); +RAPTOR_API +int raptor_sequence_shift(raptor_sequence* seq, void *data); +RAPTOR_API +void* raptor_sequence_get_at(raptor_sequence* seq, int idx); +RAPTOR_API +void* raptor_sequence_pop(raptor_sequence* seq); +RAPTOR_API +void* raptor_sequence_unshift(raptor_sequence* seq); +RAPTOR_API +void* raptor_sequence_delete_at(raptor_sequence* seq, int idx); + +RAPTOR_API +void raptor_sequence_sort(raptor_sequence* seq, raptor_data_compare_handler compare); +RAPTOR_API +void raptor_sequence_sort_r(raptor_sequence* seq, raptor_data_compare_arg_handler compare, void* user_data); +RAPTOR_API +int raptor_sequence_swap(raptor_sequence* seq, int i, int j); +RAPTOR_API +int raptor_sequence_reverse(raptor_sequence* seq, int start_index, int length); +RAPTOR_API +int raptor_sequence_next_permutation(raptor_sequence *seq, raptor_data_compare_handler compare); + +/* helper for printing sequences of strings */ +RAPTOR_API +int raptor_sequence_print(raptor_sequence* seq, FILE* fh); +RAPTOR_API +int raptor_sequence_join(raptor_sequence* dest, raptor_sequence *src); + + +/* Unicode and UTF8 */ + +/** + * raptor_unichar: + * + * raptor Unicode codepoint + */ +typedef unsigned long raptor_unichar; +RAPTOR_API +int raptor_unicode_utf8_string_put_char(raptor_unichar c, unsigned char *output, size_t length); +RAPTOR_API +int raptor_unicode_utf8_string_get_char(const unsigned char *input, size_t length, raptor_unichar *output); +RAPTOR_API +int raptor_unicode_is_xml11_namestartchar(raptor_unichar c); +RAPTOR_API +int raptor_unicode_is_xml10_namestartchar(raptor_unichar c); +RAPTOR_API +int raptor_unicode_is_xml11_namechar(raptor_unichar c); +RAPTOR_API +int raptor_unicode_is_xml10_namechar(raptor_unichar c); +RAPTOR_API +int raptor_unicode_check_utf8_string(const unsigned char *string, size_t length); +RAPTOR_API +int raptor_unicode_utf8_strlen(const unsigned char *string, size_t length); +RAPTOR_API +size_t raptor_unicode_utf8_substr(unsigned char* dest, size_t* dest_length_p, const unsigned char* src, size_t src_length, int startingLoc, int length); + +/* Stringbuffer Class */ +RAPTOR_API +raptor_stringbuffer* raptor_new_stringbuffer(void); +RAPTOR_API +void raptor_free_stringbuffer(raptor_stringbuffer *stringbuffer); + +/* methods */ +RAPTOR_API +int raptor_stringbuffer_append_counted_string(raptor_stringbuffer* stringbuffer, const unsigned char *string, size_t length, int do_copy); +RAPTOR_API +int raptor_stringbuffer_append_string(raptor_stringbuffer* stringbuffer, const unsigned char *string, int do_copy); +RAPTOR_API +int raptor_stringbuffer_append_decimal(raptor_stringbuffer* stringbuffer, int integer); +RAPTOR_API +int raptor_stringbuffer_append_hexadecimal(raptor_stringbuffer* stringbuffer, int hex); +RAPTOR_API +int raptor_stringbuffer_append_stringbuffer(raptor_stringbuffer* stringbuffer, raptor_stringbuffer* append); +RAPTOR_API +int raptor_stringbuffer_prepend_counted_string(raptor_stringbuffer* stringbuffer, const unsigned char *string, size_t length, int do_copy); +RAPTOR_API +int raptor_stringbuffer_prepend_string(raptor_stringbuffer* stringbuffer, const unsigned char *string, int do_copy); +RAPTOR_API +unsigned char* raptor_stringbuffer_as_string(raptor_stringbuffer* stringbuffer); +RAPTOR_API +size_t raptor_stringbuffer_length(raptor_stringbuffer* stringbuffer); +RAPTOR_API +int raptor_stringbuffer_copy_to_string(raptor_stringbuffer* stringbuffer, unsigned char *string, size_t length); + +/** + * raptor_iostream_init_func: + * @context: stream context data + * + * Handler function for #raptor_iostream initialising. + * + * Return value: non-0 on failure. + */ +typedef int (*raptor_iostream_init_func) (void *context); + +/** + * raptor_iostream_finish_func: + * @context: stream context data + * + * Handler function for #raptor_iostream terminating. + * + */ +typedef void (*raptor_iostream_finish_func) (void *context); + +/** + * raptor_iostream_write_byte_func + * @context: stream context data + * @byte: byte to write + * + * Handler function for implementing raptor_iostream_write_byte(). + * + * Return value: non-0 on failure. + */ +typedef int (*raptor_iostream_write_byte_func) (void *context, const int byte); + +/** + * raptor_iostream_write_bytes_func: + * @context: stream context data + * @ptr: pointer to bytes to write + * @size: size of item + * @nmemb: number of items + * + * Handler function for implementing raptor_iostream_write_bytes(). + * + * Return value: non-0 on failure. + */ +typedef int (*raptor_iostream_write_bytes_func) (void *context, const void *ptr, size_t size, size_t nmemb); + +/** + * raptor_iostream_write_end_func: + * @context: stream context data + * + * Handler function for implementing raptor_iostream_write_end(). + * + * Return value: non-0 on failure. + */ +typedef int (*raptor_iostream_write_end_func) (void *context); + +/** + * raptor_iostream_read_bytes_func: + * @context: stream context data + * @ptr: pointer to buffer to read into + * @size: size of buffer + * @nmemb: number of items + * + * Handler function for implementing raptor_iostream_read_bytes(). + * + * Return value: number of items read, 0 or < @size on EOF, <0 on failure + */ +typedef int (*raptor_iostream_read_bytes_func) (void *context, void *ptr, size_t size, size_t nmemb); + +/** + * raptor_iostream_read_eof_func: + * @context: stream context data + * + * Handler function for implementing raptor_iostream_read_eof(). + * + * Return value: non-0 if EOF + */ +typedef int (*raptor_iostream_read_eof_func) (void *context); + +/** + * raptor_iostream_handler: + * @version: interface version. Presently 1 or 2. + * @init: initialisation handler - optional, called at most once (V1) + * @finish: finishing handler - optional, called at most once (V1) + * @write_byte: write byte handler - required (for writing) (V1) + * @write_bytes: write bytes handler - required (for writing) (V1) + * @write_end: write end handler - optional (for writing), called at most once (V1) + * @read_bytes: read bytes handler - required (for reading) (V2) + * @read_eof: read EOF handler - required (for reading) (V2) + * + * I/O stream implementation handler structure. + * + */ +typedef struct { + int version; + + /* V1 functions */ + raptor_iostream_init_func init; + raptor_iostream_finish_func finish; + raptor_iostream_write_byte_func write_byte; + raptor_iostream_write_bytes_func write_bytes; + raptor_iostream_write_end_func write_end; + + /* V2 functions */ + raptor_iostream_read_bytes_func read_bytes; + raptor_iostream_read_eof_func read_eof; +} raptor_iostream_handler; + + +/* I/O Stream Class */ +RAPTOR_API +raptor_iostream* raptor_new_iostream_from_handler(raptor_world* world, void *user_data, const raptor_iostream_handler* const handler); +RAPTOR_API +raptor_iostream* raptor_new_iostream_to_sink(raptor_world* world); +RAPTOR_API +raptor_iostream* raptor_new_iostream_to_filename(raptor_world* world, const char *filename); +RAPTOR_API +raptor_iostream* raptor_new_iostream_to_file_handle(raptor_world* world, FILE *handle); +RAPTOR_API +raptor_iostream* raptor_new_iostream_to_string(raptor_world* world, void **string_p, size_t *length_p, raptor_data_malloc_handler const malloc_handler); +RAPTOR_API +raptor_iostream* raptor_new_iostream_from_sink(raptor_world* world); +RAPTOR_API +raptor_iostream* raptor_new_iostream_from_filename(raptor_world* world, const char *filename); +RAPTOR_API +raptor_iostream* raptor_new_iostream_from_file_handle(raptor_world* world, FILE *handle); +RAPTOR_API +raptor_iostream* raptor_new_iostream_from_string(raptor_world* world, void *string, size_t length); +RAPTOR_API +void raptor_free_iostream(raptor_iostream *iostr); + +RAPTOR_API +int raptor_iostream_write_bytes(const void *ptr, size_t size, size_t nmemb, raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_write_byte(const int byte, raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_write_end(raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_string_write(const void *string, raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_counted_string_write(const void *string, size_t len, raptor_iostream *iostr); +RAPTOR_API +unsigned long raptor_iostream_tell(raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_decimal_write(int integer, raptor_iostream* iostr); +RAPTOR_API +int raptor_iostream_hexadecimal_write(unsigned int integer, int width, raptor_iostream* iostr); +RAPTOR_API +int raptor_stringbuffer_write(raptor_stringbuffer *sb, raptor_iostream* iostr); +RAPTOR_API +int raptor_uri_write(raptor_uri *uri, raptor_iostream *iostr); +RAPTOR_API +int raptor_iostream_read_bytes(void *ptr, size_t size, size_t nmemb, raptor_iostream* iostr); +RAPTOR_API +int raptor_iostream_read_eof(raptor_iostream *iostr); + +/* I/O Stream utility functions */ + +/** + * raptor_escaped_write_bitflags: + * @RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF : Allow \b \f, + * @RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU : Allow \t \n \r \u \U + * @RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8 : Use UTF-8 instead of \u \U for U+0080 or larger (will always use \u for U+0000..U+001F and U+007F) + * @RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES: Must escape #x00-#x20<>\"{}|^` in URIs + * @RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL: N-Triples literal + * @RAPTOR_ESCAPED_WRITE_NTRIPLES_URI: N-Triples URI + * @RAPTOR_ESCAPED_WRITE_SPARQL_LITERAL: SPARQL literal: allows raw UTF8 for printable literals + * @RAPTOR_ESCAPED_WRITE_SPARQL_LONG_LITERAL: SPARQL long literal: no BS-escapes allowed + * @RAPTOR_ESCAPED_WRITE_SPARQL_URI: SPARQL uri: have to escape certain characters + * @RAPTOR_ESCAPED_WRITE_TURTLE_URI: Turtle 2013 URIs (like SPARQL) + * @RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL: Turtle 2013 literals (like SPARQL) + * @RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL: Turtle 2013 long literals (like SPARQL) + * @RAPTOR_ESCAPED_WRITE_JSON_LITERAL: JSON literals: UTF-8 plus \b \f \t \r \n, \uXXXX only, no \U + * + * Bit flags for raptor_string_escaped_write() and friends. + */ +typedef enum { + RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF = 1, + RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU = 2, + RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8 = 4, + RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES = 8, + + /* N-Triples - favour writing \u, \U over UTF8 */ + RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL = RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU | RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF, + RAPTOR_ESCAPED_WRITE_NTRIPLES_URI = RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES, + + /* SPARQL literal: allows raw UTF8 for printable literals */ + RAPTOR_ESCAPED_WRITE_SPARQL_LITERAL = RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8, + + /* SPARQL long literal: no BS-escapes allowed */ + RAPTOR_ESCAPED_WRITE_SPARQL_LONG_LITERAL = RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8, + + /* SPARQL uri: have to escape certain characters */ + RAPTOR_ESCAPED_WRITE_SPARQL_URI = RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8 | RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES, + + /* Turtle (2013) escapes are like SPARQL */ + RAPTOR_ESCAPED_WRITE_TURTLE_URI = RAPTOR_ESCAPED_WRITE_SPARQL_URI, + RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL = RAPTOR_ESCAPED_WRITE_SPARQL_LITERAL, + RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL = RAPTOR_ESCAPED_WRITE_SPARQL_LONG_LITERAL, + + /* JSON literals: UTF-8 plus \b \f \t \r \n \uXXXX */ + RAPTOR_ESCAPED_WRITE_JSON_LITERAL = RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU | RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF | RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8 +} raptor_escaped_write_bitflags; + + +RAPTOR_API +int raptor_string_ntriples_write(const unsigned char *string, size_t len, const char delim, raptor_iostream *iostr); +RAPTOR_API +int raptor_bnodeid_ntriples_write(const unsigned char *bnodeid, size_t len, raptor_iostream *iostr); +RAPTOR_API RAPTOR_DEPRECATED +int raptor_string_python_write(const unsigned char *string, size_t len, const char delim, unsigned int mode, raptor_iostream *iostr); +RAPTOR_API +int raptor_statement_ntriples_write(const raptor_statement *statement, raptor_iostream* iostr, int write_graph_term); +RAPTOR_API +int raptor_string_escaped_write(const unsigned char *string, size_t len, const char delim, unsigned int flags, raptor_iostream *iostr); + + +/* Parser and Serializer options */ + +/** + * raptor_option_value_type: + * @RAPTOR_OPTION_VALUE_TYPE_BOOL: Boolean integer value. Non-0 is true + * @RAPTOR_OPTION_VALUE_TYPE_INT: Decimal integer value + * @RAPTOR_OPTION_VALUE_TYPE_STRING: String value + * @RAPTOR_OPTION_VALUE_TYPE_URI: URI String value. + * @RAPTOR_OPTION_VALUE_TYPE_LAST: internal + * + * Option value types. + */ +typedef enum { + RAPTOR_OPTION_VALUE_TYPE_BOOL, + RAPTOR_OPTION_VALUE_TYPE_INT, + RAPTOR_OPTION_VALUE_TYPE_STRING, + RAPTOR_OPTION_VALUE_TYPE_URI, + RAPTOR_OPTION_VALUE_TYPE_LAST = RAPTOR_OPTION_VALUE_TYPE_URI +} raptor_option_value_type; + + +/** + * raptor_option_description: + * @domain: domain ID + * @option: option ID + * @value_type: data type of option value + * @name: short name for option + * @name_len: length of @name + * @label: description of option + * @uri: URI identifying option + * + * Description of an option for a domain. + */ +typedef struct { + raptor_domain domain; + raptor_option option; + raptor_option_value_type value_type; + const char* name; + size_t name_len; + const char* label; + raptor_uri* uri; +} raptor_option_description; + + +RAPTOR_API +unsigned int raptor_option_get_count(void); +RAPTOR_API +const char* raptor_option_get_value_type_label(const raptor_option_value_type type); +RAPTOR_API +void raptor_free_option_description(raptor_option_description* option_description); +RAPTOR_API +raptor_option_description* raptor_world_get_option_description(raptor_world* world, const raptor_domain domain, const raptor_option option); + + +/* SAX2 element Class (raptor_xml_element) */ +RAPTOR_API +raptor_xml_element* raptor_new_xml_element(raptor_qname* name, const unsigned char* xml_language, raptor_uri* xml_base); +RAPTOR_API +raptor_xml_element* raptor_new_xml_element_from_namespace_local_name(raptor_namespace *ns, const unsigned char *name, const unsigned char *xml_language, raptor_uri *xml_base); +RAPTOR_API +void raptor_free_xml_element(raptor_xml_element *element); + +/* methods */ +RAPTOR_API +raptor_qname* raptor_xml_element_get_name(raptor_xml_element *xml_element); +RAPTOR_API +void raptor_xml_element_set_attributes(raptor_xml_element* xml_element, raptor_qname **attributes, int count); +RAPTOR_API +raptor_qname** raptor_xml_element_get_attributes(raptor_xml_element* xml_element); +RAPTOR_API +int raptor_xml_element_get_attributes_count(raptor_xml_element* xml_element); +RAPTOR_API +int raptor_xml_element_declare_namespace(raptor_xml_element* xml_element, raptor_namespace *nspace); +RAPTOR_API +int raptor_xml_element_write(raptor_xml_element *element, raptor_namespace_stack *nstack, int is_empty, int is_end, int depth, raptor_iostream *iostr); +RAPTOR_API +int raptor_xml_element_is_empty(raptor_xml_element* xml_element); +RAPTOR_API +const unsigned char* raptor_xml_element_get_language(raptor_xml_element* xml_element); + + +/* XML Writer Class (raptor_xml_writer) */ +RAPTOR_API +raptor_xml_writer* raptor_new_xml_writer(raptor_world* world, raptor_namespace_stack *nstack, raptor_iostream* iostr); +RAPTOR_API +void raptor_free_xml_writer(raptor_xml_writer* xml_writer); + +/* methods */ +RAPTOR_API +void raptor_xml_writer_empty_element(raptor_xml_writer* xml_writer, raptor_xml_element *element); +RAPTOR_API +void raptor_xml_writer_start_element(raptor_xml_writer* xml_writer, raptor_xml_element *element); +RAPTOR_API +void raptor_xml_writer_end_element(raptor_xml_writer* xml_writer, raptor_xml_element *element); +RAPTOR_API +void raptor_xml_writer_newline(raptor_xml_writer* xml_writer); +RAPTOR_API +void raptor_xml_writer_cdata(raptor_xml_writer* xml_writer, const unsigned char *s); +RAPTOR_API +void raptor_xml_writer_cdata_counted(raptor_xml_writer* xml_writer, const unsigned char *s, unsigned int len); +RAPTOR_API +void raptor_xml_writer_raw(raptor_xml_writer* xml_writer, const unsigned char *s); +RAPTOR_API +void raptor_xml_writer_raw_counted(raptor_xml_writer* xml_writer, const unsigned char *s, unsigned int len); +RAPTOR_API +void raptor_xml_writer_comment(raptor_xml_writer* xml_writer, const unsigned char *s); +RAPTOR_API +void raptor_xml_writer_comment_counted(raptor_xml_writer* xml_writer, const unsigned char *s, unsigned int len); +RAPTOR_API +void raptor_xml_writer_flush(raptor_xml_writer* xml_writer); +RAPTOR_API +int raptor_xml_writer_set_option(raptor_xml_writer *xml_writer, raptor_option option, char* string, int integer); +RAPTOR_API +int raptor_xml_writer_get_option(raptor_xml_writer *xml_writer, raptor_option option, char** string_p, int* integer_p); +RAPTOR_API +int raptor_xml_writer_get_depth(raptor_xml_writer *xml_writer); + +/** + * raptor_sax2_start_element_handler: + * @user_data: user data + * @xml_element: XML element + * + * SAX2 start element handler + */ +typedef void (*raptor_sax2_start_element_handler)(void *user_data, raptor_xml_element *xml_element); + +/** + * raptor_sax2_end_element_handler: + * @user_data: user data + * @xml_element: XML element + * + * SAX2 end element handler + */ +typedef void (*raptor_sax2_end_element_handler)(void *user_data, raptor_xml_element* xml_element); + +/** + * raptor_sax2_characters_handler: + * @user_data: user data + * @xml_element: XML element + * @s: string + * @len: string len + * + * SAX2 characters handler + */ +typedef void (*raptor_sax2_characters_handler)(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len); + +/** + * raptor_sax2_cdata_handler: + * @user_data: user data + * @xml_element: XML element + * @s: string + * @len: string len + + * SAX2 CDATA section handler + */ +typedef void (*raptor_sax2_cdata_handler)(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len); + +/** + * raptor_sax2_comment_handler: + * @user_data: user data + * @xml_element: XML element + * @s: string + * + * SAX2 XML comment handler + */ +typedef void (*raptor_sax2_comment_handler)(void *user_data, raptor_xml_element* xml_element, const unsigned char *s); + +/** + * raptor_sax2_unparsed_entity_decl_handler: + * @user_data: user data + * @entityName: entity name + * @base: base URI + * @systemId: system ID + * @publicId: public ID + * @notationName: notation name + * + * SAX2 unparsed entity (NDATA) handler + */ +typedef void (*raptor_sax2_unparsed_entity_decl_handler)(void *user_data, const unsigned char* entityName, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId, const unsigned char* notationName); + +/** + * raptor_sax2_external_entity_ref_handler: + * @user_data: user data + * @context: context + * @base: base URI + * @systemId: system ID + * @publicId: public ID + * + * SAX2 external entity reference handler + * + * Return value: 0 if processing should not continue because of a + * fatal error in the handling of the external entity. + */ +typedef int (*raptor_sax2_external_entity_ref_handler)(void *user_data, const unsigned char* context, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId); + + +/* SAX2 API */ +RAPTOR_API +raptor_sax2* raptor_new_sax2(raptor_world *world, raptor_locator *locator, void* user_data); +RAPTOR_API +void raptor_free_sax2(raptor_sax2 *sax2); + +/* methods */ +RAPTOR_API +void raptor_sax2_set_start_element_handler(raptor_sax2* sax2, raptor_sax2_start_element_handler handler); +RAPTOR_API +void raptor_sax2_set_end_element_handler(raptor_sax2* sax2, raptor_sax2_end_element_handler handler); +RAPTOR_API +void raptor_sax2_set_characters_handler(raptor_sax2* sax2, raptor_sax2_characters_handler handler); +RAPTOR_API +void raptor_sax2_set_cdata_handler(raptor_sax2* sax2, raptor_sax2_cdata_handler handler); +RAPTOR_API +void raptor_sax2_set_comment_handler(raptor_sax2* sax2, raptor_sax2_comment_handler handler); +RAPTOR_API +void raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2* sax2, raptor_sax2_unparsed_entity_decl_handler handler); +RAPTOR_API +void raptor_sax2_set_external_entity_ref_handler(raptor_sax2* sax2, raptor_sax2_external_entity_ref_handler handler); +RAPTOR_API +void raptor_sax2_set_namespace_handler(raptor_sax2* sax2, raptor_namespace_handler handler); +RAPTOR_API +void raptor_sax2_set_uri_filter(raptor_sax2* sax2, raptor_uri_filter_func filter, void *user_data); +RAPTOR_API +void raptor_sax2_parse_start(raptor_sax2 *sax2, raptor_uri *base_uri); +RAPTOR_API +int raptor_sax2_parse_chunk(raptor_sax2* sax2, const unsigned char *buffer, size_t len, int is_end); +RAPTOR_API +const unsigned char* raptor_sax2_inscope_xml_language(raptor_sax2* sax2); +RAPTOR_API +raptor_uri* raptor_sax2_inscope_base_uri(raptor_sax2* sax2); + + + +/* AVL Trees */ + +/** + * raptor_avltree: + * + * AVL Tree + */ +typedef struct raptor_avltree_s raptor_avltree; + +/** + * raptor_avltree_iterator: + * + * AVL Tree Iterator as created by raptor_new_avltree_iterator() + */ +typedef struct raptor_avltree_iterator_s raptor_avltree_iterator; + +/** + * raptor_avltree_visit_handler: + * @depth: depth of object in tree + * @data: data object being visited + * @user_data: user data arg to raptor_avltree_visit() + * + * AVL Tree visitor function as given to raptor_avltree_visit() + * + * Return value: non-0 to terminate visit early. + */ +typedef int (*raptor_avltree_visit_handler)(int depth, void* data, void *user_data); + + +/** + * raptor_avltree_bitflags: + * @RAPTOR_AVLTREE_FLAG_REPLACE_DUPLICATES: If set raptor_avltree_add() will replace any duplicate items. If not set, raptor_avltree_add() will not replace them and will return status >0 when adding a duplicate. (Default is not set) + * + * Bit flags for AVL Tree class constructor raptor_new_avltree() + **/ +typedef enum { + RAPTOR_AVLTREE_FLAG_REPLACE_DUPLICATES = 1 +} raptor_avltree_bitflags; + + +RAPTOR_API +raptor_avltree* raptor_new_avltree(raptor_data_compare_handler compare_handler, raptor_data_free_handler free_handler, unsigned int flags); +RAPTOR_API +void raptor_free_avltree(raptor_avltree* tree); + +/* methods */ +RAPTOR_API +int raptor_avltree_add(raptor_avltree* tree, void* p_data); +RAPTOR_API +void* raptor_avltree_remove(raptor_avltree* tree, void* p_data); +RAPTOR_API +int raptor_avltree_delete(raptor_avltree* tree, void* p_data); +RAPTOR_API +void raptor_avltree_trim(raptor_avltree* tree); +RAPTOR_API +void* raptor_avltree_search(raptor_avltree* tree, const void* p_data); +RAPTOR_API +int raptor_avltree_visit(raptor_avltree* tree, raptor_avltree_visit_handler visit_handler, void* user_data); +RAPTOR_API +int raptor_avltree_size(raptor_avltree* tree); +RAPTOR_API +void raptor_avltree_set_print_handler(raptor_avltree* tree, raptor_data_print_handler print_handler); +RAPTOR_API +int raptor_avltree_print(raptor_avltree* tree, FILE* stream); + +RAPTOR_API +raptor_avltree_iterator* raptor_new_avltree_iterator(raptor_avltree* tree, void* range, raptor_data_free_handler range_free_handler, int direction); +RAPTOR_API +void raptor_free_avltree_iterator(raptor_avltree_iterator* iterator); + +RAPTOR_API +int raptor_avltree_iterator_is_end(raptor_avltree_iterator* iterator); +RAPTOR_API +int raptor_avltree_iterator_next(raptor_avltree_iterator* iterator); +RAPTOR_API +void* raptor_avltree_iterator_get(raptor_avltree_iterator* iterator); + +/* utility methods */ +RAPTOR_API +void raptor_sort_r(void *base, size_t nel, size_t width, raptor_data_compare_arg_handler compar, void *user_data); + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/raptor_abbrev.c b/src/raptor_abbrev.c new file mode 100644 index 0000000..d850e58 --- /dev/null +++ b/src/raptor_abbrev.c @@ -0,0 +1,642 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_abbrev.c - Code common to abbreviating serializers (ttl/rdfxmla) + * + * Copyright (C) 2006, Dave Robillard + * Copyright (C) 2004-2011, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * Copyright (C) 2005, Steve Shepard steveshep@gmail.com + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * raptor_abbrev_node implementation + * + */ + + +static raptor_abbrev_subject* raptor_new_abbrev_subject(raptor_abbrev_node* node); + +/** + * raptor_new_abbrev_node: + * @world: raptor world + * @term: term to use + * + * INTERNAL - Constructor for raptor_abbrev_node + * + * The @term is copied by the constructor. + * + * Return value: new raptor abbrev node or NULL on failure + **/ +raptor_abbrev_node* +raptor_new_abbrev_node(raptor_world* world, raptor_term *term) +{ + raptor_abbrev_node* node = NULL; + + if(term->type == RAPTOR_TERM_TYPE_UNKNOWN) + return NULL; + + node = RAPTOR_CALLOC(raptor_abbrev_node*, 1, sizeof(*node)); + if(node) { + node->world = world; + node->ref_count = 1; + node->term = raptor_term_copy(term); + } + + return node; +} + + +/** + * raptor_new_abbrev_node: + * @node: raptor abbrev node + * + * INTERNAL - Destructor for raptor_abbrev_node + */ +void +raptor_free_abbrev_node(raptor_abbrev_node* node) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(node, raptor_abbrev_node); + + if(--node->ref_count) + return; + + if(node->term) + raptor_free_term(node->term); + + RAPTOR_FREE(raptor_abbrev_node, node); +} + + +/** + * raptor_abbrev_node_compare: + * @node1: node 1 + * @node2: node 2 + * + * INTERNAL - compare two raptor_abbrev_nodes. + * + * This needs to be a strong ordering for use by raptor_avltree. + * This is very performance critical, anything to make it faster is worth it. + * + * Return value: <0, 0 or 1 if @node1 less than, equal or greater + * than @node2 respectively + */ +int +raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2) +{ + if(node1 == node2) + return 0; + + return raptor_term_compare(node1->term, node2->term); +} + + +/** + * raptor_abbrev_node_equals: + * @node1: node 1 + * @node2: node 2 + * + * INTERNAL - compare two raptor_abbrev_nodes for equality + * + * Return value: non-0 if nodes are equal + */ +int +raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2) +{ + return raptor_term_equals(node1->term, node2->term); +} + + +/** + * raptor_abbrev_node_lookup: + * @nodes: Tree of nodes to search + * @node: Node value to search for + * + * INTERNAL - Look in an avltree of nodes for a node described by parameters + * and if present create it, add it and return it + * + * Return value: the node found/created or NULL on failure + */ +raptor_abbrev_node* +raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term) +{ + raptor_abbrev_node *lookup_node; + raptor_abbrev_node *rv_node; + + /* Create a temporary node for search comparison. */ + lookup_node = raptor_new_abbrev_node(term->world, term); + + if(!lookup_node) + return NULL; + + rv_node = (raptor_abbrev_node*)raptor_avltree_search(nodes, lookup_node); + + /* If not found, insert/return a new one */ + if(!rv_node) { + + if(raptor_avltree_add(nodes, lookup_node)) + return NULL; + else + return lookup_node; + + /* Found */ + } else { + raptor_free_abbrev_node(lookup_node); + return rv_node; + } +} + + +static raptor_abbrev_node** +raptor_new_abbrev_po(raptor_abbrev_node* predicate, raptor_abbrev_node* object) +{ + raptor_abbrev_node** nodes = NULL; + nodes = RAPTOR_CALLOC(raptor_abbrev_node**, 2, sizeof(raptor_abbrev_node*)); + if(!nodes) + return NULL; + + nodes[0] = predicate; + nodes[1] = object; + + return nodes; +} + + +static void +raptor_free_abbrev_po(raptor_abbrev_node** nodes) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(nodes, raptor_abbrev_node_pair); + + if(nodes[0]) + raptor_free_abbrev_node(nodes[0]); + if(nodes[1]) + raptor_free_abbrev_node(nodes[1]); + + RAPTOR_FREE(raptor_abbrev_nodes, nodes); +} + + +static int +raptor_compare_abbrev_po(raptor_abbrev_node** nodes1, + raptor_abbrev_node** nodes2) +{ + int d; + d = raptor_abbrev_node_compare(nodes1[0], nodes2[0]); + if(!d) + d = raptor_abbrev_node_compare(nodes1[1], nodes2[1]); + + return d; +} + + +#ifdef RAPTOR_DEBUG +static int +raptor_print_abbrev_po(void** object, FILE* handle) +{ + raptor_abbrev_node** nodes = (raptor_abbrev_node**)object; + raptor_abbrev_node* p = nodes[0]; + raptor_abbrev_node* o = nodes[1]; + + if(p && o) { + fputc('[', handle); + raptor_term_print_as_ntriples(p->term, handle); + fputs(" : ", handle); + raptor_term_print_as_ntriples(o->term, handle); + fputs("]\n", handle); + } + return 0; +} +#endif + + +/* + * raptor_abbrev_subject implementation + * + * The subject of triples, with all predicates and values + * linked from them. + * + **/ + + +static raptor_abbrev_subject* +raptor_new_abbrev_subject(raptor_abbrev_node* node) +{ + raptor_abbrev_subject* subject; + + if(!(node->term->type == RAPTOR_TERM_TYPE_URI || + node->term->type == RAPTOR_TERM_TYPE_BLANK)) { + raptor_log_error(node->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Subject node is type %d not a uri or blank node"); + return NULL; + } + + subject = RAPTOR_CALLOC(raptor_abbrev_subject*, 1, sizeof(*subject)); + + if(subject) { + subject->node = node; + subject->node->ref_count++; + subject->node->count_as_subject++; + + subject->node_type = NULL; + + subject->valid = 1; + + subject->properties = + raptor_new_avltree((raptor_data_compare_handler)raptor_compare_abbrev_po, + (raptor_data_free_handler)raptor_free_abbrev_po, + 0); +#ifdef RAPTOR_DEBUG + if(subject->properties) + raptor_avltree_set_print_handler(subject->properties, + (raptor_data_print_handler)raptor_print_abbrev_po); +#endif + + subject->list_items = + raptor_new_sequence((raptor_data_free_handler)raptor_free_abbrev_node, NULL); + + if(!subject->properties || !subject->list_items) { + raptor_free_abbrev_subject(subject); + subject = NULL; + } + + } + + return subject; +} + + +void +raptor_free_abbrev_subject(raptor_abbrev_subject* subject) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(subject, raptor_abbrev_subject); + + if(subject->node) + raptor_free_abbrev_node(subject->node); + + if(subject->node_type) + raptor_free_abbrev_node(subject->node_type); + + if(subject->properties) + raptor_free_avltree(subject->properties); + + if(subject->list_items) + raptor_free_sequence(subject->list_items); + + RAPTOR_FREE(raptor_subject, subject); +} + + +int +raptor_abbrev_subject_valid(raptor_abbrev_subject *subject) +{ + return subject->valid; +} + + +int +raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject) +{ + subject->valid = 0; + return 0; +} + + + + +/** + * raptor_subject_add_property: + * @subject: subject node to add to + * @predicate: predicate node + * @object: object node + * + * INTERNAL - Add predicate/object pair into properties array of a subject node. + * + * The subject node takes ownership of the predicate/object nodes. + * On error, predicate/object are freed immediately. + * + * Return value: <0 on failure, >0 if pair is a duplicate and it was not added + **/ +int +raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject, + raptor_abbrev_node* predicate, + raptor_abbrev_node* object) +{ + int err; + raptor_abbrev_node** nodes; + + nodes = raptor_new_abbrev_po(predicate, object); + if(!nodes) + return -1; + + predicate->ref_count++; + object->ref_count++; + + if(raptor_avltree_search(subject->properties, nodes)) { + /* Already present - do not add a duplicate triple (s->[p o]) */ + raptor_free_abbrev_po(nodes); + return 1; + } + +#if 0 + fprintf(stderr, "Adding P,O "); + raptor_print_abbrev_po(stderr, nodes); + + raptor_avltree_dump(subject->properties, stderr); +#endif + err = raptor_avltree_add(subject->properties, nodes); + if(err) + return -1; +#if 0 + fprintf(stderr, "Result "); + raptor_avltree_print(subject->properties, stderr); + + raptor_avltree_dump(subject->properties, stderr); + + raptor_avltree_check(subject->properties); + + fprintf(stderr, "\n\n"); +#endif + + return 0; +} + + +int +raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1, + raptor_abbrev_subject* subject2) +{ + return raptor_abbrev_node_compare(subject1->node, subject2->node); +} + + +/** + * raptor_abbrev_subject_find: + * @subjects: AVL-Tree of subject nodes + * @term: node to find + * + * INTERNAL - Find a subject node in an AVL-Tree of subject nodes + * + * Return value: node or NULL if not found or failure + */ +raptor_abbrev_subject* +raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node) +{ + raptor_abbrev_subject* rv_subject = NULL; + raptor_abbrev_node* lookup_node = NULL; + raptor_abbrev_subject* lookup = NULL; + + /* datatype and language are both NULL for a subject node */ + + lookup_node = raptor_new_abbrev_node(node->world, node); + if(!lookup_node) + return NULL; + + lookup = raptor_new_abbrev_subject(lookup_node); + if(!lookup) { + raptor_free_abbrev_node(lookup_node); + return NULL; + } + + rv_subject = (raptor_abbrev_subject*) raptor_avltree_search(subjects, lookup); + + raptor_free_abbrev_subject(lookup); + raptor_free_abbrev_node(lookup_node); + + return rv_subject; +} + + +/** + * raptor_abbrev_subject_lookup: + * @nodes: AVL-Tree of subject nodes + * @subjects: AVL-Tree of URI-subject nodes + * @blanks: AVL-Tree of blank-subject nodes + * @term: node to find + * + * INTERNAL - Find a subject node in the appropriate uri/blank AVL-Tree of subject nodes or add it + * + * Return value: node or NULL on failure + */ +raptor_abbrev_subject* +raptor_abbrev_subject_lookup(raptor_avltree* nodes, + raptor_avltree* subjects, raptor_avltree* blanks, + raptor_term* term) +{ + raptor_avltree *tree; + raptor_abbrev_subject* rv_subject; + + /* Search for specified resource. */ + tree = (term->type == RAPTOR_TERM_TYPE_BLANK) ? blanks : subjects; + rv_subject = raptor_abbrev_subject_find(tree, term); + + /* If not found, create one and insert it */ + if(!rv_subject) { + raptor_abbrev_node* node = raptor_abbrev_node_lookup(nodes, term); + if(node) { + rv_subject = raptor_new_abbrev_subject(node); + if(rv_subject) { + if(raptor_avltree_add(tree, rv_subject)) { + rv_subject = NULL; + } + } + } + } + + return rv_subject; +} + + +#ifdef ABBREV_DEBUG +void +raptor_print_subject(raptor_abbrev_subject* subject) +{ + int i; + unsigned char *subj; + unsigned char *pred; + unsigned char *obj; + raptor_avltree_iterator* iter = NULL; + + /* Note: The raptor_abbrev_node field passed as the first argument for + * raptor_term_to_string() is somewhat arbitrary, since as + * the data structure is designed, the first word in the value union + * is what was passed as the subject/predicate/object of the + * statement. + */ + subj = raptor_term_to_string(subject); + + if(subject->type) { + obj = raptor_term_to_string(subject); + fprintf(stderr,"[%s, http://www.w3.org/1999/02/22-rdf-syntax-ns#type, %s]\n", subj, obj); + RAPTOR_FREE(char*, obj); + } + + for(i = 0; i < raptor_sequence_size(subject->elements); i++) { + + raptor_abbrev_node* o = raptor_sequence_get_at(subject->elements, i); + if(o) { + obj = raptor_term_to_string(o); + fprintf(stderr,"[%s, [rdf:_%d], %s]\n", subj, i, obj); + RAPTOR_FREE(char*, obj); + } + + } + + + iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); + while(iter) { + raptor_abbrev_node** nodes; + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + raptor_print_abbrev_po(stderr, nodes); + + if(raptor_avltree_iterator_next(iter)) + break; + } + if(iter) + raptor_free_avltree_iterator(iter); + + RAPTOR_FREE(char*, subj); + +} +#endif + + +/* helper functions */ + +/** + * raptor_new_qname_from_resource: + * @namespaces: sequence of namespaces (corresponding to nstack) + * @nstack: #raptor_namespace_stack to use/update + * @namespace_count: size of nstack (may be modified) + * @node: #raptor_abbrev_node to use + * + * INTERNAL - Make an XML QName from the URI associated with the node. + * + * Return value: the QName or NULL on failure + **/ +raptor_qname* +raptor_new_qname_from_resource(raptor_sequence* namespaces, + raptor_namespace_stack* nstack, + int* namespace_count, + raptor_abbrev_node* node) +{ + unsigned char* name = NULL; /* where to split predicate name */ + size_t name_len = 1; + unsigned char *uri_string; + size_t uri_len; + unsigned char *p; + raptor_uri *ns_uri; + raptor_namespace *ns; + raptor_qname *qname; + unsigned char *ns_uri_string; + size_t ns_uri_string_len; + + if(node->term->type != RAPTOR_TERM_TYPE_URI) { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL1("Node must be a URI\n"); +#else + return NULL; +#endif + } + + qname = raptor_new_qname_from_namespace_uri(nstack, node->term->value.uri, 10); + if(qname) + return qname; + + uri_string = raptor_uri_as_counted_string(node->term->value.uri, &uri_len); + + p= uri_string; + name_len = uri_len; + while(name_len >0) { + if(raptor_xml_name_check(p, name_len, 10)) { + name = p; + break; + } + p++; name_len--; + } + + if(!name || (name == uri_string)) + return NULL; + + ns_uri_string_len = uri_len - name_len; + ns_uri_string = RAPTOR_MALLOC(unsigned char*, ns_uri_string_len + 1); + if(!ns_uri_string) + return NULL; + memcpy(ns_uri_string, (const char*)uri_string, ns_uri_string_len); + ns_uri_string[ns_uri_string_len] = '\0'; + + ns_uri = raptor_new_uri_from_counted_string(node->world, ns_uri_string, + ns_uri_string_len); + RAPTOR_FREE(char*, ns_uri_string); + + if(!ns_uri) + return NULL; + + ns = raptor_namespaces_find_namespace_by_uri(nstack, ns_uri); + if(!ns) { + /* The namespace was not declared, so create one */ + unsigned char prefix[2 + MAX_ASCII_INT_SIZE + 1]; + (*namespace_count)++; + prefix[0] = 'n'; + prefix[1] = 's'; + (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*,&prefix[2]), + MAX_ASCII_INT_SIZE + 1, *namespace_count, + /* base */ 10, -1, '\0'); + + ns = raptor_new_namespace_from_uri(nstack, prefix, ns_uri, 0); + + /* We'll most likely need this namespace again. Push it on our + * stack. It will be deleted in raptor_rdfxmla_serialize_terminate() + */ + if(raptor_sequence_push(namespaces, ns)) { + /* namespaces sequence has no free handler so we have to free + * the ns ourselves on error + */ + raptor_free_namespace(ns); + raptor_free_uri(ns_uri); + return NULL; + } + } + + qname = raptor_new_qname_from_namespace_local_name(node->world, ns, name, + NULL); + + raptor_free_uri(ns_uri); + + return qname; +} diff --git a/src/raptor_avltree.c b/src/raptor_avltree.c new file mode 100644 index 0000000..4001a92 --- /dev/null +++ b/src/raptor_avltree.c @@ -0,0 +1,1791 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_avltree.c - Balanced Binary Tree / AVL Tree + * + * This file is in the public domain. + * + * Based on public domain sources posted to comp.sources.misc in 1993 + * + * From: p...@vix.com (Paul Vixie) + * Newsgroups: comp.sources.unix + * Subject: v27i034: REPOST AVL Tree subroutines (replaces v11i020 from 1987), Part01/01 + * Date: 6 Sep 1993 13:51:22 -0700 + * Message-ID: <1.747348668.4037@gw.home.vix.com> + * + * ---------------------------------------------------------------------- + * Original headers below + */ + +/* as_tree - tree library for as + * vix 14dec85 [written] + * vix 02feb86 [added tree balancing from wirth "a+ds=p" p. 220-221] + * vix 06feb86 [added tree_mung()] + * vix 20jun86 [added tree_delete per wirth a+ds (mod2 v.) p. 224] + * vix 23jun86 [added delete uar to add for replaced nodes] + * vix 22jan93 [revisited; uses RCS, ANSI, POSIX; has bug fixes] + */ + + +/* This program text was created by Paul Vixie using examples from the book: + * "Algorithms & Data Structures," Niklaus Wirth, Prentice-Hall, 1986, ISBN + * 0-13-022005-1. This code and associated documentation is hereby placed + * in the public domain. + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +#define RAPTOR_AVLTREE_DEBUG1(msg) RAPTOR_DEBUG1(msg) +#else +#define RAPTOR_AVLTREE_DEBUG1(msg) +#endif + + +#define RAPTOR_AVLTREE_ENOMEM -1 +#define RAPTOR_AVLTREE_EXISTS 1 + + +#ifndef STANDALONE + +/* raptor_avltree.c */ +typedef struct raptor_avltree_node_s raptor_avltree_node; + +/* AVL-tree */ +struct raptor_avltree_s { + /* root node of tree */ + raptor_avltree_node* root; + + /* node comparison function (optional) */ + raptor_data_compare_handler compare_handler; + + /* node deletion function (optional) */ + raptor_data_free_handler free_handler; + + /* node print function (optional) */ + raptor_data_print_handler print_handler; + + /* tree bitflags - bitmask of #raptor_avltree_bitflags flags */ + unsigned int flags; + + /* number of nodes in tree */ + unsigned int size; +}; + + +/* AVL-tree node */ +struct raptor_avltree_node_s { + /* parent tree */ + struct raptor_avltree_node_s *parent; + + /* left child tree */ + struct raptor_avltree_node_s *left; + + /* right child tree */ + struct raptor_avltree_node_s *right; + + /* balance factor = + * height of the right tree minus the height of the left tree + * i.e. equal: 0 left larger: -1 right larger: 1 + */ + signed char balance; + + /* actual data */ + void* data; +}; + + +#ifndef TRUE +#define TRUE 1 +#define FALSE 0 +#endif + + +/* local prototypes */ +static int raptor_avltree_sprout(raptor_avltree* tree, raptor_avltree_node* parent, raptor_avltree_node** node_pp, void* p_data, int *rebalancing_p); +static void* raptor_avltree_delete_internal(raptor_avltree* tree, raptor_avltree_node** node_pp, void* p_data, int *rebalancing_p); +static void* raptor_avltree_delete_internal2(raptor_avltree* tree, raptor_avltree_node** ppr_r, int *rebalancing_p, raptor_avltree_node** ppr_q); +static void raptor_avltree_balance_left(raptor_avltree* tree, raptor_avltree_node** node_pp, int *rebalancing_p); +static void raptor_avltree_balance_right(raptor_avltree* tree, raptor_avltree_node** node_pp, int *rebalancing_p); +static raptor_avltree_node* raptor_avltree_search_internal(raptor_avltree* tree, raptor_avltree_node* node, const void* p_data); +static int raptor_avltree_visit_internal(raptor_avltree* tree, raptor_avltree_node* node, int depth, raptor_avltree_visit_handler visit_fn, void* user_data); +static void raptor_free_avltree_internal(raptor_avltree* tree, raptor_avltree_node* node); +#ifdef RAPTOR_DEBUG +static void raptor_avltree_check_internal(raptor_avltree* tree, raptor_avltree_node* node, unsigned int* count_p); +#endif + + +/** + * raptor_new_avltree: + * @compare_handler: item comparison handler for ordering + * @free_handler: item free handler (or NULL) + * @flags: AVLTree flags - bitmask of #raptor_avltree_bitflags flags. + * + * AVL Tree Constructor + * + * Return value: new AVL Tree or NULL on failure + */ +raptor_avltree* +raptor_new_avltree(raptor_data_compare_handler compare_handler, + raptor_data_free_handler free_handler, + unsigned int flags) +{ + raptor_avltree* tree; + + tree = RAPTOR_MALLOC(raptor_avltree*, sizeof(*tree)); + if(!tree) + return NULL; + + tree->root = NULL; + tree->compare_handler = compare_handler; + tree->free_handler = free_handler; + tree->print_handler = NULL; + tree->flags = flags; + tree->size = 0; + + return tree; +} + + +/** + * raptor_free_avltree: + * @tree: AVLTree object + * + * AVL Tree destructor + */ +void +raptor_free_avltree(raptor_avltree* tree) +{ + if(!tree) + return; + + raptor_free_avltree_internal(tree, tree->root); + + RAPTOR_FREE(raptor_avltree, tree); +} + + +static void +raptor_free_avltree_internal(raptor_avltree* tree, raptor_avltree_node* node) +{ + if(node) { + raptor_free_avltree_internal(tree, node->left); + + raptor_free_avltree_internal(tree, node->right); + + if(tree->free_handler) + tree->free_handler(node->data); + tree->size--; + RAPTOR_FREE(raptor_avltree_node, node); + } +} + + +/* methods */ + +static raptor_avltree_node* +raptor_avltree_search_internal(raptor_avltree* tree, raptor_avltree_node* node, + const void* p_data) +{ + if(node) { + int cmp= tree->compare_handler(p_data, node->data); + + if(cmp > 0) + return raptor_avltree_search_internal(tree, node->right, p_data); + else if(cmp < 0) + return raptor_avltree_search_internal(tree, node->left, p_data); + + /* found */ + return node; + } + + /* otherwise not found */ + return NULL; +} + + +/** + * raptor_avltree_search: + * @tree: AVL Tree object + * @p_data: pointer to data item + * + * Find an item in an AVL Tree + * + * Return value: shared pointer to item (still owned by AVL Tree) or NULL on failure or if not found + */ +void* +raptor_avltree_search(raptor_avltree* tree, const void* p_data) +{ + raptor_avltree_node* node; + node = raptor_avltree_search_internal(tree, tree->root, p_data); + return node ? node->data : NULL; +} + + +/** + * raptor_avltree_add: + * @tree: AVL Tree object + * @p_data: pointer to data item + * + * add an item to an AVL Tree + * + * The item added becomes owned by the AVL Tree, and will be freed by + * the free_handler argument given to raptor_new_avltree(). + * + * Return value: 0 on success, >0 if equivalent item exists (and the old element remains in the tree), <0 on failure + */ +int +raptor_avltree_add(raptor_avltree* tree, void* p_data) +{ + int rebalancing = FALSE; + int rv; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Checking tree before adding\n"); + raptor_avltree_check(tree); +#endif + + rv = raptor_avltree_sprout(tree, NULL, &tree->root, p_data, + &rebalancing); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Checking tree after adding\n"); + raptor_avltree_check(tree); +#endif + + return rv; +} + + +/** + * raptor_avltree_remove: + * @tree: AVL Tree object + * @p_data: pointer to data item + * + * Remove an item from an AVL Tree and return it + * + * The item removed is no longer owned by the AVL Tree and is + * owned by the caller. + * + * Return value: object or NULL on failure or if not found + */ +void* +raptor_avltree_remove(raptor_avltree* tree, void* p_data) +{ + int rebalancing = FALSE; + void* rdata; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Checking tree before removing\n"); + raptor_avltree_dump(tree,stderr); + raptor_avltree_check(tree); +#endif + rdata = raptor_avltree_delete_internal(tree, &tree->root, p_data, + &rebalancing); + if(rdata) + tree->size--; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Checking tree after removing\n"); + raptor_avltree_dump(tree,stderr); + raptor_avltree_check(tree); +#endif + + return rdata; +} + + +/** + * raptor_avltree_delete: + * @tree: AVL Tree object + * @p_data: pointer to data item + * + * Remove an item from an AVL Tree and free it + * + * Return value: non-0 on failure + */ +int +raptor_avltree_delete(raptor_avltree* tree, void* p_data) +{ + void* rdata; + + rdata = raptor_avltree_remove(tree, p_data); + if(rdata) { + if(tree->free_handler) + tree->free_handler(rdata); + } + + return (rdata != NULL); +} + + +/** + * raptor_avltree_trim: + * @tree: AVLTree object + * + * Delete all nodes from an AVL tree but keep the shell. + */ +void +raptor_avltree_trim(raptor_avltree* tree) +{ + if(!tree) + return; + + raptor_free_avltree_internal(tree, tree->root); + tree->root = NULL; +} + + +static int +raptor_avltree_visit_internal(raptor_avltree* tree, raptor_avltree_node* node, + int depth, + raptor_avltree_visit_handler visit_handler, + void* user_data) +{ + if(!node) + return TRUE; + + if(!raptor_avltree_visit_internal(tree, node->left, depth+1, + visit_handler, user_data)) + return FALSE; + + if(!visit_handler(depth, node->data, user_data)) + return FALSE; + + if(!raptor_avltree_visit_internal(tree, node->right, depth+1, + visit_handler, user_data)) + return FALSE; + + return TRUE; +} + + +/** + * raptor_avltree_visit: + * @tree: AVL Tree object + * @visit_handler: visit function to call at each item + * @user_data: user data pointer fo visit function + * + * Perform an in-order visit of the items in the AVL Tree + * + * Return value: non-0 if traversal was terminated early by @visit_handler +*/ +int +raptor_avltree_visit(raptor_avltree* tree, + raptor_avltree_visit_handler visit_handler, + void* user_data) +{ + return raptor_avltree_visit_internal(tree, tree->root, 0, + visit_handler, user_data); +} + + +#ifdef RAPTOR_DEBUG +static void +raptor_avltree_print_node(raptor_avltree_node* node) +{ + fprintf(stderr, "%p: parent %p left %p right %p data %p", + RAPTOR_VOIDP(node), + RAPTOR_VOIDP(node->parent), + RAPTOR_VOIDP(node->left), + RAPTOR_VOIDP(node->right), + RAPTOR_VOIDP(node->data)); +} + + +static void +raptor_avltree_check_node(raptor_avltree* tree, raptor_avltree_node* node, + const char* fn, const char* where) +{ + if(node->parent) { + if((node->parent == node->left) || (node->parent == node->right)) { + if(fn && where) + fprintf(stderr, "%s (%s): ", fn, where); + fputs("ERROR bad node ", stderr); + raptor_avltree_print_node(node); + fputc('\n', stderr); + fflush(stderr); + abort(); + } + + if(node->parent->left != node && node->parent->right != node) { + if(fn && where) + fprintf(stderr, "%s (%s): ", fn, where); + fputs("ERROR parent node ", stderr); + raptor_avltree_print_node(node->parent); + fputs(" has no reference to child node ", stderr); + raptor_avltree_print_node(node); + fputc('\n', stderr); + fflush(stderr); + abort(); + } + } + + if(node->left) { + if(node->left->parent != node) { + if(fn && where) + fprintf(stderr, "%s (%s): ", fn, where); + fputs("ERROR left child node ", stderr); + raptor_avltree_print_node(node->left); + fputs(" has no reference to this[parent] node ", stderr); + raptor_avltree_print_node(node); + fputc('\n', stderr); + fflush(stderr); + abort(); + } + } + if(node->right) { + if(node->right->parent != node) { + if(fn && where) + fprintf(stderr, "%s (%s): ", fn, where); + fputs("ERROR right child node ", stderr); + raptor_avltree_print_node(node->right); + fputs(" has no reference to this[parent] node ", stderr); + raptor_avltree_print_node(node); + fputc('\n', stderr); + fflush(stderr); + abort(); + } + } +} +#endif + + +static int +raptor_avltree_sprout_left(raptor_avltree* tree, raptor_avltree_node** node_pp, + void* p_data, int *rebalancing_p) +{ + raptor_avltree_node *p1, *p2, *p_parent; + int rc; + + RAPTOR_AVLTREE_DEBUG1("LESS. raptor_avltree_sprouting left.\n"); + + p_parent = (*node_pp)->parent; + + rc = raptor_avltree_sprout(tree, *node_pp, &(*node_pp)->left, p_data, + rebalancing_p); + if(rc) + return rc; + + if(!*rebalancing_p) + return FALSE; + + /* left branch has grown longer */ + RAPTOR_AVLTREE_DEBUG1("LESS: left branch has grown\n"); + switch((*node_pp)->balance) { + case 1: + /* right branch WAS longer; balance is ok now */ + RAPTOR_AVLTREE_DEBUG1("LESS: case 1.. balance restored implicitly\n"); + (*node_pp)->balance = 0; + *rebalancing_p = FALSE; + break; + + case 0: + /* balance WAS okay; now left branch longer */ + RAPTOR_AVLTREE_DEBUG1("LESS: case 0.. balance bad but still ok\n"); + (*node_pp)->balance = -1; + break; + + case -1: + /* left branch was already too long. rebalance */ + RAPTOR_AVLTREE_DEBUG1("LESS: case -1: rebalancing\n"); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Tree before rebalancing\n"); + raptor_avltree_dump(tree, stderr); +#endif + p1 = (*node_pp)->left; + + if(p1->balance == -1) { + /* LL */ + RAPTOR_AVLTREE_DEBUG1("LESS: single LL\n"); + (*node_pp)->left = p1->right; + if((*node_pp)->left) + (*node_pp)->left->parent = (*node_pp); + p1->right = *node_pp; + if(p1->right) + p1->right->parent = p1; + (*node_pp)->balance = 0; + *node_pp = p1; + (*node_pp)->parent = p_parent; + } else { + /* double LR */ + RAPTOR_AVLTREE_DEBUG1("LESS: double LR\n"); + p2 = p1->right; + p1->right= p2->left; + if(p1->right) + p1->right->parent = p1; + p2->left = p1; + if(p2->left) + p2->left->parent = p2; + + (*node_pp)->left = p2->right; + if((*node_pp)->left) + (*node_pp)->left->parent = (*node_pp); + p2->right = *node_pp; + if(p2->right) + p2->right->parent = p2; + + if(p2->balance == -1) + (*node_pp)->balance = 1; + else + (*node_pp)->balance = 0; + + if(p2->balance == 1) + p1->balance = -1; + else + p1->balance = 0; + + *node_pp = p2; + (*node_pp)->parent = p_parent; + } /* end else */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Tree after rebalancing\n"); + raptor_avltree_dump(tree, stderr); +#endif + + (*node_pp)->balance = 0; + *rebalancing_p = FALSE; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(1) { + unsigned int discard = 0; + raptor_avltree_check_internal(tree, *node_pp, &discard); + } +#endif + } /* end switch */ + + return FALSE; +} + + +static int +raptor_avltree_sprout_right(raptor_avltree* tree, + raptor_avltree_node** node_pp, + void* p_data, int *rebalancing_p) +{ + raptor_avltree_node *p1, *p2, *p_parent; + int rc; + + RAPTOR_AVLTREE_DEBUG1("MORE: raptor_avltree_sprouting to the right\n"); + + p_parent = (*node_pp)->parent; + + rc = raptor_avltree_sprout(tree, *node_pp, &(*node_pp)->right, p_data, + rebalancing_p); + if(rc) + return rc; + + if(!*rebalancing_p) + return FALSE; + + /* right branch has grown longer */ + RAPTOR_AVLTREE_DEBUG1("MORE: right branch has grown\n"); + + switch((*node_pp)->balance) { + case -1: + RAPTOR_AVLTREE_DEBUG1("MORE: balance was off, fixed implicitly\n"); + (*node_pp)->balance = 0; + *rebalancing_p = FALSE; + break; + + case 0: + RAPTOR_AVLTREE_DEBUG1("MORE: balance was okay, now off but ok\n"); + (*node_pp)->balance = 1; + break; + + case 1: + RAPTOR_AVLTREE_DEBUG1("MORE: balance was off, need to rebalance\n"); + p1 = (*node_pp)->right; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Tree before rebalancing\n"); + raptor_avltree_dump(tree, stderr); +#endif + if(p1->balance == 1) { + /* RR */ + RAPTOR_AVLTREE_DEBUG1("MORE: single RR\n"); + (*node_pp)->right = p1->left; + if((*node_pp)->right) + (*node_pp)->right->parent = (*node_pp); + p1->left = *node_pp; + if(p1->left) + p1->left->parent = p1; + (*node_pp)->balance = 0; + *node_pp = p1; + (*node_pp)->parent = p_parent; + } else { + /* double RL */ + RAPTOR_AVLTREE_DEBUG1("MORE: double RL\n"); + + p2 = p1->left; + p1->left = p2->right; + if(p1->left) + p1->left->parent = p1; + p2->right = p1; + if(p2->right) + p2->right->parent = p2; + + (*node_pp)->right = p2->left; + if((*node_pp)->right) + (*node_pp)->right->parent = (*node_pp); + p2->left = *node_pp; + if(p2->left) + p2->left->parent = p2; + + if(p2->balance == 1) + (*node_pp)->balance = -1; + else + (*node_pp)->balance = 0; + + if(p2->balance == -1) + p1->balance = 1; + else + p1->balance = 0; + + *node_pp = p2; + (*node_pp)->parent = p_parent; + } /* end else */ + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Tree after rebalancing\n"); + raptor_avltree_dump(tree, stderr); +#endif + (*node_pp)->balance = 0; + *rebalancing_p = FALSE; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(1) { + unsigned int discard = 0; + raptor_avltree_check_internal(tree, *node_pp, &discard); + } +#endif + } /* end switch */ + + return FALSE; +} + + +/* grow a tree by sprouting with a new node + * + * Return values: + * 0 on success + * >0 if equivalent item exists (and the old element remains in the tree) + * <0 if memory is exhausted. + */ +static int +raptor_avltree_sprout(raptor_avltree* tree, raptor_avltree_node* parent, + raptor_avltree_node** node_pp, void* p_data, + int *rebalancing_p) +{ + int cmp; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_AVLTREE_DEBUG1("Enter\n"); + if ( *node_pp) { + raptor_avltree_print_node(*node_pp); + RAPTOR_AVLTREE_DEBUG1("\n"); + } + else { + RAPTOR_AVLTREE_DEBUG1("Nil node\n"); + } +#endif + + /* If grounded, add the node here, set the rebalance flag and return */ + if(!*node_pp) { + RAPTOR_AVLTREE_DEBUG1("grounded. adding new node, setting rebalancing flag true\n"); + *node_pp = RAPTOR_MALLOC(raptor_avltree_node*, sizeof(**node_pp)); + if(!*node_pp) { + if(tree->free_handler) + tree->free_handler(p_data); + return RAPTOR_AVLTREE_ENOMEM; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Creating new node %p\n", RAPTOR_VOIDP(*node_pp)); +#endif + + (*node_pp)->parent = parent; + (*node_pp)->left = NULL; + (*node_pp)->right = NULL; + (*node_pp)->balance = 0; + (*node_pp)->data= p_data; + *rebalancing_p = TRUE; + + tree->size++; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_avltree_check_node(tree, *node_pp, 0, 0); + + RAPTOR_AVLTREE_DEBUG1("Tree now looks this way\n"); + raptor_avltree_dump(tree,stderr); +#endif + + return FALSE; + } + + /* check node */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_avltree_check_node(tree, *node_pp, 0, 0); +#endif + /* compare the data */ + cmp = tree->compare_handler(p_data, (*node_pp)->data); + if(cmp < 0) + /* if LESS, prepare to move to the left. */ + return raptor_avltree_sprout_left(tree, node_pp, p_data, rebalancing_p); + else if(cmp > 0) + /* if MORE, prepare to move to the right. */ + return raptor_avltree_sprout_right(tree, node_pp, p_data, rebalancing_p); + + /* otherwise equivalent key */ + *rebalancing_p = FALSE; + + if(tree->flags & RAPTOR_AVLTREE_FLAG_REPLACE_DUPLICATES) { + /* replace item with equivalent key */ + if(tree->free_handler) + tree->free_handler((*node_pp)->data); + (*node_pp)->data= p_data; + + return FALSE; + } else { + /* ignore item with equivalent key */ + if(tree->free_handler) + tree->free_handler(p_data); + return RAPTOR_AVLTREE_EXISTS; + } +} + + +static void* +raptor_avltree_delete_internal(raptor_avltree* tree, + raptor_avltree_node** node_pp, + void* p_data, + int *rebalancing_p) +{ + int cmp; + void* rdata = NULL; + + RAPTOR_AVLTREE_DEBUG1("Enter\n"); + + if(*node_pp == NULL) { + RAPTOR_AVLTREE_DEBUG1("key not in tree\n"); + return rdata; + } + + if(p_data) + cmp = tree->compare_handler((*node_pp)->data, p_data); + else + cmp = 0; + + if(cmp > 0) { + RAPTOR_AVLTREE_DEBUG1("too high - scan left\n"); + rdata = raptor_avltree_delete_internal(tree, &(*node_pp)->left, p_data, + rebalancing_p); + if(*rebalancing_p) + raptor_avltree_balance_left(tree, node_pp, rebalancing_p); + + } else if(cmp < 0) { + RAPTOR_AVLTREE_DEBUG1("too low - scan right\n"); + rdata = raptor_avltree_delete_internal(tree, &(*node_pp)->right, p_data, + rebalancing_p); + if(*rebalancing_p) + raptor_avltree_balance_right(tree, node_pp, rebalancing_p); + + } else { + raptor_avltree_node *pr_q; + + RAPTOR_AVLTREE_DEBUG1("equal\n"); + pr_q = *node_pp; + + rdata = pr_q->data; + + if(pr_q->right == NULL) { + RAPTOR_AVLTREE_DEBUG1("right subtree null\n"); + *node_pp = pr_q->left; + if(*node_pp) + (*node_pp)->parent = pr_q->parent; + *rebalancing_p = TRUE; + } else if(pr_q->left == NULL) { + RAPTOR_AVLTREE_DEBUG1("right subtree non-null, left subtree null\n"); + *node_pp = pr_q->right; + if(*node_pp) + (*node_pp)->parent = pr_q->parent; + *rebalancing_p = TRUE; + } else { + RAPTOR_AVLTREE_DEBUG1("neither subtree null\n"); + rdata = raptor_avltree_delete_internal2(tree, &pr_q->left, rebalancing_p, + &pr_q); + if(*rebalancing_p) + raptor_avltree_balance_left(tree, node_pp, rebalancing_p); + } + + RAPTOR_FREE(raptor_avltree_node, pr_q); + } + + return rdata; +} + + +static void* +raptor_avltree_delete_internal2(raptor_avltree* tree, + raptor_avltree_node** ppr_r, + int *rebalancing_p, + raptor_avltree_node** ppr_q) +{ + void* rdata = NULL; + + RAPTOR_AVLTREE_DEBUG1("Enter\n"); + + if((*ppr_r)->right != NULL) { + rdata = raptor_avltree_delete_internal2(tree, + &(*ppr_r)->right, + rebalancing_p, + ppr_q); + if(*rebalancing_p) + raptor_avltree_balance_right(tree, ppr_r, rebalancing_p); + + } else { + raptor_avltree_node* ppr_r_left_new_parent; + rdata = (*ppr_q)->data; + + (*ppr_q)->data = (*ppr_r)->data; + *ppr_q = *ppr_r; + ppr_r_left_new_parent = (*ppr_r)->parent; + *ppr_r = (*ppr_r)->left; + if(*ppr_r) + (*ppr_r)->parent = ppr_r_left_new_parent; + *rebalancing_p = TRUE; + } + + return rdata; +} + + +static void +raptor_avltree_balance_left(raptor_avltree* tree, + raptor_avltree_node** node_pp, int *rebalancing_p) +{ + raptor_avltree_node *p1, *p2, *p_parent; + int b1, b2; + + RAPTOR_AVLTREE_DEBUG1("left branch has shrunk\n"); + + p_parent = (*node_pp)->parent; + + switch((*node_pp)->balance) { + case -1: + RAPTOR_AVLTREE_DEBUG1("was imbalanced, fixed implicitly\n"); + (*node_pp)->balance = 0; + break; + + case 0: + RAPTOR_AVLTREE_DEBUG1("was okay, is now one off\n"); + (*node_pp)->balance = 1; + *rebalancing_p = FALSE; + break; + + case 1: + RAPTOR_AVLTREE_DEBUG1("was already off, this is too much\n"); + p1 = (*node_pp)->right; + b1 = p1->balance; + + if(b1 >= 0) { + RAPTOR_AVLTREE_DEBUG1("single RR\n"); + (*node_pp)->right = p1->left; + if((*node_pp)->right) + (*node_pp)->right->parent = (*node_pp); + p1->left = *node_pp; + if(p1->left) + p1->left->parent = p1; + if(b1 == 0) { + RAPTOR_AVLTREE_DEBUG1("b1 == 0\n"); + (*node_pp)->balance = 1; + p1->balance = -1; + *rebalancing_p = FALSE; + } else { + RAPTOR_AVLTREE_DEBUG1("b1 != 0\n"); + (*node_pp)->balance = 0; + p1->balance = 0; + } + *node_pp = p1; + (*node_pp)->parent = p_parent; + } else { + RAPTOR_AVLTREE_DEBUG1("double RL\n"); + p2 = p1->left; + b2 = p2->balance; + p1->left = p2->right; + if(p1->left) + p1->left->parent = p1; + p2->right = p1; + if(p2->right) + p2->right->parent = p2; + (*node_pp)->right = p2->left; + if((*node_pp)->right) + (*node_pp)->right->parent = (*node_pp); + p2->left = *node_pp; + if(p2->left) + p2->left->parent = p2; + if(b2 == 1) + (*node_pp)->balance = -1; + else + (*node_pp)->balance = 0; + if(b2 == -1) + p1->balance = 1; + else + p1->balance = 0; + *node_pp = p2; + (*node_pp)->parent = p_parent; + p2->balance = 0; + } + break; + } /* end switch */ + +} + + +static void +raptor_avltree_balance_right(raptor_avltree* tree, + raptor_avltree_node** node_pp, int *rebalancing_p) +{ + raptor_avltree_node *p1, *p2, *p_parent; + int b1, b2; + + RAPTOR_AVLTREE_DEBUG1("right branch has shrunk\n"); + + p_parent = (*node_pp)->parent; + + switch((*node_pp)->balance) { + case 1: + RAPTOR_AVLTREE_DEBUG1("was imbalanced, fixed implicitly\n"); + (*node_pp)->balance = 0; + break; + + case 0: + RAPTOR_AVLTREE_DEBUG1("was okay, is now one off\n"); + (*node_pp)->balance = -1; + *rebalancing_p = FALSE; + break; + + case -1: + RAPTOR_AVLTREE_DEBUG1("was already off, this is too much\n"); + p1 = (*node_pp)->left; + b1 = p1->balance; + + if(b1 <= 0) { + RAPTOR_AVLTREE_DEBUG1("single LL\n"); + (*node_pp)->left = p1->right; + if((*node_pp)->left) + (*node_pp)->left->parent = (*node_pp); + p1->right = *node_pp; + if(p1->right) + p1->right->parent = p1; + if(b1 == 0) { + RAPTOR_AVLTREE_DEBUG1("b1 == 0\n"); + (*node_pp)->balance = -1; + p1->balance = 1; + *rebalancing_p = FALSE; + } else { + RAPTOR_AVLTREE_DEBUG1("b1 != 0\n"); + (*node_pp)->balance = 0; + p1->balance = 0; + } + *node_pp = p1; + (*node_pp)->parent = p_parent; + } else { + RAPTOR_AVLTREE_DEBUG1("double LR\n"); + p2 = p1->right; + b2 = p2->balance; + p1->right = p2->left; + if(p1->right) + p1->right->parent = p1; + p2->left = p1; + if(p2->left) + p2->left->parent = p2; + (*node_pp)->left = p2->right; + if((*node_pp)->left) + (*node_pp)->left->parent = (*node_pp); + p2->right = *node_pp; + if(p2->right) + p2->right->parent = p2; + if(b2 == -1) + (*node_pp)->balance = 1; + else + (*node_pp)->balance = 0; + if(b2 == 1) + p1->balance = -1; + else + p1->balance = 0; + *node_pp = p2; + (*node_pp)->parent = p_parent; + p2->balance = 0; + } + } /* end switch */ + +} + + +/** + * raptor_avltree_size: + * @tree: AVL Tree object + * + * Get the number of items in the AVL Tree + * + * Return value: number of items in tree + */ +int +raptor_avltree_size(raptor_avltree* tree) +{ + return tree->size; +} + + +/** + * raptor_avltree_set_print_handler: + * @tree: AVL Tree object + * @print_handler: print function + * + * Set the handler for printing an item in a tree + * + */ +void +raptor_avltree_set_print_handler(raptor_avltree* tree, + raptor_data_print_handler print_handler) +{ + tree->print_handler = print_handler; +} + + +/* Follow left children until a match for range is found (if range not NULL) */ +static raptor_avltree_node* +raptor_avltree_node_leftmost(raptor_avltree* tree, raptor_avltree_node* node, + void* range) +{ + /*assert(node); + assert(!range || tree->compare_handler(range, node->data) == 0);*/ + if(range) + while(node && node->left && + tree->compare_handler(range, node->left->data) == 0) + node = node->left; + else + while(node && node->left) + node = node->left; + + return node; +} + + +static raptor_avltree_node* +raptor_avltree_node_rightmost(raptor_avltree* tree, raptor_avltree_node* node, + void* range) +{ + /*assert(node); + assert(!range || tree->compare_handler(range, node->data) == 0);*/ + if(range) + while(node && node->right && + tree->compare_handler(range, node->right->data) == 0) + node = node->right; + else + while(node && node->right) + node = node->right; + return node; +} + + +/* Follow right children until a match for range is found (range required) */ +static raptor_avltree_node* +raptor_avltree_node_search_right(raptor_avltree* tree, + raptor_avltree_node* node, void* range) +{ + raptor_avltree_node* result; + + if(!node) + return NULL; + + result = node->right; + while(result) { + if(tree->compare_handler(range, result->data) == 0) { + return result; + } else { + result = result->right; + } + } + + return node; +} + + +/* Follow left children until a match for range is found (range required) */ +static raptor_avltree_node* +raptor_avltree_node_search_left(raptor_avltree* tree, + raptor_avltree_node* node, void* range) +{ + raptor_avltree_node* result; + + if(!node) + return NULL; + + result = node->left; + while(result) { + if(tree->compare_handler(range, result->data) == 0) { + return result; + } else { + result = result->left; + } + } + + return node; +} + + +static raptor_avltree_node* +raptor_avltree_node_prev(raptor_avltree* tree, raptor_avltree_node* node, + void* range) +{ + int up = 0; + + /*assert(!range || tree->compare_handler(range, node->data) == 0);*/ + + if(node->left) { + /* Should never go left if the current node is already < range */ + raptor_avltree_node* prev; + prev = raptor_avltree_node_rightmost(tree, node->left, NULL); + /*assert(!range ||tree->compare_handler(range, node->data) <= 0);*/ + if(range) { + if(tree->compare_handler(range, prev->data) == 0) { + up = 0; + node = prev; + } else { + up = 1; + } + } else { + node = prev; + up = 0; + } + } else { + up = 1; + } + + if(up) { + raptor_avltree_node* last = node; + /* Need to go up */ + node = node->parent; + while(node) { + + /* moving from right subtree to this node */ + if(node->right && last == node->right) { + break; + } + + /* moved up to find an unvisited left subtree */ + if(node->left && last != node->left) { + /* Should never go left if the current node is already > range */ + /*assert(!range ||tree->compare_handler(range, node->data) <= 0);*/ + node = raptor_avltree_node_rightmost(tree, node->left, range); + break; + } + last = node; + node = node->parent; + } + } + + if(node && range) { + if(tree->compare_handler(range, node->data) == 0) + return node; + else + return NULL; + } else { + return node; + } +} + + +/* Follow right children until a match for range is found (if range not NULL) */ +static raptor_avltree_node* +raptor_avltree_node_next(raptor_avltree* tree, raptor_avltree_node* node, + void* range) +{ + int up = 0; + + /*assert(!range || tree->compare_handler(range, node->data) == 0);*/ + + if(node->right) { + /* Should never go right if the current node is already > range */ + raptor_avltree_node* next; + next = raptor_avltree_node_leftmost(tree, node->right, NULL); + /*assert(!range ||tree->compare_handler(range, node->data) <= 0);*/ + if(range) { + if(tree->compare_handler(range, next->data) == 0) { + up = 0; + node = next; + } else { + up = 1; + } + } else { + node = next; + up = 0; + } + } else { + up = 1; + } + + if(up) { + raptor_avltree_node* last = node; + /* Need to go up */ + node = node->parent; + while(node) { + + /* moving from left subtree to this node */ + if(node->left && last == node->left) { + break; + } + + /* moved up to find an unvisited right subtree */ + if(node->right && last != node->right) { + /* Should never go right if the current node is already > range */ + /*assert(!range ||tree->compare_handler(range, node->data) <= 0);*/ + node = raptor_avltree_node_leftmost(tree, node->right, range); + break; + } + last = node; + node = node->parent; + } + } + + if(node && range) { + if(tree->compare_handler(range, node->data) == 0) + return node; + else + return NULL; + } else { + return node; + } +} + + +struct raptor_avltree_iterator_s { + raptor_avltree* tree; + raptor_avltree_node* root; + raptor_avltree_node* current; + void* range; + raptor_data_free_handler range_free_handler; + int direction; + int is_finished; +}; + + +/** + * raptor_new_avltree_iterator: + * @tree: #raptor_avltree object + * @range: range + * @range_free_handler: function to free @range object + * @direction: <0 to go 'backwards' otherwise 'forwards' + * + * Get an in-order iterator for the start of a range, or the entire contents + * + * If range is NULL, the entire tree is walked in order. If range + * specifies a range (i.e. the tree comparison function will 'match' + * (return 0 for) range and /several/ nodes), the iterator will be + * placed at the leftmost child matching range, and + * raptor_avltree_iterator_next will iterate over all nodes (and only + * nodes) that match range. + * + * Return value: a new #raptor_avltree_iterator object or NULL on failure + **/ +raptor_avltree_iterator* +raptor_new_avltree_iterator(raptor_avltree* tree, void* range, + raptor_data_free_handler range_free_handler, + int direction) +{ + raptor_avltree_iterator* iterator; + + iterator = RAPTOR_CALLOC(raptor_avltree_iterator*, 1, sizeof(*iterator)); + if(!iterator) + return NULL; + + iterator->is_finished = 0; + iterator->current = NULL; + + iterator->tree = tree; + iterator->range = range; + iterator->range_free_handler = range_free_handler; + iterator->direction = direction; + + if(range) { + /* find the topmost match (range is contained entirely in tree + * rooted here) + */ + iterator->current = raptor_avltree_search_internal(tree, tree->root, range); + } else { + iterator->current = tree->root; + } + + iterator->root = iterator->current; + + if(iterator->current) { + if(iterator->direction < 0) { + /* go down to find END of range (or tree) */ + while(1) { + raptor_avltree_node* pred; + iterator->current = raptor_avltree_node_rightmost(tree, + iterator->current, + range); + /* move left until a match is found */ + pred = raptor_avltree_node_search_left(tree, iterator->current->right, + range); + + if(pred && tree->compare_handler(range, pred->data) == 0) + iterator->current = pred; + else + break; + } + } else { + /* go down to find START of range (or tree) */ + while(1) { + raptor_avltree_node* pred; + iterator->current = raptor_avltree_node_leftmost(tree, + iterator->current, + range); + /* move right until a match is found */ + pred = raptor_avltree_node_search_right(tree, iterator->current->left, + range); + + if(pred && tree->compare_handler(range, pred->data) == 0) + iterator->current = pred; + else + break; + } + } + } + + return iterator; +} + + +/** + * raptor_free_avltree_iterator: + * @iterator: AVL Tree iterator object + * + * AVL Tree Iterator destructor + */ +void +raptor_free_avltree_iterator(raptor_avltree_iterator* iterator) +{ + if(!iterator) + return; + + if(iterator->range && iterator->range_free_handler) + iterator->range_free_handler(iterator->range); + + RAPTOR_FREE(raptor_avltree_iterator, iterator); +} + + +/** + * raptor_avltree_iterator_is_end: + * @iterator: AVL Tree iterator object + * + * Test if an iteration is finished + * + * Return value: non-0 if iteration is finished + */ +int +raptor_avltree_iterator_is_end(raptor_avltree_iterator* iterator) +{ + raptor_avltree_node *node = iterator->current; + + if(iterator->is_finished) + return 1; + iterator->is_finished = (node == NULL); + + return iterator->is_finished; +} + + +/** + * raptor_avltree_iterator_next: + * @iterator: AVL Tree iterator object + * + * Move iteration to next/prev object + * + * Return value: non-0 if iteration is finished + */ +int +raptor_avltree_iterator_next(raptor_avltree_iterator* iterator) +{ + raptor_avltree_node *node = iterator->current; + + if(!node || iterator->is_finished) + return 1; + + if(iterator->direction < 0) + iterator->current = raptor_avltree_node_prev(iterator->tree, node, + iterator->range); + else + iterator->current = raptor_avltree_node_next(iterator->tree, node, + iterator->range); + /* Stay within rooted subtree */ + if(iterator->root->parent == iterator->current) + iterator->current = NULL; + + iterator->is_finished = (iterator->current == NULL); + + return iterator->is_finished; +} + + +/** + * raptor_avltree_iterator_get: + * @iterator: AVL Tree iterator object + * + * Get current iteration object + * + * Return value: object or NULL if iteration is finished + */ +void* +raptor_avltree_iterator_get(raptor_avltree_iterator* iterator) +{ + raptor_avltree_node *node = iterator->current; + + if(iterator->is_finished) + return NULL; + + iterator->is_finished = (node == NULL); + if(iterator->is_finished) + return NULL; + + return node->data; +} + + +/** + * raptor_avltree_print: + * @tree: AVL Tree + * @stream: stream to print to + * + * Print the items in the tree in order to a stream (for debugging) + * + * Return value: non-0 on failure + */ +int +raptor_avltree_print(raptor_avltree* tree, FILE* stream) +{ + int i; + int rv = 0; + raptor_avltree_iterator* iter = NULL; + + fprintf(stream, "AVL Tree size %u\n", tree->size); + for(i = 0, (iter = raptor_new_avltree_iterator(tree, NULL, NULL, 1)); + iter && !rv; + i++, (rv = raptor_avltree_iterator_next(iter))) { + void* data = raptor_avltree_iterator_get(iter); + if(!data) + continue; + fprintf(stream, "%d) ", i); + if(tree->print_handler) + tree->print_handler(data, stream); + else + fprintf(stream, "Data Node %p\n", RAPTOR_VOIDP(data)); + } + /*assert(i == tree->size);*/ + + if(iter) + raptor_free_avltree_iterator(iter); + + return 0; +} + + +#ifdef RAPTOR_DEBUG + +static int +raptor_avltree_dump_internal(raptor_avltree* tree, raptor_avltree_node* node, + int depth, FILE* stream) +{ + int i; + if(!node) + return TRUE; + + for(i = 0; i < depth; i++) + fputs(" ", stream); + fprintf(stream, "Node %p: parent %p left %p right %p data %p\n", + RAPTOR_VOIDP(node), + RAPTOR_VOIDP(node->parent), + RAPTOR_VOIDP(node->left), + RAPTOR_VOIDP(node->right), + RAPTOR_VOIDP(node->data)); + if(tree->print_handler) { + for(i= 0; i < depth; i++) + fputs(" ", stream); + tree->print_handler(node->data, stream); + } + + if(!raptor_avltree_dump_internal(tree, node->left, depth+1, stream)) + return FALSE; + + if(!raptor_avltree_dump_internal(tree, node->right, depth+1, stream)) + return FALSE; + + return TRUE; +} + + +/* debugging tree dump with pointers and depth indenting */ +int +raptor_avltree_dump(raptor_avltree* tree, FILE* stream) +{ + fprintf(stream, "Dumping avltree %p size %u\n", RAPTOR_VOIDP(tree), + tree->size); + + return raptor_avltree_dump_internal(tree, tree->root, 0, stream); +} + + +static void +raptor_avltree_check_internal(raptor_avltree* tree, raptor_avltree_node* node, + unsigned int* count_p) +{ + if(!node) + return; + + (*count_p)++; + + raptor_avltree_check_node(tree, node, NULL, NULL); + + raptor_avltree_check_internal(tree, node->left, count_p); + + raptor_avltree_check_internal(tree, node->right, count_p); +} + + +/* debugging tree check - parent/child pointers and counts */ +void +raptor_avltree_check(raptor_avltree* tree) +{ + unsigned int count = 0; + + raptor_avltree_check_internal(tree, tree->root, &count); + if(count != tree->size) { + fprintf(stderr, "Tree %p nodes count is %u. actual count %u\n", + RAPTOR_VOIDP(tree), tree->size, count); + abort(); + } +} + +#endif + +#endif + + +#ifdef STANDALONE + +#include <string.h> + +typedef struct +{ + FILE *fh; + int count; + const char** results; + int failed; +} visit_state; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +static int +print_string(int depth, void* data, void *user_data) +{ + visit_state* vs = (visit_state*)user_data; + + fprintf(vs->fh, "%3d: %s\n", vs->count, (char*) data); + vs->count++; + return 1; +} +#endif + +static int +check_string(int depth, void* data, void *user_data) +{ + visit_state* vs = (visit_state*)user_data; + const char* result = vs->results[vs->count]; + + if(strcmp((const char*)data, result)) { + fprintf(vs->fh, "%3d: Expected '%s' but found '%s'\n", vs->count, + result, (char*)data); + vs->failed = 1; + } + vs->count++; + + return 1; +} + +static int +compare_strings(const void *l, const void *r) +{ + return strcmp((const char*)l, (const char*)r); +} + + +/* one more prototype */ +int main(int argc, char *argv[]); + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); +#define ITEM_COUNT 8 + const char *items[ITEM_COUNT+1] = { "ron", "amy", "jen", "bij", "jib", "daj", "jim", "def", NULL }; +#define DELETE_COUNT 2 + const char *delete_items[DELETE_COUNT+1] = { "jen", "jim", NULL }; +#define RESULT_COUNT (ITEM_COUNT-DELETE_COUNT) + const char *results[RESULT_COUNT+1] = { "amy", "bij", "daj", "def", "jib", "ron", NULL}; + + raptor_avltree* tree; + raptor_avltree_iterator* iter; + visit_state vs; + int i; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + tree = raptor_new_avltree(compare_strings, + NULL, /* no free as they are static pointers above */ + 0); + if(!tree) { + fprintf(stderr, "%s: Failed to create tree\n", program); + exit(1); + } + for(i = 0; items[i]; i++) { + int rc; + void* node; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Adding tree item '%s'\n", program, items[i]); +#endif + + rc = raptor_avltree_add(tree, (void*)items[i]); + if(rc) { + fprintf(stderr, + "%s: Adding tree item %d '%s' failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + +#ifdef RAPTOR_DEBUG + raptor_avltree_check(tree); +#endif + + node = raptor_avltree_search(tree, (void*)items[i]); + if(!node) { + fprintf(stderr, + "%s: Tree did NOT contain item %d '%s' as expected\n", + program, i, items[i]); + exit(1); + } + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Printing tree\n", program); + vs.fh = stderr; + vs.count = 0; + raptor_avltree_visit(tree, print_string, &vs); + + fprintf(stderr, "%s: Dumping tree\n", program); + raptor_avltree_dump(tree, stderr); +#endif + + + + for(i = 0; delete_items[i]; i++) { + int rc; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Deleting tree item '%s'\n", program, delete_items[i]); +#endif + + rc = raptor_avltree_delete(tree, (void*)delete_items[i]); + if(!rc) { + fprintf(stderr, + "%s: Deleting tree item %d '%s' failed, returning error %d\n", + program, i, delete_items[i], rc); + exit(1); + } + +#ifdef RAPTOR_DEBUG + raptor_avltree_check(tree); +#endif + } + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Walking tree forwards via iterator\n", program); +#endif + iter = raptor_new_avltree_iterator(tree, NULL, NULL, 1); + for(i = 0; 1; i++) { + const char* data = (const char*)raptor_avltree_iterator_get(iter); + const char* result = results[i]; + if((!data && data != result) || (data && strcmp(data, result))) { + fprintf(stderr, "%3d: Forwards iterator expected '%s' but found '%s'\n", + i, result, data); + exit(1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%3d: Got '%s'\n", i, data); +#endif + if(raptor_avltree_iterator_next(iter)) + break; + if(i > RESULT_COUNT) { + fprintf(stderr, "Forward iterator did not end on result %i as expected\n", i); + exit(1); + } + } + raptor_free_avltree_iterator(iter); + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Checking tree\n", program); +#endif + vs.count = 0; + vs.results = results; + vs.failed = 0; + raptor_avltree_visit(tree, check_string, &vs); + if(vs.failed) { + fprintf(stderr, "%s: Checking tree failed\n", program); + exit(1); + } + + + for(i = 0; results[i]; i++) { + const char* result = results[i]; + char* data = (char*)raptor_avltree_remove(tree, (void*)result); + if(!data) { + fprintf(stderr, "%s: remove %i failed at item '%s'\n", program, i, + result); + exit(1); + } + if(strcmp(data, result)) { + fprintf(stderr, "%s: remove %i returned %s not %s as expected\n", program, + i, data, result); + exit(1); + } + } + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing tree\n", program); +#endif + raptor_free_avltree(tree); + + raptor_free_world(world); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif diff --git a/src/raptor_concepts.c b/src/raptor_concepts.c new file mode 100644 index 0000000..443edb0 --- /dev/null +++ b/src/raptor_concepts.c @@ -0,0 +1,199 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_concepts.c - Raptor RDF namespace concepts + * + * Copyright (C) 2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * http://www.w3.org/TR/rdf-syntax-grammar/#section-grammar-summary + * + * coreSyntaxTerms := rdf:RDF | rdf:ID | rdf:about | rdf:bagID | + rdf:parseType | rdf:resource | rdf:nodeID | rdf:datatype + * syntaxTerms := coreSyntaxTerms | rdf:Description | rdf:li + * oldTerms := rdf:aboutEach | rdf:aboutEachPrefix | rdf:bagID + * + * nodeElementURIs := anyURI - ( coreSyntaxTerms | rdf:li | oldTerms ) + * propertyElementURIs := anyURI - ( coreSyntaxTerms | rdf:Description | oldTerms ) + * propertyAttributeURIs := anyURI - ( coreSyntaxTerms | rdf:Description | rdf:li | oldTerms ) + * + * So, forbidden terms in the RDF namespace are: + * nodeElements + * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | + * li | aboutEach | aboutEachPrefix | bagID + * + * propertyElements + * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | + * Description | aboutEach | aboutEachPrefix | bagID + * + * propertyAttributes + * RDF | ID | about | bagID | parseType | resource | nodeID | datatype | + * Description | li | aboutEach | aboutEachPrefix | bagID + * + * Information about rdf attributes: + * raptor_term_type type + * Set when the attribute is a property rather than just syntax + * NOTE: raptor_rdfxml_process_property_attributes() expects only + * RAPTOR_TERM_TYPE_NONE, + * RAPTOR_TERM_TYPE_LITERAL or RAPTOR_TERM_TYPE_URI + * allowed_unprefixed_on_attribute + * If allowed for legacy reasons to be unprefixed as an attribute. + * + */ + +/* (number of terms in RDF NS) + 1: for final sentinel row */ +const raptor_rdf_ns_term_info raptor_rdf_ns_terms_info[RDF_NS_LAST + 2] = { + /* term allowed boolean flags: + * node element; property element; property attr; unprefixed attr + */ + /* syntax only */ + { "RDF", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 0 }, /* just root */ + { "Description", RAPTOR_TERM_TYPE_UNKNOWN, 1, 0, 0, 0 }, + { "li", RAPTOR_TERM_TYPE_UNKNOWN, 0, 1, 0, 0 }, + { "about", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 1 }, + { "aboutEach", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 0 }, /* deprecated */ + { "aboutEachPrefix", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 0 }, /* deprecated */ + { "ID", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 1 }, + { "bagID", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 1 }, + { "resource", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 1 }, + { "parseType", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 1 }, + { "nodeID", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 0 }, + { "datatype", RAPTOR_TERM_TYPE_UNKNOWN, 0, 0, 0, 0 }, + + /* rdf:Property-s */ + { "type", RAPTOR_TERM_TYPE_URI , 1, 1, 1, 1 }, + { "value", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "subject", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "predicate", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "object", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "first", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "rest", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + + /* rdfs:Class-s */ + { "Seq", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "Bag", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "Alt", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "Statement", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "Property", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "List", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + + /* rdfs:Resource-s */ + { "nil", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + + /* datatypes */ + { "XMLLiteral", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + { "PlainLiteral", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + /* RDF 1.1 */ + /* http://www.w3.org/TR/2013/WD-rdf11-concepts-20130723/#section-html */ + { "HTML", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + /* http://www.w3.org/TR/2013/WD-rdf11-concepts-20130723/#section-Datatypes */ + { "langString", RAPTOR_TERM_TYPE_LITERAL, 1, 1, 1, 0 }, + + /* internal */ + { NULL , RAPTOR_TERM_TYPE_UNKNOWN, 1, 1, 1, 0 } +}; + + + +int +raptor_concepts_init(raptor_world* world) +{ + int i; + + for(i = 0; i < RDF_NS_LAST + 1; i++) { + unsigned char* name = (unsigned char*)raptor_rdf_ns_terms_info[i].name; + world->concepts[i] = raptor_new_uri_for_rdf_concept(world, name); + if(!world->concepts[i]) + return 1; + + /* only make a term for things that are not syntax-only */ + /* OR use: + raptor_rdf_ns_terms_info[i].allowed_as_nodeElement || + raptor_rdf_ns_terms_info[i].allowed_as_propertyElement || + raptor_rdf_ns_terms_info[i].allowed_as_propertyAttribute) + */ + if(i > RDF_NS_LAST_SYNTAX_TERM) { + world->terms[i] = raptor_new_term_from_uri(world, world->concepts[i]); + if(!world->terms[i]) + return 1; + } + } + + world->xsd_namespace_uri = raptor_new_uri(world, raptor_xmlschema_datatypes_namespace_uri); + if(!world->xsd_namespace_uri) + return 1; + + world->xsd_boolean_uri = raptor_new_uri_from_uri_local_name(world, world->xsd_namespace_uri, (const unsigned char*)"boolean"); + if(!world->xsd_boolean_uri) + return 1; + + world->xsd_decimal_uri = raptor_new_uri_from_uri_local_name(world, world->xsd_namespace_uri, (const unsigned char*)"decimal"); + if(!world->xsd_decimal_uri) + return 1; + + world->xsd_double_uri = raptor_new_uri_from_uri_local_name(world, world->xsd_namespace_uri, (const unsigned char*)"double"); + if(!world->xsd_double_uri) + return 1; + + world->xsd_integer_uri = raptor_new_uri_from_uri_local_name(world, world->xsd_namespace_uri, (const unsigned char*)"integer"); + if(!world->xsd_integer_uri) + return 1; + + return 0; +} + + + +void +raptor_concepts_finish(raptor_world* world) +{ + int i; + + for(i = 0; i < RDF_NS_LAST + 1; i++) { + raptor_uri* concept_uri = world->concepts[i]; + if(concept_uri) { + raptor_free_uri(concept_uri); + world->concepts[i] = NULL; + } + if(world->terms[i]) + raptor_free_term(world->terms[i]); + } + + if(world->xsd_boolean_uri) + raptor_free_uri(world->xsd_boolean_uri); + if(world->xsd_decimal_uri) + raptor_free_uri(world->xsd_decimal_uri); + if(world->xsd_double_uri) + raptor_free_uri(world->xsd_double_uri); + if(world->xsd_integer_uri) + raptor_free_uri(world->xsd_integer_uri); + + if(world->xsd_namespace_uri) + raptor_free_uri(world->xsd_namespace_uri); +} diff --git a/src/raptor_config.h.in b/src/raptor_config.h.in new file mode 100644 index 0000000..d587114 --- /dev/null +++ b/src/raptor_config.h.in @@ -0,0 +1,346 @@ +/* src/raptor_config.h.in. Generated from configure.ac by autoheader. */ + +/* Define if building universal (internal helper macro) */ +#undef AC_APPLE_UNIVERSAL_BUILD + +/* have to check C99 vsnprintf at runtime because cross compiling */ +#undef CHECK_VSNPRINTF_RUNTIME + +/* Flex version as a decimal */ +#undef FLEX_VERSION_DECIMAL + +/* vsnprintf has C99 compatible return value */ +#undef HAVE_C99_VSNPRINTF + +/* Have curl/curl.h */ +#undef HAVE_CURL_CURL_H + +/* Define to 1 if you have the <dlfcn.h> header file. */ +#undef HAVE_DLFCN_H + +/* Define to 1 if you have the <errno.h> header file. */ +#undef HAVE_ERRNO_H + +/* Define to 1 if you have the <fcntl.h> header file. */ +#undef HAVE_FCNTL_H + +/* Define to 1 if you have the <fetch.h> header file. */ +#undef HAVE_FETCH_H + +/* Define to 1 if you have the `getopt' function. */ +#undef HAVE_GETOPT + +/* Define to 1 if you have the <getopt.h> header file. */ +#undef HAVE_GETOPT_H + +/* Define to 1 if you have the `getopt_long' function. */ +#undef HAVE_GETOPT_LONG + +/* Define to 1 if you have the `gettimeofday' function. */ +#undef HAVE_GETTIMEOFDAY + +/* INN parsedate function present */ +#undef HAVE_INN_PARSEDATE + +/* Define to 1 if you have the <inttypes.h> header file. */ +#undef HAVE_INTTYPES_H + +/* Define to 1 if you have the `isascii' function. */ +#undef HAVE_ISASCII + +/* Define to 1 if you have the <libxml/hash.h> header file. */ +#undef HAVE_LIBXML_HASH_H + +/* Define to 1 if you have the <libxml/HTMLparser.h> header file. */ +#undef HAVE_LIBXML_HTMLPARSER_H + +/* Define to 1 if you have the <libxml/nanohttp.h> header file. */ +#undef HAVE_LIBXML_NANOHTTP_H + +/* Define to 1 if you have the <libxml/parser.h> header file. */ +#undef HAVE_LIBXML_PARSER_H + +/* Define to 1 if you have the <libxml/SAX2.h> header file. */ +#undef HAVE_LIBXML_SAX2_H + +/* Define to 1 if you have the <libxslt/xslt.h> header file. */ +#undef HAVE_LIBXSLT_XSLT_H + +/* Define to 1 if you have the <limits.h> header file. */ +#undef HAVE_LIMITS_H + +/* Define to 1 if you have the `qsort_r' function. */ +#undef HAVE_QSORT_R + +/* Define to 1 if you have the `qsort_s' function. */ +#undef HAVE_QSORT_S + +/* Raptor raptor_parse_date available */ +#undef HAVE_RAPTOR_PARSE_DATE + +/* Define to 1 if you have the `setjmp' function. */ +#undef HAVE_SETJMP + +/* Define to 1 if you have the <setjmp.h> header file. */ +#undef HAVE_SETJMP_H + +/* Define to 1 if you have the `stat' function. */ +#undef HAVE_STAT + +/* Define to 1 if you have the <stddef.h> header file. */ +#undef HAVE_STDDEF_H + +/* Define to 1 if you have the <stdint.h> header file. */ +#undef HAVE_STDINT_H + +/* Define to 1 if you have the <stdio.h> header file. */ +#undef HAVE_STDIO_H + +/* Define to 1 if you have the <stdlib.h> header file. */ +#undef HAVE_STDLIB_H + +/* Define to 1 if you have the `strcasecmp' function. */ +#undef HAVE_STRCASECMP + +/* Define to 1 if you have the `stricmp' function. */ +#undef HAVE_STRICMP + +/* Define to 1 if you have the <strings.h> header file. */ +#undef HAVE_STRINGS_H + +/* Define to 1 if you have the <string.h> header file. */ +#undef HAVE_STRING_H + +/* have the strtok_r function */ +#undef HAVE_STRTOK_R + +/* Define to 1 if you have the <sys/param.h> header file. */ +#undef HAVE_SYS_PARAM_H + +/* Define to 1 if you have the <sys/stat.h> header file. */ +#undef HAVE_SYS_STAT_H + +/* Define to 1 if you have the <sys/time.h> header file. */ +#undef HAVE_SYS_TIME_H + +/* Define to 1 if you have the <sys/types.h> header file. */ +#undef HAVE_SYS_TYPES_H + +/* Define to 1 if you have the <time.h> header file. */ +#undef HAVE_TIME_H + +/* Define to 1 if you have the <unistd.h> header file. */ +#undef HAVE_UNISTD_H + +/* Define to 1 if you have the `vasprintf' function. */ +#undef HAVE_VASPRINTF + +/* Define to 1 if you have the `vsnprintf' function. */ +#undef HAVE_VSNPRINTF + +/* Define to 1 if you have the `xmlCtxtUseOptions' function. */ +#undef HAVE_XMLCTXTUSEOPTIONS + +/* Define to 1 if you have the `xmlSAX2InternalSubset' function. */ +#undef HAVE_XMLSAX2INTERNALSUBSET + +/* YAJL has API version 2 */ +#undef HAVE_YAJL2 + +/* Define to 1 if you have the <yajl/yajl_parse.h> header file. */ +#undef HAVE_YAJL_YAJL_PARSE_H + +/* Is __FUNCTION__ available */ +#undef HAVE___FUNCTION__ + +/* ICU UC major version */ +#undef ICU_UC_MAJOR_VERSION + +/* Define to the sub-directory where libtool stores uninstalled libraries. */ +#undef LT_OBJDIR + +/* Define to 1 if maintainer mode is enabled. */ +#undef MAINTAINER_MODE + +/* need 'extern int optind' declaration? */ +#undef NEED_OPTIND_DECLARATION + +/* Name of package */ +#undef PACKAGE + +/* Define to the address where bug reports for this package should be sent. */ +#undef PACKAGE_BUGREPORT + +/* Define to the full name of this package. */ +#undef PACKAGE_NAME + +/* Define to the full name and version of this package. */ +#undef PACKAGE_STRING + +/* Define to the one symbol short name of this package. */ +#undef PACKAGE_TARNAME + +/* Define to the home page for this package. */ +#undef PACKAGE_URL + +/* Define to the version of this package. */ +#undef PACKAGE_VERSION + +/* Define to 1 if debug messages are enabled. */ +#undef RAPTOR_DEBUG + +/* Use ICU for Unicode NFC check */ +#undef RAPTOR_ICU_NFC + +/* does libxml struct xmlEntity have a field etype */ +#undef RAPTOR_LIBXML_ENTITY_ETYPE + +/* does libxml struct xmlEntity have a field name_length */ +#undef RAPTOR_LIBXML_ENTITY_NAME_LENGTH + +/* does libxml have HTML_PARSE_NONET */ +#undef RAPTOR_LIBXML_HTML_PARSE_NONET + +/* does libxml xmlSAXHandler have externalSubset field */ +#undef RAPTOR_LIBXML_XMLSAXHANDLER_EXTERNALSUBSET + +/* does libxml xmlSAXHandler have initialized field */ +#undef RAPTOR_LIBXML_XMLSAXHANDLER_INITIALIZED + +/* does libxml have XML_PARSE_NONET */ +#undef RAPTOR_LIBXML_XML_PARSE_NONET + +/* Minimum supported package version */ +#undef RAPTOR_MIN_VERSION_DECIMAL + +/* Building GRDDL parser */ +#undef RAPTOR_PARSER_GRDDL + +/* Building guess parser */ +#undef RAPTOR_PARSER_GUESS + +/* Building JSON parser */ +#undef RAPTOR_PARSER_JSON + +/* Building N-Quads parser */ +#undef RAPTOR_PARSER_NQUADS + +/* Building N-Triples parser */ +#undef RAPTOR_PARSER_NTRIPLES + +/* Building RDFA parser */ +#undef RAPTOR_PARSER_RDFA + +/* Building RDF/XML parser */ +#undef RAPTOR_PARSER_RDFXML + +/* Building RSS Tag Soup parser */ +#undef RAPTOR_PARSER_RSS + +/* Building TRiG parser */ +#undef RAPTOR_PARSER_TRIG + +/* Building Turtle parser */ +#undef RAPTOR_PARSER_TURTLE + +/* Building Atom 1.0 serializer */ +#undef RAPTOR_SERIALIZER_ATOM + +/* Building GraphViz DOT serializer */ +#undef RAPTOR_SERIALIZER_DOT + +/* Building HTML Table serializer */ +#undef RAPTOR_SERIALIZER_HTML + +/* Building JSON serializer */ +#undef RAPTOR_SERIALIZER_JSON + +/* Building mKR serializer */ +#undef RAPTOR_SERIALIZER_MKR + +/* Building N-Quads serializer */ +#undef RAPTOR_SERIALIZER_NQUADS + +/* Building N-Triples serializer */ +#undef RAPTOR_SERIALIZER_NTRIPLES + +/* Building RDF/XML serializer */ +#undef RAPTOR_SERIALIZER_RDFXML + +/* Building RDF/XML-abbreviated serializer */ +#undef RAPTOR_SERIALIZER_RDFXML_ABBREV + +/* Building RSS 1.0 serializer */ +#undef RAPTOR_SERIALIZER_RSS_1_0 + +/* Building Turtle serializer */ +#undef RAPTOR_SERIALIZER_TURTLE + +/* Release version as a decimal */ +#undef RAPTOR_VERSION_DECIMAL + +/* Major version number */ +#undef RAPTOR_VERSION_MAJOR + +/* Minor version number */ +#undef RAPTOR_VERSION_MINOR + +/* Release version number */ +#undef RAPTOR_VERSION_RELEASE + +/* Have libcurl WWW library */ +#undef RAPTOR_WWW_LIBCURL + +/* Have libfetch WWW library */ +#undef RAPTOR_WWW_LIBFETCH + +/* Have libxml available as a WWW library */ +#undef RAPTOR_WWW_LIBXML + +/* No WWW library */ +#undef RAPTOR_WWW_NONE + +/* Check XML 1.1 Names */ +#undef RAPTOR_XML_1_1 + +/* Use libxml XML parser */ +#undef RAPTOR_XML_LIBXML + +/* Define to 1 if all of the C90 standard headers exist (not just the ones + required in a freestanding environment). This macro is provided for + backward compatibility; new code need not use it. */ +#undef STDC_HEADERS + +/* Version number of package */ +#undef VERSION + +/* Define WORDS_BIGENDIAN to 1 if your processor stores words with the most + significant byte first (like Motorola and SPARC, unlike Intel). */ +#if defined AC_APPLE_UNIVERSAL_BUILD +# if defined __BIG_ENDIAN__ +# define WORDS_BIGENDIAN 1 +# endif +#else +# ifndef WORDS_BIGENDIAN +# undef WORDS_BIGENDIAN +# endif +#endif + +/* Define to 1 if `lex' declares `yytext' as a `char *' by default, not a + `char[]'. */ +#undef YYTEXT_POINTER + +/* Number of bits in a file offset, on hosts where this is settable. */ +#undef _FILE_OFFSET_BITS + +/* Define for large files, on AIX-style hosts. */ +#undef _LARGE_FILES + +/* Define to empty if `const' does not conform to ANSI C. */ +#undef const + +/* Define to `__inline__' or `__inline' if that's what the C compiler + calls it, or to nothing if 'inline' is not supported under any name. */ +#ifndef __cplusplus +#undef inline +#endif diff --git a/src/raptor_config_cmake.h.in b/src/raptor_config_cmake.h.in new file mode 100644 index 0000000..2e1582b --- /dev/null +++ b/src/raptor_config_cmake.h.in @@ -0,0 +1,136 @@ +/* raptor_config.h */ + +#define RAPTOR_VERSION_DECIMAL @RAPTOR_VERSION_DECIMAL@ +#define RAPTOR_MIN_VERSION_DECIMAL @RAPTOR_MIN_VERSION_DECIMAL@ + +#cmakedefine HAVE_ERRNO_H +#cmakedefine HAVE_FCNTL_H +#cmakedefine HAVE_GETOPT_H +#cmakedefine HAVE_LIMITS_H +#cmakedefine HAVE_MATH_H +#cmakedefine HAVE_SETJMP_H +#cmakedefine HAVE_STDDEF_H +#cmakedefine HAVE_STDLIB_H +#cmakedefine HAVE_STRING_H +#cmakedefine HAVE_UNISTD_H +#cmakedefine HAVE_TIME_H +#cmakedefine HAVE_SYS_PARAM_H +#cmakedefine HAVE_TIME_H +#cmakedefine HAVE_SYS_STAT_H +#cmakedefine HAVE_SYS_STAT_H +#cmakedefine HAVE_SYS_TIME_H + +#cmakedefine TIME_WITH_SYS_TIME + +#cmakedefine HAVE_ACCESS +#cmakedefine HAVE__ACCESS +#cmakedefine HAVE_GETOPT +#cmakedefine HAVE_GETOPT_LONG +#cmakedefine HAVE_GETTIMEOFDAY +#cmakedefine HAVE_ISASCII +#cmakedefine HAVE_SETJMP +#cmakedefine HAVE_SNPRINTF +#cmakedefine HAVE__SNPRINTF +#cmakedefine HAVE_STAT +#cmakedefine HAVE_STRCASECMP +#cmakedefine HAVE_STRICMP +#cmakedefine HAVE__STRICMP +#cmakedefine HAVE_STRTOK_R +#cmakedefine HAVE_VASPRINTF +#cmakedefine HAVE_VSNPRINTF +#cmakedefine HAVE__VSNPRINTF + +#cmakedefine HAVE___FUNCTION__ + +#define SIZEOF_UNSIGNED_CHAR @SIZEOF_UNSIGNED_CHAR@ +#define SIZEOF_UNSIGNED_SHORT @SIZEOF_UNSIGNED_SHORT@ +#define SIZEOF_UNSIGNED_INT @SIZEOF_UNSIGNED_INT@ +#define SIZEOF_UNSIGNED_LONG @SIZEOF_UNSIGNED_LONG@ +#define SIZEOF_UNSIGNED_LONG_LONG @SIZEOF_UNSIGNED_LONG_LONG@ + +#cmakedefine HAVE_XMLCTXTUSEOPTIONS +#cmakedefine HAVE_XMLSAX2INTERNALSUBSET +#cmakedefine RAPTOR_LIBXML_ENTITY_ETYPE +#cmakedefine RAPTOR_LIBXML_ENTITY_NAME_LENGTH +#cmakedefine RAPTOR_LIBXML_HTML_PARSE_NONET +#cmakedefine RAPTOR_LIBXML_XMLSAXHANDLER_EXTERNALSUBSET +#cmakedefine RAPTOR_LIBXML_XMLSAXHANDLER_INITIALIZED +#cmakedefine RAPTOR_LIBXML_XML_PARSE_NONET + +#cmakedefine RAPTOR_STATIC +#cmakedefine HAVE_RAPTOR_PARSE_DATE +#define @RAPTOR_WWW_DEFINE@ +#define @RAPTOR_XML_DEFINE@ +#cmakedefine RAPTOR_XML_1_1 + +#cmakedefine RAPTOR_PARSER_RDFXML +#cmakedefine RAPTOR_PARSER_NTRIPLES +#cmakedefine RAPTOR_PARSER_TURTLE +#cmakedefine RAPTOR_PARSER_TRIG +#cmakedefine RAPTOR_PARSER_RSS +#cmakedefine RAPTOR_PARSER_GRDDL +#cmakedefine RAPTOR_PARSER_GUESS +#cmakedefine RAPTOR_PARSER_RDFA +#cmakedefine RAPTOR_PARSER_JSON +#cmakedefine RAPTOR_PARSER_NQUADS + +#cmakedefine RAPTOR_SERIALIZER_RDFXML +#cmakedefine RAPTOR_SERIALIZER_NTRIPLES +#cmakedefine RAPTOR_SERIALIZER_RDFXML_ABBREV +#cmakedefine RAPTOR_SERIALIZER_TURTLE +#cmakedefine RAPTOR_SERIALIZER_MKR +#cmakedefine RAPTOR_SERIALIZER_RSS_1_0 +#cmakedefine RAPTOR_SERIALIZER_ATOM +#cmakedefine RAPTOR_SERIALIZER_DOT +#cmakedefine RAPTOR_SERIALIZER_HTML +#cmakedefine RAPTOR_SERIALIZER_JSON +#cmakedefine RAPTOR_SERIALIZER_NQUADS + +#ifdef WIN32 +# define WIN32_LEAN_AND_MEAN +# define _CRT_NONSTDC_NO_DEPRECATE +# define _CRT_SECURE_NO_DEPRECATE + +# ifdef _MSC_VER +# if _MSC_VER >= 1300 +# define __func__ __FUNCTION__ +# else + /* better than nothing */ +# define raptor_str(s) #s +# define __func__ "func@" __FILE__ ":" raptor_str(__LINE__) +# endif +# endif + +# define RAPTOR_INLINE __inline + +# define S_ISTYPE(mode, mask) (((mode) & _S_IFMT) == (mask)) +# define S_ISDIR(mode) S_ISTYPE((mode), _S_IFDIR) +# define S_ISREG(mode) S_ISTYPE((mode), _S_IFREG) + + /* Mode bits for access() */ +# define R_OK 04 +# define W_OK 02 + +# if !defined(HAVE_ACCESS) && defined(HAVE__ACCESS) +# define access(p,m) _access(p,m) +# endif +# ifndef HAVE_STRCASECMP +# if defined(HAVE__STRICMP) +# define strcasecmp(a,b) _stricmp(a,b) +# elif defined(HAVE_STRICMP) +# define strcasecmp(a,b) stricmp(a,b) +# endif +# endif +# if !defined(HAVE_SNPRINTF) && defined(HAVE__SNPRINTF) +# define snprintf _snprintf +# endif +# if !defined(HAVE_VSNPRINTF) && defined(HAVE__VSNPRINTF) +# define vsnprintf _vsnprintf +# endif + + /* These prevent parsedate.c from declaring malloc() and free() */ +# define YYMALLOC malloc +# define YYFREE free +#endif + +/* end raptor_config.h */ diff --git a/src/raptor_escaped.c b/src/raptor_escaped.c new file mode 100644 index 0000000..b6c61d8 --- /dev/null +++ b/src/raptor_escaped.c @@ -0,0 +1,289 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_escaped.c - Raptor escaped writing utilities + * + * Copyright (C) 2013, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/** + * raptor_string_escaped_write: + * @string: UTF-8 string to write + * @len: length of UTF-8 string + * @delim: Terminating delimiter character for string (such as " or >) or \0 for no escaping. + * @flags: bit flags - see #raptor_escaped_write_bitflags + * @iostr: #raptor_iostream to write to + * + * Write a UTF-8 string formatted using different escapes to a #raptor_iostream + * + * Supports writing escapes in the Python, N-Triples, Turtle, JSON, mKR, + * SPARQL styles to an iostream. + * + * Return value: non-0 on failure such as bad UTF-8 encoding. + **/ +int +raptor_string_escaped_write(const unsigned char *string, + size_t len, + const char delim, + unsigned int flags, + raptor_iostream *iostr) +{ + unsigned char c; + int unichar_len; + raptor_unichar unichar; + + if(!string) + return 1; + + for(; (c=*string); string++, len--) { + if((delim && c == delim && (delim == '\'' || delim == '"')) || + c == '\\') { + raptor_iostream_write_byte('\\', iostr); + raptor_iostream_write_byte(c, iostr); + continue; + } + + if(delim && c == delim) { + raptor_iostream_counted_string_write("\\u", 2, iostr); + raptor_iostream_hexadecimal_write(c, 4, iostr); + continue; + } + + if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_SPARQL_URI_ESCAPES) { + /* Must escape #x00-#x20<>\"{}|^` */ + if(c <= 0x20 || + c == '<' || c == '>' || c == '\\' || c == '"' || + c == '{' || c == '}' || c == '|' || c == '^' || c == '`') { + raptor_iostream_counted_string_write("\\u", 2, iostr); + raptor_iostream_hexadecimal_write(c, 4, iostr); + continue; + } else if(c < 0x7f) { + raptor_iostream_write_byte(c, iostr); + continue; + } + } + + if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_TNRU) { + if(c == 0x09) { + raptor_iostream_counted_string_write("\\t", 2, iostr); + continue; + } else if(c == 0x0a) { + raptor_iostream_counted_string_write("\\n", 2, iostr); + continue; + } else if(c == 0x0d) { + raptor_iostream_counted_string_write("\\r", 2, iostr); + continue; + } else if(c < 0x20 || c == 0x7f) { + raptor_iostream_counted_string_write("\\u", 2, iostr); + raptor_iostream_hexadecimal_write(c, 4, iostr); + continue; + } + } + + if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_BS_ESCAPES_BF) { + if(c == 0x08) { + /* JSON has \b for backspace */ + raptor_iostream_counted_string_write("\\b", 2, iostr); + continue; + } else if(c == 0x0b) { + /* JSON has \f for formfeed */ + raptor_iostream_counted_string_write("\\f", 2, iostr); + continue; + } + } + + /* Just format remaining characters */ + if(c < 0x7f) { + raptor_iostream_write_byte(c, iostr); + continue; + } + + /* It is unicode */ + unichar_len = raptor_unicode_utf8_string_get_char(string, len, &unichar); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > len) + /* UTF-8 encoding had an error or ended in the middle of a string */ + return 1; + + if(flags & RAPTOR_ESCAPED_WRITE_BITFLAG_UTF8) { + /* UTF-8 is allowed so no need to escape */ + raptor_iostream_counted_string_write(string, unichar_len, iostr); + } else { + if(unichar < 0x10000) { + raptor_iostream_counted_string_write("\\u", 2, iostr); + raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 4, iostr); + } else { + raptor_iostream_counted_string_write("\\U", 2, iostr); + raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), 8, iostr); + } + } + + unichar_len--; /* since loop does len-- */ + string += unichar_len; len -= unichar_len; + + } + + return 0; +} + + +/** + * raptor_string_python_write: + * @string: UTF-8 string to write + * @len: length of UTF-8 string + * @delim: Terminating delimiter character for string (such as " or >) + * or \0 for no escaping. + * @mode: mode 0=N-Triples mode, 1=Turtle (allow raw UTF-8), 2=Turtle long string (allow raw UTF-8), 3=JSON + * @iostr: #raptor_iostream to write to + * + * Write a UTF-8 string using Python-style escapes (N-Triples, Turtle, JSON, mKR) to a #raptor_iostream + * + * @Deprecated: use raptor_string_escaped_write() where the features + * requested are bits that can be individually chosen. + * + * Return value: non-0 on failure such as bad UTF-8 encoding. + **/ +int +raptor_string_python_write(const unsigned char *string, + size_t len, + const char delim, + unsigned int mode, + raptor_iostream *iostr) +{ + unsigned int flags = 0; + + switch(mode) { + case 0: + flags = RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL; + break; + + case 1: + flags = RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL; + break; + + case 2: + flags = RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL; + break; + + case 3: + flags = RAPTOR_ESCAPED_WRITE_JSON_LITERAL; + break; + + default: + return 1; + } + + return raptor_string_escaped_write(string, len, delim, flags, iostr); +} + + + +/** + * raptor_term_escaped_write: + * @term: term to write + * @flags: bit flags - see #raptor_escaped_write_bitflags + * @iostr: raptor iostream + * + * Write a #raptor_term formatted with escapes to a #raptor_iostream + * + * Return value: non-0 on failure + **/ +int +raptor_term_escaped_write(const raptor_term *term, + unsigned int flags, + raptor_iostream* iostr) +{ + const char* quotes="\"\"\"\""; + + if(!term) + return 1; + + switch(term->type) { + case RAPTOR_TERM_TYPE_LITERAL: + if(flags == RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL) + raptor_iostream_counted_string_write(quotes, 3, iostr); + else + raptor_iostream_write_byte('"', iostr); + raptor_string_escaped_write(term->value.literal.string, + term->value.literal.string_len, + '"', + flags, + iostr); + if(flags == RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL) + raptor_iostream_counted_string_write(quotes, 3, iostr); + else + raptor_iostream_write_byte('"', iostr); + + if(term->value.literal.language) { + raptor_iostream_write_byte('@', iostr); + raptor_iostream_counted_string_write(term->value.literal.language, + term->value.literal.language_len, + iostr); + } + if(term->value.literal.datatype) { + if(flags == RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL) + flags = RAPTOR_ESCAPED_WRITE_NTRIPLES_URI; + else if(flags == RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL) + flags = RAPTOR_ESCAPED_WRITE_TURTLE_URI; + + raptor_iostream_counted_string_write("^^", 2, iostr); + raptor_uri_escaped_write(term->value.literal.datatype, NULL, + flags, iostr); + } + + break; + + case RAPTOR_TERM_TYPE_BLANK: + raptor_iostream_counted_string_write("_:", 2, iostr); + + raptor_iostream_counted_string_write(term->value.blank.string, + term->value.blank.string_len, + iostr); + break; + + case RAPTOR_TERM_TYPE_URI: + if(flags == RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL) + flags = RAPTOR_ESCAPED_WRITE_NTRIPLES_URI; + else if(flags == RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL) + flags = RAPTOR_ESCAPED_WRITE_TURTLE_URI; + + raptor_uri_escaped_write(term->value.uri, NULL, flags, iostr); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(term->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Triple has unsupported term type %u", + term->type); + return 1; + } + + return 0; +} diff --git a/src/raptor_general.c b/src/raptor_general.c new file mode 100644 index 0000000..bf3c3ae --- /dev/null +++ b/src/raptor_general.c @@ -0,0 +1,771 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_general.c - Raptor general routines + * + * Copyright (C) 2000-2014, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#ifdef MAINTAINER_MODE +#include <git-version.h> +#endif + +/* statics */ + +const char * const raptor_short_copyright_string = "Copyright 2000-2023 David Beckett. Copyright 2000-2005 University of Bristol"; + +const char * const raptor_copyright_string = "Copyright (C) 2000-2023 David Beckett - http://www.dajobe.org/\nCopyright (C) 2000-2005 University of Bristol - http://www.bristol.ac.uk/"; + +const char * const raptor_license_string = "LGPL 2.1 or newer, GPL 2 or newer, Apache 2.0 or newer.\nSee http://librdf.org/raptor/LICENSE.html for full terms."; + +const char * const raptor_home_url_string = "http://librdf.org/raptor/"; + +/** + * raptor_version_string: + * + * Library full version as a string. + * + * See also #raptor_version_decimal. + */ +const char * const raptor_version_string = RAPTOR_VERSION_STRING +#ifdef GIT_VERSION +" GIT " GIT_VERSION +#endif +; + +/** + * raptor_version_major: + * + * Library major version number as a decimal integer. + */ +const unsigned int raptor_version_major = RAPTOR_VERSION_MAJOR; + +/** + * raptor_version_minor: + * + * Library minor version number as a decimal integer. + */ +const unsigned int raptor_version_minor = RAPTOR_VERSION_MINOR; + +/** + * raptor_version_release: + * + * Library release version number as a decimal integer. + */ +const unsigned int raptor_version_release = RAPTOR_VERSION_RELEASE; + +/** + * raptor_version_decimal: + * + * Library full version as a decimal integer. + * + * See also #raptor_version_string. + */ +const unsigned int raptor_version_decimal = RAPTOR_VERSION_DECIMAL; + + +/** + * raptor_new_world: + * @version_decimal: raptor version as a decimal integer as defined by the macro #RAPTOR_VERSION and static int #raptor_version_decimal + * + * Allocate a new raptor_world object. + * + * Allocation of the world and initialization are decoupled to allow + * changing settings on the world object before init. + * + * Settings and configuration of the world may be made after creating + * the object and before the world is initialized using methods such + * as raptor_world_set_flag(), raptor_world_set_log_handler(), + * raptor_world_set_generate_bnodeid_handler(). Some configuration + * may not be changed after initialization. + * + * The raptor_world is initialized with raptor_world_open(). + * + * Return value: uninitialized raptor_world object or NULL on failure + */ +raptor_world * +raptor_new_world_internal(unsigned int version_decimal) +{ + raptor_world *world; + + if(version_decimal < RAPTOR_MIN_VERSION_DECIMAL) { + fprintf(stderr, + "raptor_new_world() called via header from version %u but minimum supported version is %u\n", + version_decimal, RAPTOR_GOOD_CAST(unsigned int, RAPTOR_MIN_VERSION_DECIMAL)); + return NULL; + } + + world = RAPTOR_CALLOC(raptor_world*, 1, sizeof(*world)); + if(world) { + world->magic = RAPTOR2_WORLD_MAGIC; + + /* set default flags - can be updated by raptor_world_set_flag() */ + + /* set: RAPTOR_LIBXML_FLAGS_GENERIC_ERROR_SAVE + * set: RAPTOR_LIBXML_FLAGS_STRUCTURED_ERROR_SAVE + */ + world->libxml_flags = RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE | + RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE ; + /* set: URI Interning */ + world->uri_interning = 1; + + world->internal_ignore_errors = 0; + } + + return world; +} + + +/** + * raptor_world_open: + * @world: raptor_world object + * + * Initialise the raptor library. + * + * Initializes a #raptor_world object created by raptor_new_world(). + * Allocation and initialization are decoupled to allow + * changing settings on the world object before init. + * + * The initialized world object is used with subsequent raptor API calls. + * + * Return value: non-0 on failure + */ +int +raptor_world_open(raptor_world* world) +{ + int rc; + + if(!world) + return -1; + + if(world->opened) + return 0; /* not an error */ + + world->opened = 1; + + rc = raptor_uri_init(world); + if(rc) + return rc; + + rc = raptor_concepts_init(world); + if(rc) + return rc; + + rc = raptor_parsers_init(world); + if(rc) + return rc; + + rc = raptor_serializers_init(world); + if(rc) + return rc; + + rc = raptor_sax2_init(world); + if(rc) + return rc; + + rc = raptor_www_init(world); + if(rc) + return rc; + +#ifdef RAPTOR_XML_LIBXML + rc = raptor_libxml_init(world); + if(rc) + return rc; +#endif + + return 0; +} + + +/** + * raptor_free_world: + * @world: raptor_world object + * + * Terminate the raptor library. + * + * Destroys the raptor_world object and all related information. + */ +void +raptor_free_world(raptor_world* world) +{ + if(!world) + return; + + if(world->default_generate_bnodeid_handler_prefix) + RAPTOR_FREE(char*, world->default_generate_bnodeid_handler_prefix); + +#ifdef RAPTOR_XML_LIBXML + raptor_libxml_finish(world); +#endif + + raptor_www_finish(world); + + raptor_sax2_finish(world); + + raptor_serializers_finish(world); + + raptor_parsers_finish(world); + + raptor_concepts_finish(world); + + raptor_uri_finish(world); + + RAPTOR_FREE(raptor_world, world); +} + + +/** + * raptor_world_set_generate_bnodeid_handler: + * @world: #raptor_world world object + * @user_data: user data pointer for callback + * @handler: generate ID callback function + * + * Set the generate ID handler function. + * + * Sets the function to generate IDs for the library. The handler is + * called with the @user_data parameter. + * + * The final argument of the callback method is user_bnodeid, the value of + * the rdf:nodeID attribute that the user provided if any (or NULL). + * It can either be returned directly as the generated value when present or + * modified. The passed in value must be free()d if it is not used. + * + * If handler is NULL, the default method is used + * + **/ +void +raptor_world_set_generate_bnodeid_handler(raptor_world* world, + void *user_data, + raptor_generate_bnodeid_handler handler) +{ + world->generate_bnodeid_handler_user_data = user_data; + world->generate_bnodeid_handler = handler; +} + + +static unsigned char* +raptor_world_default_generate_bnodeid_handler(void *user_data, + unsigned char *user_bnodeid) +{ + raptor_world *world = (raptor_world*)user_data; + int id; + unsigned char *buffer; + const char* prefix; + unsigned int prefix_length; + size_t id_length; + + if(user_bnodeid) + return user_bnodeid; + + id = ++world->default_generate_bnodeid_handler_base; + + id_length = raptor_format_integer(NULL, 0, id, /* base */ 10, -1, '\0'); + + if(world->default_generate_bnodeid_handler_prefix) { + prefix = world->default_generate_bnodeid_handler_prefix; + prefix_length = world->default_generate_bnodeid_handler_prefix_length; + } else { + prefix = "genid"; + prefix_length = 5; /* strlen("genid") */ + } + + buffer = RAPTOR_MALLOC(unsigned char*, id_length + prefix_length + 1); + if(!buffer) + return NULL; + + memcpy(buffer, prefix, prefix_length); + (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*, &buffer[prefix_length]), + id_length + 1, id, /* base */ 10,-1, '\0'); + + return buffer; +} + + +/** + * raptor_world_generate_bnodeid: + * @world: raptor_world object + * + * Generate an new blank node ID + * + * Return value: newly allocated generated ID or NULL on failure + **/ +unsigned char* +raptor_world_generate_bnodeid(raptor_world *world) +{ + return raptor_world_internal_generate_id(world, NULL); +} + + +unsigned char* +raptor_world_internal_generate_id(raptor_world *world, + unsigned char *user_bnodeid) +{ + if(world->generate_bnodeid_handler) + return world->generate_bnodeid_handler(world->generate_bnodeid_handler_user_data, + user_bnodeid); + else + return raptor_world_default_generate_bnodeid_handler(world, user_bnodeid); +} + + +/** + * raptor_world_set_generate_bnodeid_parameters: + * @world: #raptor_world object + * @prefix: prefix string + * @base: integer base identifier + * + * Set default ID generation parameters. + * + * Sets the parameters for the default algorithm used to generate IDs. + * The default algorithm uses both @prefix and @base to generate a new + * identifier. The exact identifier generated is not guaranteed to + * be a strict concatenation of @prefix and @base but will use both + * parts. The @prefix parameter is copied to generate an ID. + * + * For finer control of the generated identifiers, use + * raptor_world_set_generate_bnodeid_handler(). + * + * If @prefix is NULL, the default prefix is used (currently "genid") + * If @base is less than 1, it is initialised to 1. + * + **/ +void +raptor_world_set_generate_bnodeid_parameters(raptor_world* world, + char *prefix, int base) +{ + char *prefix_copy = NULL; + unsigned int length = 0; + + if(--base < 0) + base = 0; + + if(prefix) { + length = RAPTOR_BAD_CAST(unsigned int, strlen(prefix)); + + prefix_copy = RAPTOR_MALLOC(char*, length + 1); + if(!prefix_copy) + return; + + memcpy(prefix_copy, prefix, length+1); + } + + if(world->default_generate_bnodeid_handler_prefix) + RAPTOR_FREE(char*, world->default_generate_bnodeid_handler_prefix); + + world->default_generate_bnodeid_handler_prefix = prefix_copy; + world->default_generate_bnodeid_handler_prefix_length = length; + world->default_generate_bnodeid_handler_base = base; +} + + +/** + * raptor_world_set_libxslt_security_preferences: + * @world: world + * @security_preferences: security preferences (an #xsltSecurityPrefsPtr) or NULL + * + * Set libxslt security preferences policy object + * + * The @security_preferences object will NOT become owned by + * #raptor_world. + * + * If libxslt is compiled into the library, @security_preferences + * should be an #xsltSecurityPrefsPtr and will be used to call + * xsltSetCtxtSecurityPrefs() when an XSLT engine is initialised. + * If @security_preferences is NULL, this will disable all raptor's + * calls to xsltSetCtxtSecurityPrefs(). + * + * If libxslt is not compiled in, the object set here is not used. + * + * Return value: 0 on success, non-0 on failure: <0 on errors and >0 if world is already opened + */ +int +raptor_world_set_libxslt_security_preferences(raptor_world *world, + void *security_preferences) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + if(world->opened) + return 1; + + world->xslt_security_preferences = security_preferences; + world->xslt_security_preferences_policy = 1; + + return 0; +} + + +/** + * raptor_world_set_flag: + * @world: world + * @flag: flag + * @value: value + * + * Set library-wide configuration + * + * This function is used to control raptor-wide options across + * classes. These options must be set before raptor_world_open() is + * called explicitly or implicitly (by creating a raptor object). + * There is no enumeration function for these flags because they are + * not user options and must be set before the library is + * initialised. For similar reasons, there is no get function. + * + * See the #raptor_world_flags documentation for full details of + * what the flags mean. + * + * Return value: 0 on success, non-0 on failure: <0 on errors (-1 if flag is unknown, -2 if value is illegal) and >0 if world is already opened + */ +int +raptor_world_set_flag(raptor_world *world, raptor_world_flag flag, int value) +{ + int rc = 0; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + if(world->opened) + return 1; + + switch(flag) { + case RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE: + case RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE: + if(value) + world->libxml_flags |= (int)flag; + else + world->libxml_flags &= ~(int)flag; + break; + + case RAPTOR_WORLD_FLAG_URI_INTERNING: + world->uri_interning = value; + break; + + case RAPTOR_WORLD_FLAG_WWW_SKIP_INIT_FINISH: + world->www_skip_www_init_finish = value; + break; + } + + return rc; +} + + +/** + * raptor_world_set_log_handler: + * @world: world object + * @user_data: user data to pass to function + * @handler: pointer to the function + * + * Set the message (error, warning, info) handling function. + * + * The function will receive callbacks when messages are generated + * + * Return value: non-0 on failure + **/ +int +raptor_world_set_log_handler(raptor_world *world, void *user_data, + raptor_log_handler handler) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + world->message_handler_user_data = user_data; + world->message_handler = handler; + + return 0; +} + + +/** + * raptor_basename: + * @name: path + * + * Get the basename of a path + * + * Return value: filename part of a pathname + **/ +const char* +raptor_basename(const char *name) +{ + const char *p; + if((p = strrchr(name, '/'))) + name = p+1; + else if((p = strrchr(name, '\\'))) + name = p+1; + + return name; +} + + +const unsigned char * const raptor_xml_literal_datatype_uri_string = (const unsigned char *)"http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral"; +const unsigned int raptor_xml_literal_datatype_uri_string_len = 53; + +/** + * raptor_check_ordinal: + * @name: ordinal string + * + * Check an RDF property ordinal, the n in rdf:_n + * + * Return value: ordinal integer or <0 if string is not a valid ordinal + */ +int +raptor_check_ordinal(const unsigned char *name) +{ + int ordinal= -1; + unsigned char c; + + while((c=*name++)) { + if(c < '0' || c > '9') + return -1; + if(ordinal <0) + ordinal = 0; + ordinal *= 10; + ordinal += (c - '0'); + } + return ordinal; +} + + +static const char* const raptor_domain_labels[RAPTOR_DOMAIN_LAST + 1] = { + "none", + "I/O Stream", + "XML Namespace", + "RDF Parser", + "XML QName", + "XML SAX2", + "RDF Serializer", + "RDF Term", + "Turtle Writer", + "URI", + "World", + "WWW", + "XML Writer" +}; + + +/** + * raptor_domain_get_label: + * @domain: domain + * + * Get label for a domain + * + * Return value: label string or NULL if domain is not valid + */ +const char* +raptor_domain_get_label(raptor_domain domain) +{ + return (domain <= RAPTOR_DOMAIN_LAST) ? raptor_domain_labels[domain] : NULL; +} + + + +/* internal */ +void +raptor_world_internal_set_ignore_errors(raptor_world* world, int flag) +{ + world->internal_ignore_errors = flag; +} + + +/** + * raptor_free_memory: + * @ptr: memory pointer + * + * Free memory allocated inside raptor. + * + * Some systems require memory allocated in a library to + * be deallocated in that library. This function allows + * memory allocated by raptor to be freed. + * + * Examples include the result of the '_to_' methods that returns + * allocated memory such as raptor_uri_filename_to_uri_string, + * raptor_uri_filename_to_uri_string + * and raptor_uri_uri_string_to_filename_fragment + * + **/ +void +raptor_free_memory(void *ptr) +{ + if(!ptr) + return; + + RAPTOR_FREE(void, ptr); +} + + +/** + * raptor_alloc_memory: + * @size: size of memory to allocate + * + * Allocate memory inside raptor. + * + * Some systems require memory allocated in a library to + * be deallocated in that library. This function allows + * memory to be allocated inside the raptor shared library + * that can be freed inside raptor either internally or via + * raptor_free_memory. + * + * Examples include using this in the raptor_world_generate_bnodeid() handler + * code to create new strings that will be used internally + * as short identifiers and freed later on by the parsers. + * + * Return value: the address of the allocated memory or NULL on failure + * + **/ +void* +raptor_alloc_memory(size_t size) +{ + return RAPTOR_MALLOC(void*, size); +} + + +/** + * raptor_calloc_memory: + * @nmemb: number of members + * @size: size of item + * + * Allocate zeroed array of items inside raptor. + * + * Some systems require memory allocated in a library to + * be deallocated in that library. This function allows + * memory to be allocated inside the raptor shared library + * that can be freed inside raptor either internally or via + * raptor_free_memory. + * + * Examples include using this in the raptor_world_generate_bnodeid() handler + * code to create new strings that will be used internally + * as short identifiers and freed later on by the parsers. + * + * Return value: the address of the allocated memory or NULL on failure + * + **/ +void* +raptor_calloc_memory(size_t nmemb, size_t size) +{ + return RAPTOR_CALLOC(void*, nmemb, size); +} + + +#if defined (RAPTOR_DEBUG) && defined(RAPTOR_MEMORY_SIGN) +void* +raptor_sign_malloc(size_t size) +{ + int *p; + + size += sizeof(int); + + p = (int*)malloc(size); + *p++ = RAPTOR_SIGN_KEY; + return p; +} + +void* +raptor_sign_calloc(size_t nmemb, size_t size) +{ + int *p; + + /* turn into bytes */ + size = nmemb*size + sizeof(int); + + p = (int*)calloc(1, size); + *p++ = RAPTOR_SIGN_KEY; + return p; +} + +void* +raptor_sign_realloc(void *ptr, size_t size) +{ + int *p; + + if(!ptr) + return raptor_sign_malloc(size); + + p = (int*)ptr; + p--; + + if(*p != RAPTOR_SIGN_KEY) + RAPTOR_FATAL3("memory signature %08X != %08X", *p, RAPTOR_SIGN_KEY); + + size += sizeof(int); + + p = (int*)realloc(p, size); + *p++= RAPTOR_SIGN_KEY; + return p; +} + +void +raptor_sign_free(void *ptr) +{ + int *p; + + if(!ptr) + return; + + p = (int*)ptr; + p--; + + if(*p != RAPTOR_SIGN_KEY) + RAPTOR_FATAL3("memory signature %08X != %08X", *p, RAPTOR_SIGN_KEY); + + free(p); +} +#endif + + +int +raptor_check_world_internal(raptor_world* world, const char* name) +{ + static int __warned = 0; + + if(!world) { + fprintf(stderr, "%s called with NULL world object\n", name); + RAPTOR_ASSERT_DIE(return 1) + } + + /* In Raptor V1 ABI the first int of raptor_world is the 'opened' field */ + if(world->magic == RAPTOR1_WORLD_MAGIC_1 || + world->magic == RAPTOR1_WORLD_MAGIC_2) { + if(!__warned++) + fprintf(stderr, "%s called with Raptor V1 world object\n", name); + return 1; + } + + if(world->magic != RAPTOR2_WORLD_MAGIC) { + if(!__warned++) + fprintf(stderr, "%s called with invalid Raptor V2 world object\n", name); + return 1; + } + + return 0; +} diff --git a/src/raptor_grddl.c b/src/raptor_grddl.c new file mode 100644 index 0000000..b54d8c8 --- /dev/null +++ b/src/raptor_grddl.c @@ -0,0 +1,2131 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_grddl.c - Raptor GRDDL (+microformats) Parser implementation + * + * Copyright (C) 2005-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +/* + * Specifications: + * Gleaning Resource Descriptions from Dialects of Languages (GRDDL) + * W3C Recommendation 11 September 2007 + * http://www.w3.org/TR/2007/REC-grddl-20070911/ + * http://www.w3.org/TR/grddl/ + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#include <libxml/xpath.h> +/* for xmlXPathRegisterNs() */ +#include <libxml/xpathInternals.h> +#include <libxml/xinclude.h> +#include <libxml/HTMLparser.h> + +#include <libxslt/xslt.h> +#include <libxslt/transform.h> +#include <libxslt/xsltutils.h> +#include <libxslt/security.h> + + +/* + * libxslt API notes + * + * Inputs to an XSLT transformation process with libxslt are: + * 1. A set of (key:value) parameters. + * 2. An xsltStylesheetPtr for the XSLT sheet + * Which could be made from a file or an xmlDoc; and the xmlDoc. + * made from a file or memory buffer. + * 3. An xmlDoc for the XML source + * Which could be made from a file or a memory buffer. + * + */ + + +static void raptor_grddl_filter_triples(void *user_data, raptor_statement *statement); + +static void raptor_grddl_xsltGenericError_handler(void *user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 0); + +static void raptor_libxslt_set_global_state(raptor_parser *rdf_parser); +static void raptor_libxslt_reset_global_state(raptor_parser *rdf_parser); + + +typedef struct +{ + /* transformation (XSLT) or profile URI */ + raptor_uri* uri; + /* base URI in effect when the above was found */ + raptor_uri* base_uri; +} grddl_xml_context; + + +/* + * XSLT parser object + */ +struct raptor_grddl_parser_context_s { + raptor_world* world; + raptor_parser* rdf_parser; + + /* HTML document ctxt */ + htmlParserCtxtPtr html_ctxt; + /* XML document ctxt */ + xmlParserCtxtPtr xml_ctxt; + + /* Create xpath evaluation context */ + xmlXPathContextPtr xpathCtx; + + /* parser for dealing with the result */ + raptor_parser* internal_parser; + /* ... constructed with this name */ + const char* internal_parser_name; + + /* URI of root namespace of document */ + raptor_uri* root_ns_uri; + + /* List of transformation URIs for document */ + raptor_sequence* doc_transform_uris; + + /* Copy of the user data statement_handler overwritten to point to + * raptor_grddl_filter_triples() + */ + void* saved_user_data; + raptor_statement_handler saved_statement_handler; + + /* URI data-view:namespaceTransformation */ + raptor_uri* namespace_transformation_uri; + + /* URI data-view:profileTransformation */ + raptor_uri* profile_transformation_uri; + + /* List of namespace / <head profile> URIs */ + raptor_sequence* profile_uris; + + /* List of visited URIs */ + raptor_sequence* visited_uris; + + /* Depth of GRDDL parsers - 0 means that the lists above + * are owned by this parser: visited_uris + * */ + int grddl_depth; + + /* Content-Type of top-level document */ + char* content_type; + + /* Check content type once */ + int content_type_check; + + /* stringbuffer to use to store retrieved document */ + raptor_stringbuffer* sb; + + /* non-0 to perform an additional RDF/XML parse on a retrieved document + * because it has been identified as RDF/XML. */ + int process_this_as_rdfxml; + + /* non-0 to perform GRDL processing on document */ + int grddl_processing; + + /* non-0 to perform XML Include processing on document */ + int xinclude_processing; + + /* non-0 to perform HTML Base processing on document */ + int html_base_processing; + + /* non-0 to perform HTML <link> processing on document */ + int html_link_processing; + + xmlGenericErrorFunc saved_xsltGenericError; + void *saved_xsltGenericErrorContext; + + xsltSecurityPrefsPtr saved_xsltSecurityPrefs; +}; + + +typedef struct raptor_grddl_parser_context_s raptor_grddl_parser_context; + + +static void +raptor_grddl_xsltGenericError_handler(void *user_data, const char *msg, ...) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + va_list arguments; + size_t msg_len; + size_t length; + char *nmsg; + + if(!msg || *msg == '\n') + return; + + va_start(arguments, msg); + + msg_len = strlen(msg); + +#define PREFIX "libxslt error: " +#define PREFIX_LENGTH 15 + length = PREFIX_LENGTH + msg_len + 1; + nmsg = RAPTOR_MALLOC(char*, length); + if(nmsg) { + memcpy(nmsg, PREFIX, PREFIX_LENGTH); + memcpy(nmsg + PREFIX_LENGTH, msg, msg_len + 1); + if(nmsg[length-1] == '\n') + nmsg[length-1] = '\0'; + } + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + raptor_parser_log_error_varargs(rdf_parser, RAPTOR_LOG_LEVEL_ERROR, + nmsg ? nmsg : msg, arguments); + PRAGMA_IGNORE_WARNING_END + + if(nmsg) + RAPTOR_FREE(char*, nmsg); + + va_end(arguments); +} + + +static grddl_xml_context* +raptor_new_xml_context(raptor_world* world, raptor_uri* uri, + raptor_uri* base_uri) +{ + grddl_xml_context* xml_context; + + xml_context = RAPTOR_MALLOC(grddl_xml_context*, sizeof(*xml_context)); + if(uri) + uri = raptor_uri_copy(uri); + if(base_uri) + base_uri = raptor_uri_copy(base_uri); + xml_context->uri = uri; + xml_context->base_uri = base_uri; + + return xml_context; +} + + +static void +grddl_free_xml_context(void* userdata) +{ + grddl_xml_context* xml_context = (grddl_xml_context*)userdata; + + if(xml_context->uri) + raptor_free_uri(xml_context->uri); + if(xml_context->base_uri) + raptor_free_uri(xml_context->base_uri); + RAPTOR_FREE(grddl_xml_context, xml_context); +} + + +static int +raptor_grddl_parse_init_common(raptor_parser* rdf_parser, const char *name) +{ + raptor_grddl_parser_context* grddl_parser; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + grddl_parser->world = rdf_parser->world; + grddl_parser->rdf_parser = rdf_parser; + + /* Sequence of grddl_xml_context* URIs of XSLT sheets to transform + * the document */ + grddl_parser->doc_transform_uris = raptor_new_sequence((raptor_data_free_handler)grddl_free_xml_context, NULL); + + grddl_parser->grddl_processing = 1; + grddl_parser->xinclude_processing = 1; + grddl_parser->html_base_processing = 0; + grddl_parser->html_link_processing = 1; + + return 0; +} + + +/* 58 == strlen(grddl_namespaceTransformation_uri_string) */ +#define GRDDL_NAMESPACETRANSFORMATION_URI_STRING_LEN 58 +static const unsigned char * const grddl_namespaceTransformation_uri_string = (const unsigned char*)"http://www.w3.org/2003/g/data-view#namespaceTransformation"; + +/* 56 == strlen(grddl_profileTransformation_uri_string) */ +#define GRDDL_PROFILETRANSFORMATION_URI_STRING_LEN 56 +static const unsigned char * const grddl_profileTransformation_uri_string = (const unsigned char*)"http://www.w3.org/2003/g/data-view#profileTransformation"; + + +static int +raptor_grddl_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_grddl_parser_context* grddl_parser; + raptor_world* world = rdf_parser->world; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + raptor_grddl_parse_init_common(rdf_parser, name); + + /* Sequence of URIs from <head profile> */ + grddl_parser->profile_uris = raptor_new_sequence((raptor_data_free_handler)grddl_free_xml_context, NULL); + + grddl_parser->namespace_transformation_uri = raptor_new_uri_from_counted_string(world, grddl_namespaceTransformation_uri_string, GRDDL_NAMESPACETRANSFORMATION_URI_STRING_LEN); + grddl_parser->profile_transformation_uri = raptor_new_uri_from_counted_string(world, grddl_profileTransformation_uri_string, GRDDL_PROFILETRANSFORMATION_URI_STRING_LEN); + + /* Sequence of URIs visited - may be overwritten if this is not + * the depth 0 grddl parser + */ + grddl_parser->visited_uris = raptor_new_sequence((raptor_data_free_handler)raptor_free_uri, (raptor_data_print_handler)raptor_uri_print); + + return 0; +} + + +static void +raptor_grddl_parse_terminate(raptor_parser *rdf_parser) +{ + raptor_grddl_parser_context *grddl_parser; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(grddl_parser->xml_ctxt) { + if(grddl_parser->xml_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->xml_ctxt->myDoc); + grddl_parser->xml_ctxt->myDoc = NULL; + } + xmlFreeParserCtxt(grddl_parser->xml_ctxt); + } + + if(grddl_parser->html_ctxt) { + if(grddl_parser->html_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->html_ctxt->myDoc); + grddl_parser->html_ctxt->myDoc = NULL; + } + htmlFreeParserCtxt(grddl_parser->html_ctxt); + } + + if(grddl_parser->xpathCtx) + xmlXPathFreeContext(grddl_parser->xpathCtx); + + if(grddl_parser->internal_parser) + raptor_free_parser(grddl_parser->internal_parser); + + if(grddl_parser->root_ns_uri) + raptor_free_uri(grddl_parser->root_ns_uri); + + if(grddl_parser->doc_transform_uris) + raptor_free_sequence(grddl_parser->doc_transform_uris); + + if(grddl_parser->profile_uris) + raptor_free_sequence(grddl_parser->profile_uris); + + if(grddl_parser->namespace_transformation_uri) + raptor_free_uri(grddl_parser->namespace_transformation_uri); + + if(grddl_parser->profile_transformation_uri) + raptor_free_uri(grddl_parser->profile_transformation_uri); + + if(!grddl_parser->grddl_depth) { + if(grddl_parser->visited_uris) + raptor_free_sequence(grddl_parser->visited_uris); + } + + if(grddl_parser->content_type) + RAPTOR_FREE(char*, grddl_parser->content_type); + + if(grddl_parser->sb) + raptor_free_stringbuffer(grddl_parser->sb); +} + + +static void +raptor_grddl_parser_add_parent(raptor_parser *rdf_parser, + raptor_grddl_parser_context* parent_grddl_parser) +{ + raptor_grddl_parser_context* grddl_parser; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + /* Do not set parent twice */ + if(grddl_parser->visited_uris == parent_grddl_parser->visited_uris) + return; + + /* free any sequence here */ + if(grddl_parser->visited_uris) + raptor_free_sequence(grddl_parser->visited_uris); + + /* share parent's list and do not free it here */ + grddl_parser->visited_uris = parent_grddl_parser->visited_uris; + grddl_parser->grddl_depth = parent_grddl_parser->grddl_depth + 1; + + grddl_parser->saved_user_data = parent_grddl_parser->rdf_parser; + grddl_parser->saved_statement_handler = raptor_grddl_filter_triples; +} + + + +static int +raptor_grddl_parse_start(raptor_parser *rdf_parser) +{ + raptor_grddl_parser_context* grddl_parser; + raptor_locator *locator = &rdf_parser->locator; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + locator->line = 1; + + grddl_parser->content_type_check = 0; + grddl_parser->process_this_as_rdfxml = 0; + + return 0; +} + + +#define MATCH_IS_VALUE_LIST 1 +#define MATCH_IS_PROFILE 2 +#define MATCH_IS_HARDCODED 4 +/* stop looking for other hardcoded matches */ +#define MATCH_LAST 8 +static struct { + const xmlChar* xpath; + int flags; + const xmlChar* xslt_sheet_uri; +} match_table[]={ + /* XHTML document where the GRDDL profile is in + * <link ref='transform' href='url'> inside the html <head> + * Value of @rel is a space-separated list of link types. + */ + { + (const xmlChar*)"/html:html/html:head[contains(@profile,\"http://www.w3.org/2003/g/data-view\")]/html:link[contains(@rel,\"transformation\")]/@href", + 0, + NULL + } + , + /* XHTML document where the GRDDL profile is in + * <a rel='transform' href='url'> inside the html <body> + * Value of @rel is a space-separated list of link types. + */ + { + (const xmlChar*)"/html:html/html:head[contains(@profile,\"http://www.w3.org/2003/g/data-view\")]/../..//html:a[contains(@rel,\"transformation\")]/@href", + 0, + NULL + } + , + /* XML document linking to transform via attribute dataview:transformation + * on the root element. + * Example: http://www.w3.org/2004/01/rdxh/grddl-p3p-example + **/ + { + (const xmlChar*)"/*/@dataview:transformation", + MATCH_IS_VALUE_LIST, + NULL + } + , + /* hCalendar microformat http://microformats.org/wiki/hcalendar */ + { + (const xmlChar*)"//*[contains(concat(' ', concat(normalize-space(@class),' ')),' vevent ')]", + MATCH_IS_HARDCODED, + (const xmlChar*)"http://www.w3.org/2002/12/cal/glean-hcal.xsl" + } + , + /* hReview microformat http://microformats.org/wiki/review */ + { + (const xmlChar*)"//*[contains(concat(' ', concat(normalize-space(@class),' ')),' hreview ')]", + MATCH_IS_HARDCODED | MATCH_LAST, /* stop here since hCard is inside hReview */ + (const xmlChar*)"http://www.w3.org/2001/sw/grddl-wg/doc29/hreview2rdfxml.xsl" + } + , + /* hCard microformat http://microformats.org/wiki/hcard */ + { + (const xmlChar*)"//*[contains(concat(' ', concat(normalize-space(@class),' ')),' vcard ')]", + MATCH_IS_HARDCODED, + (const xmlChar*)"http://www.w3.org/2006/vcard/hcard2rdf.xsl" + } + , + { + NULL, + 0, + NULL + } +}; + + +static const char* const grddl_namespace_uris_ignore_list[] = { + "http://www.w3.org/1999/xhtml", + "http://www.w3.org/1999/02/22-rdf-syntax-ns#", + "http://www.w3.org/2001/XMLSchema", + NULL +}; + + +/* add URI to XSLT transformation URI list */ +static void +raptor_grddl_add_transform_xml_context(raptor_grddl_parser_context* grddl_parser, + grddl_xml_context* xml_context) +{ + int i; + raptor_uri* uri = xml_context->uri; + int size; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Found document transformation URI '%s'\n", + raptor_uri_as_string(uri)); +#endif + + size = raptor_sequence_size(grddl_parser->doc_transform_uris); + for(i = 0; i < size; i++) { + grddl_xml_context* xc; + xc = (grddl_xml_context*)raptor_sequence_get_at(grddl_parser->doc_transform_uris, i); + if(raptor_uri_equals(uri, xc->uri)) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Already seen XSLT URI '%s'\n", raptor_uri_as_string(uri)); +#endif + grddl_free_xml_context(xml_context); + return; + } + } + + RAPTOR_DEBUG3("Adding new document transformation XSLT URI %s with base URI %s\n", + (uri ? (const char*)raptor_uri_as_string(uri): "(NONE)"), + (xml_context->base_uri ? (const char*)raptor_uri_as_string(xml_context->base_uri) : "(NONE)")); + + raptor_sequence_push(grddl_parser->doc_transform_uris, xml_context); +} + + +static void +raptor_grddl_filter_triples(void *user_data, raptor_statement *statement) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_grddl_parser_context* grddl_parser; + int i; + raptor_uri* predicate_uri; + int size; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + /* Look for a triple <uri> <uri> <uri> */ + if(statement->subject->type != RAPTOR_TERM_TYPE_URI || + statement->predicate->type != RAPTOR_TERM_TYPE_URI || + statement->object->type != RAPTOR_TERM_TYPE_URI) + return; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG2("Parser %p: Relaying statement: ", RAPTOR_VOIDP(rdf_parser)); + raptor_statement_print(statement, stderr); + fputc('\n', stderr); +#endif + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("Parser %p: Checking against %d profile URIs\n", + RAPTOR_VOIDP(rdf_parser), + raptor_sequence_size(grddl_parser->profile_uris)); +#endif + + /* Look for(i = 0, root namespace URI) + * <document-root-element-namespace-URI> data-view:namespaceTransformation ?tr + * or (i>0, profile URIs) + * <document-root-element-namespace-URI> data-view:profileTransformation ?tr + * and then ?tr becomes a new document transformation URI + */ + predicate_uri = grddl_parser->namespace_transformation_uri; + size = raptor_sequence_size(grddl_parser->profile_uris); + for(i = 0; i < size; i++) { + grddl_xml_context* xml_context; + raptor_uri* profile_uri; + grddl_xml_context* new_xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_get_at(grddl_parser->profile_uris, i); + profile_uri = xml_context->uri; + + if(i == 1) + predicate_uri = grddl_parser->profile_transformation_uri; + + if(!profile_uri) + continue; + + if(raptor_uri_equals(statement->subject->value.uri, profile_uri) && + raptor_uri_equals(statement->predicate->value.uri, predicate_uri)) { + raptor_uri* uri = statement->object->value.uri; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG4("Parser %p: Matches profile URI #%d '%s'\n", + RAPTOR_VOIDP(rdf_parser), + i, raptor_uri_as_string(profile_uri)); +#endif + + new_xml_context = raptor_new_xml_context(rdf_parser->world, uri, + rdf_parser->base_uri); + raptor_grddl_add_transform_xml_context(grddl_parser, new_xml_context); + } else { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG4("Parser %p: Failed to match profile URI #%d '%s'\n", + RAPTOR_VOIDP(rdf_parser), + i, raptor_uri_as_string(profile_uri)); +#endif + } + + } + +} + + +static int +raptor_grddl_ensure_internal_parser(raptor_parser* rdf_parser, + const char* parser_name, int filter) +{ + raptor_grddl_parser_context* grddl_parser; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(!grddl_parser->internal_parser_name || + !strcmp(parser_name, "guess") || + strcmp(grddl_parser->internal_parser_name, parser_name)) { + /* construct a new parser if none in use or not what is required */ + if(grddl_parser->internal_parser) { + unsigned int our_emit_flags = rdf_parser->emit_graph_marks; + + /* copy back bit flags from parser about to be destroyed */ + raptor_parser_copy_flags_state(rdf_parser, + grddl_parser->internal_parser); + + /* restore whatever graph makrs state we had here */ + rdf_parser->emit_graph_marks = our_emit_flags ? 1 : 0; + + RAPTOR_DEBUG3("Parser %p: Freeing internal %s parser.\n", + RAPTOR_VOIDP(rdf_parser), + grddl_parser->internal_parser_name); + + raptor_free_parser(grddl_parser->internal_parser); + grddl_parser->internal_parser = NULL; + grddl_parser->internal_parser_name = NULL; + } + + RAPTOR_DEBUG3("Parser %p: Allocating new internal %s parser.\n", + RAPTOR_VOIDP(rdf_parser), parser_name); + grddl_parser->internal_parser = raptor_new_parser(rdf_parser->world, + parser_name); + if(!grddl_parser->internal_parser) { + raptor_parser_error(rdf_parser, "Failed to create %s parser", + parser_name); + return 1; + } + + /* initialise the new parser with the outer state */ + grddl_parser->internal_parser_name = parser_name; + if(raptor_parser_copy_user_state(grddl_parser->internal_parser, rdf_parser)) + return 1; + + /* Disable graph marks in newly constructed internal parser */ + grddl_parser->internal_parser->emit_graph_marks = 0; + + grddl_parser->saved_user_data = rdf_parser->user_data; + grddl_parser->saved_statement_handler = rdf_parser->statement_handler; + } + + /* Filter the triples for profile/namespace URIs */ + if(filter) { + grddl_parser->internal_parser->user_data = rdf_parser; + grddl_parser->internal_parser->statement_handler = raptor_grddl_filter_triples; + } else { + grddl_parser->internal_parser->user_data = grddl_parser->saved_user_data; + grddl_parser->internal_parser->statement_handler = grddl_parser->saved_statement_handler; + } + + return 0; +} + + +/* Run a GRDDL transform using a pre-parsed XSLT stylesheet already + * formed into a libxml document (with URI) + */ +static int +raptor_grddl_run_grddl_transform_doc(raptor_parser* rdf_parser, + grddl_xml_context* xml_context, + xmlDocPtr xslt_doc, + xmlDocPtr doc) +{ + raptor_world* world = rdf_parser->world; + raptor_grddl_parser_context* grddl_parser; + int ret = 0; + xsltStylesheetPtr sheet = NULL; + xmlDocPtr res = NULL; + xmlChar *doc_txt = NULL; + int doc_txt_len = 0; + const char* parser_name; + const char* params[7]; + const unsigned char* base_uri_string; + size_t base_uri_len; + raptor_uri* xslt_uri; + raptor_uri* base_uri; + char *quoted_base_uri = NULL; + xsltTransformContextPtr userCtxt = NULL; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + xslt_uri = xml_context->uri; + base_uri = xml_context->base_uri ? xml_context->base_uri : xml_context->uri; + + base_uri_string = raptor_uri_as_counted_string(base_uri, &base_uri_len); + + RAPTOR_DEBUG3("Running GRDDL transform with XSLT URI '%s' with doc base URI '%s'\n", + raptor_uri_as_string(xslt_uri), + base_uri_string); + + raptor_libxslt_set_global_state(rdf_parser); + + /* This calls xsltGetDefaultSecurityPrefs() */ + sheet = xsltParseStylesheetDoc(xslt_doc); + if(!sheet) { + raptor_parser_error(rdf_parser, "Failed to parse stylesheet in '%s'", + raptor_uri_as_string(xslt_uri)); + ret = 1; + goto cleanup_xslt; + } + + /* This calls xsltGetDefaultSecurityPrefs() */ + userCtxt = xsltNewTransformContext(sheet, doc); + + /* set per-transform security preferences */ + if(world->xslt_security_preferences) + xsltSetCtxtSecurityPrefs((xsltSecurityPrefs*)world->xslt_security_preferences, + userCtxt); + + /* set per-transform generic error handler */ + xsltSetTransformErrorFunc(userCtxt, rdf_parser, + raptor_grddl_xsltGenericError_handler); + + + /* + * Define 'base', 'Base' and 'url' params to allow some XSLT sheets to work: + * base: + * http://www.w3.org/2000/07/uri43/uri.xsl + * Base: + * http://www.w3.org/2000/08/w3c-synd/home2rss.xsl + * url: (optional) + * http://www.w3.org/2001/sw/grddl-wg/td/RDFa2RDFXML.xsl + */ + quoted_base_uri = RAPTOR_MALLOC(char*, base_uri_len + 3); + quoted_base_uri[0] = '\''; + memcpy(quoted_base_uri + 1, (const char*)base_uri_string, base_uri_len); + quoted_base_uri[base_uri_len + 1] = '\''; + quoted_base_uri[base_uri_len + 2] = '\0'; + + params[0] = "base"; + params[1] = (const char*)quoted_base_uri; + params[2] = "Base"; + params[3] = (const char*)quoted_base_uri; + params[4] = "url"; + params[5] = (const char*)quoted_base_uri; + params[6] = NULL; + + res = xsltApplyStylesheetUser(sheet, doc, params, NULL, NULL, userCtxt); + + if(!res) { + raptor_parser_error(rdf_parser, "Failed to apply stylesheet in '%s'", + raptor_uri_as_string(xslt_uri)); + ret = 1; + goto cleanup_xslt; + } + + if(res->type == XML_HTML_DOCUMENT_NODE) { + if(sheet->method != NULL) + xmlFree(sheet->method); + sheet->method = (xmlChar*)xmlMalloc(5); + memcpy(sheet->method, "html", 5); + } + + /* write the resulting XML to a string */ + xsltSaveResultToString(&doc_txt, &doc_txt_len, res, sheet); + + if(!doc_txt || !doc_txt_len) { + raptor_parser_warning(rdf_parser, "XSLT returned an empty document"); + goto cleanup_xslt; + } + + RAPTOR_DEBUG4("XSLT returned %d bytes document method %s media type %s\n", + doc_txt_len, + (sheet->method ? (const char*)sheet->method : "NULL"), + (sheet->mediaType ? (const char*)sheet->mediaType : "NULL")); + + /* Set mime types for XSLT <xsl:output method> content */ + if(sheet->mediaType == NULL && sheet->method) { + if(!(strcmp((const char*)sheet->method, "text"))) { + sheet->mediaType = (xmlChar*)xmlMalloc(11); + memcpy(sheet->mediaType, "text/plain",11); + } else if(!(strcmp((const char*)sheet->method, "xml"))) { + sheet->mediaType = (xmlChar*)xmlMalloc(16); + memcpy(sheet->mediaType, "application/xml",16); + } else if(!(strcmp((const char*)sheet->method, "html"))) { + sheet->mediaType = (xmlChar*)xmlMalloc(10); + memcpy(sheet->mediaType, "text/html",10); + } + } + + /* Assume all that all media XML is RDF/XML and also that + * with no information at all we have RDF/XML + */ + if(!sheet->mediaType || + (sheet->mediaType && + !strcmp((const char*)sheet->mediaType, "application/xml"))) { + if(sheet->mediaType) + xmlFree(sheet->mediaType); + sheet->mediaType = (xmlChar*)xmlMalloc(20); + memcpy(sheet->mediaType, "application/rdf+xml",20); + } + + parser_name = raptor_world_guess_parser_name(rdf_parser->world, NULL, + (const char*)sheet->mediaType, + doc_txt, doc_txt_len, NULL); + if(!parser_name) { + RAPTOR_DEBUG3("Parser %p: Guessed no parser from mime type '%s' and content - ending", + RAPTOR_VOIDP(rdf_parser), sheet->mediaType); + goto cleanup_xslt; + } + + RAPTOR_DEBUG4("Parser %p: Guessed parser %s from mime type '%s' and content\n", + RAPTOR_VOIDP(rdf_parser), parser_name, sheet->mediaType); + + if(!strcmp((const char*)parser_name, "grddl")) { + RAPTOR_DEBUG2("Parser %p: Ignoring guess to run grddl parser - ending", + RAPTOR_VOIDP(rdf_parser)); + goto cleanup_xslt; + } + + ret = raptor_grddl_ensure_internal_parser(rdf_parser, parser_name, 0); + if(ret) + goto cleanup_xslt; + + if(grddl_parser->internal_parser) { + /* generate the triples */ + ret = raptor_parser_parse_start(grddl_parser->internal_parser, base_uri); + if(!ret) + ret = raptor_parser_parse_chunk(grddl_parser->internal_parser, + doc_txt, doc_txt_len, 1); + } + + cleanup_xslt: + + if(userCtxt) + xsltFreeTransformContext(userCtxt); + + if(quoted_base_uri) + RAPTOR_FREE(char*, quoted_base_uri); + + if(doc_txt) + xmlFree(doc_txt); + + if(res) + xmlFreeDoc(res); + + if(sheet) + xsltFreeStylesheet(sheet); + + raptor_libxslt_reset_global_state(rdf_parser); + + return ret; +} + + +typedef struct +{ + raptor_parser* rdf_parser; + xmlParserCtxtPtr xc; + raptor_uri* base_uri; +} raptor_grddl_xml_parse_bytes_context; + + +static void +raptor_grddl_uri_xml_parse_bytes(raptor_www* www, + void *userdata, + const void *ptr, size_t size, size_t nmemb) +{ + raptor_grddl_xml_parse_bytes_context* xpbc; + size_t len = size * nmemb; + int rc = 0; + + xpbc = (raptor_grddl_xml_parse_bytes_context*)userdata; + + if(!xpbc->xc) { + xmlParserCtxtPtr xc; + + xc = xmlCreatePushParserCtxt(NULL, NULL, + (const char*)ptr, RAPTOR_BAD_CAST(int, len), + (const char*)raptor_uri_as_string(xpbc->base_uri)); + if(!xc) + rc = 1; + else { + int libxml_options = 0; + +#ifdef RAPTOR_LIBXML_XML_PARSE_NONET + if(RAPTOR_OPTIONS_GET_NUMERIC(xpbc->rdf_parser, RAPTOR_OPTION_NO_NET)) + libxml_options |= XML_PARSE_NONET; +#endif +#ifdef HAVE_XMLCTXTUSEOPTIONS + xmlCtxtUseOptions(xc, libxml_options); +#endif + + xc->replaceEntities = 1; + xc->loadsubset = 1; + } + xpbc->xc = xc; + } else + rc = xmlParseChunk(xpbc->xc, (const char*)ptr, RAPTOR_BAD_CAST(int, len), 0); + + if(rc) + raptor_parser_error(xpbc->rdf_parser, "XML Parsing failed"); +} + + +#define FETCH_IGNORE_ERRORS 1 +#define FETCH_ACCEPT_XSLT 2 + +static int +raptor_grddl_fetch_uri(raptor_parser* rdf_parser, + raptor_uri* uri, + raptor_www_write_bytes_handler write_bytes_handler, + void* write_bytes_user_data, + raptor_www_content_type_handler content_type_handler, + void* content_type_user_data, + int flags) +{ + raptor_www *www; + const char *accept_h; + int ret = 0; + int ignore_errors = (flags & FETCH_IGNORE_ERRORS); + + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)) { + if(!raptor_uri_uri_string_is_file_uri(raptor_uri_as_string(uri))) + return 1; + } + + www = raptor_new_www(rdf_parser->world); + if(!www) + return 1; + + if(raptor_www_set_user_agent2(www, "grddl/0.1", 0)) + goto cleanup_www; + + if(flags & FETCH_ACCEPT_XSLT) { + if(raptor_www_set_http_accept2(www, "application/xml", 0)) + goto cleanup_www; + } else { + accept_h = raptor_parser_get_accept_header(rdf_parser); + if(accept_h) { + ret = raptor_www_set_http_accept2(www, accept_h, 0); + RAPTOR_FREE(char*, accept_h); + if(ret) + goto cleanup_www; + } + } + if(rdf_parser->uri_filter) + raptor_www_set_uri_filter(www, rdf_parser->uri_filter, + rdf_parser->uri_filter_user_data); + if(ignore_errors) + raptor_world_internal_set_ignore_errors(rdf_parser->world, 1); + + raptor_www_set_write_bytes_handler(www, write_bytes_handler, + write_bytes_user_data); + raptor_www_set_content_type_handler(www, content_type_handler, + content_type_user_data); + + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WWW_TIMEOUT) > 0) + raptor_www_set_connection_timeout(www, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WWW_TIMEOUT)); + + ret = raptor_www_fetch(www, uri); + + raptor_free_www(www); + + if(ignore_errors) + raptor_world_internal_set_ignore_errors(rdf_parser->world, 0); + + return ret; + +cleanup_www: + raptor_free_www(www); + + return 1; +} + + +/* Run a GRDDL transform using a XSLT stylesheet at a given URI */ +static int +raptor_grddl_run_grddl_transform_uri(raptor_parser* rdf_parser, + grddl_xml_context* xml_context, + xmlDocPtr doc) +{ + xmlParserCtxtPtr xslt_ctxt = NULL; + raptor_grddl_xml_parse_bytes_context xpbc; + int ret = 0; + raptor_uri* xslt_uri; + raptor_uri* base_uri; + raptor_uri* old_locator_uri; + raptor_locator *locator = &rdf_parser->locator; + + xslt_uri = xml_context->uri; + base_uri = xml_context->base_uri ? xml_context->base_uri : xml_context->uri; + + RAPTOR_DEBUG3("Running GRDDL transform with XSLT URI %s and base URI %s\n", + raptor_uri_as_string(xslt_uri), + raptor_uri_as_string(base_uri)); + + /* make an xsltStylesheetPtr via the raptor_grddl_uri_xml_parse_bytes + * callback as bytes are returned + */ + xpbc.xc = NULL; + xpbc.rdf_parser = rdf_parser; + xpbc.base_uri = base_uri; + + old_locator_uri = locator->uri; + locator->uri = xslt_uri; + ret = raptor_grddl_fetch_uri(rdf_parser, + xslt_uri, + raptor_grddl_uri_xml_parse_bytes, &xpbc, + NULL, NULL, + FETCH_ACCEPT_XSLT); + xslt_ctxt = xpbc.xc; + if(ret) { + locator->uri = old_locator_uri; + raptor_parser_warning(rdf_parser, "Fetching XSLT document URI '%s' failed", + raptor_uri_as_string(xslt_uri)); + ret = 0; + } else { + xmlParseChunk(xpbc.xc, NULL, 0, 1); + + ret = raptor_grddl_run_grddl_transform_doc(rdf_parser, + xml_context, + xslt_ctxt->myDoc, + doc); + locator->uri = old_locator_uri; + } + + if(xslt_ctxt) + xmlFreeParserCtxt(xslt_ctxt); + + return ret; +} + + +static int +raptor_grddl_seen_uri(raptor_grddl_parser_context* grddl_parser, + raptor_uri* uri) +{ + int i; + int seen = 0; + raptor_sequence* seq = grddl_parser->visited_uris; + int size; + + size = raptor_sequence_size(seq); + for(i = 0; i < size; i++) { + raptor_uri* vuri = (raptor_uri*)raptor_sequence_get_at(seq, i); + if(raptor_uri_equals(uri, vuri)) { + seen = 1; + break; + } + } + +#ifdef RAPTOR_DEBUG + if(seen) + RAPTOR_DEBUG2("Already seen URI '%s'\n", raptor_uri_as_string(uri)); +#endif + + return seen; +} + + +static void +raptor_grddl_done_uri(raptor_grddl_parser_context* grddl_parser, + raptor_uri* uri) +{ + if(!grddl_parser->visited_uris) + return; + + if(!raptor_grddl_seen_uri(grddl_parser, uri)) { + raptor_sequence* seq = grddl_parser->visited_uris; + raptor_sequence_push(seq, raptor_uri_copy(uri)); + } +} + + +static raptor_sequence* +raptor_grddl_run_xpath_match(raptor_parser* rdf_parser, + xmlDocPtr doc, + const xmlChar* xpathExpr, + int flags) +{ + raptor_grddl_parser_context* grddl_parser; + /* Evaluate xpath expression */ + xmlXPathObjectPtr xpathObj = NULL; + raptor_sequence* seq = NULL; + xmlNodeSetPtr nodes; + int i; + int size; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + seq = raptor_new_sequence((raptor_data_free_handler)grddl_free_xml_context, NULL); + + /* Evaluate xpath expression */ + xpathObj = xmlXPathEvalExpression(xpathExpr, + grddl_parser->xpathCtx); + if(!xpathObj) { + raptor_parser_error(rdf_parser, + "Unable to evaluate XPath expression \"%s\"", + xpathExpr); + raptor_free_sequence(seq); seq = NULL; + goto cleanup_xpath_match; + } + + nodes = xpathObj->nodesetval; + if(!nodes || xmlXPathNodeSetIsEmpty(nodes)) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("No match found with XPath expression \"%s\" over '%s'\n", + xpathExpr, raptor_uri_as_string(rdf_parser->base_uri)); +#endif + raptor_free_sequence(seq); seq = NULL; + goto cleanup_xpath_match; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("Found match with XPath expression \"%s\" over '%s'\n", + xpathExpr, raptor_uri_as_string(rdf_parser->base_uri)); +#endif + + size = xmlXPathNodeSetGetLength(nodes); + for(i = 0; i < size; i++) { + xmlNodePtr node = nodes->nodeTab[i]; + const unsigned char* uri_string = NULL; + xmlChar *base_uri_string; + raptor_uri* base_uri = NULL; + raptor_uri* uri = NULL; + + if(node->type != XML_ATTRIBUTE_NODE && + node->type != XML_ELEMENT_NODE) { + raptor_parser_error(rdf_parser, "Got unexpected node type %u", + node->type); + continue; + } + + + /* xmlNodeGetBase() returns base URI or NULL and must be freed + * with xmlFree() + */ + if(grddl_parser->html_base_processing) { + xmlElementType savedType = doc->type; + doc->type = XML_HTML_DOCUMENT_NODE; + base_uri_string = xmlNodeGetBase(doc, node); + doc->type = savedType; + } else + base_uri_string = xmlNodeGetBase(doc, node); + + + if(node->type == XML_ATTRIBUTE_NODE) + uri_string = (const unsigned char*)node->children->content; + else { /* XML_ELEMENT_NODE */ + if(node->ns) + uri_string = (const unsigned char*)node->ns->href; + } + + + if(base_uri_string) { + base_uri = raptor_new_uri(rdf_parser->world, base_uri_string); + xmlFree(base_uri_string); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("XML base URI of match is '%s'\n", + raptor_uri_as_string(base_uri)); +#endif + } else if(rdf_parser->base_uri) + base_uri = raptor_uri_copy(rdf_parser->base_uri); + else + base_uri = NULL; + + if(uri_string && (flags & MATCH_IS_VALUE_LIST)) { + char *start; + char *end; + char* buffer; + size_t list_len = strlen((const char*)uri_string); + + buffer = RAPTOR_MALLOC(char*, list_len + 1); + memcpy(buffer, uri_string, list_len + 1); + + for(start = end = buffer; end; start = end+1) { + grddl_xml_context* xml_context; + + end = strchr(start, ' '); + if(end) + *end = '\0'; + + if(start == end) + continue; + + RAPTOR_DEBUG2("Got list match URI '%s'\n", start); + + uri = raptor_new_uri_relative_to_base(rdf_parser->world, + base_uri, + (const unsigned char*)start); + if(flags & MATCH_IS_PROFILE && + !strcmp((const char*)raptor_uri_as_string(uri), + "http://www.w3.org/2003/g/data-view'")) { + raptor_free_uri(uri); + continue; + } + + xml_context = raptor_new_xml_context(rdf_parser->world, uri, base_uri); + raptor_sequence_push(seq, xml_context); + } + RAPTOR_FREE(char*, buffer); + } else if(flags & MATCH_IS_HARDCODED) { + RAPTOR_DEBUG2("Got hardcoded XSLT match for %s\n", xpathExpr); + /* return at first match, that's enough */ + if(base_uri) + raptor_free_uri(base_uri); + break; + } else if(uri_string) { + grddl_xml_context* xml_context; + RAPTOR_DEBUG2("Got single match URI '%s'\n", uri_string); + + uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, + uri_string); + xml_context = raptor_new_xml_context(rdf_parser->world, uri, base_uri); + raptor_sequence_push(seq, xml_context); + raptor_free_uri(uri); + } + + if(base_uri) + raptor_free_uri(base_uri); + } + + cleanup_xpath_match: + if(xpathObj) + xmlXPathFreeObject(xpathObj); + + return seq; +} + + +static void +raptor_grddl_check_recursive_content_type_handler(raptor_www* www, + void* userdata, + const char* content_type) +{ + raptor_parser* rdf_parser = (raptor_parser*)userdata; + raptor_grddl_parser_context* grddl_parser; + size_t len; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(!content_type) + return; + + len = strlen(content_type)+1; + if(grddl_parser->content_type) + RAPTOR_FREE(char*, grddl_parser->content_type); + grddl_parser->content_type = RAPTOR_MALLOC(char*, len + 1); + memcpy(grddl_parser->content_type, content_type, len + 1); + + if(!strncmp(content_type, "application/rdf+xml", 19)) { + grddl_parser->process_this_as_rdfxml = 1; + + RAPTOR_DEBUG2("Parser %p: Found RDF/XML content type\n", + RAPTOR_VOIDP(rdf_parser)); + raptor_parser_save_content(rdf_parser, 1); + } + + if(!strncmp(content_type, "text/html", 9) || + !strncmp(content_type, "application/html+xml", 20)) { + RAPTOR_DEBUG3("Parser %p: Found HTML content type '%s'\n", + RAPTOR_VOIDP(rdf_parser), content_type); + grddl_parser->html_base_processing = 1; + } + +} + +#define RECURSIVE_FLAGS_IGNORE_ERRORS 1 +#define RECURSIVE_FLAGS_FILTER 2 + +static int +raptor_grddl_run_recursive(raptor_parser* rdf_parser, raptor_uri* uri, + const char *parser_name, int flags) +{ + raptor_grddl_parser_context* grddl_parser; + raptor_www_content_type_handler content_type_handler = NULL; + int ret = 0; + const unsigned char* ibuffer = NULL; + size_t ibuffer_len = 0; + raptor_parse_bytes_context rpbc; + int ignore_errors = (flags & RECURSIVE_FLAGS_IGNORE_ERRORS) > 0; + int filter = (flags & RECURSIVE_FLAGS_FILTER) > 0; + int fetch_uri_flags = 0; + int is_grddl=!strcmp(parser_name, "grddl"); + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(raptor_grddl_seen_uri(grddl_parser, uri)) + return 0; + + if(is_grddl) + content_type_handler = raptor_grddl_check_recursive_content_type_handler; + + if(raptor_grddl_ensure_internal_parser(rdf_parser, parser_name, filter)) + return !ignore_errors; + + RAPTOR_DEBUG3("Running recursive %s operation on URI '%s'\n", + parser_name, raptor_uri_as_string(uri)); + + if(is_grddl) + raptor_grddl_parser_add_parent(grddl_parser->internal_parser, grddl_parser); + + rpbc.rdf_parser = grddl_parser->internal_parser; + rpbc.base_uri = NULL; + rpbc.final_uri = NULL; + rpbc.started = 0; + + if(ignore_errors) + fetch_uri_flags |=FETCH_IGNORE_ERRORS; + + if(raptor_grddl_fetch_uri(grddl_parser->internal_parser, + uri, + raptor_parser_parse_uri_write_bytes, &rpbc, + content_type_handler, grddl_parser->internal_parser, + fetch_uri_flags)) { + if(!ignore_errors) + raptor_parser_warning(rdf_parser, + "Fetching GRDDL document URI '%s' failed\n", + raptor_uri_as_string(uri)); + ret = 0; + goto tidy; + } + + if(ignore_errors) + raptor_world_internal_set_ignore_errors(rdf_parser->world, 1); + + raptor_parser_parse_chunk(grddl_parser->internal_parser, NULL, 0, 1); + + /* If content was saved, process it as RDF/XML */ + ibuffer = raptor_parser_get_content(grddl_parser->internal_parser, + &ibuffer_len); + if(ibuffer && strcmp(parser_name, "rdfxml")) { + RAPTOR_DEBUG2("Running additional RDF/XML parse on URI '%s' content\n", + raptor_uri_as_string(uri)); + + if(raptor_grddl_ensure_internal_parser(rdf_parser, "rdfxml", 1)) + ret = 1; + else { + if(raptor_parser_parse_start(grddl_parser->internal_parser, uri)) + ret = 1; + else { + ret = raptor_parser_parse_chunk(grddl_parser->internal_parser, ibuffer, + ibuffer_len, 1); + } + } + + raptor_parser_save_content(grddl_parser->internal_parser, 0); + } + + if(ibuffer) + RAPTOR_FREE(char*, ibuffer); + + if(rpbc.final_uri) + raptor_free_uri(rpbc.final_uri); + + if(ignore_errors) { + raptor_world_internal_set_ignore_errors(rdf_parser->world, 0); + ret = 0; + } + + tidy: + + return ret; +} + + +static void +raptor_grddl_libxml_discard_error(void* user_data, const char *msg, ...) +{ + return; +} + + +static int +raptor_grddl_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_grddl_parser_context* grddl_parser; + int i; + int ret = 0; + const unsigned char* uri_string; + raptor_uri* uri; + /* XML document DOM */ + xmlDocPtr doc; + int expri; + unsigned char* buffer = NULL; + size_t buffer_len = 0; + int buffer_is_libxml = 0; + int loop; + + if(!is_end && !rdf_parser->emitted_default_graph) { + /* Cannot tell if we have a statement yet but must ensure that + * the start default graph mark is done once and done before any + * statements. + */ + raptor_parser_start_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph++; + } + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(grddl_parser->content_type && !grddl_parser->content_type_check) { + grddl_parser->content_type_check++; + if(!strncmp(grddl_parser->content_type, "application/rdf+xml", 19)) { + RAPTOR_DEBUG3("Parser %p: Found document with type '%s' is RDF/XML\n", + RAPTOR_VOIDP(rdf_parser), grddl_parser->content_type); + grddl_parser->process_this_as_rdfxml = 1; + } + if(!strncmp(grddl_parser->content_type, "text/html", 9) || + !strncmp(grddl_parser->content_type, "application/html+xml", 20)) { + RAPTOR_DEBUG3("Parser %p: Found document with type '%s' is HTML\n", + RAPTOR_VOIDP(rdf_parser), grddl_parser->content_type); + grddl_parser->html_base_processing = 1; + } + } + + if(!grddl_parser->sb) + grddl_parser->sb = raptor_new_stringbuffer(); + + raptor_stringbuffer_append_counted_string(grddl_parser->sb, s, len, 1); + + if(!is_end) + return 0; + + buffer_len = raptor_stringbuffer_length(grddl_parser->sb); + buffer = RAPTOR_MALLOC(unsigned char*, buffer_len + 1); + if(buffer) + raptor_stringbuffer_copy_to_string(grddl_parser->sb, + buffer, buffer_len); + + + uri_string = raptor_uri_as_string(rdf_parser->base_uri); + + /* Discard parsing errors */ + raptor_world_internal_set_ignore_errors(rdf_parser->world, 1); + + RAPTOR_DEBUG4("Parser %p: URI %s: processing %d bytes of content\n", + RAPTOR_VOIDP(rdf_parser), uri_string, (int)buffer_len); + + for(loop = 0; loop < 2; loop++) { + int rc; + + if(loop == 0) { + int libxml_options = 0; + + RAPTOR_DEBUG2("Parser %p: Creating an XML parser\n", + RAPTOR_VOIDP(rdf_parser)); + + /* try to create an XML parser context */ + grddl_parser->xml_ctxt = xmlCreatePushParserCtxt(NULL, NULL, + (const char*)buffer, + RAPTOR_BAD_CAST(int, buffer_len), + (const char*)uri_string); + if(!grddl_parser->xml_ctxt) { + RAPTOR_DEBUG2("Parser %p: Creating an XML parser failed\n", + RAPTOR_VOIDP(rdf_parser)); + continue; + } + +#ifdef RAPTOR_LIBXML_XML_PARSE_NONET + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)) + libxml_options |= XML_PARSE_NONET; +#endif +#ifdef HAVE_XMLCTXTUSEOPTIONS + xmlCtxtUseOptions(grddl_parser->xml_ctxt, libxml_options); +#endif + + + grddl_parser->xml_ctxt->vctxt.warning = raptor_grddl_libxml_discard_error; + grddl_parser->xml_ctxt->vctxt.error = raptor_grddl_libxml_discard_error; + + grddl_parser->xml_ctxt->replaceEntities = 1; + grddl_parser->xml_ctxt->loadsubset = 1; + } else { /* loop is 1 */ + + /* try to create an HTML parser context */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_TAG_SOUP)) { + xmlCharEncoding enc; + int options; + + RAPTOR_DEBUG2("Parser %p: Creating an HTML parser\n", + RAPTOR_VOIDP(rdf_parser)); + + enc = xmlDetectCharEncoding((const unsigned char*)buffer, + RAPTOR_BAD_CAST(int, buffer_len)); + grddl_parser->html_ctxt = htmlCreatePushParserCtxt(/*sax*/ NULL, + /*user_data*/ NULL, + (const char *)buffer, + RAPTOR_BAD_CAST(int, buffer_len), + (const char *)uri_string, + enc); + if(!grddl_parser->html_ctxt) { + RAPTOR_DEBUG2("Parser %p: Creating an HTML parser failed\n", + RAPTOR_VOIDP(rdf_parser)); + continue; + } + + /* HTML parser */ + grddl_parser->html_ctxt->replaceEntities = 1; + grddl_parser->html_ctxt->loadsubset = 1; + + grddl_parser->html_ctxt->vctxt.error = raptor_grddl_libxml_discard_error; + + /* HTML_PARSE_NOWARNING disables sax->warning, vxtxt.warning */ + /* HTML_PARSE_NOERROR disables sax->error, vctxt.error */ + options = HTML_PARSE_NOERROR | HTML_PARSE_NOWARNING; +#ifdef HTML_PARSE_RECOVER + options |= HTML_PARSE_RECOVER; +#endif +#ifdef RAPTOR_LIBXML_HTML_PARSE_NONET + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)) + options |= HTML_PARSE_NONET; +#endif + + htmlCtxtUseOptions(grddl_parser->html_ctxt, options); + + } else /* No HTML tag soup allowed so continue loop */ + continue; + } + + + if(grddl_parser->html_ctxt) { + RAPTOR_DEBUG2("Parser %p: Parsing as HTML\n", RAPTOR_VOIDP(rdf_parser)); + rc = htmlParseChunk(grddl_parser->html_ctxt, (const char*)s, 0, 1); + RAPTOR_DEBUG3("Parser %p: Parsing as HTML %s\n", + RAPTOR_VOIDP(rdf_parser), + (rc ? "failed" : "succeeded")); + if(rc) { + if(grddl_parser->html_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->html_ctxt->myDoc); + grddl_parser->html_ctxt->myDoc = NULL; + } + htmlFreeParserCtxt(grddl_parser->html_ctxt); + grddl_parser->html_ctxt = NULL; + } + } else { + RAPTOR_DEBUG2("Parser %p: Parsing as XML\n", RAPTOR_VOIDP(rdf_parser)); + rc = xmlParseChunk(grddl_parser->xml_ctxt, (const char*)s, 0, 1); + RAPTOR_DEBUG3("Parser %p: Parsing as XML %s\n", RAPTOR_VOIDP(rdf_parser), + (rc ? "failed" : "succeeded")); + if(rc) { + if(grddl_parser->xml_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->xml_ctxt->myDoc); + grddl_parser->xml_ctxt->myDoc = NULL; + } + xmlFreeParserCtxt(grddl_parser->xml_ctxt); + grddl_parser->xml_ctxt = NULL; + } + } + + if(!rc) + break; + + } + + /* Restore error handling */ + raptor_world_internal_set_ignore_errors(rdf_parser->world, 0); + + if(!grddl_parser->html_ctxt && !grddl_parser->xml_ctxt) { + raptor_parser_error(rdf_parser, "Failed to create HTML or XML parsers"); + ret = 1; + goto tidy; + } + + raptor_grddl_done_uri(grddl_parser, rdf_parser->base_uri); + + if(grddl_parser->html_ctxt) + doc = grddl_parser->html_ctxt->myDoc; + else + doc = grddl_parser->xml_ctxt->myDoc; + if(!doc) { + raptor_parser_error(rdf_parser, + "Failed to create XML DOM for GRDDL document"); + ret = 1; + goto tidy; + } + + if(!grddl_parser->grddl_processing) + goto transform; + + + if(grddl_parser->xinclude_processing) { + RAPTOR_DEBUG3("Parser %p: Running XInclude processing on URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), + raptor_uri_as_string(rdf_parser->base_uri)); + if(xmlXIncludeProcess(doc) < 0) { + raptor_parser_error(rdf_parser, + "XInclude processing failed for GRDDL document"); + ret = 1; + goto tidy; + } else { + int blen; + + /* write the result of XML Include to buffer */ + RAPTOR_FREE(char*, buffer); + xmlDocDumpFormatMemory(doc, (xmlChar**)&buffer, &blen, + 1 /* indent the result */); + buffer_len = blen; + buffer_is_libxml = 1; + + RAPTOR_DEBUG3("Parser %p: XML Include processing returned %d bytes document\n", + RAPTOR_VOIDP(rdf_parser), (int)buffer_len); + } + } + + + RAPTOR_DEBUG3("Parser %p: Running top-level GRDDL on URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), + raptor_uri_as_string(rdf_parser->base_uri)); + + /* Work out if there is a root namespace URI */ + if(1) { + xmlNodePtr xnp; + xmlNsPtr rootNs = NULL; + const unsigned char* ns_uri_string = NULL; + + xnp = xmlDocGetRootElement(doc); + if(xnp) { + rootNs = xnp->ns; + if(rootNs) + ns_uri_string = (const unsigned char*)(rootNs->href); + } + + if(ns_uri_string) { + int n; + + RAPTOR_DEBUG3("Parser %p: Root namespace URI is %s\n", + RAPTOR_VOIDP(rdf_parser), ns_uri_string); + + if(!strcmp((const char*)ns_uri_string, + (const char*)raptor_rdf_namespace_uri) && + !strcmp((const char*)xnp->name, "RDF")) { + RAPTOR_DEBUG3("Parser %p: Root element of %s is rdf:RDF - process this as RDF/XML later\n", + RAPTOR_VOIDP(rdf_parser), + raptor_uri_as_string(rdf_parser->base_uri)); + grddl_parser->process_this_as_rdfxml = 1; + } + + for(n = 0; grddl_namespace_uris_ignore_list[n]; n++) { + if(!strcmp(grddl_namespace_uris_ignore_list[n], + (const char*)ns_uri_string)) { + /* ignore this namespace */ + RAPTOR_DEBUG3("Parser %p: Ignoring GRDDL for namespace URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), ns_uri_string); + ns_uri_string = NULL; + break; + } + } + if(ns_uri_string) { + grddl_xml_context* xml_context; + + grddl_parser->root_ns_uri = raptor_new_uri_relative_to_base(rdf_parser->world, + rdf_parser->base_uri, + ns_uri_string); + xml_context = raptor_new_xml_context(rdf_parser->world, + grddl_parser->root_ns_uri, + rdf_parser->base_uri); + raptor_sequence_push(grddl_parser->profile_uris, xml_context); + + RAPTOR_DEBUG3("Parser %p: Processing GRDDL namespace URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), + raptor_uri_as_string(grddl_parser->root_ns_uri)); + raptor_grddl_run_recursive(rdf_parser, grddl_parser->root_ns_uri, + "grddl", + RECURSIVE_FLAGS_IGNORE_ERRORS | + RECURSIVE_FLAGS_FILTER); + } + + } + } + + /* Always put something at the start of the list even if NULL + * so later it can be searched for in output triples + */ + if(!grddl_parser->root_ns_uri) { + grddl_xml_context* xml_context; + xml_context = raptor_new_xml_context(rdf_parser->world, NULL, NULL); + raptor_sequence_push(grddl_parser->profile_uris, xml_context); + } + + + /* Create the XPath evaluation context */ + if(!grddl_parser->xpathCtx) { + grddl_parser->xpathCtx = xmlXPathNewContext(doc); + if(!grddl_parser->xpathCtx) { + raptor_parser_error(rdf_parser, + "Failed to create XPath context for GRDDL document"); + ret = 1; + goto tidy; + } + + xmlXPathRegisterNs(grddl_parser->xpathCtx, + (const xmlChar*)"html", + (const xmlChar*)"http://www.w3.org/1999/xhtml"); + xmlXPathRegisterNs(grddl_parser->xpathCtx, + (const xmlChar*)"dataview", + (const xmlChar*)"http://www.w3.org/2003/g/data-view#"); + } + + /* Try <head profile> URIs */ + if(1) { + raptor_sequence* result; + result = raptor_grddl_run_xpath_match(rdf_parser, doc, + (const xmlChar*)"/html:html/html:head/@profile", + MATCH_IS_VALUE_LIST | MATCH_IS_PROFILE); + if(result) { + int size; + + RAPTOR_DEBUG4("Parser %p: Found %d <head profile> URIs in URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), raptor_sequence_size(result), + raptor_uri_as_string(rdf_parser->base_uri)); + + + /* Store profile URIs, skipping NULLs or the GRDDL profile itself */ + while(raptor_sequence_size(result)) { + grddl_xml_context* xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_unshift(result); + if(!xml_context) + continue; + uri = xml_context->uri; + if(!strcmp("http://www.w3.org/2003/g/data-view", + (const char*)raptor_uri_as_string(uri))) { + RAPTOR_DEBUG3("Ignoring <head profile> of URI %s: URI %s\n", + raptor_uri_as_string(rdf_parser->base_uri), + raptor_uri_as_string(uri)); + grddl_free_xml_context(xml_context); + continue; + } + raptor_sequence_push(grddl_parser->profile_uris, xml_context); + } + raptor_free_sequence(result); + + + /* Recursive GRDDL through all the <head profile> URIs */ + size = raptor_sequence_size(grddl_parser->profile_uris); + for(i = 1; i < size; i++) { + grddl_xml_context* xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_get_at(grddl_parser->profile_uris, i); + uri = xml_context->uri; + if(!uri) + continue; + + RAPTOR_DEBUG4("Processing <head profile> #%d of URI %s: URI %s\n", + i, raptor_uri_as_string(rdf_parser->base_uri), + raptor_uri_as_string(uri)); + ret = raptor_grddl_run_recursive(rdf_parser, uri, + "grddl", + RECURSIVE_FLAGS_IGNORE_ERRORS| + RECURSIVE_FLAGS_FILTER); + } + } + + } /* end head profile URIs */ + + + /* Try XHTML document with alternate forms + * <link type="application/rdf+xml" href="URI" /> + * Value of @href is a URI + */ + if(grddl_parser->html_link_processing && + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_LINK)) { + raptor_sequence* result; + result = raptor_grddl_run_xpath_match(rdf_parser, doc, + (const xmlChar*)"/html:html/html:head/html:link[@type=\"application/rdf+xml\"]/@href", + 0); + if(result) { + RAPTOR_DEBUG4("Parser %p: Found %d <link> URIs in URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), raptor_sequence_size(result), + raptor_uri_as_string(rdf_parser->base_uri)); + + /* Recursively parse all the <link> URIs, skipping NULLs */ + i = 0; + while(raptor_sequence_size(result)) { + grddl_xml_context* xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_unshift(result); + if(!xml_context) + continue; + + uri = xml_context->uri; + if(uri) { + RAPTOR_DEBUG4("Processing <link> #%d of URI %s: URI %s\n", + i, raptor_uri_as_string(rdf_parser->base_uri), + raptor_uri_as_string(uri)); + i++; + ret = raptor_grddl_run_recursive(rdf_parser, uri, "guess", + RECURSIVE_FLAGS_IGNORE_ERRORS); + } + grddl_free_xml_context(xml_context); + } + + raptor_free_sequence(result); + } + } + + + /* Try all XPaths */ + for(expri = 0; match_table[expri].xpath; expri++) { + raptor_sequence* result; + int flags = match_table[expri].flags; + + if((flags & MATCH_IS_HARDCODED) && + !RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_MICROFORMATS)) + continue; + + result = raptor_grddl_run_xpath_match(rdf_parser, doc, + match_table[expri].xpath, flags); + if(result) { + if(match_table[expri].xslt_sheet_uri) { + grddl_xml_context* xml_context; + + /* Ignore what matched, use a hardcoded XSLT URI */ + uri_string = match_table[expri].xslt_sheet_uri; + RAPTOR_DEBUG3("Parser %p: Using hard-coded XSLT URI '%s'\n", + RAPTOR_VOIDP(rdf_parser), uri_string); + + raptor_free_sequence(result); + result = raptor_new_sequence((raptor_data_free_handler)grddl_free_xml_context, NULL); + + uri = raptor_new_uri_relative_to_base(rdf_parser->world, + rdf_parser->base_uri, uri_string); + + xml_context = raptor_new_xml_context(rdf_parser->world, uri, + rdf_parser->base_uri); + raptor_sequence_push(result, xml_context); + + raptor_free_uri(uri); + } + + while(raptor_sequence_size(result)) { + grddl_xml_context* xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_unshift(result); + if(!xml_context) + break; + + raptor_grddl_add_transform_xml_context(grddl_parser, xml_context); + } + raptor_free_sequence(result); + + if(flags & MATCH_LAST) + break; + } + + + if(rdf_parser->failed) + break; + + } /* end XPath expression loop */ + + if(rdf_parser->failed) { + ret = 1; + goto tidy; + } + + + /* Process this document's content buffer as RDF/XML */ + if(grddl_parser->process_this_as_rdfxml && buffer) { + RAPTOR_DEBUG3("Parser %p: Running additional RDF/XML parse on root document URI '%s' content\n", + RAPTOR_VOIDP(rdf_parser), + raptor_uri_as_string(rdf_parser->base_uri)); + + if(raptor_grddl_ensure_internal_parser(rdf_parser, "rdfxml", 0)) + ret = 1; + else { + if(raptor_parser_parse_start(grddl_parser->internal_parser, + rdf_parser->base_uri)) + ret = 1; + else { + ret = raptor_parser_parse_chunk(grddl_parser->internal_parser, buffer, + buffer_len, 1); + } + } + + } + + + /* Apply all transformation URIs seen */ + transform: + while(raptor_sequence_size(grddl_parser->doc_transform_uris)) { + grddl_xml_context* xml_context; + + xml_context = (grddl_xml_context*)raptor_sequence_unshift(grddl_parser->doc_transform_uris); + ret = raptor_grddl_run_grddl_transform_uri(rdf_parser, xml_context, doc); + grddl_free_xml_context(xml_context); + if(ret) + break; + } + + if(rdf_parser->emitted_default_graph) { + /* May or may not have generated statements but we must close the + * start default graph mark above + */ + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + + + tidy: + if(buffer) { + if(buffer_is_libxml) + xmlFree((xmlChar*)buffer); + else + RAPTOR_FREE(char*, buffer); + } + + if(grddl_parser->sb) { + raptor_free_stringbuffer(grddl_parser->sb); + grddl_parser->sb = NULL; + } + + if(grddl_parser->xml_ctxt) { + if(grddl_parser->xml_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->xml_ctxt->myDoc); + grddl_parser->xml_ctxt->myDoc = NULL; + } + xmlFreeParserCtxt(grddl_parser->xml_ctxt); + grddl_parser->xml_ctxt = NULL; + } + if(grddl_parser->html_ctxt) { + if(grddl_parser->html_ctxt->myDoc) { + xmlFreeDoc(grddl_parser->html_ctxt->myDoc); + grddl_parser->html_ctxt->myDoc = NULL; + } + xmlFreeParserCtxt(grddl_parser->html_ctxt); + grddl_parser->html_ctxt = NULL; + } + + if(grddl_parser->xpathCtx) { + xmlXPathFreeContext(grddl_parser->xpathCtx); + grddl_parser->xpathCtx = NULL; + } + + return (ret != 0); +} + + +static int +raptor_grddl_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "xhtml")) + score = 4; + if(!strcmp((const char*)suffix, "html")) + score = 2; + } else if(identifier) { + if(strstr((const char*)identifier, "xhtml")) + score = 4; + } + + return score; +} + + +static void +raptor_grddl_parse_content_type_handler(raptor_parser* rdf_parser, + const char* content_type) +{ + raptor_grddl_parser_context* grddl_parser; + + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + if(content_type) { + size_t len = strlen(content_type) + 1; + if(grddl_parser->content_type) + RAPTOR_FREE(char*, grddl_parser->content_type); + + grddl_parser->content_type = RAPTOR_MALLOC(char*, len + 1); + memcpy(grddl_parser->content_type, content_type, len + 1); + } +} + + + +static const char* const grddl_names[2] = { "grddl", NULL }; + +#define GRDDL_TYPES_COUNT 2 +static const raptor_type_q grddl_types[GRDDL_TYPES_COUNT + 1] = { + { "text/html", 9, 2}, + { "application/xhtml+xml", 21, 4}, + { NULL, 0, 0} +}; + +static int +raptor_grddl_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = grddl_names; + + factory->desc.mime_types = grddl_types; + + factory->desc.label = "Gleaning Resource Descriptions from Dialects of Languages"; + factory->desc.uri_strings = NULL; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_grddl_parser_context); + + factory->init = raptor_grddl_parse_init; + factory->terminate = raptor_grddl_parse_terminate; + factory->start = raptor_grddl_parse_start; + factory->chunk = raptor_grddl_parse_chunk; + factory->recognise_syntax = raptor_grddl_parse_recognise_syntax; + factory->content_type_handler= raptor_grddl_parse_content_type_handler; + + return rc; +} + + +int +raptor_init_parser_grddl_common(raptor_world* world) +{ +#ifdef HAVE_XSLTINIT + xsltInit(); +#endif + + if(!world->xslt_security_preferences && + !world->xslt_security_preferences_policy) { + xsltSecurityPrefsPtr raptor_xslt_sec = NULL; + + raptor_xslt_sec = xsltNewSecurityPrefs(); + + /* no read from file (read from URI with scheme = file) */ + xsltSetSecurityPrefs(raptor_xslt_sec, XSLT_SECPREF_READ_FILE, + xsltSecurityForbid); + + /* no create/write to file */ + xsltSetSecurityPrefs(raptor_xslt_sec, XSLT_SECPREF_WRITE_FILE, + xsltSecurityForbid); + + /* no create directory */ + xsltSetSecurityPrefs(raptor_xslt_sec, XSLT_SECPREF_CREATE_DIRECTORY, + xsltSecurityForbid); + + /* yes read from URI with scheme != file (XSLT_SECPREF_READ_NETWORK) */ + + /* no write to network (you can 'write' with GET params anyway) */ + xsltSetSecurityPrefs(raptor_xslt_sec, XSLT_SECPREF_WRITE_NETWORK, + xsltSecurityForbid); + + world->xslt_security_preferences = (void*)raptor_xslt_sec; + } + + return 0; +} + + +int +raptor_init_parser_grddl(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_grddl_parser_register_factory); +} + + +void +raptor_terminate_parser_grddl_common(raptor_world *world) +{ + if(world->xslt_security_preferences && + !world->xslt_security_preferences_policy) { + + /* Free the security preferences object owned by raptor world */ + xsltFreeSecurityPrefs((xsltSecurityPrefsPtr)world->xslt_security_preferences); + world->xslt_security_preferences = NULL; + } + + xsltCleanupGlobals(); +} + + + +/* + * Save libxslt global state that needs overwriting. + * + * Initialise the global state with raptor GRDDL parser values. + * + * Restored by raptor_libxslt_reset_global_state() + */ +static void +raptor_libxslt_set_global_state(raptor_parser *rdf_parser) +{ + raptor_grddl_parser_context* grddl_parser; + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + /* save global (libxslt-wide) generic error handler */ + grddl_parser->saved_xsltGenericError = xsltGenericError; + grddl_parser->saved_xsltGenericErrorContext = xsltGenericErrorContext; + + /* set global (libxslt-wide) generic error handler to raptor GRDDL parser */ + xsltSetGenericErrorFunc(rdf_parser, + raptor_grddl_xsltGenericError_handler); + + /* save global (libxslt-wide) default security prefs */ + grddl_parser->saved_xsltSecurityPrefs = xsltGetDefaultSecurityPrefs(); + + if(grddl_parser->world->xslt_security_preferences && + !grddl_parser->world->xslt_security_preferences_policy) { + /* set global (libxslt-wide) security preferences to raptor */ + xsltSetDefaultSecurityPrefs((xsltSecurityPrefs*)grddl_parser->world->xslt_security_preferences); + } +} + + +/* + * Restore libxslt global state that raptor_libxslt_set_global_state() + * overwrote back to the original values. + * + */ +static void +raptor_libxslt_reset_global_state(raptor_parser* rdf_parser) +{ + raptor_grddl_parser_context* grddl_parser; + grddl_parser = (raptor_grddl_parser_context*)rdf_parser->context; + + /* restore global (libxslt-wide) default security prefs */ + xsltSetDefaultSecurityPrefs(grddl_parser->saved_xsltSecurityPrefs); + + /* restore global (libxslt-wide) generic error handler */ + xsltSetGenericErrorFunc(grddl_parser->saved_xsltGenericErrorContext, + grddl_parser->saved_xsltGenericError); +} + diff --git a/src/raptor_guess.c b/src/raptor_guess.c new file mode 100644 index 0000000..0417f0f --- /dev/null +++ b/src/raptor_guess.c @@ -0,0 +1,263 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_guess.c - Raptor guessing real parser implementation + * + * Copyright (C) 2005-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * guess parser object + */ +struct raptor_guess_parser_context_s { + /* content type got from URI request */ + char* content_type; + + /* URI from start_parse */ + raptor_uri* uri; + + /* Non-0 when we need to guess */ + int do_guess; + + /* Actual parser to use */ + raptor_parser* parser; +}; + + +typedef struct raptor_guess_parser_context_s raptor_guess_parser_context; + + +static int +raptor_guess_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_guess_parser_context *guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + guess_parser->content_type = NULL; + + guess_parser->do_guess = 1; + + return 0; +} + + +static void +raptor_guess_parse_terminate(raptor_parser *rdf_parser) +{ + raptor_guess_parser_context *guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(guess_parser->content_type) + RAPTOR_FREE(char*, guess_parser->content_type); + + if(guess_parser->parser) + raptor_free_parser(guess_parser->parser); +} + + +static void +raptor_guess_parse_content_type_handler(raptor_parser* rdf_parser, + const char* content_type) +{ + raptor_guess_parser_context* guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(content_type) { + const char *p; + size_t len; + + if((p = strchr(content_type,';'))) + len = p-content_type; + else + len = strlen(content_type); + + guess_parser->content_type = RAPTOR_MALLOC(char*, len + 1); + memcpy(guess_parser->content_type, content_type, len); + guess_parser->content_type[len]='\0'; + + RAPTOR_DEBUG2("Got content type '%s'\n", guess_parser->content_type); + } +} + + +static int +raptor_guess_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *buffer, size_t len, + int is_end) +{ + raptor_guess_parser_context* guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(guess_parser->do_guess) { + const unsigned char *identifier = NULL; + const char *name; + + guess_parser->do_guess = 0; + + if(rdf_parser->base_uri) + identifier = raptor_uri_as_string(rdf_parser->base_uri); + + name = raptor_world_guess_parser_name(rdf_parser->world, + NULL, guess_parser->content_type, + buffer, len, identifier); + if(!name) { + raptor_parser_error(rdf_parser, + "Failed to guess parser from content type '%s'", + guess_parser->content_type ? + guess_parser->content_type : "(none)"); + raptor_parser_parse_abort(rdf_parser); + if(guess_parser->parser) { + raptor_free_parser(guess_parser->parser); + guess_parser->parser = NULL; + } + return 1; + } else { + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Guessed parser name '%s'\n", name); +#endif + + /* If there is an existing guessed parser factory present and + * it's different from the wanted parser, free it + */ + if(guess_parser->parser) { + raptor_parser_factory* factory = raptor_world_get_parser_factory(rdf_parser->world, name); + + if(guess_parser->parser->factory != factory) { + raptor_free_parser(guess_parser->parser); + guess_parser->parser = NULL; + } + } + + if(!guess_parser->parser) { + guess_parser->parser = raptor_new_parser(rdf_parser->world, name); + if(!guess_parser->parser) + return 1; + } + + /* copy any user data to the grddl parser */ + if(raptor_parser_copy_user_state(guess_parser->parser, rdf_parser)) + return 1; + + if(raptor_parser_parse_start(guess_parser->parser, rdf_parser->base_uri)) + return 1; + } + } + + + /* now we can pass on calls to internal guess_parser */ + return raptor_parser_parse_chunk(guess_parser->parser, buffer, len, is_end); +} + + +static const char* +raptor_guess_accept_header(raptor_parser* rdf_parser) +{ + return raptor_parser_get_accept_header_all(rdf_parser->world); +} + + +static const char* +raptor_guess_guess_get_name(raptor_parser* rdf_parser) +{ + raptor_guess_parser_context *guess_parser; + guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(guess_parser) + return raptor_parser_get_name(guess_parser->parser); + else + return rdf_parser->factory->desc.names[0]; +} + + +static const raptor_syntax_description* +raptor_guess_guess_get_description(raptor_parser* rdf_parser) +{ + raptor_guess_parser_context *guess_parser; + guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(guess_parser && guess_parser->parser) + return raptor_parser_get_description(guess_parser->parser); + else + return &rdf_parser->factory->desc; +} + + +static raptor_locator * +raptor_guess_guess_get_locator(raptor_parser *rdf_parser) +{ + raptor_guess_parser_context *guess_parser; + guess_parser = (raptor_guess_parser_context*)rdf_parser->context; + + if(guess_parser && guess_parser->parser) + return raptor_parser_get_locator(guess_parser->parser); + else + return &rdf_parser->locator; +} + + +static const char* const guess_names[2] = { "guess", NULL }; + +static int +raptor_guess_parser_register_factory(raptor_parser_factory *factory) +{ + factory->desc.names = guess_names; + + factory->desc.mime_types = NULL; + + factory->desc.label = "Pick the parser to use using content type and URI"; + factory->desc.uri_strings = NULL; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_guess_parser_context); + + factory->init = raptor_guess_parse_init; + factory->terminate = raptor_guess_parse_terminate; + factory->chunk = raptor_guess_parse_chunk; + factory->content_type_handler = raptor_guess_parse_content_type_handler; + factory->accept_header = raptor_guess_accept_header; + factory->get_name = raptor_guess_guess_get_name; + factory->get_description = raptor_guess_guess_get_description; + factory->get_locator = raptor_guess_guess_get_locator; + + return 0; +} + + +int +raptor_init_parser_guess(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_guess_parser_register_factory); +} diff --git a/src/raptor_internal.h b/src/raptor_internal.h new file mode 100644 index 0000000..e6f98e9 --- /dev/null +++ b/src/raptor_internal.h @@ -0,0 +1,1528 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_internal.h - Redland Parser Toolkit for RDF (Raptor) internals + * + * Copyright (C) 2002-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + + +#ifndef RAPTOR_INTERNAL_H +#define RAPTOR_INTERNAL_H + +#ifdef __cplusplus +extern "C" { +#define RAPTOR_EXTERN_C extern "C" +#else +#define RAPTOR_EXTERN_C +#endif + +#ifdef RAPTOR_INTERNAL + +/* for the memory allocation functions */ +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#undef HAVE_STDLIB_H +#endif + +/* Some internal functions are needed by the test programs */ +#ifndef RAPTOR_INTERNAL_API +#define RAPTOR_INTERNAL_API RAPTOR_API +#endif + +/* Can be over-ridden or undefined in a config.h file or -Ddefine */ +#ifndef RAPTOR_INLINE +#define RAPTOR_INLINE inline +#endif + +#ifdef LIBRDF_DEBUG +#define RAPTOR_DEBUG 1 +#endif + +#if defined(RAPTOR_MEMORY_SIGN) +#define RAPTOR_SIGN_KEY 0x08A61080 +void* raptor_sign_malloc(size_t size); +void* raptor_sign_calloc(size_t nmemb, size_t size); +void* raptor_sign_realloc(void *ptr, size_t size); +void raptor_sign_free(void *ptr); + +#define RAPTOR_MALLOC(type, size) (type)raptor_sign_malloc(size) +#define RAPTOR_CALLOC(type, nmemb, size) (type)raptor_sign_calloc(nmemb, size) +#define RAPTOR_REALLOC(type, ptr, size) (type)raptor_sign_realloc(ptr, size) +#define RAPTOR_FREE(type, ptr) raptor_sign_free((void*)ptr) + +#else +#define RAPTOR_MALLOC(type, size) (type)malloc(size) +#define RAPTOR_CALLOC(type, nmemb, size) (type)calloc(nmemb, size) +#define RAPTOR_REALLOC(type, ptr, size) (type)realloc(ptr, size) +#define RAPTOR_FREE(type, ptr) free((void*)ptr) + +#endif + +#ifdef HAVE___FUNCTION__ +#else +#define __FUNCTION__ "???" +#endif + +#ifndef RAPTOR_DEBUG_FH +#define RAPTOR_DEBUG_FH stderr +#endif + +#ifdef RAPTOR_DEBUG +/* Debugging messages */ +#define RAPTOR_DEBUG1(msg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__); } while(0) +#define RAPTOR_DEBUG2(msg, arg1) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1);} while(0) +#define RAPTOR_DEBUG3(msg, arg1, arg2) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2);} while(0) +#define RAPTOR_DEBUG4(msg, arg1, arg2, arg3) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3);} while(0) +#define RAPTOR_DEBUG5(msg, arg1, arg2, arg3, arg4) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3, arg4);} while(0) +#define RAPTOR_DEBUG6(msg, arg1, arg2, arg3, arg4, arg5) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: " msg, __FILE__, __LINE__, __FUNCTION__, arg1, arg2, arg3, arg4, arg5);} while(0) + +#ifndef RAPTOR_ASSERT_DIE +#define RAPTOR_ASSERT_DIE(x) abort(); +#endif + +#else +/* DEBUGGING TURNED OFF */ + +/* No debugging messages */ +#define RAPTOR_DEBUG1(msg) +#define RAPTOR_DEBUG2(msg, arg1) +#define RAPTOR_DEBUG3(msg, arg1, arg2) +#define RAPTOR_DEBUG4(msg, arg1, arg2, arg3) +#define RAPTOR_DEBUG5(msg, arg1, arg2, arg3, arg4) +#define RAPTOR_DEBUG6(msg, arg1, arg2, arg3, arg4, arg5) + +#define SYSTEM_MALLOC(size) malloc(size) +#define SYSTEM_FREE(ptr) free(ptr) + +#ifndef RAPTOR_ASSERT_DIE +#define RAPTOR_ASSERT_DIE(x) x; +#endif + +#endif + + +#ifdef RAPTOR_DISABLE_ASSERT_MESSAGES +#define RAPTOR_ASSERT_REPORT(line) +#else +#define RAPTOR_ASSERT_REPORT(msg) fprintf(RAPTOR_DEBUG_FH, "%s:%d: (%s) assertion failed: " msg "\n", __FILE__, __LINE__, __FUNCTION__); +#endif + + +#ifdef RAPTOR_DISABLE_ASSERT + +#define RAPTOR_ASSERT(condition, msg) +#define RAPTOR_ASSERT_RETURN(condition, msg, ret) +#define RAPTOR_ASSERT_OBJECT_POINTER_RETURN(pointer, type) do { \ + if(!pointer) \ + return; \ +} while(0) +#define RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(pointer, type, ret) + +#else + +#define RAPTOR_ASSERT(condition, msg) do { \ + if(condition) { \ + RAPTOR_ASSERT_REPORT(msg) \ + RAPTOR_ASSERT_DIE(return) \ + } \ +} while(0) + +#define RAPTOR_ASSERT_RETURN(condition, msg, ret) do { \ + if(condition) { \ + RAPTOR_ASSERT_REPORT(msg) \ + RAPTOR_ASSERT_DIE(return ret) \ + } \ +} while(0) + +#define RAPTOR_ASSERT_OBJECT_POINTER_RETURN(pointer, type) do { \ + if(!pointer) { \ + RAPTOR_ASSERT_REPORT("object pointer of type " #type " is NULL.") \ + RAPTOR_ASSERT_DIE(return) \ + } \ +} while(0) + +#define RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(pointer, type, ret) do { \ + if(!pointer) { \ + RAPTOR_ASSERT_REPORT("object pointer of type " #type " is NULL.") \ + RAPTOR_ASSERT_DIE(return ret) \ + } \ +} while(0) + +#endif + +/* _Pragma() is C99 and is the only way to include pragmas since you + * cannot use #pragma in a macro + * + * #if defined __STDC_VERSION__ && (__STDC_VERSION__ >= 199901L) + * + * Valid for clang or GCC >= 4.9.0 + */ +#if defined(__clang__) || (defined(__GNUC__) && ((__GNUC__ << 16) + __GNUC_MINOR__ >= ((4) << 16) + (9))) +#define PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wformat-nonliteral\"") +#define PRAGMA_IGNORE_WARNING_LONG_LONG_START \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wlong-long\"") +#define PRAGMA_IGNORE_WARNING_END \ + _Pragma ("GCC diagnostic pop") +#else +#define PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START +#define PRAGMA_IGNORE_WARNING_LONG_LONG_STAR +#define PRAGMA_IGNORE_WARNING_END +#endif + + +/* Fatal errors - always happen */ +#define RAPTOR_FATAL1(msg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__); abort();} while(0) +#define RAPTOR_FATAL2(msg,arg) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg); abort();} while(0) +#define RAPTOR_FATAL3(msg,arg1,arg2) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg1, arg2); abort();} while(0) +#define RAPTOR_FATAL4(msg,arg1,arg2,arg3) do {fprintf(RAPTOR_DEBUG_FH, "%s:%d:%s: fatal error: " msg, __FILE__, __LINE__ , __FUNCTION__, arg1, arg2, arg3); abort();} while(0) + +#define MAX_ASCII_INT_SIZE 13 + +/* XML parser includes */ + +#ifdef RAPTOR_XML_LIBXML + +/* newer ICU (via libxml/encoding.h) requires C++ context */ +#ifdef __cplusplus +extern "C++" { +#endif +#include <libxml/parser.h> +#ifdef __cplusplus +} +#endif + +/* libxml-only prototypes */ + + +/* raptor_libxml.c exports */ +extern void raptor_libxml_sax_init(raptor_sax2* sax2); +extern void raptor_libxml_generic_error(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +extern int raptor_libxml_init(raptor_world* world); +extern void raptor_libxml_finish(raptor_world* world); + +extern void raptor_libxml_validation_error(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); +extern void raptor_libxml_validation_warning(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); +void raptor_libxml_free(xmlParserCtxtPtr xc); + +/* raptor_parse.c - exported to libxml part */ +extern void raptor_libxml_update_document_locator(raptor_sax2* sax2, raptor_locator* locator); + +/* end of libxml-only */ +#endif + + +typedef struct raptor_parser_factory_s raptor_parser_factory; +typedef struct raptor_serializer_factory_s raptor_serializer_factory; +typedef struct raptor_id_set_s raptor_id_set; +typedef struct raptor_uri_detail_s raptor_uri_detail; + + +/* raptor_option.c */ + +/* These are bits and may be bit-ORed */ +/** + * raptor_option_area: + * @RAPTOR_OPTION_AREA_NONE: internal + * @RAPTOR_OPTION_AREA_PARSER: #raptor_parser (public) + * @RAPTOR_OPTION_AREA_SERIALIZER: #raptor_serializer (public) + * @RAPTOR_OPTION_AREA_XML_WRITER: #raptor_xml_writer (public) + * @RAPTOR_OPTION_AREA_TURTLE_WRITER: #raptor_turtle_writer (internal) + * @RAPTOR_OPTION_AREA_SAX2: #raptor_sax2 (public) + * + * Internal - raptor option areas +*/ +typedef enum { + RAPTOR_OPTION_AREA_NONE = 0, + RAPTOR_OPTION_AREA_PARSER = 1, + RAPTOR_OPTION_AREA_SERIALIZER = 2, + RAPTOR_OPTION_AREA_XML_WRITER = 4, + RAPTOR_OPTION_AREA_TURTLE_WRITER = 8, + RAPTOR_OPTION_AREA_SAX2 = 16 +} raptor_option_area; + +typedef union +{ + char* string; + int integer; +} raptor_str_int; + +typedef struct +{ + raptor_option_area area; + raptor_str_int options[RAPTOR_OPTION_LAST+1]; +} raptor_object_options; + + +#define RAPTOR_OPTIONS_GET_NUMERIC(object, option) \ + ((object)->options.options[(int)option].integer) +#define RAPTOR_OPTIONS_GET_STRING(object, option) \ + ((object)->options.options[(int)option].string) + +#define RAPTOR_OPTIONS_SET_NUMERIC(object, option, value) do { \ + (object)->options.options[(int)option].integer = value; \ +} while(0) +#define RAPTOR_OPTIONS_SET_STRING(object, option, value) do { \ + (object)->options.options[(int)option].string = value; \ +} while(0) + +int raptor_option_value_is_numeric(const raptor_option option); +int raptor_option_is_valid_for_area(const raptor_option option, raptor_option_area area); + +void raptor_object_options_init(raptor_object_options* options, raptor_option_area area); +void raptor_object_options_clear(raptor_object_options* options); +int raptor_object_options_copy_state(raptor_object_options* to, raptor_object_options* from); +int raptor_object_options_get_option(raptor_object_options *options, raptor_option option, char** string_p, int* integer_p); +int raptor_object_options_set_option(raptor_object_options *options, raptor_option option, const char* string, int integer); + + + + +/* raptor_concepts.c */ + +/* + * raptor_rdf_ns_term_id: + * + * RDF namespace syntax terms, properties and classes. + * + * The order must match names in the raptor_rdf_ns_terms_info table + * + */ +typedef enum { + /* These terms are used only in the RDF/XML syntax; never in RDF graph */ + RDF_NS_RDF = 0, + RDF_NS_Description = 1, + RDF_NS_li = 2, + RDF_NS_about = 3, + RDF_NS_aboutEach = 4, + RDF_NS_aboutEachPrefix = 5, + RDF_NS_ID = 6, + RDF_NS_bagID = 7, + RDF_NS_resource = 8, + RDF_NS_parseType = 9, + RDF_NS_nodeID = 10, + RDF_NS_datatype = 11, + /* These terms are all properties in RDF model (of type rdf:Property) */ + RDF_NS_type = 12, + RDF_NS_value = 13, + RDF_NS_subject = 14, + RDF_NS_predicate = 15, + RDF_NS_object = 16, + RDF_NS_first = 17, + RDF_NS_rest = 18, + /* These terms are all classes in the RDF model (of type rdfs:Class) */ + RDF_NS_Seq = 19, + RDF_NS_Bag = 20, + RDF_NS_Alt = 21, + RDF_NS_Statement = 22, + RDF_NS_Property = 23, + RDF_NS_List = 24, + /* These terms are all resources in the RDF model (of type rdfs:Resource) */ + RDF_NS_nil = 25, + + /* These terms are datatypes (used as a literal datatype URI) */ + RDF_NS_XMLLiteral = 26, + RDF_NS_PlainLiteral = 27, /* http://www.w3.org/TR/rdf-text/ */ + /* RDF 1.1 datatypes */ + RDF_NS_HTML = 28, + RDF_NS_langString = 29, + + /* These terms are internal */ + RDF_NS_LAST_SYNTAX_TERM = RDF_NS_datatype, + + RDF_NS_LAST = RDF_NS_langString +} raptor_rdf_ns_term_id; + + +typedef struct { + /* term name */ + const char *name; + + /* RDF/XML: the statement object type of this when used as an attribute */ + raptor_term_type type; + + /* RDF/XML: name restrictions */ + unsigned int allowed_as_nodeElement : 1; + unsigned int allowed_as_propertyElement : 1; + unsigned int allowed_as_propertyAttribute : 1; + unsigned int allowed_unprefixed_on_attribute : 1; +} raptor_rdf_ns_term_info; + + +extern const raptor_rdf_ns_term_info raptor_rdf_ns_terms_info[(RDF_NS_LAST + 1) + 1]; + +#define RAPTOR_RDF_RDF_URI(world) world->concepts[RDF_NS_RDF] +#define RAPTOR_RDF_Description_URI(world) world->concepts[RDF_NS_Description] +#define RAPTOR_RDF_li_URI(world) world->concepts[RDF_NS_li] +#define RAPTOR_RDF_about(world) world->concepts[RDF_NS_about] +#define RAPTOR_RDF_aboutEach(world) world->concepts[RDF_NS_aboutEach] +#define RAPTOR_RDF_aboutEachPrefix(world) world->concepts[RDF_NS_aboutEachPrefix] +#define RAPTOR_RDF_ID_URI(world) world->concepts[RDF_NS_ID] +#define RAPTOR_RDF_bagID_URI(world) world->concepts[RDF_NS_bagID] +#define RAPTOR_RDF_resource_URI(world) world->concepts[RDF_NS_resource] +#define RAPTOR_RDF_parseType_URI(world) world->concepts[RDF_NS_parseType] +#define RAPTOR_RDF_nodeID_URI(world) world->concepts[RDF_NS_nodeID] +#define RAPTOR_RDF_datatype_URI(world) world->concepts[RDF_NS_datatype] + +#define RAPTOR_RDF_type_URI(world) world->concepts[RDF_NS_type] +#define RAPTOR_RDF_value_URI(world) world->concepts[RDF_NS_value] +#define RAPTOR_RDF_subject_URI(world) world->concepts[RDF_NS_subject] +#define RAPTOR_RDF_predicate_URI(world) world->concepts[RDF_NS_predicate] +#define RAPTOR_RDF_object_URI(world) world->concepts[RDF_NS_object] +#define RAPTOR_RDF_first_URI(world) world->concepts[RDF_NS_first] +#define RAPTOR_RDF_rest_URI(world) world->concepts[RDF_NS_rest] + +#define RAPTOR_RDF_Seq_URI(world) world->concepts[RDF_NS_Seq] +#define RAPTOR_RDF_Bag_URI(world) world->concepts[RDF_NS_Bag] +#define RAPTOR_RDF_Alt_URI(world) world->concepts[RDF_NS_Alt] +#define RAPTOR_RDF_Statement_URI(world) world->concepts[RDF_NS_Statement] +#define RAPTOR_RDF_Property_URI(world) world->concepts[RDF_NS_Property] +#define RAPTOR_RDF_List_URI(world) world->concepts[RDF_NS_List] + +#define RAPTOR_RDF_nil_URI(world) world->concepts[RDF_NS_nil] +#define RAPTOR_RDF_XMLLiteral_URI(world) world->concepts[RDF_NS_XMLLiteral] +#define RAPTOR_RDF_PlainLiteral_URI(world) world->concepts[RDF_NS_PlainLiteral] + + +/* syntax only (RDF:RDF ... RDF:datatype) are not provided as terms */ + +#define RAPTOR_RDF_type_term(world) world->terms[RDF_NS_type] +#define RAPTOR_RDF_value_term(world) world->terms[RDF_NS_value] +#define RAPTOR_RDF_subject_term(world) world->terms[RDF_NS_subject] +#define RAPTOR_RDF_predicate_term(world) world->terms[RDF_NS_predicate] +#define RAPTOR_RDF_object_term(world) world->terms[RDF_NS_object] +#define RAPTOR_RDF_first_term(world) world->terms[RDF_NS_first] +#define RAPTOR_RDF_rest_term(world) world->terms[RDF_NS_rest] + +#define RAPTOR_RDF_Seq_term(world) world->terms[RDF_NS_Seq] +#define RAPTOR_RDF_Bag_term(world) world->terms[RDF_NS_Bag] +#define RAPTOR_RDF_Alt_term(world) world->terms[RDF_NS_Alt] +#define RAPTOR_RDF_Statement_term(world) world->terms[RDF_NS_Statement] +#define RAPTOR_RDF_Property_term(world) world->terms[RDF_NS_Property] +#define RAPTOR_RDF_List_term(world) world->terms[RDF_NS_List] + +#define RAPTOR_RDF_nil_term(world) world->terms[RDF_NS_nil] +#define RAPTOR_RDF_XMLLiteral_term(world) world->terms[RDF_NS_XMLLiteral] +#define RAPTOR_RDF_PlainLiteral_term(world) world->terms[RDF_NS_PlainLiteral] + + +int raptor_concepts_init(raptor_world* world); +void raptor_concepts_finish(raptor_world* world); + + + +/* raptor_iostream.c */ +raptor_world* raptor_iostream_get_world(raptor_iostream *iostr); + + +/* Raptor Namespace Stack node */ +struct raptor_namespace_stack_s { + raptor_world* world; + int size; + + int table_size; + raptor_namespace** table; + raptor_namespace* def_namespace; + + raptor_uri *rdf_ms_uri; + raptor_uri *rdf_schema_uri; +}; + + +/* Forms: + * 1) prefix=NULL uri=<URI> - default namespace defined + * 2) prefix=NULL, uri=NULL - no default namespace + * 3) prefix=<prefix>, uri=<URI> - regular pair defined <prefix>:<URI> + */ +struct raptor_namespace_s { + /* next down the stack, NULL at bottom */ + struct raptor_namespace_s* next; + + raptor_namespace_stack *nstack; + + /* NULL means is the default namespace */ + const unsigned char *prefix; + /* needed to safely compare prefixed-names */ + unsigned int prefix_length; + /* URI of namespace or NULL for default */ + raptor_uri *uri; + /* parsing depth that this ns was added. It will + * be deleted when the parser leaves this depth + */ + int depth; + /* Non 0 if is xml: prefixed name */ + int is_xml; + /* Non 0 if is RDF M&S Namespace */ + int is_rdf_ms; + /* Non 0 if is RDF Schema Namespace */ + int is_rdf_schema; +}; + +raptor_namespace** raptor_namespace_stack_to_array(raptor_namespace_stack *nstack, size_t *size_p); + +#ifdef RAPTOR_XML_LIBXML +#define RAPTOR_LIBXML_MAGIC 0x8AF108 +#endif + + +/* Size of buffer to use when reading from a file */ +#if defined(BUFSIZ) && BUFSIZ > 4096 +#define RAPTOR_READ_BUFFER_SIZE BUFSIZ +#else +#define RAPTOR_READ_BUFFER_SIZE 4096 +#endif + + +/* + * Raptor parser object + */ +struct raptor_parser_s { + raptor_world* world; + +#ifdef RAPTOR_XML_LIBXML + int magic; +#endif + + /* can be filled with error location information */ + raptor_locator locator; + + /* non-0 if parser had fatal error and cannot continue */ + unsigned int failed : 1; + + /* non-0 to enable emitting graph marks (default set). Intended + * for use by GRDDL the parser on it's child parsers to prevent + * multiple start/end marks on the default graph. + */ + unsigned int emit_graph_marks : 1; + + /* non-0 if have emitted start default graph mark */ + unsigned int emitted_default_graph : 1; + + /* generated ID counter */ + int genid; + + /* base URI of RDF/XML */ + raptor_uri *base_uri; + + /* static statement for use in passing to user code */ + raptor_statement statement; + + /* Options (per-object) */ + raptor_object_options options; + + /* stuff for our user */ + void *user_data; + + /* parser callbacks */ + raptor_statement_handler statement_handler; + + raptor_graph_mark_handler graph_mark_handler; + + void* uri_filter_user_data; + raptor_uri_filter_func uri_filter; + + /* parser specific stuff */ + void *context; + + struct raptor_parser_factory_s* factory; + + /* namespace callback */ + raptor_namespace_handler namespace_handler; + + void* namespace_handler_user_data; + + raptor_stringbuffer* sb; + + /* raptor_www pointer stored here to allow cleanup on error */ + raptor_www* www; + + /* internal data for lexers */ + void* lexer_user_data; + + /* internal read buffer */ + unsigned char buffer[RAPTOR_READ_BUFFER_SIZE + 1]; +}; + + +/** A Parser Factory */ +struct raptor_parser_factory_s { + raptor_world* world; + + struct raptor_parser_factory_s* next; + + /* the rest of this structure is populated by the + parser-specific register function */ + + size_t context_length; + + /* static desc that the parser registration initialises */ + raptor_syntax_description desc; + + /* create a new parser */ + int (*init)(raptor_parser* parser, const char *name); + + /* destroy a parser */ + void (*terminate)(raptor_parser* parser); + + /* start a parse */ + int (*start)(raptor_parser* parser); + + /* parse a chunk of memory */ + int (*chunk)(raptor_parser* parser, const unsigned char *buffer, size_t len, int is_end); + + /* finish the parser factory */ + void (*finish_factory)(raptor_parser_factory* factory); + + /* score recognition of the syntax by a block of characters, the + * content identifier or it's suffix or a mime type + * (different from the factory-registered one) + */ + int (*recognise_syntax)(raptor_parser_factory* factory, const unsigned char *buffer, size_t len, const unsigned char *identifier, const unsigned char *suffix, const char *mime_type); + + /* get the Content-Type value of a URI request */ + void (*content_type_handler)(raptor_parser* rdf_parser, const char* content_type); + + /* get the Accept header of a URI request (OPTIONAL) */ + const char* (*accept_header)(raptor_parser* rdf_parser); + + /* get the name (OPTIONAL) */ + const char* (*get_name)(raptor_parser* rdf_parser); + + /* get the description (OPTIONAL) */ + const raptor_syntax_description* (*get_description)(raptor_parser* rdf_parser); + + /* get the current graph (OPTIONAL) - if not implemented, the current graph is always the default (NULL) and start/end graph marks are synthesised */ + raptor_uri* (*get_graph)(raptor_parser* rdf_parser); + + /* get the locator (OPTIONAL) */ + raptor_locator* (*get_locator)(raptor_parser* rdf_parser); +}; + + +/* + * Raptor serializer object + */ +struct raptor_serializer_s { + raptor_world* world; + + /* can be filled with error location information */ + raptor_locator locator; + + /* non 0 if serializer had fatal error and cannot continue */ + int failed; + + /* base URI of RDF/XML */ + raptor_uri *base_uri; + + /* serializer specific stuff */ + void *context; + + /* destination stream for the serialization */ + raptor_iostream *iostream; + + /* if true, iostream was made here so free it */ + int free_iostream_on_end; + + struct raptor_serializer_factory_s* factory; + + /* Options (per-object) */ + raptor_object_options options; +}; + + +/** A Serializer Factory for a syntax */ +struct raptor_serializer_factory_s { + raptor_world* world; + + struct raptor_serializer_factory_s* next; + + /* the rest of this structure is populated by the + serializer-specific register function */ + size_t context_length; + + /* static desc that the parser registration initialises */ + raptor_syntax_description desc; + + /* create a new serializer */ + int (*init)(raptor_serializer* serializer, const char *name); + + /* destroy a serializer */ + void (*terminate)(raptor_serializer* serializer); + + /* add a namespace */ + int (*declare_namespace)(raptor_serializer* serializer, raptor_uri *uri, const unsigned char *prefix); + + /* start a serialization */ + int (*serialize_start)(raptor_serializer* serializer); + + /* serialize a statement */ + int (*serialize_statement)(raptor_serializer* serializer, raptor_statement *statment); + + /* end a serialization */ + int (*serialize_end)(raptor_serializer* serializer); + + /* finish the serializer factory */ + void (*finish_factory)(raptor_serializer_factory* factory); + + /* add a namespace using an existing namespace */ + int (*declare_namespace_from_namespace)(raptor_serializer* serializer, raptor_namespace *nspace); + + /* flush current serialization state */ + int (*serialize_flush)(raptor_serializer* serializer); +}; + + +/* for raptor_parser_parse_uri_write_bytes() when used as a handler for + * raptor_www_set_write_bytes_handler() + */ +typedef struct +{ + raptor_parser* rdf_parser; + raptor_uri* base_uri; + raptor_uri* final_uri; + int started; +} raptor_parse_bytes_context; + + +/* raptor_serialize.c */ +raptor_serializer_factory* raptor_serializer_register_factory(raptor_world* world, int (*factory) (raptor_serializer_factory*)); + + +/* raptor_general.c */ + +raptor_parser_factory* raptor_world_register_parser_factory(raptor_world* world, int (*factory) (raptor_parser_factory*)); +int raptor_parser_factory_add_mime_type(raptor_parser_factory* factory, const char* mime_type, int q); + +unsigned char* raptor_world_internal_generate_id(raptor_world *world, unsigned char *user_bnodeid); + +#ifdef RAPTOR_DEBUG +void raptor_stats_print(raptor_parser *rdf_parser, FILE *stream); +#endif +RAPTOR_INTERNAL_API const char* raptor_basename(const char *name); +int raptor_term_print_as_ntriples(const raptor_term *term, FILE* stream); + +/* raptor_ntriples.c */ +size_t raptor_ntriples_parse_term(raptor_world* world, raptor_locator* locator, unsigned char *string, size_t *len_p, raptor_term** term_p, int allow_turtle); + +/* raptor_parse.c */ +raptor_parser_factory* raptor_world_get_parser_factory(raptor_world* world, const char *name); +void raptor_delete_parser_factories(void); +RAPTOR_INTERNAL_API const char* raptor_parser_get_accept_header_all(raptor_world* world); +int raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri); +void raptor_parser_parse_uri_write_bytes(raptor_www* www, void *userdata, const void *ptr, size_t size, size_t nmemb); +void raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); +void raptor_parser_error(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); +RAPTOR_INTERNAL_API void raptor_parser_log_error(raptor_parser* parser, raptor_log_level level, const char *message, ...) RAPTOR_PRINTF_FORMAT(3, 4); +RAPTOR_INTERNAL_API void raptor_parser_log_error_varargs(raptor_parser* parser, raptor_log_level level, const char *message, va_list arguments) RAPTOR_PRINTF_FORMAT(3, 0); +void raptor_parser_warning(raptor_parser* parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +/* logging */ +void raptor_world_internal_set_ignore_errors(raptor_world* world, int flag); +void raptor_log_error_varargs(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message, va_list arguments) RAPTOR_PRINTF_FORMAT(4, 0); +RAPTOR_INTERNAL_API void raptor_log_error_formatted(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message, ...) RAPTOR_PRINTF_FORMAT(4, 5); +void raptor_log_error(raptor_world* world, raptor_log_level level, raptor_locator* locator, const char* message); + + +/* raptor_parse.c */ + +typedef struct raptor_rdfxml_parser_s raptor_rdfxml_parser; + +/* Prototypes for common libxml parsing event-handling functions */ +extern void raptor_xml_start_element_handler(void *user_data, const unsigned char *name, const unsigned char **atts); +extern void raptor_xml_end_element_handler(void *user_data, const unsigned char *name); +/* s is not 0 terminated. */ +extern void raptor_xml_characters_handler(void *user_data, const unsigned char *s, int len); +extern void raptor_xml_cdata_handler(void *user_data, const unsigned char *s, int len); +void raptor_xml_comment_handler(void *user_data, const unsigned char *s); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +void raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser, FILE *stream); +#endif + +void raptor_parser_copy_flags_state(raptor_parser *to_parser, raptor_parser *from_parser); +int raptor_parser_copy_user_state(raptor_parser *to_parser, raptor_parser *from_parser); + +/* raptor_general.c */ +extern int raptor_valid_xml_ID(raptor_parser *rdf_parser, const unsigned char *string); +int raptor_check_ordinal(const unsigned char *name); + +/* raptor_locator.c */ + + +#ifdef HAVE_STRCASECMP +#define raptor_strcasecmp strcasecmp +#define raptor_strncasecmp strncasecmp +#else +#ifdef HAVE_STRICMP +#define raptor_strcasecmp stricmp +#define raptor_strncasecmp strnicmp +#endif +#endif + +/* raptor_nfc_icu.c */ +int raptor_nfc_icu_check (const unsigned char* string, size_t len); + + +/* raptor_namespace.c */ + +#ifdef RAPTOR_DEBUG +void raptor_namespace_print(FILE *stream, raptor_namespace* ns); +#endif + +void raptor_parser_start_namespace(raptor_parser* rdf_parser, raptor_namespace* nspace); + + +/* + * Raptor XML-namespace qualified name (qname), for elements or attributes + * + * namespace is only defined when the XML name has a namespace and + * only then is uri also given. + */ +struct raptor_qname_s { + raptor_world* world; + /* Name - always present */ + const unsigned char *local_name; + int local_name_length; + /* Namespace or NULL if not in a namespace */ + const raptor_namespace *nspace; + /* URI of namespace+local_name or NULL if not defined */ + raptor_uri *uri; + /* optional value - used when name is an attribute */ + const unsigned char *value; + size_t value_length; +}; + + + +/* raptor_qname.c */ +#ifdef RAPTOR_DEBUG +void raptor_qname_print(FILE *stream, raptor_qname* name); +#endif + + +/* raptor_uri.c */ + +int raptor_uri_init(raptor_world* world); +void raptor_uri_finish(raptor_world* world); +raptor_uri* raptor_new_uri_from_rdf_ordinal(raptor_world* world, int ordinal); +size_t raptor_uri_normalize_path(unsigned char* path_buffer, size_t path_len); + +/* parsers */ +int raptor_init_parser_rdfxml(raptor_world* world); +int raptor_init_parser_ntriples(raptor_world* world); +int raptor_init_parser_turtle(raptor_world* world); +int raptor_init_parser_trig(raptor_world* world); +int raptor_init_parser_n3(raptor_world* world); +int raptor_init_parser_grddl_common(raptor_world* world); +int raptor_init_parser_grddl(raptor_world* world); +int raptor_init_parser_guess(raptor_world* world); +int raptor_init_parser_rss(raptor_world* world); +int raptor_init_parser_rdfa(raptor_world* world); +int raptor_init_parser_json(raptor_world* world); +int raptor_init_parser_nquads(raptor_world* world); + +void raptor_terminate_parser_grddl_common(raptor_world *world); + +#ifdef RAPTOR_PARSER_RDFA +#define rdfa_add_item raptor_librdfa_rdfa_add_item +#define rdfa_append_to_list_mapping raptor_librdfa_rdfa_append_to_list_mapping +#define rdfa_canonicalize_string raptor_librdfa_rdfa_canonicalize_string +#define rdfa_complete_current_property_value_triples raptor_librdfa_rdfa_complete_current_property_value_triples +#define rdfa_complete_incomplete_triples raptor_librdfa_rdfa_complete_incomplete_triples +#define rdfa_complete_list_triples raptor_librdfa_rdfa_complete_list_triples +#define rdfa_complete_object_literal_triples raptor_librdfa_rdfa_complete_object_literal_triples +#define rdfa_complete_relrev_triples raptor_librdfa_rdfa_complete_relrev_triples +#define rdfa_complete_type_triples raptor_librdfa_rdfa_complete_type_triples +#define rdfa_copy_list raptor_librdfa_rdfa_copy_list +#define rdfa_copy_mapping raptor_librdfa_rdfa_copy_mapping +#define rdfa_create_bnode raptor_librdfa_rdfa_create_bnode +#define rdfa_create_context raptor_librdfa_rdfa_create_context +#define rdfa_create_list raptor_librdfa_rdfa_create_list +#define rdfa_create_list_mapping raptor_librdfa_rdfa_create_list_mapping +#define rdfa_create_mapping raptor_librdfa_rdfa_create_mapping +#define rdfa_create_new_element_context raptor_librdfa_rdfa_create_new_element_context +#define rdfa_create_triple raptor_librdfa_rdfa_create_triple +#define rdfa_establish_new_1_0_subject raptor_librdfa_rdfa_establish_new_1_0_subject +#define rdfa_establish_new_1_0_subject_with_relrev raptor_librdfa_ablish_new_1_0_subject_with_relrev +#define rdfa_establish_new_1_1_subject raptor_librdfa_ablish_new_1_1_subject +#define rdfa_establish_new_1_1_subject_with_relrev raptor_librdfa_ablish_new_1_1_subject_with_relrev +#define rdfa_establish_new_inlist_triples raptor_librdfa_ablish_new_inlist_triples +#define rdfa_free_context raptor_librdfa_free_context +#define rdfa_free_context_stack raptor_librdfa_free_context_stack +#define rdfa_free_list raptor_librdfa_rdfa_free_list +#define rdfa_free_mapping raptor_librdfa_rdfa_free_mapping +#define rdfa_free_triple raptor_librdfa_rdfa_free_triple +#define rdfa_get_buffer raptor_librdfa_rdfa_get_buffer +#define rdfa_get_curie_type raptor_librdfa_rdfa_get_curie_type +#define rdfa_get_list_mapping raptor_librdfa_rdfa_get_list_mapping +#define rdfa_get_mapping raptor_librdfa_rdfa_get_mapping +#define rdfa_init_base raptor_librdfa_rdfa_init_base +#define rdfa_init_context raptor_librdfa_rdfa_init_context +#define rdfa_iri_get_base raptor_librdfa_rdfa_iri_get_base +#define rdfa_join_string raptor_librdfa_rdfa_join_string +#define rdfa_n_append_string raptor_librdfa_rdfa_n_append_string +#define rdfa_names raptor_librdfa_rdfa_names +#define rdfa_next_mapping raptor_librdfa_rdfa_next_mapping +#define rdfa_parse raptor_librdfa_rdfa_parse +#define rdfa_parse_buffer raptor_librdfa_rdfa_parse_buffer +#define rdfa_parse_chunk raptor_librdfa_rdfa_parse_chunk +#define rdfa_parse_end raptor_librdfa_rdfa_parse_end +#define rdfa_parse_start raptor_librdfa_rdfa_parse_start +#define rdfa_pop_item raptor_librdfa_rdfa_pop_item +#define rdfa_print_list raptor_librdfa_rdfa_print_list +#define rdfa_print_mapping raptor_librdfa_rdfa_print_mapping +#define rdfa_print_string raptor_librdfa_rdfa_print_string +#define rdfa_print_triple raptor_librdfa_rdfa_print_triple +#define rdfa_print_triple_list raptor_librdfa_rdfa_print_triple_list +#define rdfa_push_item raptor_librdfa_rdfa_push_item +#define rdfa_replace_list raptor_librdfa_rdfa_replace_list +#define rdfa_replace_string raptor_librdfa_rdfa_replace_string +#define rdfa_resolve_curie raptor_librdfa_rdfa_resolve_curie +#define rdfa_resolve_curie_list raptor_librdfa_rdfa_resolve_curie_list +#define rdfa_resolve_relrev_curie raptor_librdfa_rdfa_resolve_relrev_curie +#define rdfa_resolve_uri raptor_librdfa_rdfa_resolve_uri +#define rdfa_save_incomplete_list_triples raptor_librdfa_rdfa_save_incomplete_list_triples +#define rdfa_save_incomplete_triples raptor_librdfa_rdfa_save_incomplete_triples +#define rdfa_set_buffer_filler raptor_librdfa_rdfa_set_buffer_filler +#define rdfa_set_default_graph_triple_handler raptor_librdfa_rdfa_set_default_graph_triple_handler +#define rdfa_set_processor_graph_triple_handler raptor_librdfa_rdfa_set_processor_graph_triple_handler +#define rdfa_setup_initial_context raptor_librdfa_rdfa_setup_initial_context +#define rdfa_update_language raptor_librdfa_rdfa_update_language +#define rdfa_update_mapping raptor_librdfa_rdfa_update_mapping +#define rdfa_update_uri_mappings raptor_librdfa_rdfa_update_uri_mappings +#define rdfa_uri_strings raptor_librdfa_rdfa_uri_strings +#endif + +/* raptor_parse.c */ +int raptor_parsers_init(raptor_world* world); +void raptor_parsers_finish(raptor_world *world); + +void raptor_parser_save_content(raptor_parser* rdf_parser, int save); +const unsigned char* raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p); +void raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri, int is_declared); +void raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared); + +/* raptor_rss.c */ +int raptor_init_serializer_rss10(raptor_world* world); +int raptor_init_serializer_atom(raptor_world* world); + +extern const unsigned char * const raptor_atom_namespace_uri; + +/* raptor_rfc2396.c */ +RAPTOR_INTERNAL_API raptor_uri_detail* raptor_new_uri_detail(const unsigned char *uri_string); +RAPTOR_INTERNAL_API void raptor_free_uri_detail(raptor_uri_detail* uri_detail); +unsigned char* raptor_uri_detail_to_string(raptor_uri_detail *ud, size_t* len_p); + +/* serializers */ +/* raptor_serializer.c */ +int raptor_serializers_init(raptor_world* world); +void raptor_serializers_finish(raptor_world* world); + +/* raptor_serializer_dot.c */ +int raptor_init_serializer_dot(raptor_world* world); + +/* raptor_serializer_ntriples.c */ +int raptor_init_serializer_ntriples(raptor_world* world); +int raptor_init_serializer_nquads(raptor_world* world); + +/* raptor_serialize_rdfxml.c */ +int raptor_init_serializer_rdfxml(raptor_world* world); + +/* raptor_serialize_rdfxmla.c */ +int raptor_init_serializer_rdfxmla(raptor_world* world); + +/* raptor_serialize_turtle.c */ +int raptor_init_serializer_turtle(raptor_world* world); +int raptor_init_serializer_mkr(raptor_world* world); + +/* raptor_serialize_html.c */ +int raptor_init_serializer_html(raptor_world* world); + +/* raptor_serialize_json.c */ +int raptor_init_serializer_json(raptor_world* world); + +/* raptor_unicode.c */ +extern const raptor_unichar raptor_unicode_max_codepoint; + +int raptor_unicode_is_namestartchar(raptor_unichar c); +int raptor_unicode_is_namechar(raptor_unichar c); +int raptor_unicode_check_utf8_nfc_string(const unsigned char *input, size_t length); + +/* raptor_www*.c */ +#ifdef RAPTOR_WWW_LIBXML +#include <libxml/parser.h> +#include <libxml/xmlerror.h> +#include <libxml/nanohttp.h> +#endif + +#ifdef RAPTOR_WWW_LIBCURL +#include <curl/curl.h> +#include <curl/easy.h> +#endif + +/* Size of buffer used in various raptor_www places for I/O */ +#ifndef RAPTOR_WWW_BUFFER_SIZE +#define RAPTOR_WWW_BUFFER_SIZE 4096 +#endif + +/* WWW library state */ +struct raptor_www_s { + raptor_world* world; + char *type; + int free_type; + size_t total_bytes; + int failed; + int status_code; + + raptor_uri *uri; + +#ifdef RAPTOR_WWW_LIBCURL + CURL* curl_handle; + char error_buffer[CURL_ERROR_SIZE]; + int curl_init_here; + int checked_status; +#endif + +#ifdef RAPTOR_WWW_LIBXML + void *ctxt; + int is_end; + void *old_xmlGenericErrorContext; +#endif + + char buffer[RAPTOR_WWW_BUFFER_SIZE + 1]; + + char *user_agent; + + /* proxy URL string or NULL for none */ + char *proxy; + + void *write_bytes_userdata; + raptor_www_write_bytes_handler write_bytes; + void *content_type_userdata; + raptor_www_content_type_handler content_type; + + void* uri_filter_user_data; + raptor_uri_filter_func uri_filter; + + /* can be filled with error location information */ + raptor_locator locator; + + char *http_accept; + + FILE* handle; + + int connection_timeout; + + /* The URI returned after any redirections */ + raptor_uri* final_uri; + + void *final_uri_userdata; + raptor_www_final_uri_handler final_uri_handler; + + char* cache_control; +}; + + + +/* internal */ +void raptor_www_libxml_init(raptor_www *www); +void raptor_www_libxml_free(raptor_www *www); +int raptor_www_libxml_fetch(raptor_www *www); + +void raptor_www_error(raptor_www *www, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +int raptor_www_curl_init(raptor_www *www); +void raptor_www_curl_free(raptor_www *www); +int raptor_www_curl_fetch(raptor_www *www); +int raptor_www_curl_set_ssl_cert_options(raptor_www* www, const char* cert_filename, const char* cert_type, const char* cert_passphrase); +int raptor_www_curl_set_ssl_verify_options(raptor_www* www, int verify_peer, int verify_host); + +void raptor_www_libfetch_init(raptor_www *www); +void raptor_www_libfetch_free(raptor_www *www); +int raptor_www_libfetch_fetch(raptor_www *www); + +/* raptor_set.c */ +RAPTOR_INTERNAL_API raptor_id_set* raptor_new_id_set(raptor_world* world); +RAPTOR_INTERNAL_API void raptor_free_id_set(raptor_id_set* set); +RAPTOR_INTERNAL_API int raptor_id_set_add(raptor_id_set* set, raptor_uri* base_uri, const unsigned char *item, size_t item_len); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +void raptor_id_set_stats_print(raptor_id_set* set, FILE *stream); +#endif + +/* raptor_sax2.c */ +/* + * SAX2 elements/attributes on stack + */ +struct raptor_xml_element_s { + /* NULL at bottom of stack */ + struct raptor_xml_element_s *parent; + raptor_qname *name; + raptor_qname **attributes; + unsigned int attribute_count; + + /* value of xml:lang attribute on this element or NULL */ + const unsigned char *xml_language; + + /* URI of xml:base attribute value on this element or NULL */ + raptor_uri *base_uri; + + /* CDATA content of element and checks for mixed content */ + raptor_stringbuffer* content_cdata_sb; + unsigned int content_cdata_length; + /* how many cdata blocks seen */ + unsigned int content_cdata_seen; + /* how many contained elements seen */ + unsigned int content_element_seen; + + raptor_sequence *declared_nspaces; + + void* user_data; +}; + + +struct raptor_sax2_s { +#ifdef RAPTOR_XML_LIBXML + int magic; +#endif + raptor_world* world; + void* user_data; + +#ifdef RAPTOR_XML_LIBXML + /* structure holding sax event handlers */ + xmlSAXHandler sax; + /* parser context */ + xmlParserCtxtPtr xc; + /* pointer to SAX document locator */ + xmlSAXLocatorPtr loc; + +#if LIBXML_VERSION < 20425 + /* flag for some libxml eversions*/ + int first_read; +#endif + +#endif + + /* element depth */ + int depth; + + /* stack of elements - elements add after current_element */ + raptor_xml_element *root_element; + raptor_xml_element *current_element; + + /* start of an element */ + raptor_sax2_start_element_handler start_element_handler; + /* end of an element */ + raptor_sax2_end_element_handler end_element_handler; + /* characters */ + raptor_sax2_characters_handler characters_handler; + /* like <![CDATA[...]> */ + raptor_sax2_cdata_handler cdata_handler; + /* comment */ + raptor_sax2_comment_handler comment_handler; + /* unparsed (NDATA) entity */ + raptor_sax2_unparsed_entity_decl_handler unparsed_entity_decl_handler; + /* external entity reference */ + raptor_sax2_external_entity_ref_handler external_entity_ref_handler; + + raptor_locator *locator; + + /* New XML namespace callback */ + raptor_namespace_handler namespace_handler; + + raptor_object_options options; + + /* stack of namespaces, most recently added at top */ + raptor_namespace_stack namespaces; /* static */ + + /* base URI for resolving relative URIs or xml:base URIs */ + raptor_uri* base_uri; + + /* sax2 init failed - do not try to do anything with it */ + int failed; + + /* call SAX2 handlers if non-0 */ + int enabled; + + void* uri_filter_user_data; + raptor_uri_filter_func uri_filter; +}; + +int raptor_sax2_init(raptor_world* world); +void raptor_sax2_finish(raptor_world* world); + + +raptor_xml_element* raptor_xml_element_pop(raptor_sax2* sax2); +void raptor_xml_element_push(raptor_sax2* sax2, raptor_xml_element* element); +int raptor_sax2_get_depth(raptor_sax2* sax2); +void raptor_sax2_inc_depth(raptor_sax2* sax2); +void raptor_sax2_dec_depth(raptor_sax2* sax2); +void raptor_sax2_update_document_locator(raptor_sax2* sax2, raptor_locator* locator); +int raptor_sax2_set_option(raptor_sax2 *sax2, raptor_option option, char* string, int integer); + +#ifdef RAPTOR_DEBUG +void raptor_print_xml_element(raptor_xml_element *element, FILE* stream); +#endif + +void raptor_sax2_start_element(void* user_data, const unsigned char *name, const unsigned char **atts); +void raptor_sax2_end_element(void* user_data, const unsigned char *name); +void raptor_sax2_characters(void* user_data, const unsigned char *s, int len); +void raptor_sax2_cdata(void* user_data, const unsigned char *s, int len); +void raptor_sax2_comment(void* user_data, const unsigned char *s); +void raptor_sax2_unparsed_entity_decl(void* user_data, const unsigned char* entityName, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId, const unsigned char* notationName); +int raptor_sax2_external_entity_ref(void* user_data, const unsigned char* context, const unsigned char* base, const unsigned char* systemId, const unsigned char* publicId); +int raptor_sax2_check_load_uri_string(raptor_sax2* sax2, const unsigned char* uri_string); + +/* turtle_parser.y and turtle_lexer.l */ +typedef struct raptor_turtle_parser_s raptor_turtle_parser; + +/* n3_parser.y and n3_lexer.l */ +typedef struct raptor_n3_parser_s raptor_n3_parser; + +/* raptor_rfc2396.c */ +struct raptor_uri_detail_s +{ + size_t uri_len; + /* buffer is the same size as the original uri_len */ + unsigned char *buffer; + + /* URI Components. These all point into buffer */ + unsigned char *scheme; + unsigned char *authority; + unsigned char *path; + unsigned char *query; + unsigned char *fragment; + + /* Lengths of the URI Components */ + size_t scheme_len; + size_t authority_len; + size_t path_len; + size_t query_len; + size_t fragment_len; + + /* Flags */ + int is_hierarchical; +}; + + +/* for time_t */ +#ifdef HAVE_SYS_TIME_H +#include <sys/time.h> +#endif +#ifdef HAVE_TIME_H +#include <time.h> +#endif + +/* parsedate.c */ +#ifdef HAVE_INN_PARSEDATE +#include <libinn.h> +#define RAPTOR_PARSEDATE_FUNCTION parsedate +#else +#ifdef HAVE_RAPTOR_PARSE_DATE +time_t raptor_parse_date(const char *p, time_t *now); +#define RAPTOR_PARSEDATE_FUNCTION raptor_parse_date +#else +#ifdef HAVE_CURL_CURL_H +#include <curl/curl.h> +#define RAPTOR_PARSEDATE_FUNCTION curl_getdate +#endif +#endif +#endif + +/* only used internally now */ +typedef void (*raptor_simple_message_handler)(void *user_data, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + + +/* turtle_common.c */ +RAPTOR_INTERNAL_API int raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer, const unsigned char *text, size_t len, int delim, raptor_simple_message_handler error_handler, void *error_data, int is_uri); + + +/* raptor_abbrev.c */ + +typedef struct { + raptor_world* world; + int ref_count; /* count of references to this node */ + int count_as_subject; /* count of this blank/resource node as subject */ + int count_as_object; /* count of this blank/resource node as object */ + + raptor_term* term; +} raptor_abbrev_node; + +#ifdef RAPTOR_DEBUG +#define RAPTOR_DEBUG_ABBREV_NODE(label, node) \ + do { \ + RAPTOR_DEBUG1(label " "); \ + raptor_term_print_as_ntriples(node->term, RAPTOR_DEBUG_FH); \ + fprintf(RAPTOR_DEBUG_FH, " (refcount %d subject %d object %d)\n", \ + node->ref_count, \ + node->count_as_subject, \ + node->count_as_object); \ + } while(0) +#else +#define RAPTOR_DEBUG_ABBREV_NODE(label, node) +#endif + +typedef struct { + raptor_abbrev_node* node; /* node representing the subject of + * this resource */ + raptor_abbrev_node* node_type; /* the rdf:type of this resource */ + raptor_avltree *properties; /* list of properties + * (predicate/object pair) of this + * subject */ + raptor_sequence *list_items; /* list of container elements if + * is rdf container */ + int valid; /* set 0 for blank nodes that do not + * need to be referred to again */ +} raptor_abbrev_subject; + + +raptor_abbrev_node* raptor_new_abbrev_node(raptor_world* world, raptor_term* term); +void raptor_free_abbrev_node(raptor_abbrev_node* node); +int raptor_abbrev_node_compare(raptor_abbrev_node* node1, raptor_abbrev_node* node2); +int raptor_abbrev_node_equals(raptor_abbrev_node* node1, raptor_abbrev_node* node2); +raptor_abbrev_node* raptor_abbrev_node_lookup(raptor_avltree* nodes, raptor_term* term); + +void raptor_free_abbrev_subject(raptor_abbrev_subject* subject); +int raptor_abbrev_subject_add_property(raptor_abbrev_subject* subject, raptor_abbrev_node* predicate, raptor_abbrev_node* object); +int raptor_abbrev_subject_compare(raptor_abbrev_subject* subject1, raptor_abbrev_subject* subject2); +raptor_abbrev_subject* raptor_abbrev_subject_find(raptor_avltree *subjects, raptor_term* node); +raptor_abbrev_subject* raptor_abbrev_subject_lookup(raptor_avltree* nodes, raptor_avltree* subjects, raptor_avltree* blanks, raptor_term* term); +int raptor_abbrev_subject_valid(raptor_abbrev_subject *subject); +int raptor_abbrev_subject_invalidate(raptor_abbrev_subject *subject); + + +/* avltree */ +#ifdef RAPTOR_DEBUG +int raptor_avltree_dump(raptor_avltree* tree, FILE* stream); +void raptor_avltree_check(raptor_avltree* tree); +#endif + + +raptor_qname* raptor_new_qname_from_resource(raptor_sequence* namespaces, raptor_namespace_stack* nstack, int* namespace_count, raptor_abbrev_node* node); + + +/** + * raptor_turtle_writer: + * + * Raptor Turtle Writer class + */ +typedef struct raptor_turtle_writer_s raptor_turtle_writer; + +/** + * raptor_turtle_writer_flags: + * @TURTLE_WRITER_FLAG_AUTO_INDENT: auto indent + * @TURTLE_WRITER_FLAG_MKR: write mkr not turtle + * + * Bit flags for raptor_new_turtle_writer() + */ +typedef enum { + TURTLE_WRITER_FLAG_AUTO_INDENT = 1, + TURTLE_WRITER_FLAG_MKR = 2 +} raptor_turtle_writer_flags; + + +/* Turtle Writer Class (raptor_turtle_writer) */ +RAPTOR_INTERNAL_API raptor_turtle_writer* raptor_new_turtle_writer(raptor_world* world, raptor_uri* base_uri, int write_base_uri, raptor_namespace_stack *nstack, raptor_iostream* iostr, int flags); +RAPTOR_INTERNAL_API void raptor_free_turtle_writer(raptor_turtle_writer* turtle_writer); +RAPTOR_INTERNAL_API void raptor_turtle_writer_raw(raptor_turtle_writer* turtle_writer, const unsigned char *s); +RAPTOR_INTERNAL_API void raptor_turtle_writer_raw_counted(raptor_turtle_writer* turtle_writer, const unsigned char *s, unsigned int len); +RAPTOR_INTERNAL_API void raptor_turtle_writer_namespace_prefix(raptor_turtle_writer* turtle_writer, raptor_namespace* ns); +void raptor_turtle_writer_base(raptor_turtle_writer* turtle_writer, raptor_uri* base_uri); +RAPTOR_INTERNAL_API void raptor_turtle_writer_increase_indent(raptor_turtle_writer *turtle_writer); +RAPTOR_INTERNAL_API void raptor_turtle_writer_decrease_indent(raptor_turtle_writer *turtle_writer); +RAPTOR_INTERNAL_API void raptor_turtle_writer_newline(raptor_turtle_writer *turtle_writer); +RAPTOR_INTERNAL_API int raptor_turtle_writer_reference(raptor_turtle_writer* turtle_writer, raptor_uri* uri); +RAPTOR_INTERNAL_API int raptor_turtle_writer_literal(raptor_turtle_writer* turtle_writer, raptor_namespace_stack *nstack, const unsigned char *s, const unsigned char* lang, raptor_uri* datatype); +RAPTOR_INTERNAL_API void raptor_turtle_writer_csv_string(raptor_turtle_writer* turtle_writer, const unsigned char *s); +RAPTOR_INTERNAL_API void raptor_turtle_writer_qname(raptor_turtle_writer* turtle_writer, raptor_qname* qname); +RAPTOR_INTERNAL_API int raptor_turtle_writer_quoted_counted_string(raptor_turtle_writer* turtle_writer, const unsigned char *s, size_t length); +void raptor_turtle_writer_comment(raptor_turtle_writer* turtle_writer, const unsigned char *s); +RAPTOR_INTERNAL_API int raptor_turtle_writer_set_option(raptor_turtle_writer *turtle_writer, raptor_option option, int value); +int raptor_turtle_writer_set_option_string(raptor_turtle_writer *turtle_writer, raptor_option option, const unsigned char *value); +int raptor_turtle_writer_get_option(raptor_turtle_writer *turtle_writer, raptor_option option); +const unsigned char *raptor_turtle_writer_get_option_string(raptor_turtle_writer *turtle_writer, raptor_option option); +void raptor_turtle_writer_bnodeid(raptor_turtle_writer* turtle_writer, const unsigned char *bnodeid, size_t len); +int raptor_turtle_writer_uri(raptor_turtle_writer* turtle_writer, raptor_uri* uri); +int raptor_turtle_writer_term(raptor_turtle_writer* turtle_writer, raptor_term* term); +int raptor_turtle_is_legal_turtle_qname(raptor_qname* qname); + + +/** + * raptor_json_writer: + * + * Raptor JSON Writer class + */ +typedef struct raptor_json_writer_s raptor_json_writer; + +/* raptor_json_writer.c */ +raptor_json_writer* raptor_new_json_writer(raptor_world* world, raptor_uri* base_uri, raptor_iostream* iostr); +void raptor_free_json_writer(raptor_json_writer* json_writer); + +int raptor_json_writer_newline(raptor_json_writer* json_writer); +int raptor_json_writer_key_value(raptor_json_writer* json_writer, const char* key, size_t key_len, const char* value, size_t value_len); +int raptor_json_writer_start_block(raptor_json_writer* json_writer, char c); +int raptor_json_writer_end_block(raptor_json_writer* json_writer, char c); +int raptor_json_writer_literal_object(raptor_json_writer* json_writer, unsigned char* s, size_t s_len, unsigned char* lang, raptor_uri* datatype); +int raptor_json_writer_blank_object(raptor_json_writer* json_writer, const unsigned char* blank, size_t blank_len); +int raptor_json_writer_uri_object(raptor_json_writer* json_writer, raptor_uri* uri); +int raptor_json_writer_term(raptor_json_writer* json_writer, raptor_term *term); +int raptor_json_writer_key_uri_value(raptor_json_writer* json_writer, const char* key, size_t key_len, raptor_uri* uri); + +/* raptor_memstr.c */ +const char* raptor_memstr(const char *haystack, size_t haystack_len, const char *needle); + +/* raptor_serialize_rdfxmla.c special functions for embedding rdf/xml */ +int raptor_rdfxmla_serialize_set_write_rdf_RDF(raptor_serializer* serializer, int value); +int raptor_rdfxmla_serialize_set_xml_writer(raptor_serializer* serializer, raptor_xml_writer* xml_writer, raptor_namespace_stack *nstack); +int raptor_rdfxmla_serialize_set_single_node(raptor_serializer* serializer, raptor_uri* uri); +int raptor_rdfxmla_serialize_set_write_typed_nodes(raptor_serializer* serializer, int value); + +/* snprintf.c */ +size_t raptor_format_integer(char* buffer, size_t bufsize, int integer, unsigned int base, int width, char padding); + +/* raptor_world structure */ +#define RAPTOR1_WORLD_MAGIC_1 0 +#define RAPTOR1_WORLD_MAGIC_2 1 +#define RAPTOR2_WORLD_MAGIC 0xC4129CEF + +#define RAPTOR_CHECK_CONSTRUCTOR_WORLD(world) \ + do { \ + if(raptor_check_world_internal(world, __FUNCTION__)) \ + return NULL; \ + } while(0) + + +RAPTOR_INTERNAL_API int raptor_check_world_internal(raptor_world* world, const char* name); + + + +struct raptor_world_s { + /* signature to check this is a world object */ + unsigned int magic; + + /* world has been initialized with raptor_world_open() */ + int opened; + + /* internal flag used to ignore errors for e.g. child GRDDL parsers */ + int internal_ignore_errors; + + void* message_handler_user_data; + raptor_log_handler message_handler; + + /* sequence of parser factories */ + raptor_sequence *parsers; + + /* sequence of serializer factories */ + raptor_sequence *serializers; + + /* raptor_rss_common initialisation counter */ + int rss_common_initialised; + + /* raptor_rss_{namespaces,types,fields}_info const data initialized to raptor_uri,raptor_qname objects */ + raptor_uri **rss_namespaces_info_uris; + raptor_uri **rss_types_info_uris; + raptor_qname **rss_types_info_qnames; + raptor_uri **rss_fields_info_uris; + raptor_qname **rss_fields_info_qnames; + + /* raptor_www v2 flags */ + int www_skip_www_init_finish; + int www_initialized; + + /* This is used to store a #xsltSecurityPrefsPtr typed object + * pointer when libxslt is compiled in. + */ + void* xslt_security_preferences; + /* 0 raptor owns the above object and should free it with + * xsltFreeSecurityPrefs() on exit + * 1 user set the above object and raptor does not own it + */ + int xslt_security_preferences_policy; + + /* Flags for libxml set by raptor_world_set_libxml_flags(). + * See #raptor_libxml_flags for meanings + */ + int libxml_flags; + +#ifdef RAPTOR_XML_LIBXML + void *libxml_saved_structured_error_context; + xmlStructuredErrorFunc libxml_saved_structured_error_handler; + + void *libxml_saved_generic_error_context; + xmlGenericErrorFunc libxml_saved_generic_error_handler; +#endif + + raptor_avltree *uris_tree; + + raptor_uri* concepts[RDF_NS_LAST + 1]; + + raptor_term* terms[RDF_NS_LAST + 1]; + + /* last log message - points to data it does not own */ + raptor_log_message message; + + /* should */ + int uri_interning; + + /* generate blank node ID policy */ + void *generate_bnodeid_handler_user_data; + raptor_generate_bnodeid_handler generate_bnodeid_handler; + + int default_generate_bnodeid_handler_base; + char *default_generate_bnodeid_handler_prefix; + unsigned int default_generate_bnodeid_handler_prefix_length; + + raptor_uri* xsd_namespace_uri; + raptor_uri* xsd_boolean_uri; + raptor_uri* xsd_decimal_uri; + raptor_uri* xsd_double_uri; + raptor_uri* xsd_integer_uri; +}; + +/* raptor_www.c */ +int raptor_www_init(raptor_world* world); +void raptor_www_finish(raptor_world* world); + + + +#define RAPTOR_LANG_LEN_FROM_INT(len) (int)(len) +#define RAPTOR_LANG_LEN_TO_SIZE_T(len) (size_t)(len) + +/* Safe casts: widening a value */ +#define RAPTOR_GOOD_CAST(t, v) (t)(v) + +/* Unsafe casts: narrowing a value */ +#define RAPTOR_BAD_CAST(t, v) (t)(v) + +/* Cast to void* for debugging prints with %p */ +#define RAPTOR_VOIDP(p) (void*)p + +/* end of RAPTOR_INTERNAL */ +#endif + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/raptor_iostream.c b/src/raptor_iostream.c new file mode 100644 index 0000000..ba7b77a --- /dev/null +++ b/src/raptor_iostream.c @@ -0,0 +1,1666 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_iostream.c - Raptor I/O-stream class for abstracting I/O + * + * Copyright (C) 2004-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +#define RAPTOR_IOSTREAM_MODE_READ 1 +#define RAPTOR_IOSTREAM_MODE_WRITE 2 + +#define RAPTOR_IOSTREAM_FLAGS_EOF 1 +#define RAPTOR_IOSTREAM_FLAGS_FREE_HANDLER 2 + +struct raptor_iostream_s +{ + raptor_world *world; + + void *user_data; + const raptor_iostream_handler* handler; + size_t offset; + unsigned int mode; + int flags; +}; + + + +/* prototypes for local functions */ + + +static int +raptor_iostream_calculate_modes(const raptor_iostream_handler * const handler) +{ + int mode = 0; + + /* API V1 checks */ + if((handler->version >= 1) && + handler->read_bytes) + mode |= RAPTOR_IOSTREAM_MODE_READ; + + /* API V2 checks */ + if((handler->version >= 2) && + (handler->write_byte || handler->write_bytes)) + mode |= RAPTOR_IOSTREAM_MODE_WRITE; + + return mode; +} + + +/* Return non-0 if handler is legal and OK for given mode (if not 0 = ANY) */ +static int +raptor_iostream_check_handler(const raptor_iostream_handler * const handler, + unsigned int user_mode) +{ + int mode; + + if(handler->version < 1 || handler->version > 2) + return 0; + + mode = raptor_iostream_calculate_modes(handler); + if(user_mode && !(user_mode & mode)) + return 0; + + return (mode != 0); +} + + +/** + * raptor_new_iostream_from_handler: + * @world: raptor_world object + * @user_data: pointer to context information to pass in to calls + * @handler: pointer to handler methods + * + * Create a new iostream over a user-defined handler + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_from_handler(raptor_world *world, + void *user_data, + const raptor_iostream_handler* const handler) +{ + raptor_iostream* iostr; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(handler, raptor_iostream_handler, NULL); + + raptor_world_open(world); + + if(!raptor_iostream_check_handler(handler, 0)) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) + return NULL; + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)user_data; + iostr->mode = raptor_iostream_calculate_modes(handler); + + if(iostr->handler->init && + iostr->handler->init(iostr->user_data)) { + RAPTOR_FREE(raptor_iostream, iostr); + return NULL; + } + return iostr; +} + + + +/* Local handlers for reading/writing to/from a sink */ + +static int +raptor_sink_iostream_write_byte(void *user_data, const int byte) +{ + return 0; +} + +static int +raptor_sink_iostream_write_bytes(void *user_data, const void *ptr, + size_t size, size_t nmemb) +{ + return RAPTOR_BAD_CAST(int, size * nmemb); /* success */ +} + +static int +raptor_sink_iostream_read_bytes(void *user_data, void *ptr, + size_t size, size_t nmemb) +{ + return 0; +} + +static int +raptor_sink_iostream_read_eof(void *user_data) +{ + return 1; /* EOF always */ +} + +static const raptor_iostream_handler raptor_iostream_sink_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ NULL, + /* .write_byte = */ raptor_sink_iostream_write_byte, + /* .write_bytes = */ raptor_sink_iostream_write_bytes, + /* .write_end = */ NULL, + /* .read_bytes = */ raptor_sink_iostream_read_bytes, + /* .read_eof = */ raptor_sink_iostream_read_eof +}; + + +/** + * raptor_new_iostream_to_sink: + * @world: raptor_world object + * + * Create a new write iostream to a sink, throwing away all data. + * + * Provides an that throw away all writes and returns end of input + * immediately on reads. Same as raptor_new_iostream_from_sink() + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_to_sink(raptor_world *world) +{ + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + return raptor_new_iostream_from_handler(world, + NULL, &raptor_iostream_sink_handler); +} + + +/* Local handlers for reading/writing from a filename */ + +static void +raptor_filename_iostream_finish(void *user_data) +{ + FILE* handle = (FILE*)user_data; + fclose(handle); +} + +static int +raptor_filename_iostream_write_byte(void *user_data, const int byte) +{ + FILE* handle = (FILE*)user_data; + return (fputc(byte, handle) == byte); +} + +static int +raptor_filename_iostream_write_bytes(void *user_data, + const void *ptr, size_t size, size_t nmemb) +{ + FILE* handle = (FILE*)user_data; + return RAPTOR_BAD_CAST(int, fwrite(ptr, size, nmemb, handle)); +} + +static int +raptor_filename_iostream_write_end(void *user_data) +{ + FILE* handle = (FILE*)user_data; + return fclose(handle); +} + +static int +raptor_filename_iostream_read_bytes(void *user_data, + void *ptr, size_t size, size_t nmemb) +{ + FILE* handle = (FILE*)user_data; + return RAPTOR_BAD_CAST(int, fread(ptr, size, nmemb, handle)); +} + +static int +raptor_filename_iostream_read_eof(void *user_data) +{ + FILE* handle = (FILE*)user_data; + return feof(handle); +} + +static const raptor_iostream_handler raptor_iostream_write_filename_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ raptor_filename_iostream_finish, + /* .write_byte = */ raptor_filename_iostream_write_byte, + /* .write_bytes = */ raptor_filename_iostream_write_bytes, + /* .write_end = */ raptor_filename_iostream_write_end, + /* .read_bytes = */ NULL, + /* .read_eof = */ NULL +}; + + +/** + * raptor_new_iostream_to_filename: + * @world: raptor world + * @filename: Output filename to open and write to + * + * Constructor - create a new iostream writing to a filename. + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_to_filename(raptor_world *world, const char *filename) +{ + FILE *handle; + raptor_iostream* iostr; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_WRITE; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if(!filename) + return NULL; + + handler = &raptor_iostream_write_filename_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + handle = fopen(filename, "wb"); + if(!handle) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) { + fclose(handle); + return NULL; + } + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)handle; + iostr->mode = mode; + + if(iostr->handler->init && + iostr->handler->init(iostr->user_data)) { + raptor_free_iostream(iostr); + return NULL; + } + return iostr; +} + + +static const raptor_iostream_handler raptor_iostream_write_file_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ NULL, + /* .write_byte = */ raptor_filename_iostream_write_byte, + /* .write_bytes = */ raptor_filename_iostream_write_bytes, + /* .write_end = */ NULL, + /* .read_bytes = */ NULL, + /* .read_eof = */ NULL +}; + + +/** + * raptor_new_iostream_to_file_handle: + * @world: raptor world + * @handle: FILE* handle to write to + * + * Constructor - create a new iostream writing to a FILE*. + * + * The @handle must already be open for writing. + * NOTE: This does not fclose the @handle when it is finished. + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_to_file_handle(raptor_world *world, FILE *handle) +{ + raptor_iostream* iostr; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_WRITE; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if(!handle) + return NULL; + + handler = &raptor_iostream_write_file_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) + return NULL; + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)handle; + iostr->mode = mode; + + if(iostr->handler->init && iostr->handler->init(iostr->user_data)) { + RAPTOR_FREE(raptor_iostream, iostr); + return NULL; + } + return iostr; +} + + + +struct raptor_write_string_iostream_context { + raptor_stringbuffer *sb; + void *(*malloc_handler)(size_t size); + void **string_p; + size_t *length_p; +}; + + +/* Local handlers for writing to a string */ + +static void +raptor_write_string_iostream_finish(void *user_data) +{ + struct raptor_write_string_iostream_context* con; + size_t len; + void *str = NULL; + + con = (struct raptor_write_string_iostream_context*)user_data; + len = raptor_stringbuffer_length(con->sb); + + *con->string_p = NULL; + if(con->length_p) + *con->length_p = len; + + str = (void*)con->malloc_handler(len+1); + if(str) { + if(len) + raptor_stringbuffer_copy_to_string(con->sb, (unsigned char*)str, len+1); + else + *(char*)str='\0'; + *con->string_p = str; + } + + if(!str && con->length_p) + *con->length_p = 0; + + raptor_free_stringbuffer(con->sb); + RAPTOR_FREE(raptor_write_string_iostream_context, con); + return; +} + +static int +raptor_write_string_iostream_write_byte(void *user_data, const int byte) +{ + struct raptor_write_string_iostream_context* con; + unsigned char buf = (unsigned char)byte; + + con = (struct raptor_write_string_iostream_context*)user_data; + return raptor_stringbuffer_append_counted_string(con->sb, &buf, 1, 1); +} + + +static int +raptor_write_string_iostream_write_bytes(void *user_data, const void *ptr, + size_t size, size_t nmemb) +{ + struct raptor_write_string_iostream_context* con; + + con = (struct raptor_write_string_iostream_context*)user_data; + if(raptor_stringbuffer_append_counted_string(con->sb, + (const unsigned char*)ptr, size * nmemb, 1)) + return 0; /* failure */ + return RAPTOR_BAD_CAST(int, size * nmemb); /* success */ +} + +static const raptor_iostream_handler raptor_iostream_write_string_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ raptor_write_string_iostream_finish, + /* .write_byte = */ raptor_write_string_iostream_write_byte, + /* .write_bytes = */ raptor_write_string_iostream_write_bytes, + /* .write_end = */ NULL, + /* .read_bytes = */ NULL, + /* .read_eof = */ NULL +}; + + +/** + * raptor_new_iostream_to_string: + * @world: raptor world + * @string_p: pointer to location to hold string + * @length_p: pointer to location to hold length of string (or NULL) + * @malloc_handler: pointer to malloc() to use to make string (or NULL) + * + * Constructor - create a new iostream writing to a string. + * + * If @malloc_handler is null, raptor will allocate it using it's + * own memory allocator. *@string_p is set to NULL on failure (and + * *@length_p to 0 if @length_p is not NULL). + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +RAPTOR_EXTERN_C +raptor_iostream* +raptor_new_iostream_to_string(raptor_world *world, + void **string_p, size_t *length_p, + raptor_data_malloc_handler const malloc_handler) +{ + raptor_iostream* iostr; + struct raptor_write_string_iostream_context* con; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_WRITE; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!string_p) + return NULL; + + raptor_world_open(world); + + handler = &raptor_iostream_write_string_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) + return NULL; + + con = RAPTOR_CALLOC(struct raptor_write_string_iostream_context*, 1, + sizeof(*con)); + if(!con) { + RAPTOR_FREE(raptor_iostream, iostr); + return NULL; + } + + con->sb = raptor_new_stringbuffer(); + if(!con->sb) { + RAPTOR_FREE(raptor_iostream, iostr); + RAPTOR_FREE(raptor_write_string_iostream_context, con); + return NULL; + } + + con->string_p = string_p; + *string_p = NULL; + + con->length_p = length_p; + if(length_p) + *length_p = 0; + + if(malloc_handler) + con->malloc_handler = malloc_handler; + else + con->malloc_handler = raptor_alloc_memory; + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)con; + iostr->mode = mode; + + if(iostr->handler->init && iostr->handler->init(iostr->user_data)) { + raptor_free_iostream(iostr); + return NULL; + } + return iostr; +} + + +/** + * raptor_new_iostream_from_sink: + * @world: raptor world + * + * Create a new read iostream from a sink, returning no data. + * + * Provides an I/O source that returns end of input immediately on + * reads, and throw away all writes. Same as + * raptor_new_iostream_to_sink() + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_from_sink(raptor_world *world) +{ + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + return raptor_new_iostream_from_handler(world, NULL, + &raptor_iostream_sink_handler); +} + + +static const raptor_iostream_handler raptor_iostream_read_filename_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ raptor_filename_iostream_finish, + /* .write_byte = */ NULL, + /* .write_bytes = */ NULL, + /* .write_end = */ NULL, + /* .read_bytes = */ raptor_filename_iostream_read_bytes, + /* .read_eof = */ raptor_filename_iostream_read_eof +}; + + +/** + * raptor_new_iostream_from_filename: + * @world: raptor world + * @filename: Input filename to open and read from + * + * Constructor - create a new iostream reading from a filename. + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_from_filename(raptor_world *world, const char *filename) +{ + FILE *handle; + raptor_iostream* iostr; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_READ; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!filename) + return NULL; + + raptor_world_open(world); + + handler = &raptor_iostream_read_filename_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + handle = fopen(filename, "rb"); + if(!handle) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) { + fclose(handle); + return NULL; + } + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)handle; + iostr->mode = mode; + + if(iostr->handler->init && + iostr->handler->init(iostr->user_data)) { + raptor_free_iostream(iostr); + return NULL; + } + return iostr; +} + + +static const raptor_iostream_handler raptor_iostream_read_file_handle_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ NULL, + /* .write_byte = */ NULL, + /* .write_bytes = */ NULL, + /* .write_end = */ NULL, + /* .read_bytes = */ raptor_filename_iostream_read_bytes, + /* .read_eof = */ raptor_filename_iostream_read_eof +}; + + +/** + * raptor_new_iostream_from_file_handle: + * @world: raptor world + * @handle: Input file_handle to open and read from + * + * Constructor - create a new iostream reading from a file_handle. + * + * The @handle must already be open for reading. + * NOTE: This does not fclose the @handle when it is finished. + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_from_file_handle(raptor_world *world, FILE *handle) +{ + raptor_iostream* iostr; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_READ; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!handle) + return NULL; + + raptor_world_open(world); + + handler = &raptor_iostream_read_file_handle_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) + return NULL; + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)handle; + iostr->mode = mode; + + if(iostr->handler->init && + iostr->handler->init(iostr->user_data)) { + RAPTOR_FREE(raptor_iostream, iostr); + return NULL; + } + return iostr; +} + + +/** + * raptor_free_iostream: + * @iostr: iostream object + * + * Destructor - destroy an iostream. + **/ +void +raptor_free_iostream(raptor_iostream *iostr) +{ + if(!iostr) + return; + + if(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) + raptor_iostream_write_end(iostr); + + if(iostr->handler->finish) + iostr->handler->finish(iostr->user_data); + + if((iostr->flags & RAPTOR_IOSTREAM_FLAGS_FREE_HANDLER)) + RAPTOR_FREE(raptor_iostream_handler, iostr->handler); + + RAPTOR_FREE(raptor_iostream, iostr); +} + + + +/** + * raptor_iostream_write_byte: + * @byte: byte to write + * @iostr: raptor iostream + * + * Write a byte to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_write_byte(const int byte, raptor_iostream *iostr) +{ + iostr->offset++; + + if(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) + return 1; + if(!iostr->handler->write_byte) + return 1; + if(!(iostr->mode & RAPTOR_IOSTREAM_MODE_WRITE)) + return 1; + return iostr->handler->write_byte(iostr->user_data, byte); +} + + +/** + * raptor_iostream_write_bytes: + * @ptr: start of objects to write + * @size: size of object + * @nmemb: number of objects + * @iostr: raptor iostream + * + * Write bytes to the iostream. + * + * Return value: number of objects actually written, which may be less than nmemb. <0 on failure + **/ +int +raptor_iostream_write_bytes(const void *ptr, size_t size, size_t nmemb, + raptor_iostream *iostr) +{ + int nobj; + + if(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) + return -1; + if(!iostr->handler->write_bytes) + return -1; + if(!(iostr->mode & RAPTOR_IOSTREAM_MODE_WRITE)) + return -1; + + nobj = iostr->handler->write_bytes(iostr->user_data, ptr, size, nmemb); + if(nobj > 0) + iostr->offset += (size * nobj); + + return nobj; +} + + +/** + * raptor_iostream_string_write: + * @string: string + * @iostr: raptor iostream + * + * Write a NULL-terminated string to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_string_write(const void *string, raptor_iostream *iostr) +{ + size_t len = strlen((const char*)string); + int nobj = raptor_iostream_write_bytes(string, 1, len, iostr); + return (RAPTOR_BAD_CAST(size_t, nobj) != len); +} + + +/** + * raptor_iostream_counted_string_write: + * @string: string + * @len: string length + * @iostr: raptor iostream + * + * Write a counted string to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_counted_string_write(const void *string, size_t len, + raptor_iostream *iostr) +{ + int nobj = raptor_iostream_write_bytes(string, 1, len, iostr); + return (RAPTOR_BAD_CAST(size_t, nobj) != len); +} + + +/** + * raptor_uri_write: + * @uri: URI + * @iostr: raptor iostream + * + * Write a raptor URI to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_uri_write(raptor_uri* uri, raptor_iostream* iostr) +{ + size_t len; + const void *string = raptor_uri_as_counted_string(uri, &len); + int nobj = raptor_iostream_write_bytes(string, 1, len, iostr); + return (RAPTOR_BAD_CAST(size_t, nobj) != len); +} + + +/** + * raptor_iostream_write_end: + * @iostr: raptor iostream + * + * End writing to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_write_end(raptor_iostream *iostr) +{ + int rc = 0; + + if(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) + return 1; + if(iostr->handler->write_end) + rc = iostr->handler->write_end(iostr->user_data); + iostr->flags |= RAPTOR_IOSTREAM_FLAGS_EOF; + + return rc; +} + + +/** + * raptor_stringbuffer_write: + * @sb: #raptor_stringbuffer to write + * @iostr: raptor iostream + * + * Write a stringbuffer to an iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_write(raptor_stringbuffer *sb, raptor_iostream* iostr) +{ + size_t length; + if(!sb) + return 1; + + length = raptor_stringbuffer_length(sb); + if(length) { + int nobj = raptor_iostream_write_bytes(raptor_stringbuffer_as_string(sb), + 1, length, iostr); + return (RAPTOR_BAD_CAST(size_t, nobj) != length); + } else + return 0; +} + + +/** + * raptor_iostream_decimal_write: + * @integer: integer to format as decimal + * @iostr: raptor iostream + * + * Write an integer in decimal to the iostream. + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_decimal_write(int integer, raptor_iostream* iostr) +{ + /* enough for 64 bit signed integer + * INT64_MAX is 9223372036854775807 (19 digits) + 1 for sign + */ + unsigned char buf[20]; + unsigned char *p; + int i = integer; + size_t length = 1; + int nobj; + + if(integer < 0) { + length++; + i= -integer; + } + while(i /= 10) + length++; + + p = buf+length-1; + i = integer; + if(i < 0) + i= -i; + do { + *p-- = RAPTOR_GOOD_CAST(unsigned char, '0' + (i %10)); + i /= 10; + } while(i); + if(integer < 0) + *p= '-'; + + nobj = raptor_iostream_write_bytes(buf, 1, length, iostr); + return (RAPTOR_BAD_CAST(size_t, nobj) != length); +} + + +/** + * raptor_iostream_hexadecimal_write: + * @integer: unsigned integer to format as hexadecimal + * @width: field width + * @iostr: raptor iostream + * + * Write an integer in hexadecimal to the iostream. + * + * Always 0-fills the entire field and writes in uppercase A-F + * + * Return value: non-0 on failure + **/ +int +raptor_iostream_hexadecimal_write(unsigned int integer, int width, + raptor_iostream* iostr) +{ + char *buf; + int nobj; + + if(width < 1) + return 1; + + buf = RAPTOR_MALLOC(char*, width + 1); + if(!buf) + return 1; + + (void)raptor_format_integer(buf, width + 1, integer, /* base */ 16, + width, '0'); + + nobj = raptor_iostream_write_bytes(buf, 1, width, iostr); + RAPTOR_FREE(char*, buf); + return (nobj != width); +} + + + +/** + * raptor_iostream_read_bytes: + * @ptr: start of buffer to read objects into + * @size: size of object + * @nmemb: number of objects to read + * @iostr: raptor iostream + * + * Read bytes to the iostream. + * + * Return value: number of objects read, 0 or less than nmemb on EOF, <0 on failure + **/ +int +raptor_iostream_read_bytes(void *ptr, size_t size, size_t nmemb, + raptor_iostream *iostr) +{ + int count; + + if(!(iostr->mode & RAPTOR_IOSTREAM_MODE_READ)) + return -1; + + if(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) + return 0; + + if(!iostr->handler->read_bytes) + count= -1; + else + count = iostr->handler->read_bytes(iostr->user_data, ptr, size, nmemb); + + if(count > 0) + iostr->offset += (size*count); + + if(RAPTOR_BAD_CAST(size_t, count) < nmemb) + iostr->flags |= RAPTOR_IOSTREAM_FLAGS_EOF; + + return count; +} + + +/** + * raptor_iostream_read_eof: + * @iostr: raptor read iostream + * + * Check if an read iostream has ended + * + * Return value: non-0 if EOF (or not a read iostream) + **/ +int +raptor_iostream_read_eof(raptor_iostream *iostr) +{ + /* Streams without read are always EOF */ + if(!(iostr->mode & RAPTOR_IOSTREAM_MODE_READ)) + return 1; + + if(!(iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) && + iostr->handler->read_eof && + iostr->handler->read_eof(iostr->user_data)) + iostr->flags |= RAPTOR_IOSTREAM_FLAGS_EOF; + + return ((iostr->flags & RAPTOR_IOSTREAM_FLAGS_EOF) != 0); +} + + +struct raptor_read_string_iostream_context { + /* input buffer */ + void* string; + size_t length; + /* pointer into buffer */ + size_t offset; +}; + + +/* Local handlers for reading from a string */ + +static void +raptor_read_string_iostream_finish(void *user_data) +{ + struct raptor_read_string_iostream_context* con; + + con = (struct raptor_read_string_iostream_context*)user_data; + RAPTOR_FREE(raptor_read_string_iostream_context, con); + return; +} + +static int +raptor_read_string_iostream_read_bytes(void *user_data, void *ptr, + size_t size, size_t nmemb) +{ + struct raptor_read_string_iostream_context* con; + size_t avail; + size_t blen; + + if(!ptr || size <= 0 || !nmemb) + return -1; + + con = (struct raptor_read_string_iostream_context*)user_data; + if(con->offset >= con->length) + return 0; + + avail = (con->length - con->offset) / size; + if(avail > nmemb) + avail = nmemb; + blen = (avail * size); + memcpy(ptr, (char*)con->string + con->offset, blen); + con->offset += blen; + + return RAPTOR_BAD_CAST(int, avail); +} + +static int +raptor_read_string_iostream_read_eof(void *user_data) +{ + struct raptor_read_string_iostream_context* con; + + con = (struct raptor_read_string_iostream_context*)user_data; + return (con->offset >= con->length); +} + + +static const raptor_iostream_handler raptor_iostream_read_string_handler = { + /* .version = */ 2, + /* .init = */ NULL, + /* .finish = */ raptor_read_string_iostream_finish, + /* .write_byte = */ NULL, + /* .write_bytes = */ NULL, + /* .write_end = */ NULL, + /* .read_bytes = */ raptor_read_string_iostream_read_bytes, + /* .read_eof = */ raptor_read_string_iostream_read_eof +}; + + +/** + * raptor_new_iostream_from_string: + * @world: raptor world + * @string: pointer to string + * @length: length of string + * + * Constructor - create a new iostream reading from a string. + * + * Return value: new #raptor_iostream object or NULL on failure + **/ +raptor_iostream* +raptor_new_iostream_from_string(raptor_world *world, + void *string, size_t length) +{ + raptor_iostream* iostr; + struct raptor_read_string_iostream_context* con; + const raptor_iostream_handler* handler; + const unsigned int mode = RAPTOR_IOSTREAM_MODE_READ; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!string) + return NULL; + + raptor_world_open(world); + + handler = &raptor_iostream_read_string_handler; + if(!raptor_iostream_check_handler(handler, mode)) + return NULL; + + iostr = RAPTOR_CALLOC(raptor_iostream*, 1, sizeof(*iostr)); + if(!iostr) + return NULL; + + con = RAPTOR_CALLOC(struct raptor_read_string_iostream_context*, 1, + sizeof(*con)); + if(!con) { + RAPTOR_FREE(raptor_iostream, iostr); + return NULL; + } + + con->string = string; + con->length = length; + + iostr->world = world; + iostr->handler = handler; + iostr->user_data = (void*)con; + iostr->mode = mode; + + if(iostr->handler->init && iostr->handler->init(iostr->user_data)) { + raptor_free_iostream(iostr); + return NULL; + } + return iostr; +} + + +/** + * raptor_iostream_tell: + * @iostr: raptor iostream + * + * Get the offset in the iostream. + * + * Return value: offset in iostream + **/ +unsigned long +raptor_iostream_tell(raptor_iostream *iostr) +{ + return RAPTOR_BAD_CAST(unsigned long, iostr->offset); +} + + +/* internal */ +raptor_world* +raptor_iostream_get_world(raptor_iostream *iostr) +{ + return iostr->world; +} + + +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +static const char *program; + +#define READ_BUFFER_SIZE 256 + + +static int +test_write_to_filename(raptor_world *world, const char* filename, + const char* test_string, size_t test_string_len, + const unsigned int expected_bytes_count) +{ + raptor_iostream *iostr = NULL; + unsigned long count; + int rc = 0; + const char* const label="write iostream to filename"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s '%s'\n", program, label, filename); +#endif + + iostr = raptor_new_iostream_to_filename(world, filename); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s '%s'\n", program, label, filename); + rc = 1; + goto tidy; + } + + raptor_iostream_write_bytes(test_string, 1, test_string_len, iostr); + raptor_iostream_write_byte('\n', iostr); + + count = raptor_iostream_tell(iostr); + if(count != expected_bytes_count) { + fprintf(stderr, "%s: %s wrote %d bytes, expected %d\n", program, label, + (int)count, expected_bytes_count); + rc = 1; + goto tidy; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + remove(filename); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_write_to_file_handle(raptor_world *world, FILE* handle, + const char* test_string, size_t test_string_len, + const unsigned int expected_bytes_count) +{ + raptor_iostream *iostr = NULL; + unsigned long count; + int rc = 0; + const char* const label="write iostream to file handle"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + + iostr = raptor_new_iostream_to_file_handle(world, handle); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s\n", program, label); + rc = 1; + goto tidy; + } + + raptor_iostream_write_bytes(test_string, 1, test_string_len, iostr); + raptor_iostream_write_byte('\n', iostr); + + count = raptor_iostream_tell(iostr); + if(count != expected_bytes_count) { + fprintf(stderr, "%s: %s wrote %d bytes, expected %d\n", program, label, + (int)count, expected_bytes_count); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_write_to_string(raptor_world *world, + const char* test_string, size_t test_string_len, + const unsigned int expected_bytes_count) +{ + raptor_iostream *iostr = NULL; + unsigned long count; + int rc = 0; + void *string = NULL; + size_t string_len; + const char* const label="write iostream to a string"; + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + + iostr = raptor_new_iostream_to_string(world, &string, &string_len, NULL); + if(!iostr) { + fprintf(stderr, "%s: Failed to create write iostream to string\n", + program); + rc = 1; + goto tidy; + } + + raptor_iostream_write_bytes(test_string, 1, test_string_len, iostr); + raptor_iostream_write_byte('\n', iostr); + + count = raptor_iostream_tell(iostr); + if(count != expected_bytes_count) { + fprintf(stderr, "%s: %s wrote %d bytes, expected %d\n", program, label, + (int)count, expected_bytes_count); + rc = 1; + } + + raptor_free_iostream(iostr); iostr = NULL; + + if(!string) { + fprintf(stderr, "%s: %s failed to create a string\n", program, label); + return 1; + } + if(string_len != count) { + fprintf(stderr, "%s: %s created a string length %d, expected %d\n", + program, label, (int)string_len, (int)count); + return 1; + } + + tidy: + if(string) + raptor_free_memory(string); + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_write_to_sink(raptor_world *world, + const char* test_string, size_t test_string_len, + const unsigned int expected_bytes_count) +{ + raptor_iostream *iostr = NULL; + unsigned long count; + int rc = 0; + const char* const label="write iostream to sink"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + + iostr = raptor_new_iostream_to_sink(world); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s\n", program, label); + rc = 1; + goto tidy; + } + + raptor_iostream_write_bytes(test_string, 1, test_string_len, iostr); + raptor_iostream_write_byte('\n', iostr); + + count = raptor_iostream_tell(iostr); + if(count != expected_bytes_count) { + fprintf(stderr, "%s: %s wrote %d bytes, expected %d\n", program, label, + (int)count, expected_bytes_count); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_read_from_filename(raptor_world *world, + const char* filename, + const char* test_string, size_t test_string_len, + const int expected_len, + const int expected_len2) +{ + raptor_iostream *iostr = NULL; + char buffer[READ_BUFFER_SIZE]; + int count; + int rc = 0; + const char* const label="read iostream from filename"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s '%s'\n", program, label, filename); +#endif + + iostr = raptor_new_iostream_from_filename(world, filename); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s '%s'\n", program, label, filename); + rc = 1; + goto tidy; + } + + count = raptor_iostream_read_bytes(buffer, 1, test_string_len, iostr); + if(count != expected_len) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len); + rc = 1; + goto tidy; + } + + count = raptor_iostream_read_bytes(buffer, 1, test_string_len, iostr); + if(count != expected_len2) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len2); + rc = 1; + goto tidy; + } + + if(!raptor_iostream_read_eof(iostr)) { + fprintf(stderr, "%s: %s not EOF as expected\n", program, label); + rc = 1; + goto tidy; + } + + if(strncmp(buffer, test_string, test_string_len)) { + fprintf(stderr, "%s: %s returned '%s' expected '%s'\n", program, label, + buffer, test_string); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_read_from_file_handle(raptor_world *world, FILE* handle, + const char* test_string, size_t test_string_len, + const unsigned int expected_len, + const unsigned int expected_len2) +{ + raptor_iostream *iostr = NULL; + char buffer[READ_BUFFER_SIZE]; + unsigned long count; + int rc = 0; + const char* const label="read iostream from file handle"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + + iostr = raptor_new_iostream_from_file_handle(world, handle); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s\n", program, label); + rc = 1; + goto tidy; + } + + count = raptor_iostream_read_bytes(buffer, 1, test_string_len, iostr); + if(count != expected_len) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len); + rc = 1; + } + + count = raptor_iostream_read_bytes(buffer, 1, test_string_len, iostr); + if(count != expected_len2) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len2); + rc = 1; + goto tidy; + } + + if(!raptor_iostream_read_eof(iostr)) { + fprintf(stderr, "%s: %s not EOF as expected\n", program, label); + rc = 1; + } + + if(strncmp(buffer, test_string, test_string_len)) { + fprintf(stderr, "%s: %s returned '%s' expected '%s'\n", program, label, + buffer, test_string); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_read_from_string(raptor_world *world, + const char* test_string, size_t test_string_len, + const unsigned int expected_len) +{ + raptor_iostream *iostr = NULL; + char buffer[READ_BUFFER_SIZE]; + unsigned long count; + int rc = 0; + const char* const label="read iostream from a string"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + + iostr = raptor_new_iostream_from_string(world, + (void*)test_string, test_string_len); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s\n", program, label); + rc = 1; + goto tidy; + } + + count = raptor_iostream_read_bytes(buffer, 1, test_string_len, iostr); + if(count != expected_len) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len); + rc = 1; + } + + if(!raptor_iostream_read_eof(iostr)) { + fprintf(stderr, "%s: %s not EOF as expected\n", program, label); + rc = 1; + } + + if(strncmp(buffer, test_string, test_string_len)) { + fprintf(stderr, "%s: %s returned '%s' expected '%s'\n", program, label, + buffer, test_string); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +static int +test_read_from_sink(raptor_world *world, size_t read_len, size_t expected_len) +{ + raptor_iostream *iostr = NULL; + char buffer[READ_BUFFER_SIZE]; + unsigned long count; + int rc = 0; + const char* const label="read iostream from sink"; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Testing %s\n", program, label); +#endif + expected_len = 0; + iostr = raptor_new_iostream_from_sink(world); + if(!iostr) { + fprintf(stderr, "%s: Failed to create %s\n", program, label); + rc = 1; + goto tidy; + } + + count = raptor_iostream_read_bytes(buffer, 1, read_len, iostr); + if(count != expected_len) { + fprintf(stderr, "%s: %s read %d bytes, expected %d\n", program, label, + (int)count, (int)expected_len); + rc = 1; + } + + if(!raptor_iostream_read_eof(iostr)) { + fprintf(stderr, "%s: %s not EOF as expected\n", program, label); + rc = 1; + } + + tidy: + if(iostr) + raptor_free_iostream(iostr); + + if(rc) + fprintf(stderr, "%s: FAILED Testing %s\n", program, label); + + return rc; +} + + +#define OUT_FILENAME "out.bin" +#define OUT_BYTES_COUNT 14 +#define TEST_STRING "Hello, world!" +#define TEST_STRING_LEN 13 +#define IN_FILENAME "in.bin" + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + FILE *handle = NULL; + int failures = 0; + + program = raptor_basename(argv[0]); + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + /* Write tests */ + failures+= test_write_to_filename(world, (const char*)OUT_FILENAME, + TEST_STRING, TEST_STRING_LEN, (int)OUT_BYTES_COUNT); + handle = fopen((const char*)OUT_FILENAME, "wb"); + if(!handle) { + fprintf(stderr, "%s: Failed to create write file handle to file %s\n", + program, OUT_FILENAME); + failures++; + } else { + failures+= test_write_to_file_handle(world, + handle, TEST_STRING, TEST_STRING_LEN, + (int)OUT_BYTES_COUNT); + fclose(handle); + remove(OUT_FILENAME); + } + + failures+= test_write_to_string(world, + TEST_STRING, + TEST_STRING_LEN, (int)OUT_BYTES_COUNT); + failures+= test_write_to_sink(world, + TEST_STRING, + TEST_STRING_LEN, (int)OUT_BYTES_COUNT); + + remove(OUT_FILENAME); + + + /* Read tests */ + handle = fopen((const char*)IN_FILENAME, "wb"); + if(!handle) { + fprintf(stderr, "%s: Failed to create write handle to file %s\n", + program, IN_FILENAME); + failures++; + } else { + fwrite(TEST_STRING, 1, TEST_STRING_LEN, handle); + fclose(handle); + + failures+= test_read_from_filename(world, + (const char*)IN_FILENAME, + TEST_STRING, TEST_STRING_LEN, + TEST_STRING_LEN, 0); + handle = fopen((const char*)IN_FILENAME, "rb"); + if(!handle) { + fprintf(stderr, "%s: Failed to create read file handle to file %s\n", + program, IN_FILENAME); + failures++; + } else { + failures+= test_read_from_file_handle(world, + handle, + TEST_STRING, TEST_STRING_LEN, + TEST_STRING_LEN, 0); + fclose(handle); handle = NULL; + } + } + + failures+= test_read_from_string(world, + TEST_STRING, TEST_STRING_LEN, + TEST_STRING_LEN); + failures+= test_read_from_sink(world, TEST_STRING_LEN, 0); + + remove(IN_FILENAME); + + raptor_free_world(world); + + return failures; +} + +#endif diff --git a/src/raptor_json.c b/src/raptor_json.c new file mode 100644 index 0000000..6032c8a --- /dev/null +++ b/src/raptor_json.c @@ -0,0 +1,782 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_json.c - Raptor JSON Parser + * + * RDF/JSON + * http://n2.talis.com/wiki/RDF_JSON_Specification + * + * Copyright (C) 2001-2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <yajl/yajl_parse.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +typedef enum { + RAPTOR_JSON_STATE_ROOT, + RAPTOR_JSON_STATE_MAP_ROOT, + RAPTOR_JSON_STATE_TRIPLES_KEY, + RAPTOR_JSON_STATE_TRIPLES_ARRAY, + RAPTOR_JSON_STATE_TRIPLES_TRIPLE, + RAPTOR_JSON_STATE_TRIPLES_TERM, + RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY, + RAPTOR_JSON_STATE_RESOURCES_PREDICATE, + RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY, + RAPTOR_JSON_STATE_RESOURCES_OBJECT +} raptor_json_parse_state; + +typedef enum { + RAPTOR_JSON_TERM_UNKNOWN, + RAPTOR_JSON_TERM_SUBJECT, + RAPTOR_JSON_TERM_PREDICATE, + RAPTOR_JSON_TERM_OBJECT +} raptor_json_term; + +typedef enum { + RAPTOR_JSON_ATTRIB_UNKNOWN, + RAPTOR_JSON_ATTRIB_VALUE, + RAPTOR_JSON_ATTRIB_LANG, + RAPTOR_JSON_ATTRIB_TYPE, + RAPTOR_JSON_ATTRIB_DATATYPE +} raptor_json_term_attrib; + + +/* When YAJL V1 support is dropped, this can be removed */ +#ifdef HAVE_YAJL2 +#define RAPTOR_YAJL_LEN_TYPE size_t +#else +#define RAPTOR_YAJL_LEN_TYPE unsigned int +#endif + +/* + * JSON parser object + */ +struct raptor_json_parser_context_s { +#ifdef HAVE_YAJL2 +#else + yajl_parser_config config; +#endif + yajl_handle handle; + + /* Parser state */ + raptor_json_parse_state state; + raptor_json_term term; + raptor_json_term_attrib attrib; + + /* Temporary storage, while creating terms */ + raptor_term_type term_type; + unsigned char* term_value; + unsigned char* term_datatype; + unsigned char* term_lang; + + /* Temporary storage, while creating statements */ + raptor_statement statement; +}; + +typedef struct raptor_json_parser_context_s raptor_json_parser_context; + + +static void +raptor_json_reset_term(raptor_json_parser_context *context) +{ + if(context->term_value) + RAPTOR_FREE(char*, context->term_value); + if(context->term_lang) + RAPTOR_FREE(char*, context->term_lang); + if(context->term_datatype) + RAPTOR_FREE(char*, context->term_datatype); + + context->term_value = NULL; + context->term_lang = NULL; + context->term_datatype = NULL; + context->term_type = RAPTOR_TERM_TYPE_UNKNOWN; + context->attrib = RAPTOR_JSON_ATTRIB_UNKNOWN; +} + +static unsigned char* +raptor_json_cstring_from_counted_string(raptor_parser *rdf_parser, const unsigned char* str, RAPTOR_YAJL_LEN_TYPE len) +{ + unsigned char *cstr = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!cstr) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return NULL; + } + + memcpy(cstr, str, len); + cstr[len] = '\0'; + + return cstr; +} + +static raptor_term* +raptor_json_new_term_from_counted_string(raptor_parser *rdf_parser, const unsigned char* str, size_t len) +{ + raptor_term *term = NULL; + + if(len > 2 && str[0] == '_' && str[1] == ':') { + const unsigned char *node_id = &str[2]; + term = raptor_new_term_from_counted_blank(rdf_parser->world, node_id, len - 2); + + } else { + raptor_uri *uri = raptor_new_uri_from_counted_string(rdf_parser->world, str, len); + if(!uri) { + unsigned char* cstr = raptor_json_cstring_from_counted_string(rdf_parser, str, RAPTOR_BAD_CAST(int, len)); + raptor_parser_error(rdf_parser, "Could not create uri from '%s'", cstr); + RAPTOR_FREE(char*, cstr); + return NULL; + } + + term = raptor_new_term_from_uri(rdf_parser->world, uri); + raptor_free_uri(uri); + } + + return term; +} + + +static raptor_term* +raptor_json_generate_term(raptor_parser *rdf_parser) +{ + raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context; + raptor_term *term = NULL; + + if(!context->term_value) { + raptor_parser_error(rdf_parser, "No value for term defined"); + return NULL; + } + + switch(context->term_type) { + case RAPTOR_TERM_TYPE_URI: { + raptor_uri *uri = raptor_new_uri(rdf_parser->world, context->term_value); + if(!uri) { + raptor_parser_error(rdf_parser, "Could not create uri from '%s'", context->term_value); + return NULL; + } + term = raptor_new_term_from_uri(rdf_parser->world, uri); + raptor_free_uri(uri); + break; + } + case RAPTOR_TERM_TYPE_LITERAL: { + raptor_uri *datatype_uri = NULL; + if(context->term_datatype) { + datatype_uri = raptor_new_uri(rdf_parser->world, context->term_datatype); + } + term = raptor_new_term_from_literal(rdf_parser->world, context->term_value, datatype_uri, context->term_lang); + if(datatype_uri) + raptor_free_uri(datatype_uri); + break; + } + case RAPTOR_TERM_TYPE_BLANK: { + unsigned char *node_id = context->term_value; + if(strlen((const char*)node_id) > 2 && node_id[0] == '_' && node_id[1] == ':') { + node_id = &node_id[2]; + } + term = raptor_new_term_from_blank(rdf_parser->world, node_id); + break; + } + case RAPTOR_TERM_TYPE_UNKNOWN: + raptor_parser_error(rdf_parser, "No type for term defined"); + break; + } + + return term; +} + + +static int raptor_json_yajl_null(void * ctx) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_parser_error(rdf_parser, "Nulls are not valid in RDF/JSON"); + return 0; +} + +static int raptor_json_yajl_boolean(void * ctx, int b) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_parser_error(rdf_parser, "Booleans are not valid in RDF/JSON"); + return 0; +} + +#ifdef HAVE_YAJL2 +#define YAJL_INTEGER_CALLBACK_ARG_TYPE long long +#else +#define YAJL_INTEGER_CALLBACK_ARG_TYPE long +#endif +PRAGMA_IGNORE_WARNING_LONG_LONG_START +static int raptor_json_yajl_integer(void * ctx, + YAJL_INTEGER_CALLBACK_ARG_TYPE integerVal) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_parser_error(rdf_parser, "Integers are not valid in RDF/JSON"); + return 0; +} +PRAGMA_IGNORE_WARNING_END + +static int raptor_json_yajl_double(void * ctx, double d) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_parser_error(rdf_parser, "Floats are not valid in RDF/JSON"); + return 0; +} + +static int raptor_json_yajl_string(void * ctx, const unsigned char * str, + RAPTOR_YAJL_LEN_TYPE len) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM || + context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) { + switch(context->attrib) { + case RAPTOR_JSON_ATTRIB_VALUE: + context->term_value = raptor_json_cstring_from_counted_string(rdf_parser, str, len); + break; + case RAPTOR_JSON_ATTRIB_LANG: + context->term_lang = raptor_json_cstring_from_counted_string(rdf_parser, str, len); + break; + case RAPTOR_JSON_ATTRIB_TYPE: + if(!strncmp((const char*)str, "uri", len)) { + context->term_type = RAPTOR_TERM_TYPE_URI; + } else if(!strncmp((const char*)str, "literal", len)) { + context->term_type = RAPTOR_TERM_TYPE_LITERAL; + } else if(!strncmp((const char*)str, "bnode", len)) { + context->term_type = RAPTOR_TERM_TYPE_BLANK; + } else { + unsigned char * cstr = raptor_json_cstring_from_counted_string(rdf_parser, str, len); + context->term_type = RAPTOR_TERM_TYPE_UNKNOWN; + raptor_parser_error(rdf_parser, "Unknown term type: %s", cstr); + RAPTOR_FREE(char*, cstr); + } + break; + case RAPTOR_JSON_ATTRIB_DATATYPE: + context->term_datatype = raptor_json_cstring_from_counted_string(rdf_parser, str, len); + break; + case RAPTOR_JSON_ATTRIB_UNKNOWN: + default: + raptor_parser_error(rdf_parser, "Unsupported term attribute in raptor_json_string"); + break; + } + } else { + raptor_parser_error(rdf_parser, "Unexpected JSON string"); + return 0; + } + return 1; +} + +static int raptor_json_yajl_map_key(void * ctx, const unsigned char * str, + RAPTOR_YAJL_LEN_TYPE len) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_MAP_ROOT) { + if(!strncmp((const char*)str, "triples", len)) { + context->state = RAPTOR_JSON_STATE_TRIPLES_KEY; + return 1; + } else { + if(context->statement.subject) + raptor_free_term(context->statement.subject); + context->statement.subject = raptor_json_new_term_from_counted_string(rdf_parser, str, len); + if(!context->statement.subject) + return 0; + context->state = RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY; + return 1; + } + } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) { + if(context->statement.predicate) + raptor_free_term(context->statement.predicate); + context->statement.predicate = raptor_json_new_term_from_counted_string(rdf_parser, str, len); + if(!context->statement.predicate) + return 0; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) { + if(!strncmp((const char*)str, "subject", len)) { + context->term = RAPTOR_JSON_TERM_SUBJECT; + return 1; + } else if(!strncmp((const char*)str, "predicate", len)) { + context->term = RAPTOR_JSON_TERM_PREDICATE; + return 1; + } else if(!strncmp((const char*)str, "object", len)) { + context->term = RAPTOR_JSON_TERM_OBJECT; + return 1; + } else { + raptor_parser_error(rdf_parser, "Unexpected JSON key name in triple definition"); + return 0; + } + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM || + context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) { + if(!strncmp((const char*)str, "value", len)) { + context->attrib = RAPTOR_JSON_ATTRIB_VALUE; + return 1; + } else if(!strncmp((const char*)str, "type", len)) { + context->attrib = RAPTOR_JSON_ATTRIB_TYPE; + return 1; + } else if(!strncmp((const char*)str, "datatype", len)) { + context->attrib = RAPTOR_JSON_ATTRIB_DATATYPE; + return 1; + } else if(!strncmp((const char*)str, "lang", len)) { + context->attrib = RAPTOR_JSON_ATTRIB_LANG; + return 1; + } else { + context->attrib = RAPTOR_JSON_ATTRIB_UNKNOWN; + raptor_parser_error(rdf_parser, "Unexpected key name in triple definition"); + return 0; + } + } else { + raptor_parser_error(rdf_parser, "Unexpected JSON map key"); + return 0; + } +} + +static int raptor_json_yajl_start_map(void * ctx) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_ROOT) { + context->state = RAPTOR_JSON_STATE_MAP_ROOT; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_SUBJECT_KEY) { + context->state = RAPTOR_JSON_STATE_RESOURCES_PREDICATE; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY) { + context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_ARRAY) { + raptor_statement_clear(&context->statement); + context->term = RAPTOR_JSON_TERM_UNKNOWN; + context->state = RAPTOR_JSON_STATE_TRIPLES_TRIPLE; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) { + context->state = RAPTOR_JSON_STATE_TRIPLES_TERM; + raptor_json_reset_term(context); + return 1; + } else { + raptor_parser_error(rdf_parser, "Unexpected start of JSON map"); + return 0; + } +} + + +static int raptor_json_yajl_end_map(void * ctx) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT) { + context->statement.object = raptor_json_generate_term(rdf_parser); + if(!context->statement.object) + return 0; + + /* Generate the statement */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, &context->statement); + + raptor_free_term(context->statement.object); + context->statement.object = NULL; + raptor_json_reset_term(context); + + context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) { + context->state = RAPTOR_JSON_STATE_MAP_ROOT; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TERM) { + raptor_term *term = raptor_json_generate_term(rdf_parser); + if(!term) + return 0; + + /* Store the term in the statement */ + switch(context->term) { + case RAPTOR_JSON_TERM_SUBJECT: + if(context->statement.subject) + raptor_free_term(context->statement.subject); + context->statement.subject = term; + break; + case RAPTOR_JSON_TERM_PREDICATE: + if(context->statement.predicate) + raptor_free_term(context->statement.predicate); + context->statement.predicate = term; + break; + case RAPTOR_JSON_TERM_OBJECT: + if(context->statement.object) + raptor_free_term(context->statement.object); + context->statement.object = term; + break; + case RAPTOR_JSON_TERM_UNKNOWN: + default: + raptor_parser_error(rdf_parser, "Unknown term in raptor_json_end_map"); + break; + } + + context->state = RAPTOR_JSON_STATE_TRIPLES_TRIPLE; + raptor_json_reset_term(context); + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_TRIPLE) { + if(!context->statement.subject) { + raptor_parser_error(rdf_parser, "Triple is missing a subject term"); + return 0; + } else if(!context->statement.predicate) { + raptor_parser_error(rdf_parser, "Triple is missing a predicate term"); + return 0; + } else if(!context->statement.object) { + raptor_parser_error(rdf_parser, "Triple is missing a object term"); + return 0; + } else { + /* Generate the statement */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, &context->statement); + } + raptor_statement_clear(&context->statement); + context->state = RAPTOR_JSON_STATE_TRIPLES_ARRAY; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_KEY) { + context->state = RAPTOR_JSON_STATE_MAP_ROOT; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_MAP_ROOT) { + context->state = RAPTOR_JSON_STATE_ROOT; + return 1; + } else { + raptor_parser_error(rdf_parser, "Unexpected end of JSON map"); + return 0; + } +} + +static int raptor_json_yajl_start_array(void * ctx) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_RESOURCES_PREDICATE) { + context->state = RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_KEY) { + context->state = RAPTOR_JSON_STATE_TRIPLES_ARRAY; + return 1; + } else { + raptor_parser_error(rdf_parser, "Unexpected start of array"); + return 0; + } +} + +static int raptor_json_yajl_end_array(void * ctx) +{ + raptor_parser* rdf_parser = (raptor_parser*)ctx; + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->state == RAPTOR_JSON_STATE_RESOURCES_OBJECT_ARRAY) { + context->state = RAPTOR_JSON_STATE_RESOURCES_PREDICATE; + return 1; + } else if(context->state == RAPTOR_JSON_STATE_TRIPLES_ARRAY) { + context->state = RAPTOR_JSON_STATE_MAP_ROOT; + return 1; + } else { + raptor_parser_error(rdf_parser, "Unexpected end of array"); + return 0; + } +} + + +static void* +raptor_json_yajl_malloc(void *ctx, RAPTOR_YAJL_LEN_TYPE sz) +{ + return RAPTOR_MALLOC(void*, sz); +} + +static void* +raptor_json_yajl_realloc(void *ctx, void * ptr, RAPTOR_YAJL_LEN_TYPE sz) +{ + return RAPTOR_REALLOC(void*, ptr, sz); +} + +static void +raptor_json_yajl_free(void *ctx, void * ptr) +{ + RAPTOR_FREE(char*, ptr); +} + +static yajl_alloc_funcs raptor_json_yajl_alloc_funcs = { + raptor_json_yajl_malloc, + raptor_json_yajl_realloc, + raptor_json_yajl_free, + NULL +}; + +static yajl_callbacks raptor_json_yajl_callbacks = { + raptor_json_yajl_null, + raptor_json_yajl_boolean, + raptor_json_yajl_integer, + raptor_json_yajl_double, + NULL, + raptor_json_yajl_string, + raptor_json_yajl_start_map, + raptor_json_yajl_map_key, + raptor_json_yajl_end_map, + raptor_json_yajl_start_array, + raptor_json_yajl_end_array +}; + + + +/** + * raptor_json_parse_init: + * + * Initialise the Raptor JSON parser. + * + * Return value: non 0 on failure + **/ + +static int +raptor_json_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + /* Initialse the static statement */ + raptor_statement_init(&context->statement, rdf_parser->world); + + /* Configure the parser */ +#ifdef HAVE_YAJL2 +#else + context->config.allowComments = 1; + context->config.checkUTF8 = 0; +#endif + + return 0; +} + + +/* + * raptor_json_parse_terminate - Free the Raptor JSON parser + * @rdf_parser: parser object + * + **/ +static void +raptor_json_parse_terminate(raptor_parser* rdf_parser) +{ + raptor_json_parser_context *context; + context = (raptor_json_parser_context*)rdf_parser->context; + + if(context->handle) + yajl_free(context->handle); + + raptor_json_reset_term(context); + raptor_statement_clear(&context->statement); +} + + + +static int +raptor_json_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context; + yajl_status status; + int result = 0; + + if(len) { + /* Parse the chunk passed to us */ + status = yajl_parse(context->handle, s, RAPTOR_BAD_CAST(int, len)); + + if(status != yajl_status_ok +#ifdef HAVE_YAJL2 +#else + && status != yajl_status_insufficient_data +#endif + ) + { + unsigned char * str = yajl_get_error(context->handle, 1, s, RAPTOR_BAD_CAST(int, len)); + raptor_parser_error(rdf_parser, "YAJL error: %s", (const char *) str); + result = 1; + yajl_free_error(context->handle, str); + } + } + + if(is_end) { + /* parse any remaining buffered data */ +#ifdef HAVE_YAJL2 +#else +#define yajl_complete_parse(h) yajl_parse_complete(h) +#endif + status = yajl_complete_parse(context->handle); + + if(status != yajl_status_ok) + { + unsigned char * str = yajl_get_error(context->handle, 0, NULL, 0); + raptor_parser_error(rdf_parser, "YAJL error: %s", (const char *) str); + result = 1; + yajl_free_error(context->handle, str); + } + + raptor_json_reset_term(context); + raptor_statement_clear(&context->statement); + } + + return result; +} + + +static int +raptor_json_parse_start(raptor_parser* rdf_parser) +{ + raptor_json_parser_context *context = (raptor_json_parser_context*)rdf_parser->context; + + /* Destroy the old parser */ + if(context->handle) + yajl_free(context->handle); + + /* Initialise a new parser */ + context->handle = yajl_alloc( + &raptor_json_yajl_callbacks, +#ifdef HAVE_YAJL2 +#else + &context->config, +#endif + &raptor_json_yajl_alloc_funcs, + (void *)rdf_parser + ); + + if(!context->handle) { + raptor_parser_fatal_error(rdf_parser, "Failed to initialise YAJL parser"); + return 1; + } + + /* Initialise the parse state */ +#ifdef HAVE_YAJL2 + yajl_config(context->handle, yajl_allow_comments, 1); + yajl_config(context->handle, yajl_dont_validate_strings, 1); +#else +#endif + + context->state = RAPTOR_JSON_STATE_ROOT; + raptor_json_reset_term(context); + raptor_statement_clear(&context->statement); + + return 0; +} + + +static int +raptor_json_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + unsigned int pos = 0; + int score = 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "json")) + score = 8; + if(!strcmp((const char*)suffix, "js")) + score = 3; + } else if(identifier) { + if(strstr((const char*)identifier, "json")) + score = 4; + } + + if(mime_type && strstr((const char*)mime_type, "json")) + score += 6; + + /* Is the first non-whitespace character a curly brace? */ + while(pos<len) { + if(isspace(buffer[pos])) { + pos++; + } else { + if(buffer[pos] == '{') + score += 2; + break; + } + } + + return score; +} + + +static const char* const json_names[2] = { "json", NULL }; + +static const char* const json_uri_strings[2] = { + "http://docs.api.talis.com/platform-api/output-types/rdf-json", + NULL +}; + +#define JSON_TYPES_COUNT 2 +static const raptor_type_q json_types[JSON_TYPES_COUNT + 1] = { + { "application/json", 16, 1}, + { "text/json", 9, 1}, + { NULL, 0, 0} +}; + +static int +raptor_json_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = json_names; + + factory->desc.mime_types = json_types; + + factory->desc.label = "RDF/JSON (either Triples or Resource-Centric)"; + factory->desc.uri_strings = json_uri_strings; + + factory->desc.flags = 0; + + factory->context_length = sizeof(raptor_json_parser_context); + + factory->init = raptor_json_parse_init; + factory->terminate = raptor_json_parse_terminate; + factory->start = raptor_json_parse_start; + factory->chunk = raptor_json_parse_chunk; + factory->recognise_syntax = raptor_json_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_json(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_json_parser_register_factory); +} diff --git a/src/raptor_json_writer.c b/src/raptor_json_writer.c new file mode 100644 index 0000000..dd57be0 --- /dev/null +++ b/src/raptor_json_writer.c @@ -0,0 +1,345 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_json_writer.c - Raptor JSON Writer + * + * Copyright (C) 2008, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif + + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#ifndef STANDALONE + +struct raptor_json_writer_s { + raptor_world* world; + + raptor_uri* base_uri; + + /* outputting to this iostream */ + raptor_iostream *iostr; + + /* current indent */ + int indent; + + /* indent step */ + int indent_step; +}; + + + +/** + * raptor_new_json_writer: + * @world: raptor_world object + * @base_uri: Base URI for the writer + * @iostr: I/O stream to write to + * + * INTERNAL - Constructor - Create a new JSON writer writing to a raptor_iostream + * + * Return value: a new #raptor_json_writer object or NULL on failure + **/ +raptor_json_writer* +raptor_new_json_writer(raptor_world* world, + raptor_uri* base_uri, + raptor_iostream* iostr) +{ + raptor_json_writer* json_writer; + + json_writer = RAPTOR_CALLOC(raptor_json_writer*, 1, sizeof(*json_writer)); + + if(!json_writer) + return NULL; + + json_writer->world = world; + json_writer->iostr = iostr; + json_writer->base_uri = base_uri; + + json_writer->indent_step = 2; + + return json_writer; +} + + +/** + * raptor_free_json_writer: + * @json_writer: JSON writer object + * + * INTERNAL - Destructor - Free JSON Writer + * + **/ +void +raptor_free_json_writer(raptor_json_writer* json_writer) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(json_writer, raptor_json_writer); + + RAPTOR_FREE(raptor_json_writer, json_writer); +} + + +static int +raptor_json_writer_quoted(raptor_json_writer* json_writer, + const char *value, size_t value_len) +{ + int rc = 0; + + if(!value) { + raptor_iostream_counted_string_write("\"\"", 2, json_writer->iostr); + return 0; + } + + raptor_iostream_write_byte('\"', json_writer->iostr); + rc = raptor_string_escaped_write((const unsigned char*)value, value_len, + '"', RAPTOR_ESCAPED_WRITE_JSON_LITERAL, + json_writer->iostr); + raptor_iostream_write_byte('\"', json_writer->iostr); + + return rc; +} + + +static int +raptor_json_writer_spaces(raptor_json_writer* json_writer, int depth) +{ + int i; + for(i = 0; i < depth; i++) + raptor_iostream_write_byte(' ', json_writer->iostr); + return 0; +} + + +int +raptor_json_writer_newline(raptor_json_writer* json_writer) +{ + raptor_iostream_write_byte('\n', json_writer->iostr); + if(json_writer->indent) + raptor_json_writer_spaces(json_writer, json_writer->indent); + return 0; +} + + +int +raptor_json_writer_key_value(raptor_json_writer* json_writer, + const char* key, size_t key_len, + const char* value, size_t value_len) +{ + if(!key_len && key) + key_len = strlen(key); + if(!value_len && value) + value_len = strlen(value); + + raptor_json_writer_quoted(json_writer, key, key_len); + raptor_iostream_counted_string_write(" : ", 3, json_writer->iostr); + raptor_json_writer_quoted(json_writer, value, value_len); + + return 0; +} + + +int +raptor_json_writer_key_uri_value(raptor_json_writer* json_writer, + const char* key, size_t key_len, + raptor_uri* uri) +{ + const char* value; + size_t value_len; + int rc = 0; + + value = (const char*)raptor_uri_to_relative_counted_uri_string(json_writer->base_uri, uri, &value_len); + if(!value) + return 1; + + if(key) + rc = raptor_json_writer_key_value(json_writer, key, key_len, + value, value_len); + else + rc = raptor_json_writer_quoted(json_writer, value, value_len); + + RAPTOR_FREE(char*, value); + + return rc; +} + + +int +raptor_json_writer_start_block(raptor_json_writer* json_writer, char c) +{ + json_writer->indent += json_writer->indent_step; + raptor_iostream_write_byte(c, json_writer->iostr); + return 0; +} + + +int +raptor_json_writer_end_block(raptor_json_writer* json_writer, char c) +{ + raptor_iostream_write_byte(c, json_writer->iostr); + json_writer->indent -= json_writer->indent_step; + return 0; +} + + +int +raptor_json_writer_literal_object(raptor_json_writer* json_writer, + unsigned char* s, size_t s_len, + unsigned char* lang, + raptor_uri* datatype) +{ + raptor_json_writer_start_block(json_writer, '{'); + raptor_json_writer_newline(json_writer); + + raptor_iostream_counted_string_write("\"value\" : ", 10, json_writer->iostr); + + raptor_json_writer_quoted(json_writer, (const char*)s, s_len); + + if(datatype || lang) { + raptor_iostream_write_byte(',', json_writer->iostr); + raptor_json_writer_newline(json_writer); + + if(datatype) + raptor_json_writer_key_uri_value(json_writer, "datatype", 8, datatype); + + if(lang) { + if(datatype) { + raptor_iostream_write_byte(',', json_writer->iostr); + raptor_json_writer_newline(json_writer); + } + + raptor_json_writer_key_value(json_writer, "lang", 4, + (const char*)lang, 0); + } + } + + raptor_iostream_write_byte(',', json_writer->iostr); + raptor_json_writer_newline(json_writer); + + raptor_json_writer_key_value(json_writer, "type", 4, "literal", 7); + + raptor_json_writer_newline(json_writer); + + raptor_json_writer_end_block(json_writer, '}'); + raptor_json_writer_newline(json_writer); + + return 0; +} + + +int +raptor_json_writer_blank_object(raptor_json_writer* json_writer, + const unsigned char* blank, + size_t blank_len) +{ + raptor_json_writer_start_block(json_writer, '{'); + raptor_json_writer_newline(json_writer); + + raptor_iostream_counted_string_write("\"value\" : \"_:", 13, + json_writer->iostr); + raptor_iostream_counted_string_write((const char*)blank, blank_len, + json_writer->iostr); + raptor_iostream_counted_string_write("\",", 2, json_writer->iostr); + raptor_json_writer_newline(json_writer); + + raptor_iostream_counted_string_write("\"type\" : \"bnode\"", 16, + json_writer->iostr); + raptor_json_writer_newline(json_writer); + + raptor_json_writer_end_block(json_writer, '}'); + return 0; +} + + +int +raptor_json_writer_uri_object(raptor_json_writer* json_writer, + raptor_uri* uri) +{ + raptor_json_writer_start_block(json_writer, '{'); + raptor_json_writer_newline(json_writer); + + raptor_json_writer_key_uri_value(json_writer, "value", 5, uri); + raptor_iostream_write_byte(',', json_writer->iostr); + raptor_json_writer_newline(json_writer); + + raptor_iostream_counted_string_write("\"type\" : \"uri\"", 14, + json_writer->iostr); + raptor_json_writer_newline(json_writer); + + raptor_json_writer_end_block(json_writer, '}'); + + return 0; +} + + +int +raptor_json_writer_term(raptor_json_writer* json_writer, + raptor_term *term) +{ + int rc = 0; + + switch(term->type) { + case RAPTOR_TERM_TYPE_URI: + rc = raptor_json_writer_uri_object(json_writer, term->value.uri); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rc = raptor_json_writer_literal_object(json_writer, + term->value.literal.string, + term->value.literal.string_len, + term->value.literal.language, + term->value.literal.datatype); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rc = raptor_json_writer_blank_object(json_writer, + term->value.blank.string, + term->value.blank.string_len); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(json_writer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, + "Triple has unsupported term type %u", + term->type); + rc = 1; + break; + } + + return rc; +} + +#endif diff --git a/src/raptor_librdfa.c b/src/raptor_librdfa.c new file mode 100644 index 0000000..60eded0 --- /dev/null +++ b/src/raptor_librdfa.c @@ -0,0 +1,398 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_librdfa.c - Raptor RDFA Parser via librdfa implementation + * + * Copyright (C) 2008, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#include "rdfa.h" +#include "rdfa_utils.h" + + +#define RAPTOR_DEFAULT_RDFA_VERSION 0 + +/* + * RDFA parser object + */ +struct raptor_librdfa_parser_context_s { + /* librdfa object */ + rdfacontext* context; + + /* static statement for use in passing to user code */ + raptor_statement statement; + + /* 10 for 1.0 11 for 1.1 or otherwise is default (== 1.1) */ + int rdfa_version; +}; + + +typedef struct raptor_librdfa_parser_context_s raptor_librdfa_parser_context; + + +static int +raptor_librdfa_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_librdfa_parser_context *librdfa_parser; + int rdfa_version = RAPTOR_DEFAULT_RDFA_VERSION; + + librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context; + + raptor_statement_init(&rdf_parser->statement, rdf_parser->world); + + if(!strcmp(name, "rdfa11")) + rdfa_version = 11; + else if(!strcmp(name, "rdfa10")) + rdfa_version = 10; + + librdfa_parser->rdfa_version = rdfa_version; + + return 0; +} + + +static void +raptor_librdfa_parse_terminate(raptor_parser* rdf_parser) +{ + raptor_librdfa_parser_context *librdfa_parser; + + librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context; + + if(librdfa_parser->context) { + rdfa_parse_end(librdfa_parser->context); + rdfa_free_context(librdfa_parser->context); + librdfa_parser->context = NULL; + } +} + + +static void +raptor_librdfa_generate_statement(rdftriple* triple, void* callback_data) +{ + raptor_parser* parser = (raptor_parser*)callback_data; + raptor_statement *s = &parser->statement; + raptor_term *subject_term = NULL; + raptor_term *predicate_term = NULL; + raptor_uri *predicate_uri = NULL; + raptor_term *object_term = NULL; + + if(!parser->emitted_default_graph) { + raptor_parser_start_graph(parser, NULL, 0); + parser->emitted_default_graph++; + } + + if(!parser->statement_handler) + goto cleanup; + + if(!triple->subject || !triple->predicate || !triple->object) { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL1("Triple has NULL parts\n"); +#else + rdfa_free_triple(triple); + return; +#endif + } + + if(triple->predicate[0] == '_') { + raptor_parser_warning(parser, + "Ignoring RDFa triple with blank node predicate %s.", + triple->predicate); + rdfa_free_triple(triple); + return; + } + + if(triple->object_type == RDF_TYPE_NAMESPACE_PREFIX) { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL1("Triple has namespace object type\n"); +#else + rdfa_free_triple(triple); + return; +#endif + } + + if((triple->subject[0] == '_') && (triple->subject[1] == ':')) { + subject_term = raptor_new_term_from_blank(parser->world, + (const unsigned char*)triple->subject + 2); + } else { + raptor_uri* subject_uri; + + subject_uri = raptor_new_uri(parser->world, + (const unsigned char*)triple->subject); + subject_term = raptor_new_term_from_uri(parser->world, subject_uri); + raptor_free_uri(subject_uri); + subject_uri = NULL; + } + s->subject = subject_term; + + + predicate_uri = raptor_new_uri(parser->world, + (const unsigned char*)triple->predicate); + if(!predicate_uri) + goto cleanup; + + predicate_term = raptor_new_term_from_uri(parser->world, predicate_uri); + raptor_free_uri(predicate_uri); + predicate_uri = NULL; + s->predicate = predicate_term; + + + if(triple->object_type == RDF_TYPE_IRI) { + if((triple->object[0] == '_') && (triple->object[1] == ':')) { + object_term = raptor_new_term_from_blank(parser->world, + (const unsigned char*)triple->object + 2); + } else { + raptor_uri* object_uri; + object_uri = raptor_new_uri(parser->world, + (const unsigned char*)triple->object); + if(!object_uri) + goto cleanup; + + object_term = raptor_new_term_from_uri(parser->world, object_uri); + raptor_free_uri(object_uri); + } + } else if(triple->object_type == RDF_TYPE_PLAIN_LITERAL) { + object_term = raptor_new_term_from_literal(parser->world, + (const unsigned char*)triple->object, + NULL, + (const unsigned char*)triple->language); + + } else if(triple->object_type == RDF_TYPE_XML_LITERAL) { + raptor_uri* datatype_uri; + datatype_uri = raptor_new_uri_from_counted_string(parser->world, + (const unsigned char*)raptor_xml_literal_datatype_uri_string, + raptor_xml_literal_datatype_uri_string_len); + object_term = raptor_new_term_from_literal(parser->world, + (const unsigned char*)triple->object, + datatype_uri, + NULL); + raptor_free_uri(datatype_uri); + } else if(triple->object_type == RDF_TYPE_TYPED_LITERAL) { + raptor_uri *datatype_uri = NULL; + const unsigned char* language = (const unsigned char*)triple->language; + + if(triple->datatype) { + /* If datatype, no language allowed */ + language = NULL; + datatype_uri = raptor_new_uri(parser->world, + (const unsigned char*)triple->datatype); + if(!datatype_uri) + goto cleanup; + } + + object_term = raptor_new_term_from_literal(parser->world, + (const unsigned char*)triple->object, + datatype_uri, + language); + raptor_free_uri(datatype_uri); + } else { + raptor_log_error_formatted(parser->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Triple has unknown object term type %u", + s->object->type); + goto cleanup; + } + s->object = object_term; + + /* Generate statement */ + (*parser->statement_handler)(parser->user_data, s); + + cleanup: + rdfa_free_triple(triple); + + if(subject_term) + raptor_free_term(subject_term); + if(predicate_term) + raptor_free_term(predicate_term); + if(object_term) + raptor_free_term(object_term); +} + + +static void +raptor_librdfa_sax2_new_namespace_handler(void *user_data, + raptor_namespace* nspace) +{ + raptor_parser* rdf_parser; + rdf_parser = (raptor_parser*)user_data; + raptor_parser_start_namespace(rdf_parser, nspace); +} + + + +static int +raptor_librdfa_parse_start(raptor_parser* rdf_parser) +{ + raptor_locator *locator = &rdf_parser->locator; + raptor_librdfa_parser_context *librdfa_parser; + int rc; + char* base_uri_string = NULL; + + librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context; + + locator->line = -1; + locator->column = -1; + locator->byte = 0; + + if(rdf_parser->base_uri) + base_uri_string = (char*)raptor_uri_as_string(rdf_parser->base_uri); + else + /* base URI is required for rdfa - checked in rdfa_create_context() */ + return 1; + + if(librdfa_parser->context) + rdfa_free_context(librdfa_parser->context); + librdfa_parser->context = rdfa_create_context(base_uri_string); + if(!librdfa_parser->context) + return 1; + + librdfa_parser->context->namespace_handler = raptor_librdfa_sax2_new_namespace_handler; + librdfa_parser->context->namespace_handler_user_data = rdf_parser; + librdfa_parser->context->world = rdf_parser->world; + librdfa_parser->context->locator = &rdf_parser->locator; + + librdfa_parser->context->callback_data = rdf_parser; + /* returns triples */ + rdfa_set_default_graph_triple_handler(librdfa_parser->context, + raptor_librdfa_generate_statement); + + /* returns RDFa Processing Graph error triples - not used by raptor */ + rdfa_set_processor_graph_triple_handler(librdfa_parser->context, NULL); + + librdfa_parser->context->raptor_rdfa_version = librdfa_parser->rdfa_version; + + rc = rdfa_parse_start(librdfa_parser->context); + if(rc != RDFA_PARSE_SUCCESS) + return 1; + + return 0; +} + + +static int +raptor_librdfa_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_librdfa_parser_context *librdfa_parser; + int rval; + + librdfa_parser = (raptor_librdfa_parser_context*)rdf_parser->context; + rval = rdfa_parse_chunk(librdfa_parser->context, (char*)s, len, is_end); + + if(is_end) { + if(rdf_parser->emitted_default_graph) { + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + } + + return rval != RDFA_PARSE_SUCCESS; +} + +static int +raptor_librdfa_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + + if(identifier) { + if(strstr((const char*)identifier, "RDFa")) + score = 10; + } + + if(buffer && len) { +#define HAS_RDFA_1 (raptor_memstr((const char*)buffer, len, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL) +#define HAS_RDFA_2 (raptor_memstr((const char*)buffer, len, "http://www.w3.org/MarkUp/DTD/xhtml-rdfa-1.dtd") != NULL) + + if(HAS_RDFA_1 || HAS_RDFA_2) + score = 10; + } + + return score; +} + + +static const char* const rdfa_names[4] = { "rdfa", "rdfa11", "rdfa10", NULL }; + +static const char* const rdfa_uri_strings[3] = { + "http://www.w3.org/ns/formats/RDFa", + "http://www.w3.org/TR/rdfa/", + NULL +}; + +#define RDFA_TYPES_COUNT 2 +static const raptor_type_q html_types[RDFA_TYPES_COUNT + 1] = { + { "text/html", 9, 6}, + { "application/xhtml+xml", 21, 8}, + { NULL, 0, 0} +}; + +static int +raptor_librdfa_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = rdfa_names; + + factory->desc.mime_types = html_types; + + factory->desc.label = "RDF/A via librdfa"; + factory->desc.uri_strings = rdfa_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_librdfa_parser_context); + + factory->init = raptor_librdfa_parse_init; + factory->terminate = raptor_librdfa_parse_terminate; + factory->start = raptor_librdfa_parse_start; + factory->chunk = raptor_librdfa_parse_chunk; + factory->recognise_syntax = raptor_librdfa_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_rdfa(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_librdfa_parser_register_factory); +} diff --git a/src/raptor_libxml.c b/src/raptor_libxml.c new file mode 100644 index 0000000..538c2c8 --- /dev/null +++ b/src/raptor_libxml.c @@ -0,0 +1,889 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_libxml.c - Raptor libxml functions + * + * Copyright (C) 2000-2009, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifdef RAPTOR_XML_LIBXML + + +/* prototypes */ +static void raptor_libxml_warning(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); +static void raptor_libxml_error_common(void* user_data, const char *msg, va_list args, const char *prefix, int is_fatal) RAPTOR_PRINTF_FORMAT(2, 0); +static void raptor_libxml_error(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); +static void raptor_libxml_fatal_error(void *context, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +static void raptor_libxml_xmlStructuredError_handler_global(void *user_data, xmlErrorPtr err); +static void raptor_libxml_xmlStructuredError_handler_parsing(void *user_data, xmlErrorPtr err); + + + +static const char* const xml_warning_prefix="XML parser warning - "; +static const char* const xml_error_prefix="XML parser error - "; +static const char* const xml_generic_error_prefix="XML error - "; +static const char* const xml_fatal_error_prefix="XML parser fatal error - "; +static const char* const xml_validation_error_prefix="XML parser validation error - "; +static const char* const xml_validation_warning_prefix="XML parser validation warning - "; + + +#ifdef HAVE_XMLSAX2INTERNALSUBSET +/* SAX2 - 2.6.0 or later */ +#define libxml2_internalSubset xmlSAX2InternalSubset +#define libxml2_externalSubset xmlSAX2ExternalSubset +#define libxml2_isStandalone xmlSAX2IsStandalone +#define libxml2_hasInternalSubset xmlSAX2HasInternalSubset +#define libxml2_hasExternalSubset xmlSAX2HasExternalSubset +#define libxml2_resolveEntity xmlSAX2ResolveEntity +#define libxml2_getEntity xmlSAX2GetEntity +#define libxml2_getParameterEntity xmlSAX2GetParameterEntity +#define libxml2_entityDecl xmlSAX2EntityDecl +#define libxml2_unparsedEntityDecl xmlSAX2UnparsedEntityDecl +#define libxml2_startDocument xmlSAX2StartDocument +#define libxml2_endDocument xmlSAX2EndDocument +#else +/* SAX1 - before libxml2 2.6.0 */ +#define libxml2_internalSubset internalSubset +#define libxml2_externalSubset externalSubset +#define libxml2_isStandalone isStandalone +#define libxml2_hasInternalSubset hasInternalSubset +#define libxml2_hasExternalSubset hasExternalSubset +#define libxml2_resolveEntity resolveEntity +#define libxml2_getEntity getEntity +#define libxml2_getParameterEntity getParameterEntity +#define libxml2_entityDecl entityDecl +#define libxml2_unparsedEntityDecl unparsedEntityDecl +#define libxml2_startDocument startDocument +#define libxml2_endDocument endDocument +#endif + + +static void +raptor_libxml_internalSubset(void* user_data, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + libxml2_internalSubset(sax2->xc, name, ExternalID, SystemID); +} + + +#ifdef RAPTOR_LIBXML_XMLSAXHANDLER_EXTERNALSUBSET +static void +raptor_libxml_externalSubset(void* user_data, const xmlChar *name, + const xmlChar *ExternalID, const xmlChar *SystemID) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + libxml2_externalSubset(sax2->xc, name, ExternalID, SystemID); +} +#endif + + +static int +raptor_libxml_isStandalone (void* user_data) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + return libxml2_isStandalone(sax2->xc); +} + + +static int +raptor_libxml_hasInternalSubset (void* user_data) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + return libxml2_hasInternalSubset(sax2->xc); +} + + +static int +raptor_libxml_hasExternalSubset (void* user_data) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + return libxml2_hasExternalSubset(sax2->xc); +} + + +static xmlParserInputPtr +raptor_libxml_resolveEntity(void* user_data, + const xmlChar *publicId, const xmlChar *systemId) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr ctxt = sax2->xc; + const unsigned char *uri_string = NULL; + xmlParserInputPtr entity_input = NULL; + int load_entity = 0; + + if(ctxt->input) + uri_string = RAPTOR_GOOD_CAST(const unsigned char *, ctxt->input->filename); + + if(!uri_string) + uri_string = RAPTOR_GOOD_CAST(const unsigned char *, ctxt->directory); + + load_entity = RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES); + if(load_entity) + load_entity = raptor_sax2_check_load_uri_string(sax2, uri_string); + + if(load_entity) { + entity_input = xmlLoadExternalEntity(RAPTOR_GOOD_CAST(const char*, uri_string), + RAPTOR_GOOD_CAST(const char*, publicId), + ctxt); + } else { + RAPTOR_DEBUG4("Not loading entity URI %s by policy for publicId '%s' systemId '%s'\n", uri_string, publicId, systemId); + } + + return entity_input; +} + + +static xmlEntityPtr +raptor_libxml_getEntity(void* user_data, const xmlChar *name) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr xc = sax2->xc; + xmlEntityPtr ret = NULL; + + if(!xc) + return NULL; + + if(!xc->inSubset) { + /* looks for hardcoded set of entity names - lt, gt etc. */ + ret = xmlGetPredefinedEntity(name); + if(ret) { + RAPTOR_DEBUG2("Entity '%s' found in predefined set\n", name); + return ret; + } + } + + /* This section uses xmlGetDocEntity which looks for entities in + * memory only, never from a file or URI + */ + if(xc->myDoc && (xc->myDoc->standalone == 1)) { + RAPTOR_DEBUG2("Entity '%s' document is standalone\n", name); + /* Document is standalone: no entities are required to interpret doc */ + if(xc->inSubset == 2) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + if(!ret) { + xc->myDoc->standalone = 0; + ret = xmlGetDocEntity(xc->myDoc, name); + xc->myDoc->standalone = 1; + } + } + } else { + ret = xmlGetDocEntity(xc->myDoc, name); + } + + if(ret && !ret->children && + (ret->etype == XML_EXTERNAL_GENERAL_PARSED_ENTITY)) { + /* Entity is an external general parsed entity. It may be in a + * catalog file, user file or user URI + */ + int val = 0; + xmlNodePtr children; + int load_entity = 0; + + load_entity = RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES); + if(load_entity) + load_entity = raptor_sax2_check_load_uri_string(sax2, ret->URI); + + if(!load_entity) { + RAPTOR_DEBUG2("Not getting entity URI %s by policy\n", ret->URI); + children = xmlNewText((const xmlChar*)""); + } else { + /* Disable SAX2 handlers so that the SAX2 events do not all get + * sent to callbacks during dealing with the entity parsing. + */ + sax2->enabled = 0; + val = xmlParseCtxtExternalEntity(xc, ret->URI, ret->ExternalID, &children); + sax2->enabled = 1; + } + + if(!val) { + xmlAddChildList((xmlNodePtr)ret, children); + } else { + xc->validate = 0; + return NULL; + } + + ret->owner = 1; + +#if LIBXML_VERSION >= 20627 + /* Checked field was released in 2.6.27 on 2006-10-25 + * http://git.gnome.org/browse/libxml2/commit/?id=a37a6ad91a61d168ecc4b29263def3363fff4da6 + * + */ + + /* Mark this entity as having been checked - never do this again */ + if(!ret->checked) + ret->checked = 1; +#endif + } + + return ret; +} + + +static xmlEntityPtr +raptor_libxml_getParameterEntity(void* user_data, const xmlChar *name) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + return libxml2_getParameterEntity(sax2->xc, name); +} + + +static void +raptor_libxml_entityDecl(void* user_data, const xmlChar *name, int type, + const xmlChar *publicId, const xmlChar *systemId, + xmlChar *content) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + libxml2_entityDecl(sax2->xc, name, type, publicId, systemId, content); +} + + +static void +raptor_libxml_unparsedEntityDecl(void* user_data, const xmlChar *name, + const xmlChar *publicId, const xmlChar *systemId, + const xmlChar *notationName) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + libxml2_unparsedEntityDecl(sax2->xc, name, publicId, systemId, notationName); +} + + +static void +raptor_libxml_startDocument(void* user_data) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + libxml2_startDocument(sax2->xc); +} + + +static void +raptor_libxml_endDocument(void* user_data) { + raptor_sax2* sax2 = (raptor_sax2*)user_data; + xmlParserCtxtPtr xc = sax2->xc; + + libxml2_endDocument(sax2->xc); + + if(xc->myDoc) { + xmlFreeDoc(xc->myDoc); + xc->myDoc = NULL; + } +} + + + +static void +raptor_libxml_set_document_locator(void* user_data, xmlSAXLocatorPtr loc) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + sax2->loc = loc; +} + + +void +raptor_libxml_update_document_locator(raptor_sax2* sax2, + raptor_locator* locator) +{ + /* for storing error info */ + xmlSAXLocatorPtr loc = sax2 ? sax2->loc : NULL; + xmlParserCtxtPtr xc= sax2 ? sax2->xc : NULL; + + if(xc && xc->inSubset) + return; + + if(!locator) + return; + + locator->line= -1; + locator->column= -1; + + if(!xc) + return; + + if(loc) { + locator->line = loc->getLineNumber(xc); + /* Seems to be broken */ + /* locator->column = loc->getColumnNumber(xc); */ + } + +} + + +static void +raptor_libxml_warning(void* user_data, const char *msg, ...) +{ + raptor_sax2* sax2 = NULL; + va_list args; + int prefix_length = RAPTOR_BAD_CAST(int, strlen(xml_warning_prefix)); + int length; + char *nmsg; + int msg_len; + + /* Work around libxml2 bug - sometimes the sax2->error + * returns a ctx, sometimes the userdata + */ + if(((raptor_sax2*)user_data)->magic == RAPTOR_LIBXML_MAGIC) + sax2 = (raptor_sax2*)user_data; + else + /* user_data is not userData */ + sax2 = (raptor_sax2*)((xmlParserCtxtPtr)user_data)->userData; + + va_start(args, msg); + + raptor_libxml_update_document_locator(sax2, sax2->locator); + + msg_len = RAPTOR_BAD_CAST(int, strlen(msg)); + length = prefix_length + msg_len + 1; + nmsg = RAPTOR_MALLOC(char*, length); + if(nmsg) { + memcpy(nmsg, xml_warning_prefix, prefix_length); /* Do not copy NUL */ + memcpy(nmsg + prefix_length, msg, msg_len + 1); /* Copy NUL */ + if(nmsg[length-2] == '\n') + nmsg[length-2]='\0'; + } + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + raptor_log_error_varargs(sax2->world, + RAPTOR_LOG_LEVEL_WARN, + sax2->locator, + nmsg ? nmsg : msg, + args); + PRAGMA_IGNORE_WARNING_END + + if(nmsg) + RAPTOR_FREE(char*, nmsg); + va_end(args); +} + + +static void +raptor_libxml_error_common(void* user_data, const char *msg, va_list args, + const char *prefix, int is_fatal) +{ + raptor_sax2* sax2 = NULL; + int prefix_length = RAPTOR_BAD_CAST(int, strlen(prefix)); + int length; + char *nmsg; + int msg_len; + raptor_world* world = NULL; + raptor_locator* locator = NULL; + raptor_log_level l; + + if(user_data) { + /* Work around libxml2 bug - sometimes the sax2->error + * returns a user_data, sometimes the userdata + */ + if(((raptor_sax2*)user_data)->magic == RAPTOR_LIBXML_MAGIC) + sax2 = (raptor_sax2*)user_data; + else + /* user_data is not userData */ + sax2 = (raptor_sax2*)((xmlParserCtxtPtr)user_data)->userData; + } + + if(sax2) { + world = sax2->world; + locator = sax2->locator; + + if(locator) + raptor_libxml_update_document_locator(sax2, sax2->locator); + } + + msg_len = RAPTOR_BAD_CAST(int, strlen(msg)); + length = prefix_length + msg_len + 1; + nmsg = RAPTOR_MALLOC(char*, length); + if(nmsg) { + memcpy(nmsg, prefix, prefix_length); /* Do not copy NUL */ + memcpy(nmsg + prefix_length, msg, msg_len + 1); /* Copy NUL */ + if(nmsg[length-1] == '\n') + nmsg[length-1]='\0'; + } + + l = (is_fatal) ? RAPTOR_LOG_LEVEL_FATAL: RAPTOR_LOG_LEVEL_ERROR; + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + raptor_log_error_varargs(world, + l, + locator, + nmsg ? nmsg : msg, + args); + PRAGMA_IGNORE_WARNING_END + + if(nmsg) + RAPTOR_FREE(char*, nmsg); +} + + +static void +raptor_libxml_error(void* user_data, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + raptor_libxml_error_common(user_data, msg, args, xml_error_prefix, 0); + va_end(args); +} + + + +void +raptor_libxml_generic_error(void* user_data, const char *msg, ...) +{ + raptor_world* world = (raptor_world*)user_data; + va_list args; + const char* prefix = xml_generic_error_prefix; + int prefix_length = RAPTOR_BAD_CAST(int, strlen(prefix)); + int length; + char *nmsg; + int msg_len; + + va_start(args, msg); + + msg_len = RAPTOR_BAD_CAST(int, strlen(msg)); + length = prefix_length + msg_len + 1; + nmsg = RAPTOR_MALLOC(char*, length); + if(nmsg) { + memcpy(nmsg, prefix, prefix_length); /* Do not copy NUL */ + memcpy(nmsg + prefix_length, msg, msg_len + 1); /* Copy NUL */ + if(nmsg[length-1] == '\n') + nmsg[length-1]='\0'; + } + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + raptor_log_error_varargs(world, RAPTOR_LOG_LEVEL_ERROR, + /* locator */ NULL, + nmsg ? nmsg : msg, + args); + PRAGMA_IGNORE_WARNING_END + + if(nmsg) + RAPTOR_FREE(char*, nmsg); + + va_end(args); +} + + +static void +raptor_libxml_fatal_error(void* user_data, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + raptor_libxml_error_common(user_data, msg, args, xml_fatal_error_prefix, 1); + va_end(args); +} + + +void +raptor_libxml_validation_error(void* user_data, const char *msg, ...) +{ + va_list args; + + va_start(args, msg); + raptor_libxml_error_common(user_data, msg, args, + xml_validation_error_prefix, 1); + va_end(args); +} + + +void +raptor_libxml_validation_warning(void* user_data, const char *msg, ...) +{ + va_list args; + raptor_sax2* sax2 = (raptor_sax2*)user_data; + int prefix_length = RAPTOR_GOOD_CAST(int, strlen(xml_validation_warning_prefix)); + int length; + char *nmsg; + int msg_len; + + va_start(args, msg); + + raptor_libxml_update_document_locator(sax2, sax2->locator); + + msg_len = RAPTOR_BAD_CAST(int, strlen(msg)); + length = prefix_length + msg_len + 1; + nmsg = RAPTOR_MALLOC(char*, length); + if(nmsg) { + memcpy(nmsg, xml_validation_warning_prefix, prefix_length); /* Do not copy NUL */ + memcpy(nmsg + prefix_length, msg, msg_len + 1); /* Copy NUL */ + if(nmsg[length-2] == '\n') + nmsg[length-2]='\0'; + } + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + raptor_log_error_varargs(sax2->world, + RAPTOR_LOG_LEVEL_WARN, + sax2->locator, + nmsg ? nmsg : msg, + args); + PRAGMA_IGNORE_WARNING_END + + if(nmsg) + RAPTOR_FREE(char*, nmsg); + va_end(args); +} + + +/* + * Initialise libxml for a particular SAX2 setup +*/ +void +raptor_libxml_sax_init(raptor_sax2* sax2) +{ + xmlSAXHandler *sax = &sax2->sax; + + sax->internalSubset = raptor_libxml_internalSubset; + sax->isStandalone = raptor_libxml_isStandalone; + sax->hasInternalSubset = raptor_libxml_hasInternalSubset; + sax->hasExternalSubset = raptor_libxml_hasExternalSubset; + sax->resolveEntity = raptor_libxml_resolveEntity; + sax->getEntity = raptor_libxml_getEntity; + sax->getParameterEntity = raptor_libxml_getParameterEntity; + sax->entityDecl = raptor_libxml_entityDecl; + sax->attributeDecl = NULL; /* attributeDecl */ + sax->elementDecl = NULL; /* elementDecl */ + sax->notationDecl = NULL; /* notationDecl */ + sax->unparsedEntityDecl = raptor_libxml_unparsedEntityDecl; + sax->setDocumentLocator = raptor_libxml_set_document_locator; + sax->startDocument = raptor_libxml_startDocument; + sax->endDocument = raptor_libxml_endDocument; + sax->startElement= raptor_sax2_start_element; + sax->endElement= raptor_sax2_end_element; + sax->reference = NULL; /* reference */ + sax->characters= raptor_sax2_characters; + sax->cdataBlock= raptor_sax2_cdata; /* like <![CDATA[...]> */ + sax->ignorableWhitespace= raptor_sax2_cdata; + sax->processingInstruction = NULL; /* processingInstruction */ + sax->comment = raptor_sax2_comment; /* comment */ + sax->warning = (warningSAXFunc)raptor_libxml_warning; + sax->error = (errorSAXFunc)raptor_libxml_error; + sax->fatalError = (fatalErrorSAXFunc)raptor_libxml_fatal_error; + sax->serror = (xmlStructuredErrorFunc)raptor_libxml_xmlStructuredError_handler_parsing; + +#ifdef RAPTOR_LIBXML_XMLSAXHANDLER_EXTERNALSUBSET + sax->externalSubset = raptor_libxml_externalSubset; +#endif + +#ifdef RAPTOR_LIBXML_XMLSAXHANDLER_INITIALIZED + sax->initialized = 1; +#endif +} + + +void +raptor_libxml_free(xmlParserCtxtPtr xc) { + libxml2_endDocument(xc); + + if(xc->myDoc) { + xmlFreeDoc(xc->myDoc); + xc->myDoc = NULL; + } + + xmlFreeParserCtxt(xc); +} + + +int +raptor_libxml_init(raptor_world* world) +{ + xmlInitParser(); + + if(world->libxml_flags & RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE) { + world->libxml_saved_structured_error_context = xmlGenericErrorContext; + world->libxml_saved_structured_error_handler = xmlStructuredError; + /* sets xmlGenericErrorContext and xmlStructuredError */ + xmlSetStructuredErrorFunc(world, + (xmlStructuredErrorFunc)raptor_libxml_xmlStructuredError_handler_global); + } + + if(world->libxml_flags & RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE) { + world->libxml_saved_generic_error_context = xmlGenericErrorContext; + world->libxml_saved_generic_error_handler = xmlGenericError; + /* sets xmlGenericErrorContext and xmlGenericError */ + xmlSetGenericErrorFunc(world, + (xmlGenericErrorFunc)raptor_libxml_generic_error); + } + + return 0; +} + + +void +raptor_libxml_finish(raptor_world* world) +{ + if(world->libxml_flags & RAPTOR_WORLD_FLAG_LIBXML_STRUCTURED_ERROR_SAVE) + xmlSetStructuredErrorFunc(world->libxml_saved_structured_error_context, + world->libxml_saved_structured_error_handler); + + if(world->libxml_flags & RAPTOR_WORLD_FLAG_LIBXML_GENERIC_ERROR_SAVE) + xmlSetGenericErrorFunc(world->libxml_saved_generic_error_context, + world->libxml_saved_generic_error_handler); + + xmlCleanupParser(); +} + + +#if LIBXML_VERSION >= 20632 +#define XML_LAST_DL XML_FROM_SCHEMATRONV +#else +#if LIBXML_VERSION >= 20621 +#define XML_LAST_DL XML_FROM_I18N +#else +#if LIBXML_VERSION >= 20617 +#define XML_LAST_DL XML_FROM_WRITER +#else +#if LIBXML_VERSION >= 20616 +#define XML_LAST_DL XML_FROM_CHECK +#else +#if LIBXML_VERSION >= 20615 +#define XML_LAST_DL XML_FROM_VALID +#else +#define XML_LAST_DL XML_FROM_XSLT +#endif +#endif +#endif +#endif +#endif + +/* All other symbols not specifically below noted were added during + * the period 2-10 October 2003 which is before the minimum libxml2 + * version 2.6.8 release date of Mar 23 2004. + * + * When the minimum libxml2 version goes up, the #ifdefs for + * older versions can be removed. + */ +static const char* const raptor_libxml_domain_labels[XML_LAST_DL+2]= { + NULL, /* XML_FROM_NONE */ + "parser", /* XML_FROM_PARSER */ + "tree", /* XML_FROM_TREE */ + "namespace", /* XML_FROM_NAMESPACE */ + "validity", /* XML_FROM_DTD */ + "HTML parser", /* XML_FROM_HTML */ + "memory", /* XML_FROM_MEMORY */ + "output", /* XML_FROM_OUTPUT */ + "I/O" , /* XML_FROM_IO */ + "FTP", /* XML_FROM_FTP */ +#if LIBXML_VERSION >= 20618 + /* 2005-02-13 - v2.6.18 */ + "HTTP", /* XML_FROM_HTTP */ +#endif + "XInclude", /* XML_FROM_XINCLUDE */ + "XPath", /* XML_FROM_XPATH */ + "parser", /* XML_FROM_XPOINTER */ + "regexp", /* XML_FROM_REGEXP */ + "Schemas datatype", /* XML_FROM_DATATYPE */ + "Schemas parser", /* XML_FROM_SCHEMASP */ + "Schemas validity", /* XML_FROM_SCHEMASV */ + "Relax-NG parser", /* XML_FROM_RELAXNGP */ + "Relax-NG validity", /* XML_FROM_RELAXNGV */ + "Catalog", /* XML_FROM_CATALOG */ + "C14", /* XML_FROM_C14N */ + "XSLT", /* XML_FROM_XSLT */ +#if LIBXML_VERSION >= 20615 + /* 2004-10-07 - v2.6.15 */ + "validity", /* XML_FROM_VALID */ +#endif +#if LIBXML_VERSION >= 20616 + /* 2004-11-04 - v2.6.16 */ + "checking", /* XML_FROM_CHECK */ +#endif +#if LIBXML_VERSION >= 20617 + /* 2005-01-04 - v2.6.17 */ + "writer", /* XML_FROM_WRITER */ +#endif +#if LIBXML_VERSION >= 20621 + /* 2005-08-24 - v2.6.21 */ + "module", /* XML_FROM_MODULE */ + "encoding", /* XML_FROM_I18N */ +#endif +#if LIBXML_VERSION >= 20632 + /* 2008-04-08 - v2.6.32 */ + "schematronv", /* XML_FROM_SCHEMATRONV */ +#endif + NULL +}; + + +static void +raptor_libxml_xmlStructuredError_handler_common(raptor_world *world, + raptor_locator *locator, + xmlErrorPtr err) +{ + raptor_stringbuffer* sb; + char *nmsg; + raptor_log_level level = RAPTOR_LOG_LEVEL_ERROR; + + if(err == NULL || err->code == XML_ERR_OK || err->level == XML_ERR_NONE) + return; + + /* Do not warn about things with no location */ + if(err->level == XML_ERR_WARNING && !err->file) + return; + + /* XML fatal errors never cause an abort */ + if(err->level == XML_ERR_FATAL) + err->level = XML_ERR_ERROR; + + + sb = raptor_new_stringbuffer(); + if(err->domain != XML_FROM_HTML) + raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)"XML ", + 4, 1); + + if(err->domain != XML_FROM_NONE && err->domain < XML_LAST_DL) { + const unsigned char* label; + label = (const unsigned char*)raptor_libxml_domain_labels[(int)err->domain]; + raptor_stringbuffer_append_string(sb, label, 1); + raptor_stringbuffer_append_counted_string(sb, + (const unsigned char*)" ", 1, 1); + } + + if(err->level == XML_ERR_WARNING) + raptor_stringbuffer_append_counted_string(sb, + (const unsigned char*)"warning: ", + 9, 1); + else /* XML_ERR_ERROR or XML_ERR_FATAL */ + raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)"error: ", + 7, 1); + + if(err->message) { + unsigned char* msg; + size_t len; + msg = (unsigned char*)err->message; + len= strlen((const char*)msg); + if(len && msg[len-1] == '\n') + msg[--len]='\0'; + + raptor_stringbuffer_append_counted_string(sb, msg, len, 1); + } + +#if LIBXML_VERSION >= 20618 + /* 2005-02-13 - v2.6.18 */ + + /* str1 has the detailed HTTP error */ + if(err->domain == XML_FROM_HTTP && err->str1) { + unsigned char* msg; + size_t len; + msg = (unsigned char*)err->str1; + len= strlen((const char*)msg); + if(len && msg[len-1] == '\n') + msg[--len]='\0'; + + raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)" - ", + 3, 1); + raptor_stringbuffer_append_counted_string(sb, msg, len, 1); + } +#endif + + /* When err->domain == XML_FROM_XPATH then err->int1 is + * the offset into err->str1, the line with the error + */ + if(err->domain == XML_FROM_XPATH && err->str1) { + raptor_stringbuffer_append_counted_string(sb, (const unsigned char*)" in ", + 4, 1); + raptor_stringbuffer_append_string(sb, (const unsigned char*)err->str1, 1); + } + + nmsg = (char*)raptor_stringbuffer_as_string(sb); + if(err->level == XML_ERR_FATAL) + level = RAPTOR_LOG_LEVEL_FATAL; + else if(err->level == XML_ERR_ERROR) + level = RAPTOR_LOG_LEVEL_ERROR; + else + level = RAPTOR_LOG_LEVEL_WARN; + + raptor_log_error(world, level, locator, nmsg); + + raptor_free_stringbuffer(sb); +} + + +/* user_data is a raptor_world* */ +static void +raptor_libxml_xmlStructuredError_handler_global(void *user_data, + xmlErrorPtr err) +{ + raptor_world *world = NULL; + + /* user_data may point to a raptor_world* */ + if(user_data) { + world = (raptor_world*)user_data; + if(world->magic != RAPTOR2_WORLD_MAGIC) + world = NULL; + } + + raptor_libxml_xmlStructuredError_handler_common(world, NULL, err); +} + + +/* user_data may be a raptor_sax2; err->ctxt->userData may point to a + * raptor_sax2* */ +static void +raptor_libxml_xmlStructuredError_handler_parsing(void *user_data, + xmlErrorPtr err) +{ + raptor_sax2* sax2 = NULL; + + /* user_data may point to a raptor_sax2* */ + if(user_data) { + sax2 = (raptor_sax2*)user_data; + if(sax2->magic != RAPTOR_LIBXML_MAGIC) + sax2 = NULL; + } + + /* err->ctxt->userData may point to a raptor_sax2* */ + if(err && err->ctxt) { + xmlParserCtxtPtr ctxt = (xmlParserCtxtPtr)err->ctxt; + if(ctxt->userData) { + sax2 = (raptor_sax2*)ctxt->userData; + if(sax2->magic != RAPTOR_LIBXML_MAGIC) + sax2 = NULL; + } + } + + if(sax2) + raptor_libxml_xmlStructuredError_handler_common(sax2->world, sax2->locator, + err); + else + raptor_libxml_xmlStructuredError_handler_common(NULL, NULL, err); +} + + +/* end if RAPTOR_XML_LIBXML */ +#endif diff --git a/src/raptor_locator.c b/src/raptor_locator.c new file mode 100644 index 0000000..11e08da --- /dev/null +++ b/src/raptor_locator.c @@ -0,0 +1,248 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_locator.c - Raptor parsing locator functions + * + * Copyright (C) 2002-2006, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2006, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/** + * raptor_locator_print: + * @locator: #raptor_locator to print + * @stream: stream to print to + * + * Print a raptor locator to a stream. + * + * Return value: non-0 on failure + **/ +int +raptor_locator_print(raptor_locator* locator, FILE *stream) +{ + if(!locator) + return 1; + + if(locator->uri) + fprintf(stream, "URI %s", raptor_uri_as_string(locator->uri)); + else if(locator->file) + fprintf(stream, "file %s", locator->file); + else + return 0; + if(locator->line >= 0) { + fprintf(stream, ":%d", locator->line); + if(locator->column >= 0) + fprintf(stream, " column %d", locator->column); + } + + return 0; +} + + +/** + * raptor_locator_format: + * @buffer: buffer to store format + * @length: size of buffer (excluding NUL) + * @locator: #raptor_locator to format + * + * Format a raptor locator as a string. + * + * If buffer is NULL or @length is insufficient for the size of + * the locator, returns the number of additional bytes required + * in the buffer to write the locator. Writes a terminating '\0'. + * + * Return value: 0 on success, >0 if additional bytes required in buffer, <0 on failure + **/ +int +raptor_locator_format(char *buffer, size_t length, raptor_locator* locator) +{ + size_t bufsize = 0; + const char* label_str; + size_t label_len = 0; + const char* value_str = NULL; + size_t value_len; + + if(!locator) + return -1; + + #define URI_STR "URI " + #define URI_STR_LEN 4 /* strlen(URI_STR) */ + #define FILE_STR "file " + #define FILE_STR_LEN 5 /* strlen(FILE_STR) */ + #define COLUMN_STR " column " + #define COLUMN_STR_LEN 8 /* strlen(COLUMN_STR) */ + + if(locator->uri) { + label_str = URI_STR; + label_len = URI_STR_LEN; + value_str = (const char*)raptor_uri_as_counted_string(locator->uri, + &value_len); + } else if(locator->file) { + label_str = FILE_STR; + label_len = FILE_STR_LEN; + value_str = locator->file; + value_len = strlen(value_str); + } else + return -1; + + bufsize = label_len + value_len; + + if(locator->line > 0) { + bufsize += 1 + raptor_format_integer(NULL, 0, locator->line, /* base */ 10, + -1, '\0'); + if(locator->column >= 0) + bufsize += COLUMN_STR_LEN + + raptor_format_integer(NULL, 0, locator->column, /* base */ 10, + -1, '\0'); + } + + if(!buffer || !length || length < (bufsize + 1)) /* +1 for NUL */ + return RAPTOR_BAD_CAST(int, bufsize); + + + memcpy(buffer, label_str, label_len); + buffer += label_len; + memcpy(buffer, value_str, value_len); + buffer += value_len; + + if(locator->line > 0) { + *buffer ++= ':'; + buffer += raptor_format_integer(buffer, length, + locator->line, /* base */ 10, + -1, '\0'); + if(locator->column >= 0) { + memcpy(buffer, COLUMN_STR, COLUMN_STR_LEN); + buffer += COLUMN_STR_LEN; + buffer += raptor_format_integer(buffer, length, + locator->column, /* base */ 10, + -1, '\0'); + } + } + *buffer = '\0'; + + return 0; +} + + +/** + * raptor_locator_line: + * @locator: locator + * + * Get line number from locator. + * + * Return value: integer line number, or -1 if there is no line number available + **/ +int +raptor_locator_line(raptor_locator *locator) +{ + if(!locator) + return -1; + return locator->line; +} + + +/** + * raptor_locator_column: + * @locator: locator + * + * Get column number from locator. + * + * Return value: integer column number, or -1 if there is no column number available + **/ +int +raptor_locator_column(raptor_locator *locator) +{ + if(!locator) + return -1; + return locator->column; +} + + +/** + * raptor_locator_byte: + * @locator: locator + * + * Get the locator byte offset from locator. + * + * Return value: integer byte number, or -1 if there is no byte offset available + **/ +int +raptor_locator_byte(raptor_locator *locator) +{ + if(!locator) + return -1; + return locator->byte; +} + + +/** + * raptor_locator_file: + * @locator: locator + * + * Get file name from locator. + * + * Return value: string file name, or NULL if there is no filename available + **/ +const char * +raptor_locator_file(raptor_locator *locator) +{ + if(!locator) + return NULL; + return locator->file; +} + + +/** + * raptor_locator_uri: + * @locator: locator + * + * Get URI from locator. + * + * Returns a pointer to a shared string version of the URI in + * the locator. This must be copied if it is needed. + * + * Return value: string URI, or NULL if there is no URI available + **/ +const char * +raptor_locator_uri(raptor_locator *locator) +{ + if(!locator) + return NULL; + + return (const char*)raptor_uri_as_string(locator->uri); +} diff --git a/src/raptor_log.c b/src/raptor_log.c new file mode 100644 index 0000000..a4495e9 --- /dev/null +++ b/src/raptor_log.c @@ -0,0 +1,163 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_log.c - Raptor log handling + * + * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static const char* const raptor_log_level_labels[RAPTOR_LOG_LEVEL_LAST + 1] = { + "none", + "trace", + "debug", + "info", + "warning", + "error", + "fatal error" +}; + + +/** + * raptor_log_level_get_label: + * @level: log message level + * + * Get label for a log message level + * + * Return value: label string or NULL if level is not valid + */ +const char* +raptor_log_level_get_label(raptor_log_level level) +{ + return (level <= RAPTOR_LOG_LEVEL_LAST) ? raptor_log_level_labels[level] : NULL; +} + + +void +raptor_log_error_varargs(raptor_world* world, raptor_log_level level, + raptor_locator* locator, + const char* message, va_list arguments) +{ + char *buffer = NULL; + size_t length; + + if(level == RAPTOR_LOG_LEVEL_NONE) + return; + + if(world && world->internal_ignore_errors) + return; + + length = raptor_vasprintf(&buffer, message, arguments); + if(!buffer) { + if(locator) { + raptor_locator_print(locator, stderr); + fputc(' ', stderr); + } + fputs("raptor ", stderr); + fputs(raptor_log_level_labels[level], stderr); + fputs(" - ", stderr); + vfprintf(stderr, message, arguments); + fputc('\n', stderr); + return; + } + + if(length >= 1 && buffer[length-1] == '\n') + buffer[length-1]='\0'; + + raptor_log_error(world, level, locator, buffer); + + RAPTOR_FREE(char*, buffer); +} + + +void +raptor_log_error_formatted(raptor_world* world, raptor_log_level level, + raptor_locator* locator, + const char* message, ...) +{ + va_list arguments; + + va_start(arguments, message); + raptor_log_error_varargs(world, level, locator, message, arguments); + va_end(arguments); +} + + +/* internal */ +void +raptor_log_error(raptor_world* world, raptor_log_level level, + raptor_locator* locator, const char* text) +{ + raptor_log_handler handler; + + if(level == RAPTOR_LOG_LEVEL_NONE) + return; + + if(world) { + if(world->internal_ignore_errors) + return; + + memset(&world->message, '\0', sizeof(world->message)); + world->message.code = -1; + world->message.domain = RAPTOR_DOMAIN_NONE; + world->message.level = level; + world->message.locator = locator; + world->message.text = text; + + handler = world->message_handler; + if(handler) { + /* This is the place in raptor that ALL of the user error handler + * functions are called. + */ + handler(world->message_handler_user_data, &world->message); + return; + } + } + + /* default - print it to stderr */ + if(locator) { + raptor_locator_print(locator, stderr); + fputc(' ', stderr); + } + fputs("raptor ", stderr); + fputs(raptor_log_level_labels[level], stderr); + fputs(" - ", stderr); + fputs(text, stderr); + fputc('\n', stderr); +} diff --git a/src/raptor_memstr.c b/src/raptor_memstr.c new file mode 100644 index 0000000..2168e91 --- /dev/null +++ b/src/raptor_memstr.c @@ -0,0 +1,76 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_memstr.c - search for a string in a block of memory + * + * Copyright (C) 2008, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <string.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * raptor_memstr: + * @haystack: memory block to search in + * @haystack_len: size of memory block + * @needle: string to search with + * + * INTERNAL: Search for a string in a block of memory + * + * The block of memory in @haystack may not be NUL terminated but + * the searching for @needle will end if a NUL is found in @haystack. + * + * Return value: pointer to match string or NULL on failure or failed to find + */ +const char* +raptor_memstr(const char *haystack, size_t haystack_len, const char *needle) +{ + size_t needle_len; + const char *p; + + if(!haystack || !needle) + return NULL; + + if(!*needle) + return haystack; + + needle_len = strlen(needle); + + /* loop invariant: haystack_len is always length of remaining buffer at *p */ + for(p = haystack; + (haystack_len >= needle_len) && *p; + p++, haystack_len--) { + + /* check match */ + if(!memcmp(p, needle, needle_len)) + return p; + } + + return NULL; +} + + diff --git a/src/raptor_namespace.c b/src/raptor_namespace.c new file mode 100644 index 0000000..20c769e --- /dev/null +++ b/src/raptor_namespace.c @@ -0,0 +1,1160 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_namespace.c - Raptor XML namespace classes + * + * Copyright (C) 2002-2009, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* Define these for far too much output */ +#undef RAPTOR_DEBUG_VERBOSE + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#nsc-NSDeclared + * (section 4) says: + * + * -------------------------------------------------------------------- + * The prefix xml is by definition bound to the namespace name + * http://www.w3.org/XML/1998/namespace + * -------------------------------------------------------------------- + * + * Errata NE05 + * http://www.w3.org/XML/xml-names-19990114-errata#NE05 + * changes that to read: + * + * -------------------------------------------------------------------- + * The prefix xml is by definition bound to the namespace name + * http://www.w3.org/XML/1998/namespace. It may, but need not, be + * declared, and must not be bound to any other namespace name. No + * other prefix may be bound to this namespace name. + * + * The prefix xmlns is used only to declare namespace bindings and is + * by definition bound to the namespace name + * http://www.w3.org/2000/xmlns/. It must not be declared. No other + * prefix may be bound to this namespace name. + * + * All other prefixes beginning with the three-letter sequence x, m, l, + * in any case combination, are reserved. This means that + * * users should not use them except as defined by later specifications + * * processors must not treat them as fatal errors. + * -------------------------------------------------------------------- + * + * Thus should define it in the table of namespaces before we start. + * + * We *can* also define others, but let's not. + * + */ + +#ifndef STANDALONE +const unsigned char * const raptor_xml_namespace_uri = (const unsigned char *)"http://www.w3.org/XML/1998/namespace"; +const unsigned char * const raptor_rdf_namespace_uri = (const unsigned char *)"http://www.w3.org/1999/02/22-rdf-syntax-ns#"; +const unsigned int raptor_rdf_namespace_uri_len = 43; +const unsigned char * const raptor_rdf_schema_namespace_uri = (const unsigned char *)"http://www.w3.org/2000/01/rdf-schema#"; +const unsigned int raptor_rdf_schema_namespace_uri_len = 37; +const unsigned char * const raptor_xmlschema_datatypes_namespace_uri = (const unsigned char *)"http://www.w3.org/2001/XMLSchema#"; +const unsigned char * const raptor_owl_namespace_uri = (const unsigned char *)"http://www.w3.org/2002/07/owl#"; + + +/* hash function to hash namespace prefix strings (usually short strings) + * + * Uses DJ Bernstein original hash function - good on short text keys. + */ +static unsigned int +raptor_hash_ns_string(const unsigned char *str, int length) +{ + unsigned int hash = 5381; + int c; + + for(; length && (c = *str++); length--) { + hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ + } + + return hash; +} + + +#define RAPTOR_NAMESPACES_HASHTABLE_SIZE 1024 +/** + * raptor_namespaces_init: + * @world: raptor_world object + * @nstack: #raptor_namespace_stack to initialise + * @defaults: namespaces to initialise. + * + * Initialise an existing namespaces stack object + * + * This sets up the stack optionally with some common RDF namespaces. + * + * @defaults can be 0 for none, 1 for just XML, 2 for RDF, RDFS, OWL + * and XSD (RDQL uses this) or 3+ undefined. + * + * Return value: non-0 on error + */ +int +raptor_namespaces_init(raptor_world* world, + raptor_namespace_stack *nstack, + int defaults) +{ + int failures = 0; + + nstack->world = world; + + nstack->size = 0; + + nstack->table_size = RAPTOR_NAMESPACES_HASHTABLE_SIZE; + nstack->table = RAPTOR_CALLOC(raptor_namespace**, + RAPTOR_NAMESPACES_HASHTABLE_SIZE, + sizeof(raptor_namespace*)); + if(!nstack->table) + return -1; + + nstack->def_namespace = NULL; + + nstack->rdf_ms_uri = raptor_new_uri_from_counted_string(nstack->world, + (const unsigned char*)raptor_rdf_namespace_uri, + raptor_rdf_namespace_uri_len); + failures += !nstack->rdf_ms_uri; + + nstack->rdf_schema_uri = raptor_new_uri_from_counted_string(nstack->world, + (const unsigned char*)raptor_rdf_schema_namespace_uri, + raptor_rdf_schema_namespace_uri_len); + failures += !nstack->rdf_schema_uri; + + /* raptor_new_namespace_from_uri() that eventually gets called by + * raptor_new_namespace() in raptor_namespaces_start_namespace_full() + * needs rdf_ms_uri and rdf_schema_uri + * - do not call if we had failures initializing those uris */ + if(defaults && !failures) { + /* defined at level -1 since always 'present' when inside the XML world */ + failures += raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)"xml", + raptor_xml_namespace_uri, -1); + if(defaults >= 2) { + failures += raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)"rdf", + raptor_rdf_namespace_uri, 0); + failures += raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)"rdfs", + raptor_rdf_schema_namespace_uri, 0); + failures += raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)"xsd", + raptor_xmlschema_datatypes_namespace_uri, 0); + failures += raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)"owl", + raptor_owl_namespace_uri, 0); + } + } + return failures; +} + + +/** + * raptor_new_namespaces: + * @world: raptor_world object + * @defaults: namespaces to initialise + * + * Constructor - create a new #raptor_namespace_stack. + * + * See raptor_namespaces_init() for the values of @defaults. + * + * Return value: a new namespace stack or NULL on failure + **/ +raptor_namespace_stack * +raptor_new_namespaces(raptor_world* world, int defaults) +{ + raptor_namespace_stack *nstack; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + nstack = RAPTOR_CALLOC(raptor_namespace_stack*, 1, sizeof(*nstack)); + if(!nstack) + return NULL; + + if(raptor_namespaces_init(world, nstack, defaults)) { + raptor_free_namespaces(nstack); + nstack = NULL; + } + + return nstack; +} + + +/** + * raptor_namespaces_start_namespace: + * @nstack: namespace stack + * @nspace: namespace to start + * + * Start a namespace on a stack of namespaces. + **/ +void +raptor_namespaces_start_namespace(raptor_namespace_stack *nstack, + raptor_namespace *nspace) +{ + unsigned int hash = raptor_hash_ns_string(nspace->prefix, + nspace->prefix_length); + const int bucket = hash % nstack->table_size; + + nstack->size++; + + if(nstack->table[bucket]) + nspace->next = nstack->table[bucket]; + nstack->table[bucket] = nspace; + + if(!nstack->def_namespace) + nstack->def_namespace = nspace; + +#ifndef STANDALONE +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("start namespace prefix %s depth %d\n", nspace->prefix ? (char*)nspace->prefix : "(default)", nspace->depth); +#endif +#endif + +} + + +/** + * raptor_namespaces_start_namespace_full: + * @nstack: namespace stack + * @prefix: new namespace prefix (or NULL) + * @ns_uri_string: new namespace URI (or NULL) + * @depth: new namespace depth + * + * Create a new namespace and start it on a stack of namespaces. + * + * See raptor_new_namespace() for the meanings of @prefix, + * @ns_uri_string and @depth for namespaces. + * + * Return value: non-0 on failure + **/ +int +raptor_namespaces_start_namespace_full(raptor_namespace_stack *nstack, + const unsigned char *prefix, + const unsigned char *ns_uri_string, + int depth) +{ + raptor_namespace *ns; + + ns = raptor_new_namespace(nstack, prefix, ns_uri_string, depth); + if(!ns) + return 1; + + raptor_namespaces_start_namespace(nstack, ns); + return 0; +} + + +/** + * raptor_namespaces_clear: + * @nstack: namespace stack + * + * Empty a namespace stack of namespaces and any other resources. + **/ +void +raptor_namespaces_clear(raptor_namespace_stack *nstack) +{ + if(nstack->table) { + int bucket; + + for(bucket = 0; bucket < nstack->table_size; bucket++) { + raptor_namespace *ns = nstack->table[bucket]; + while(ns) { + raptor_namespace* next_ns = ns->next; + + raptor_free_namespace(ns); + nstack->size--; + ns = next_ns; + } + nstack->table[bucket] = NULL; + } + + RAPTOR_FREE(raptor_namespaces, nstack->table); + nstack->table = NULL; + nstack->table_size = 0; + } + + if(nstack->world) { + if(nstack->rdf_ms_uri) { + raptor_free_uri(nstack->rdf_ms_uri); + nstack->rdf_ms_uri = NULL; + } + if(nstack->rdf_schema_uri) { + raptor_free_uri(nstack->rdf_schema_uri); + nstack->rdf_schema_uri = NULL; + } + } + + nstack->size = 0; + + nstack->world = NULL; +} + + +/** + * raptor_free_namespaces: + * @nstack: namespace stack + * + * Destructor - destroy a namespace stack + **/ +void +raptor_free_namespaces(raptor_namespace_stack *nstack) +{ + if(!nstack) + return; + + raptor_namespaces_clear(nstack); + + RAPTOR_FREE(raptor_namespace_stack, nstack); +} + + +/** + * raptor_namespaces_end_for_depth: + * @nstack: namespace stack + * @depth: depth + * + * End all namespaces at the given depth in the namespace stack. + **/ +void +raptor_namespaces_end_for_depth(raptor_namespace_stack *nstack, int depth) +{ + int bucket; + for(bucket = 0; bucket < nstack->table_size; bucket++) { + while(nstack->table[bucket] && + nstack->table[bucket]->depth == depth) { + raptor_namespace* ns = nstack->table[bucket]; + raptor_namespace* next_ns = ns->next; + +#ifndef STANDALONE +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("namespace prefix %s depth %d\n", + ns->prefix ? (char*)ns->prefix : "(default)", depth); +#endif +#endif + raptor_free_namespace(ns); + nstack->size--; + + nstack->table[bucket] = next_ns; + } + } +} + + +/** + * raptor_namespaces_get_default_namespace: + * @nstack: namespace stack + * + * Get the current default namespace in-scope in a stack. + * + * Return value: #raptor_namespace or NULL if no default namespace is in scope + **/ +raptor_namespace* +raptor_namespaces_get_default_namespace(raptor_namespace_stack *nstack) +{ + unsigned int hash = raptor_hash_ns_string((const unsigned char *)"", 0); + const int bucket = hash % nstack->table_size; + raptor_namespace* ns; + + for(ns = nstack->table[bucket]; ns && ns->prefix; ns = ns->next) + ; + return ns; +} + + +/** + * raptor_namespaces_find_namespace: + * @nstack: namespace stack + * @prefix: namespace prefix to find + * @prefix_length: length of prefix. + * + * Find a namespace in a namespace stack by prefix. + * + * Note that this uses the @length so that the prefix may be a prefix (sic) + * of a longer string. If @prefix is NULL, the default namespace will + * be returned if present, @prefix_length length is ignored in this case. + * + * Return value: #raptor_namespace for the prefix or NULL on failure + **/ +raptor_namespace* +raptor_namespaces_find_namespace(raptor_namespace_stack *nstack, + const unsigned char *prefix, int prefix_length) +{ + raptor_namespace* ns; + unsigned int hash = raptor_hash_ns_string(prefix, prefix_length); + int bucket; + + if(!nstack || !nstack->table_size) + return NULL; + + bucket = hash % (nstack->table_size); + for(ns = nstack->table[bucket]; ns ; ns = ns->next) { + if(!prefix) { + if(!ns->prefix) + break; + } else { + if((unsigned int)prefix_length == ns->prefix_length && + !strncmp((char*)prefix, (char*)ns->prefix, prefix_length)) + break; + } + } + + return ns; +} + + +/** + * raptor_namespaces_find_namespace_by_uri: + * @nstack: namespace stack + * @ns_uri: namespace URI to find + * + * Find a namespace in a namespace stack by namespace URI. + * + * Return value: #raptor_namespace for the URI or NULL on failure + **/ +raptor_namespace* +raptor_namespaces_find_namespace_by_uri(raptor_namespace_stack *nstack, + raptor_uri *ns_uri) +{ + int bucket; + + if(!ns_uri) + return NULL; + + for(bucket = 0; bucket < nstack->table_size; bucket++) { + raptor_namespace* ns; + for(ns = nstack->table[bucket]; ns ; ns = ns->next) + if(raptor_uri_equals(ns->uri, ns_uri)) + return ns; + } + + return NULL; +} + + +/** + * raptor_namespaces_namespace_in_scope: + * @nstack: namespace stack + * @nspace: namespace + * + * Test if a given namespace is in-scope in the namespace stack. + * + * Return value: non-0 if the namespace is in scope. + **/ +int +raptor_namespaces_namespace_in_scope(raptor_namespace_stack *nstack, + const raptor_namespace *nspace) +{ + raptor_namespace* ns; + int bucket; + + for(bucket = 0; bucket < nstack->table_size; bucket++) { + for(ns = nstack->table[bucket]; ns ; ns = ns->next) + if(raptor_uri_equals(ns->uri, nspace->uri)) + return 1; + } + return 0; +} + + +/** + * raptor_new_namespace_from_uri: + * @nstack: namespace stack + * @prefix: namespace prefix string + * @ns_uri: namespace URI + * @depth: depth of namespace in the stack + * + * Constructor - create a new namespace from a prefix and URI object. + * + * This declares but does not enable the namespace declaration (or 'start' it) + * Use raptor_namespaces_start_namespace() to make the namespace + * enabled and in scope for binding prefixes. + * + * Alternatively use raptor_namespaces_start_namespace_full() can construct + * and enable a namespace in one call. + * + * Return value: a new #raptor_namespace or NULL on failure + **/ +raptor_namespace* +raptor_new_namespace_from_uri(raptor_namespace_stack *nstack, + const unsigned char *prefix, + raptor_uri* ns_uri, int depth) +{ + unsigned int prefix_length = 0; + unsigned int len; + raptor_namespace *ns; + unsigned char *p; + +#ifndef STANDALONE +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG4("namespace prefix %s uri %s depth %d\n", + prefix ? (char*)prefix : "(default)", + ns_uri ? (char*)raptor_uri_as_string(ns_uri) : "(none)", + depth); +#endif +#endif + + if(prefix && !ns_uri) { + /* failed to find namespace - now what? */ + raptor_log_error_formatted(nstack->world, RAPTOR_LOG_LEVEL_ERROR, + /* locator */ NULL, + "The namespace URI for prefix \"%s\" is empty.", + prefix); + return NULL; + } + + + len = sizeof(raptor_namespace); + if(prefix) { + prefix_length = (unsigned int)strlen((char*)prefix); + len += prefix_length + 1; + } + + /* Just one malloc for structure + namespace (maybe) + prefix (maybe)*/ + ns = RAPTOR_CALLOC(raptor_namespace*, 1, len); + if(!ns) + return NULL; + + p = (unsigned char*)ns + sizeof(raptor_namespace); + if(ns_uri) { + ns->uri = raptor_uri_copy(ns_uri); + if(!ns->uri) { + RAPTOR_FREE(raptor_namespace, ns); + return NULL; + } + } + if(prefix) { + ns->prefix = (const unsigned char*)memcpy(p, prefix, prefix_length + 1); + ns->prefix_length = prefix_length; + + if(!strcmp((char*)ns->prefix, "xml")) + ns->is_xml = 1; + } + ns->depth = depth; + + /* set convienience flags when there is a defined namespace URI */ + if(ns->uri) { + if(raptor_uri_equals(ns->uri, nstack->rdf_ms_uri)) + ns->is_rdf_ms = 1; + else if(raptor_uri_equals(ns->uri, nstack->rdf_schema_uri)) + ns->is_rdf_schema = 1; + } + + ns->nstack = nstack; + + return ns; +} + + +/** + * raptor_new_namespace: + * @nstack: namespace stack + * @prefix: namespace prefix string + * @ns_uri_string: namespace URI string + * @depth: depth of namespace in the stack + * + * Constructor - create a new namespace from a prefix and URI string with a depth scope. + * + * This declares but does not enable the namespace declaration (or 'start' it) + * Use raptor_namespaces_start_namespace() to make the namespace + * enabled and in scope for binding prefixes. + * + * Alternatively use raptor_namespaces_start_namespace_full() can construct + * and enable a namespace in one call. + * + * The @depth is a way to use the stack of namespaces for providing scoped + * namespaces where inner scope namespaces override outer scope namespaces. + * This is primarily for RDF/XML and XML syntaxes that have hierarchical + * elements. The main use of this is raptor_namespaces_end_for_depth() + * to disable ('end') all namespaces at a given depth. Otherwise set this + * to 0. + * + * Return value: a new #raptor_namespace or NULL on failure + **/ +raptor_namespace* +raptor_new_namespace(raptor_namespace_stack *nstack, + const unsigned char *prefix, + const unsigned char *ns_uri_string, int depth) +{ + raptor_uri* ns_uri = NULL; + raptor_namespace* ns; + + /* Convert an empty namespace string "" to a NULL pointer */ + if(ns_uri_string && !*ns_uri_string) + ns_uri_string = NULL; + + if(ns_uri_string) { + ns_uri = raptor_new_uri(nstack->world, ns_uri_string); + if(!ns_uri) + return NULL; + } + ns = raptor_new_namespace_from_uri(nstack, prefix, ns_uri, depth); + if(ns_uri) + raptor_free_uri(ns_uri); + + return ns; +} + + +/** + * raptor_namespace_stack_start_namespace: + * @nstack: namespace stack + * @ns: namespace + * @new_depth: new depth + * + * Copy an existing namespace to a namespace stack with a new depth + * and start it. + * + * The @depth is a way to use the stack of namespaces for providing scoped + * namespaces where inner scope namespaces override outer scope namespaces. + * This is primarily for RDF/XML and XML syntaxes that have hierarchical + * elements. The main use of this is raptor_namespaces_end_for_depth() + * to disable ('end') all namespaces at a given depth. If depths are + * not being needed it is unlikely this call is ever needed to copy an + * existing namespace at a new depth. + * + * Return value: non-0 on failure + **/ +int +raptor_namespace_stack_start_namespace(raptor_namespace_stack *nstack, + raptor_namespace *ns, + int new_depth) +{ + raptor_namespace *new_ns; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(nstack, raptor_namespace_stack, 1); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(ns, raptor_namespace, 1); + + new_ns = raptor_new_namespace_from_uri(nstack, ns->prefix, ns->uri, new_depth); + if(!new_ns) + return 1; + + raptor_namespaces_start_namespace(nstack, new_ns); + return 0; +} + + +/** + * raptor_free_namespace: + * @ns: namespace object + * + * Destructor - destroy a namespace. + **/ +void +raptor_free_namespace(raptor_namespace *ns) +{ + if(!ns) + return; + + if(ns->uri) + raptor_free_uri(ns->uri); + + RAPTOR_FREE(raptor_namespace, ns); +} + + +/** + * raptor_namespace_get_uri: + * @ns: namespace object + * + * Get the namespace URI. + * + * Return value: namespace URI or NULL + **/ +raptor_uri* +raptor_namespace_get_uri(const raptor_namespace *ns) +{ + return ns->uri; +} + + +/** + * raptor_namespace_get_prefix: + * @ns: namespace object + * + * Get the namespace prefix. + * + * Return value: prefix string or NULL + **/ +const unsigned char* +raptor_namespace_get_prefix(const raptor_namespace *ns) +{ + return (const unsigned char*)ns->prefix; +} + + +/** + * raptor_namespace_get_counted_prefix: + * @ns: namespace object + * @length_p: pointer to store length or NULL + * + * Get the namespace prefix and length. + * + * Return value: prefix string or NULL + **/ +const unsigned char* +raptor_namespace_get_counted_prefix(const raptor_namespace *ns, size_t *length_p) +{ + if(length_p) + *length_p=ns->prefix_length; + return (const unsigned char*)ns->prefix; +} + + +/** + * raptor_namespace_format_as_xml: + * @ns: namespace object + * @length_p: pointer to length (or NULL) + * + * Format a namespace in an XML style into a newly allocated string. + * + * Generates a string of the form xmlns:prefix="uri", + * xmlns="uri", xmlns:prefix="" or xmlns="" depending on the + * namespace's prefix or URI. Double quotes are always used. + * + * If @length_p is not NULL, the length of the string is + * stored in the address it points to. + * + * See also raptor_xml_namespace_string_parse() + * + * Return value: namespace formatted as newly allocated string or NULL on failure + **/ +unsigned char * +raptor_namespace_format_as_xml(const raptor_namespace *ns, size_t *length_p) +{ + size_t uri_length = 0L; + const unsigned char *uri_string = NULL; + size_t xml_uri_length = 0L; + size_t length; + unsigned char *buffer; + const char quote='"'; + unsigned char *p; + + if(ns->uri) { + int xlength; + + uri_string = raptor_uri_as_counted_string(ns->uri, &uri_length); + xlength = raptor_xml_escape_string(ns->nstack->world, + uri_string, uri_length, + NULL, 0, quote); + if(xlength < 0) + return NULL; + xml_uri_length = RAPTOR_GOOD_CAST(size_t, xlength); + } + + /* 8 = length of [[xmlns=""] */ + length = 8 + xml_uri_length + ns->prefix_length; + + if(ns->prefix) + length++; /* for : */ + + if(length_p) + *length_p = length; + + buffer = RAPTOR_MALLOC(unsigned char*, length + 1); + if(!buffer) + return NULL; + + p = buffer; + + memcpy(p, "xmlns", 5); + p += 5; + + if(ns->prefix) { + *p++ = ':'; + memcpy(p, ns->prefix, ns->prefix_length); + p += ns->prefix_length; + } + *p++ = '='; + *p++ = quote; + if(uri_length) { + int xlength; + + xlength = raptor_xml_escape_string(ns->nstack->world, + uri_string, uri_length, + p, xml_uri_length, quote); + if(xlength < 0) + return NULL; + p += RAPTOR_GOOD_CAST(size_t, xlength); + } + *p++ = quote; + /* *p used here since we never need to use value of p again [CLANG] */ + *p = '\0'; + + return buffer; +} + + +/** + * raptor_namespace_write: + * @ns: namespace to write + * @iostr: raptor iosteram + * + * Write a formatted namespace to an iostream + * + * Return value: non-0 on failure + **/ +int +raptor_namespace_write(raptor_namespace *ns, raptor_iostream* iostr) +{ + size_t uri_length = 0L; + const unsigned char *uri_string = NULL; + + if(!ns || !iostr) + return 1; + + if(ns->uri) + uri_string = raptor_uri_as_counted_string(ns->uri, &uri_length); + + raptor_iostream_counted_string_write("xmlns", 5, iostr); + if(ns->prefix) { + raptor_iostream_write_byte(':', iostr); + raptor_iostream_string_write(ns->prefix, iostr); + } + raptor_iostream_counted_string_write("=\"", 2, iostr); + if(uri_length) + raptor_iostream_counted_string_write(uri_string, uri_length, iostr); + raptor_iostream_write_byte('"', iostr); + + return 0; +} + + +/** + * raptor_xml_namespace_string_parse: + * @string: string to parse + * @prefix: pointer to location to store namespace prefix + * @uri_string: pointer to location to store namespace URI + * + * Parse a string containing an XML style namespace declaration + * into a namespace prefix and URI pair. + * + * The string is of the form xmlns:prefix="uri", + * xmlns="uri", xmlns:prefix="" or xmlns="". + * The quotes can be single or double quotes. + * + * Two values are returned from this function into *@prefix and + * *@uri_string. Either but not both may be NULL. + * + * See also raptor_namespace_format_as_xml() + * + * Return value: non-0 on failure. + **/ +int +raptor_xml_namespace_string_parse(const unsigned char *string, + unsigned char **prefix, + unsigned char **uri_string) +{ + const unsigned char *t; + unsigned char quote; + + if((!prefix || !uri_string)) + return 1; + + if(!string || (string && !*string)) + return 1; + + if(strncmp((const char*)string, "xmlns", 5)) + return 1; + + *prefix = NULL; + *uri_string = NULL; + + /* + * Four cases are expected and handled: + * xmlns="" + * xmlns="uri" + * xmlns:foo="" + * xmlns:foo="uri" + * + * (with " or ' quotes) + */ + + /* skip "xmlns" */ + string += 5; + + if(*string == ':') { + /* non-empty prefix */ + t = ++string; + while(*string && *string != '=') + string++; + if(!*string || string == t) + return 1; + + *prefix = RAPTOR_MALLOC(unsigned char*, string - t + 1); + if(!*prefix) + return 1; + memcpy(*prefix, t, string - t); + (*prefix)[string-t] = '\0'; + } + + if(*string++ != '=') + return 1; + + if(*string != '"' && *string != '\'') + return 1; + quote = *string++; + + t = string; + while(*string && *string != quote) + string++; + + if(*string != quote) + return 1; + + if(!(string - t)) + /* xmlns...="" */ + *uri_string = NULL; + else { + *uri_string = RAPTOR_MALLOC(unsigned char*, string - t + 1); + if(!*uri_string) + return 1; + memcpy(*uri_string, t, string - t); + (*uri_string)[string - t] = '\0'; + } + + return 0; +} + + +/** + * raptor_new_qname_from_namespace_uri: + * @nstack: namespace stack + * @uri: URI to use to make qname + * @xml_version: XML Version + * + * Make an appropriate XML Qname from the namespaces on a namespace stack + * + * Makes a qname from the in-scope namespaces in a stack if the URI matches + * the prefix and the rest is a legal XML name. + * + * Return value: #raptor_qname for the URI or NULL on failure + **/ +raptor_qname* +raptor_new_qname_from_namespace_uri(raptor_namespace_stack *nstack, + raptor_uri *uri, int xml_version) +{ + unsigned char *uri_string; + size_t uri_len; + raptor_namespace* ns = NULL; + unsigned char *ns_uri_string; + size_t ns_uri_len; + unsigned char *name = NULL; + int bucket; + + if(!uri) + return NULL; + + uri_string = raptor_uri_as_counted_string(uri, &uri_len); + + for(bucket = 0; bucket < nstack->table_size; bucket++) { + for(ns = nstack->table[bucket]; ns ; ns = ns->next) { + if(!ns->uri) + continue; + + ns_uri_string = raptor_uri_as_counted_string(ns->uri, + &ns_uri_len); + if(ns_uri_len >= uri_len) + continue; + if(strncmp((const char*)uri_string, (const char*)ns_uri_string, + ns_uri_len)) + continue; + + /* uri_string is a prefix of ns_uri_string */ + name = uri_string + ns_uri_len; + if(!raptor_xml_name_check(name, uri_len-ns_uri_len, xml_version)) + name = NULL; + + /* If name is set, we've found a prefix with a legal XML name value */ + if(name) + break; + } + if(name) + break; + } + + if(!ns) + return NULL; + + return raptor_new_qname_from_namespace_local_name(nstack->world, ns, + name, NULL); +} + + +#ifdef RAPTOR_DEBUG +void +raptor_namespace_print(FILE *stream, raptor_namespace* ns) +{ + const unsigned char *uri_string; + + uri_string = raptor_uri_as_string(ns->uri); + if(ns->prefix) + fprintf(stream, "%s:%s", ns->prefix, uri_string); + else + fprintf(stream, "(default):%s", uri_string); +} +#endif + + +raptor_namespace** +raptor_namespace_stack_to_array(raptor_namespace_stack *nstack, + size_t *size_p) +{ + raptor_namespace** ns_list; + size_t size = 0; + int bucket; + + ns_list = RAPTOR_CALLOC(raptor_namespace**, nstack->size, + sizeof(raptor_namespace*)); + if(!ns_list) + return NULL; + + for(bucket = 0; bucket < nstack->table_size; bucket++) { + raptor_namespace* ns; + + for(ns = nstack->table[bucket]; ns; ns = ns->next) { + int skip = 0; + unsigned int i; + if(ns->depth < 1) + continue; + + for(i = 0; i < size; i++) { + raptor_namespace* ns2 = ns_list[i]; + if((!ns->prefix && !ns2->prefix) || + (ns->prefix && ns2->prefix && + !strcmp((const char*)ns->prefix, (const char*)ns2->prefix))) { + /* this prefix was seen (overridden) earlier so skip */ + skip = 1; + break; + } + } + if(!skip) + ns_list[size++] = ns; + } + } + + if(size_p) + *size_p = size; + + return ns_list; +} + +#endif /* !STANDALONE */ + + +#ifdef STANDALONE + + +/* one more prototype */ +int main(int argc, char *argv[]); + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + raptor_namespace_stack namespaces; /* static */ + raptor_namespace* ns; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + raptor_namespaces_init(world, &namespaces, 1); + + raptor_namespaces_start_namespace_full(&namespaces, + (const unsigned char*)"ex1", + (const unsigned char*)"http://example.org/ns1", + 0); + + raptor_namespaces_start_namespace_full(&namespaces, + (const unsigned char*)"ex2", + (const unsigned char*)"http://example.org/ns2", + 1); + + if(raptor_namespaces_find_namespace(&namespaces, NULL, 0)) { + fprintf(stderr, "%s: Default namespace found when should not be found, returning error\n", + program); + return(1); + } + + raptor_namespaces_start_namespace_full(&namespaces, + NULL, + (const unsigned char*)"http://example.org/ns3", + 2); + + ns = raptor_namespaces_find_namespace(&namespaces, NULL, 0); + if(!ns) { + fprintf(stderr, "%s: Default namespace not found when should not be found, returning error\n", + program); + return(1); + } + + ns = raptor_namespaces_find_namespace(&namespaces, (const unsigned char*)"ex2", 3); + if(!ns) { + fprintf(stderr, "%s: namespace ex2 not found when should not be found, returning error\n", + program); + return(1); + } + + raptor_namespaces_end_for_depth(&namespaces, 2); + + raptor_namespaces_end_for_depth(&namespaces, 1); + + raptor_namespaces_end_for_depth(&namespaces, 0); + + raptor_namespaces_clear(&namespaces); + + raptor_free_world(world); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif + +/* + * Local Variables: + * mode:c + * c-basic-offset: 2 + * End: + */ diff --git a/src/raptor_nfc_icu.c b/src/raptor_nfc_icu.c new file mode 100644 index 0000000..03d2459 --- /dev/null +++ b/src/raptor_nfc_icu.c @@ -0,0 +1,112 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_nfc_icu.c - Raptor Unicode NFC checking via ICU library + * + * Copyright (C) 2012, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include <stdio.h> +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#include <unicode/ustring.h> +#if ICU_UC_MAJOR_VERSION >= 56 +#include <unicode/unorm2.h> +#else +#include <unicode/unorm.h> +#endif + + +/* + * raptor_nfc_icu_check: + * @input: UTF-8 string + * @length: length of string + * @error: pointer to error flag (or NULL) + * + * INTERNAL - Unicode Normal Form C (NFC) check function via ICU + * + * If errorp is not NULL, it is set to non-0 on error + * + * Return value: <0 on error, 0 if is not NFC, >0 if is NFC + **/ +int +raptor_nfc_icu_check(const unsigned char* string, size_t len) +{ +#if ICU_UC_MAJOR_VERSION >= 56 + /* norm2 is be a singleton - do not attempt to free it */ + const UNormalizer2 *norm2; +#endif + UErrorCode error_code = U_ZERO_ERROR; + UNormalizationCheckResult res; + UChar *dest; /* UTF-16 */ + int32_t dest_capacity = len << 1; + int32_t dest_length; + int rc = 0; + + /* ICU functions take a UTF-16 string so convert */ + dest = RAPTOR_MALLOC(UChar*, dest_capacity + 1); + if(!dest) + goto error; + + (void)u_strFromUTF8(dest, dest_capacity, &dest_length, + (const char *)string, (int32_t)len, &error_code); + if(!U_SUCCESS(error_code)) + goto error; + + /* unorm_quickCheck was deprecated in ICU UC V56 */ +#if ICU_UC_MAJOR_VERSION >= 56 + norm2 = unorm2_getNFCInstance(&error_code); + if(!U_SUCCESS(error_code)) + goto error; + + res = unorm2_quickCheck(norm2, dest, dest_length, &error_code); +#else + res = unorm_quickCheck(dest, dest_length, UNORM_NFC, &error_code); +#endif + if(!U_SUCCESS(error_code)) + goto error; + + /* success */ + rc = (res == UNORM_YES); + goto cleanup; + +error: + rc = -1; + +cleanup: + if(dest) + RAPTOR_FREE(UChar*, dest); + + return rc; +} diff --git a/src/raptor_nfc_test.c b/src/raptor_nfc_test.c new file mode 100644 index 0000000..625cf93 --- /dev/null +++ b/src/raptor_nfc_test.c @@ -0,0 +1,292 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_nfc_test.c - Raptor Unicode NFC validation check + * + * Copyright (C) 2004-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * It operates over the Unicode NormalizationTest.txt + * which tests normalization the process, NOT normalization checking. + * It says: + * " CONFORMANCE: + * 1. The following invariants must be true for all conformant implementations + * NFC + * c2 == NFC(c1) == NFC(c2) == NFC(c3) + * c4 == NFC(c4) == NFC(c5) + * " + * + * It does NOT require that c1, c3 and c5 are NFC. + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> /* for isprint() */ +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#undef RAPTOR_NFC_DECODE_DEBUG + + +/* + * decode_to_utf8: + * @utf8_string: destination utf8 buffer (FIXME big enough!) + * @unicode_string: first char of string + * @end: last char of unicode_string + */ +static size_t +decode_to_utf8(unsigned char *utf8_string, size_t utf8_string_length, + const char *unicode_string, const char *end) +{ + unsigned char *u = utf8_string; + const char *p = unicode_string; + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fputs("decode_to_utf8: string '", stderr); + (void)fwrite(unicode_string, sizeof(char), (end-unicode_string) + 1, stderr); + fputs("' converts to:\n ", stderr); +#endif + + while(p < end) { + unsigned long c = 0; + char *endptr; + int unicode_width; + + if(*p == ' ') { + p++; + continue; + } + + c = (unsigned long)strtol(p, &endptr, 16); + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fprintf(stderr, "U+%04lX ", c); +#endif + + p = (const char*)endptr; + + unichar_width = raptor_unicode_utf8_string_put_char(c, u, (end-p)); + if(unichar_width < 0) { + fprintf(stderr, + "decode_to_utf8 Illegal Unicode character with code point #x%lX.", + unichar); + break; + } + + u += (size_t)unichar_width; + + if((u-utf8_string) > RAPTOR_GOOD_CAST(int, utf8_string_length)) { + fprintf(stderr, + "decode_to_utf8 overwrote utf8_string buffer at byte %ld\n", + (u-utf8_string)); + abort(); + } + } + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fputs("\n", stderr); +#endif + + return u-utf8_string; +} + + + +static void +utf8_print(const unsigned char *input, size_t length, FILE *stream) +{ + size_t i = 0; + + while(i < length && *input) { + unsigned long c; + int size = raptor_unicode_utf8_string_get_char(input, length - i, &c); + if(size <= 0) + return; + + if(i) + fputc(' ', stream); + fprintf(stream, "U+%04X", RAPTOR_GOOD_CAST(int, c)); + input += size; + i += size; + } +} + + +int +main (int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); + const char *filename; + FILE *fh; + int rc = 0; + unsigned int line = 1; + size_t max_c2_len = 0; + size_t max_c4_len = 0; + int passes = 0; + int fails = 0; + + if(argc != 2) { + fprintf(stderr, + "USAGE %s [path to NormalizationTest.txt]\n" + "Get it at http://unicode.org/Public/UNIDATA/NormalizationTest.txt\n", + program); + return 1; + } + + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: file '%s' open failed - %s\n", + program, filename, strerror(errno)); + return 1; + } + +#define LINE_BUFFER_SIZE 1024 + +/* FIXME big enough for Unicode 4 (c2 max 16; c4 max 33) */ +#define UNISTR_SIZE 40 + + for(;!feof(fh); line++) { + char buffer[LINE_BUFFER_SIZE]; + char *p, *start; + unsigned char column2[UNISTR_SIZE]; + unsigned char column4[UNISTR_SIZE]; + size_t column2_len, column4_len; + int nfc_rc; + int error; + + p = fgets(buffer, LINE_BUFFER_SIZE, fh); + if(!p) { + if(ferror(fh)) { + fprintf(stderr, "%s: file '%s' read failed - %s\n", + program, filename, strerror(errno)); + rc = 1; + break; + } + /* assume feof */ + break; + }; + +#if 0 + fprintf(stderr, "%s:%d: line '%s'\n", program, line, buffer); +#endif + + /* skip lines */ + if(*p == '@' || *p == '#') + continue; + + + /* skip column 1 */ + while(*p++ != ';') + ; + + /* read column 2 into column2, column2_len */ + start = p; + /* find end column 2 */ + while(*p++ != ';') + ; + + column2_len = decode_to_utf8(column2, UNISTR_SIZE, start, p-2); + if(column2_len > max_c2_len) + max_c2_len = column2_len; + +#if 0 + fprintf(stderr, "UTF8 column 2 (%ld bytes) is: '", column2_len); + utf8_print(column2, column2_len, stderr); + fputs("'\n", stderr); +#endif + + /* skip column 3 */ + while(*p++ != ';') + ; + + /* read column 4 into column4, column4_len */ + start = p; + /* find end column 4 */ + while(*p++ != ';') + ; + + column4_len = decode_to_utf8(column4, UNISTR_SIZE, start, p-2); + if(column4_len > max_c4_len) + max_c4_len = column4_len; + +#if 0 + fprintf(stderr, "UTF8 column 4 (%ld bytes) is: '", column4_len); + utf8_print(column4, column4_len, stderr); + fputs("'\n", stderr); +#endif + + if(!raptor_unicode_check_utf8_string(column2, column2_len)) { + fprintf(stderr, "%s:%d: UTF8 column 2 failed on: '", filename, line); + utf8_print(column2, column2_len, stderr); + fputs("'\n", stderr); + fails++; + } else + passes++; + + /* Column 2 must be NFC */ + nfc_rc = raptor_nfc_check(column2, column2_len, &error); + if(!nfc_rc) { + fprintf(stderr, "%s:%d: NFC column 2 failed on: '", filename, line); + utf8_print(column2, column2_len, stderr); + fprintf(stderr, "' at byte %d of %d\n", error, (int)column2_len); + fails++; + } else + passes++; + + if(column2_len == column4_len && !memcmp(column2, column4, column2_len)) + continue; + + if(!raptor_unicode_check_utf8_string(column4, column4_len)) { + fprintf(stderr, "%s:%d: UTF8 column 4 failed on: '", filename, line); + utf8_print(column4, column4_len, stderr); + fputs("'\n", stderr); + fails++; + } else + passes++; + + /* Column 4 must be in NFC */ + nfc_rc = raptor_nfc_check(column4, column4_len, &error); + if(!nfc_rc) { + fprintf(stderr, "%s:%d: NFC column 4 failed on: '", filename, line); + utf8_print(column4, column4_len, stderr); + fprintf(stderr, "' at byte %d of %d\n", error, (int)column4_len); + fails++; + } else + passes++; + } + + fclose(fh); + + fprintf(stderr, "%s: max column 2 len: %d, max column 4 len: %d\n", program, + (int)max_c2_len, (int)max_c4_len); + fprintf(stderr, "%s: passes: %d fails: %d\n", program, + passes, fails); + + return rc; +} diff --git a/src/raptor_ntriples.c b/src/raptor_ntriples.c new file mode 100644 index 0000000..3276e79 --- /dev/null +++ b/src/raptor_ntriples.c @@ -0,0 +1,723 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_ntriples.c - Raptor N-Triples parsing utilities + * + * Copyright (C) 2013, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* These are for 7-bit ASCII and not locale-specific */ +#define IS_ASCII_ALPHA(c) (((c) > 0x40 && (c) < 0x5B) || ((c) > 0x60 && (c) < 0x7B)) +#define IS_ASCII_UPPER(c) ((c) > 0x40 && (c) < 0x5B) +#define IS_ASCII_DIGIT(c) ((c) > 0x2F && (c) < 0x3A) +#define IS_ASCII_PRINT(c) ((c) > 0x1F && (c) < 0x7F) +#define TO_ASCII_LOWER(c) ((c)+0x20) + +typedef enum { + RAPTOR_TERM_CLASS_URI, /* ends on > */ + RAPTOR_TERM_CLASS_BNODEID, /* ends on first non [A-Za-z][A-Za-z0-9]* */ + RAPTOR_TERM_CLASS_STRING, /* ends on non-escaped " */ + RAPTOR_TERM_CLASS_LANGUAGE /* ends on first non [a-z0-9]+ ('-' [a-z0-9]+ )? */ +} raptor_ntriples_term_class; + + +static int +raptor_ntriples_term_valid(unsigned char c, int position, + raptor_ntriples_term_class term_class) +{ + int result = 0; + + switch(term_class) { + case RAPTOR_TERM_CLASS_URI: + /* ends on > */ + result = (c != '>'); + break; + + case RAPTOR_TERM_CLASS_BNODEID: + /* ends on first non [A-Za-z0-9_:][-.A-Za-z0-9]* */ + result = IS_ASCII_ALPHA(c) || IS_ASCII_DIGIT(c) || c == '_' || c == ':'; + if(position) + /* FIXME + * This isn't correct; '.' is allowed in positions 1..N-1 but + * this calling convention of character-by-character cannot + * check this. + */ + result = (result || c == '-' || c == '.'); + break; + + case RAPTOR_TERM_CLASS_STRING: + /* ends on " */ + result = (c != '"'); + break; + + case RAPTOR_TERM_CLASS_LANGUAGE: + /* ends on first non [a-zA-Z]+ ('-' [a-zA-Z0-9]+ )? + * Accept _ as synonym / typo for -. + */ + result = IS_ASCII_ALPHA(c); + if(position) + result = (result || IS_ASCII_DIGIT(c) || c == '-' || c == '_'); + break; + + default: + RAPTOR_DEBUG2("Unknown N-Triples term class %u", term_class); + } + + return result; +} + + +/* + * raptor_ntriples_parse_term_internal: + * @world: raptor world + * @locator: locator object (in/out) (or NULL) + * @start: pointer to starting character of string (in) + * @dest: destination of string (in) + * @lenp: pointer to length of string (in/out) + * @dest_lenp: pointer to length of destination string (out) + * @end_char: string ending character + * @class: string class + * + * INTERNAL - Parse an N-Triples term with escapes. + * + * Relies that @dest is long enough; it need only be as large as the + * input string @start since when UTF-8 encoding, the escapes are + * removed and the result is always less than or equal to length of + * input. + * + * N-Triples strings / URIs are written in ASCII at present; + * characters outside the printable ASCII range are discarded with a + * warning. See the grammar for full details of the allowed ranges. + * + * UTF-8 and the \u and \U esapes are both allowed. + * + * URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E + * + * Return value: Non 0 on failure + **/ +static int +raptor_ntriples_parse_term_internal(raptor_world* world, + raptor_locator* locator, + const unsigned char **start, + unsigned char *dest, + size_t *lenp, size_t *dest_lenp, + char end_char, + raptor_ntriples_term_class term_class) +{ + const unsigned char *p = *start; + unsigned char c = '\0'; + size_t ulen = 0; + unsigned long unichar = 0; + unsigned int position = 0; + int end_char_seen = 0; + + /* find end of string, fixing backslashed characters on the way */ + while(*lenp > 0) { + int unichar_width; + + c = *p; + + p++; + (*lenp)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(term_class == RAPTOR_TERM_CLASS_URI && c == ' ') { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, + "URI error - illegal character %d (0x%02X) found.", + c, RAPTOR_GOOD_CAST(unsigned int, c)); + return 1; + } + + if(c > 0x7f) { + /* just copy the UTF-8 bytes through */ + int unichar_len; + unichar_len = raptor_unicode_utf8_string_get_char(p - 1, 1 + *lenp, NULL); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > *lenp) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, + "UTF-8 encoding error at character %d (0x%02X) found.", + c, RAPTOR_GOOD_CAST(unsigned int, c)); + /* UTF-8 encoding had an error or ended in the middle of a string */ + return 1; + } + memmove(dest, p-1, unichar_len); + dest += unichar_len; + + unichar_len--; /* p, *lenp were moved on by 1 earlier */ + + p += unichar_len; + (*lenp) -= unichar_len; + if(locator) { + locator->column += unichar_len; + locator->byte += unichar_len; + } + continue; + } + + if(c != '\\') { + /* finish at non-backslashed end_char */ + if(end_char && c == end_char) { + end_char_seen = 1; + break; + } + + if(!raptor_ntriples_term_valid(c, position, term_class)) { + if(end_char) { + /* end char was expected, so finding an invalid thing is an error */ + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Missing terminating '%c' (found '%c')", end_char, c); + return 0; + } else { + /* it's the end - so rewind 1 to save next char */ + p--; + (*lenp)++; + if(locator) { + locator->column--; + locator->byte--; + } + if(term_class == RAPTOR_TERM_CLASS_BNODEID && dest[-1] == '.') { + /* If bnode id ended on '.' move back one */ + dest--; + + p--; + (*lenp)++; + if(locator) { + locator->column--; + locator->byte--; + } + } + break; + } + } + + /* otherwise store and move on */ + *dest++ = c; + position++; + continue; + } + + if(!*lenp) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "\\ at end of input."); + return 0; + } + + c = *p; + + p++; + (*lenp)--; + if(locator) { + locator->column++; + locator->byte++; + } + + switch(c) { + case '"': + case '\\': + *dest++ = c; + break; + case 'b': + case 'f': + case 'n': + case 'r': + case 't': + if(term_class == RAPTOR_TERM_CLASS_URI) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI error - illegal URI escape '\\%c'.", c); + return 1; + } + + if(c == 'b') + *dest++ = '\b'; + else if(c == 'f') + *dest++ = '\f'; + else if(c == 'n') + *dest++ = '\n'; + else if(c == 'r') + *dest++ = '\r'; + else /* 't' */ + *dest++ = '\t'; + break; + case '<': + case '>': + case '{': + case '}': + case '|': + case '^': + case '`': + /* Turtle 2013 allows these in URIs (as well as \" and \\) */ + *dest++ = c; + break; + + case 'u': + case 'U': + ulen = (c == 'u') ? 4 : 8; + + if(*lenp < ulen) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "%c over end of input.", c); + return 0; + } + + if(1) { + unsigned int ii; + int n = 0; + + for(ii = 0; ii < ulen; ii++) { + char cc = p[ii]; + if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "N-Triples string error - illegal hex digit %c in Unicode escape '%c%s...'", + cc, c, p); + n = 1; + break; + } + } + + if(n) + break; + + n = sscanf((const char*)p, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar); + if(n != 1) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Uncode escape '%c%s...'", c, p); + break; + } + } + + p += ulen; + (*lenp) -= ulen; + if(locator) { + locator->column += RAPTOR_GOOD_CAST(int, ulen); + locator->byte += RAPTOR_GOOD_CAST(int, ulen); + } + + if(term_class == RAPTOR_TERM_CLASS_URI && + (unichar == 0x0020 || unichar == 0x003C || unichar == 0x003E)) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI error - illegal Unicode escape \\u%04lX in URI.", unichar); + break; + } + + if(unichar > raptor_unicode_max_codepoint) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Unicode character with code point #x%lX (max #x%lX).", unichar, raptor_unicode_max_codepoint); + break; + } + + unichar_width = raptor_unicode_utf8_string_put_char(unichar, dest, 4); + if(unichar_width < 0) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal Unicode character with code point #x%lX.", unichar); + break; + } + + /* The destination length is set here to 4 since we know that in + * all cases, the UTF-8 encoded output sequence is always shorter + * than the input sequence, and the buffer is edited in place. + * \uXXXX: 6 bytes input - UTF-8 max 3 bytes output + * \uXXXXXXXX: 10 bytes input - UTF-8 max 4 bytes output + */ + dest += (int)unichar_width; + break; + + default: + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal string escape \\%c in \"%s\"", c, (char*)start); + return 0; + } + + position++; + } /* end while */ + + + if(end_char && !end_char_seen) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Missing terminating '%c' before end of input.", end_char); + return 1; + } + + /* terminate dest, can be shorter than source */ + *dest = '\0'; + + if(dest_lenp) + *dest_lenp = p - *start; + + *start = p; + + return 0; +} + + +static int +raptor_parse_turtle_term_internal(raptor_world* world, + raptor_locator* locator, + const unsigned char **start, + unsigned char *dest, + size_t *len_p, size_t *dest_lenp, + raptor_uri** datatype_uri_p) +{ + const unsigned char *p = *start; + unsigned int position = 0; + /* 0 = xsd:integer; 1= xsd:decimal; 2= xsd:double */ + short dtype = 0; + int after_e = 0; + + while(*len_p > 0) { + unsigned char c = *p; + + if(after_e) { + if(!((c >= '0' && c <'9') || c == '+' || c == '-')) + break; + after_e = 0; + } else if((position > 0 && (c == '+' || c == '-')) || + !((c >= '0' && c <'9') || c == '.' || c == 'e' || c == 'E')) + break; + + if(c == '.') + dtype = 1; + else if(c == 'e' || c == 'E') { + dtype = 2; + after_e = 1; + } + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + *dest++ = c; + + position++; + } + + *dest = '\0'; + + if(dest_lenp) + *dest_lenp = p - *start; + + *start = p; + + if(dtype == 0) + *datatype_uri_p = raptor_uri_copy(world->xsd_integer_uri); + else if (dtype == 1) + *datatype_uri_p = raptor_uri_copy(world->xsd_decimal_uri); + else + *datatype_uri_p = raptor_uri_copy(world->xsd_double_uri); + + return 0; +} + + +/* + * raptor_ntriples_parse_term: + * @world: raptor world + * @locator: raptor locator (in/out) (or NULL) + * @string: string input (in) + * @len_p: pointer to length of @string (in/out) + * @term_p: pointer to store term (out) + * @allow_turtle: non-0 to allow Turtle forms such as integers, boolean + * + * INTERNAL - Parse an N-Triples string into a #raptor_term + * + * The @len_p destination and @locator fields are modified as parsing + * proceeds to be used in error messages. The final value is written + * into the #raptor_term pointed at by @term_p + * + * Return value: number of bytes processed or 0 on failure + */ +size_t +raptor_ntriples_parse_term(raptor_world* world, raptor_locator* locator, + unsigned char *string, size_t *len_p, + raptor_term** term_p, int allow_turtle) +{ + unsigned char *p = string; + unsigned char *dest; + size_t term_length = 0; + + switch(*p) { + case '<': + dest = p; + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(raptor_ntriples_parse_term_internal(world, locator, + (const unsigned char**)&p, + dest, len_p, &term_length, + '>', RAPTOR_TERM_CLASS_URI)) { + goto fail; + } + + if(1) { + raptor_uri *uri; + + /* Check for bad ordinal predicate */ + if(!strncmp((const char*)dest, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) { + int ordinal = raptor_check_ordinal(dest + 44); + if(ordinal <= 0) + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal ordinal value %d in property '%s'.", ordinal, dest); + } + if(raptor_uri_uri_string_is_absolute(dest) <= 0) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "URI '%s' is not absolute.", dest); + goto fail; + } + + uri = raptor_new_uri(world, dest); + if(!uri) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Could not create URI for '%s'", (const char *)dest); + goto fail; + } + + *term_p = raptor_new_term_from_uri(world, uri); + raptor_free_uri(uri); + } + break; + + case '-': + case '+': + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + if(allow_turtle) { + raptor_uri* datatype_uri = NULL; + + dest = p; + + if(raptor_parse_turtle_term_internal(world, locator, + (const unsigned char**)&p, + dest, len_p, &term_length, + &datatype_uri)) { + goto fail; + } + + *term_p = raptor_new_term_from_literal(world, + dest, + datatype_uri, + NULL /* language */); + } else + goto fail; + break; + + case '"': + dest = p; + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(raptor_ntriples_parse_term_internal(world, locator, + (const unsigned char**)&p, + dest, len_p, &term_length, + '"', RAPTOR_TERM_CLASS_STRING)) { + goto fail; + } + + if(1) { + unsigned char *object_literal_language = NULL; + unsigned char *object_literal_datatype = NULL; + raptor_uri* datatype_uri = NULL; + + if(*len_p && *p == '@') { + unsigned char *q; + size_t lang_len; + + object_literal_language = p; + + /* Skip - */ + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(!*len_p) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Missing language after \"string\"-"); + goto fail; + } + + if(raptor_ntriples_parse_term_internal(world, locator, + (const unsigned char**)&p, + object_literal_language, len_p, &lang_len, + '\0', RAPTOR_TERM_CLASS_LANGUAGE)) { + goto fail; + } + + if(!lang_len) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Invalid language tag at @%s", p); + goto fail; + } + + /* Normalize language to lowercase + * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier + * Also convert _ to - as synonym / typo. + */ + for(q = object_literal_language; *q; q++) { + if(IS_ASCII_UPPER(*q)) + *q = RAPTOR_GOOD_CAST(unsigned char, TO_ASCII_LOWER(*q)); + if(*q == '_') + *q = '-'; + } + + } + + if(*len_p > 1 && *p == '^' && p[1] == '^') { + + object_literal_datatype = p; + + /* Skip ^^ */ + p += 2; + *len_p -= 2; + if(locator) { + locator->column += 2; + locator->byte += 2; + } + + if(!*len_p || (*len_p && *p != '<')) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Missing datatype URI-ref in\"string\"^^<URI-ref> after ^^"); + goto fail; + } + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(raptor_ntriples_parse_term_internal(world, locator, + (const unsigned char**)&p, + object_literal_datatype, len_p, NULL, + '>', RAPTOR_TERM_CLASS_URI)) { + goto fail; + } + + if(raptor_uri_uri_string_is_absolute(object_literal_datatype) <= 0) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Datatype URI '%s' is not absolute.", object_literal_datatype); + goto fail; + } + + } + + if(object_literal_datatype && object_literal_language) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Typed literal used with a language - ignoring the language"); + object_literal_language = NULL; + } + + if(object_literal_datatype) { + datatype_uri = raptor_new_uri(world, + object_literal_datatype); + if(!datatype_uri) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Could not create literal datatype uri '%s'", object_literal_datatype); + goto fail; + } + object_literal_language = NULL; + } + + *term_p = raptor_new_term_from_literal(world, + dest, + datatype_uri, + object_literal_language); + if(datatype_uri) + raptor_free_uri(datatype_uri); + } + + break; + + + case '_': + /* store where _ was */ + dest = p; + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(!*len_p || (*len_p > 0 && *p != ':')) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Illegal bNodeID - _ not followed by :"); + goto fail; + } + + /* Found ':' - move on */ + + p++; + (*len_p)--; + if(locator) { + locator->column++; + locator->byte++; + } + + if(raptor_ntriples_parse_term_internal(world, locator, + (const unsigned char**)&p, + dest, len_p, &term_length, + '\0', + RAPTOR_TERM_CLASS_BNODEID)) { + goto fail; + } + + if(!term_length) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, locator, "Bad or missing bNodeID after _:"); + goto fail; + } + + *term_p = raptor_new_term_from_blank(world, dest); + + break; + + default: + RAPTOR_DEBUG2("Unknown term type '%c'", *p); + goto fail; + } + + fail: + + return p - string; +} diff --git a/src/raptor_option.c b/src/raptor_option.c new file mode 100644 index 0000000..119dfb8 --- /dev/null +++ b/src/raptor_option.c @@ -0,0 +1,718 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_option.c - Class options + * + * Copyright (C) 2004-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static const struct +{ + raptor_option option; + raptor_option_area area; + raptor_option_value_type value_type; + const char *name; + const char *label; +} raptor_options_list[RAPTOR_OPTION_LAST + 1] = { + { RAPTOR_OPTION_SCANNING, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "scanForRDF", + "RDF/XML parser scans for rdf:RDF in XML content" + }, + { RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "allowNonNsAttributes", + "RDF/XML parser allows bare 'name' rather than namespaced 'rdf:name'" + }, + { RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "allowOtherParsetypes", + "RDF/XML parser allows user-defined rdf:parseType values" + }, + { RAPTOR_OPTION_ALLOW_BAGID, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "allowBagID", + "RDF/XML parser allows rdf:bagID" + }, + { RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "allowRDFtypeRDFlist", + "RDF/XML parser generates the collection rdf:type rdf:List triple" + }, + { RAPTOR_OPTION_NORMALIZE_LANGUAGE, + (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "normalizeLanguage", + "RDF/XML parser normalizes xml:lang values to lowercase" + }, + { RAPTOR_OPTION_NON_NFC_FATAL, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "nonNFCfatal", + "RDF/XML parser makes non-NFC literals a fatal error" + }, + { RAPTOR_OPTION_WARN_OTHER_PARSETYPES, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "warnOtherParseTypes", + "RDF/XML parser warns about unknown rdf:parseType values" + }, + { RAPTOR_OPTION_CHECK_RDF_ID, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "checkRdfID", + "RDF/XML parser checks rdf:ID values for duplicates" + }, + { RAPTOR_OPTION_RELATIVE_URIS, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "relativeURIs", + "Serializers write relative URIs wherever possible." + }, + { RAPTOR_OPTION_WRITER_AUTO_INDENT, + (raptor_option_area)(RAPTOR_OPTION_AREA_XML_WRITER | RAPTOR_OPTION_AREA_TURTLE_WRITER), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "autoIndent", + "Turtle and XML Writer automatically indent elements." + }, + { RAPTOR_OPTION_WRITER_AUTO_EMPTY, + (raptor_option_area)(RAPTOR_OPTION_AREA_XML_WRITER | RAPTOR_OPTION_AREA_TURTLE_WRITER), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "autoEmpty", + "Turtle and XML Writer automatically detect and abbreviate empty elements." + }, + { RAPTOR_OPTION_WRITER_INDENT_WIDTH, + (raptor_option_area)(RAPTOR_OPTION_AREA_XML_WRITER | RAPTOR_OPTION_AREA_TURTLE_WRITER), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "indentWidth", + "Turtle and XML Writer use as number of spaces to indent." + }, + { RAPTOR_OPTION_WRITER_XML_VERSION, + (raptor_option_area)(RAPTOR_OPTION_AREA_SERIALIZER | RAPTOR_OPTION_AREA_XML_WRITER), + RAPTOR_OPTION_VALUE_TYPE_INT, + "xmlVersion", + "Serializers and XML Writer use as XML version to write." + }, + { RAPTOR_OPTION_WRITER_XML_DECLARATION, + (raptor_option_area)(RAPTOR_OPTION_AREA_SERIALIZER | RAPTOR_OPTION_AREA_XML_WRITER), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "xmlDeclaration", + "Serializers and XML Writer write XML declaration." + }, + { RAPTOR_OPTION_NO_NET, + (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "noNet", + "Parsers and SAX2 XML Parser deny internal network requests." + }, + { RAPTOR_OPTION_RESOURCE_BORDER, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "resourceBorder", + "DOT serializer resource border color" + }, + { RAPTOR_OPTION_LITERAL_BORDER, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "literalBorder", + "DOT serializer literal border color" + }, + { RAPTOR_OPTION_BNODE_BORDER, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "bnodeBorder", + "DOT serializer blank node border color" + }, + { RAPTOR_OPTION_RESOURCE_FILL, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "resourceFill", + "DOT serializer resource fill color" + }, + { RAPTOR_OPTION_LITERAL_FILL, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "literalFill", + "DOT serializer literal fill color" + }, + { RAPTOR_OPTION_BNODE_FILL, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "bnodeFill", + "DOT serializer blank node fill color" + }, + { RAPTOR_OPTION_HTML_TAG_SOUP, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "htmlTagSoup", + "GRDDL parser uses a lax HTML parser" + }, + { RAPTOR_OPTION_MICROFORMATS, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "microformats", + "GRDDL parser looks for microformats" + }, + { RAPTOR_OPTION_HTML_LINK, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "htmlLink", + "GRDDL parser looks for <link type=\"application/rdf+xml\">" + }, + { RAPTOR_OPTION_WWW_TIMEOUT, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_INT, + "wwwTimeout", + "Parser WWW request retrieval timeout" + }, + { RAPTOR_OPTION_WRITE_BASE_URI, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "writeBaseURI", + "Serializers write a base URI directive @base / xml:base" + }, + { RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "wwwHttpCacheControl", + "Parser WWW request HTTP Cache-Control: header value" + }, + { RAPTOR_OPTION_WWW_HTTP_USER_AGENT, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "wwwHttpUserAgent", + "Parser WWW request HTTP User-Agent: header value" + }, + { RAPTOR_OPTION_JSON_CALLBACK, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "jsonCallback", + "JSON serializer callback function name" + }, + { RAPTOR_OPTION_JSON_EXTRA_DATA, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "jsonExtraData", + "JSON serializer callback data parameter" + }, + { RAPTOR_OPTION_RSS_TRIPLES, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "rssTriples", + "Atom and RSS serializers write extra RDF triples" + }, + { RAPTOR_OPTION_ATOM_ENTRY_URI, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_URI, + "atomEntryUri", + "Atom serializer writes an atom:entry with this URI (otherwise atom:feed)" + }, + { RAPTOR_OPTION_PREFIX_ELEMENTS, + RAPTOR_OPTION_AREA_SERIALIZER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "prefixElements", + "Atom and RSS serializers write namespace-prefixed elements" + }, + { RAPTOR_OPTION_STRICT, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "strict", + "Operate in strict conformance mode (otherwise lax)" + }, + { RAPTOR_OPTION_WWW_CERT_FILENAME, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "wwwCertFilename", + "SSL client certificate filename" + }, + { RAPTOR_OPTION_WWW_CERT_TYPE, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "wwwCertType", + "SSL client certificate type" + }, + { RAPTOR_OPTION_WWW_CERT_PASSPHRASE, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_STRING, + "wwwCertPassphrase", + "SSL client certificate passphrase" + }, + { RAPTOR_OPTION_NO_FILE, + (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "noFile", + "Parsers and SAX2 deny internal file requests." + }, + { RAPTOR_OPTION_WWW_SSL_VERIFY_PEER, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_INT, + "wwwSslVerifyPeer", + "SSL verify peer certficate" + }, + { RAPTOR_OPTION_WWW_SSL_VERIFY_HOST, + RAPTOR_OPTION_AREA_PARSER, + RAPTOR_OPTION_VALUE_TYPE_INT, + "wwwSslVerifyHost", + "SSL verify host matching" + }, + { RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, + (raptor_option_area)(RAPTOR_OPTION_AREA_PARSER | RAPTOR_OPTION_AREA_SAX2), + RAPTOR_OPTION_VALUE_TYPE_BOOL, + "loadExternalEntities", + "Parsers and SAX2 should load external entities." + } +}; + + +static const char * const raptor_option_uri_prefix = "http://feature.librdf.org/raptor-"; +/* NOTE: this is strlen(raptor_option_uri_prefix) */ +static const int raptor_option_uri_prefix_len = 33; + + +static raptor_option_area +raptor_option_get_option_area_for_domain(raptor_domain domain) +{ + raptor_option_area area = RAPTOR_OPTION_AREA_NONE; + + if(domain == RAPTOR_DOMAIN_PARSER) + area = RAPTOR_OPTION_AREA_PARSER; + else if(domain == RAPTOR_DOMAIN_SERIALIZER) + area = RAPTOR_OPTION_AREA_SERIALIZER; + else if(domain == RAPTOR_DOMAIN_SAX2) + area = RAPTOR_OPTION_AREA_SAX2; + else if(domain == RAPTOR_DOMAIN_XML_WRITER) + area = RAPTOR_OPTION_AREA_XML_WRITER; + else if(domain == RAPTOR_DOMAIN_TURTLE_WRITER) + area = RAPTOR_OPTION_AREA_TURTLE_WRITER; + + return area; +} + + +/** + * raptor_free_option_description: + * @option_description: option description + * + * Destructor - free an option description object. + */ +void +raptor_free_option_description(raptor_option_description* option_description) +{ + if(!option_description) + return; + + /* these are shared strings pointing to static data in raptor_options_list[] */ + /* RAPTOR_FREE(char*, option_description->name); */ + /* RAPTOR_FREE(char*, option_description->label); */ + + if(option_description->uri) + raptor_free_uri(option_description->uri); + + RAPTOR_FREE(raptor_option_description, option_description); +} + + +/** + * raptor_world_get_option_description: + * @world: raptor world object + * @domain: domain + * @option: option enumeration (0+) + * + * Get a description of an option for a domain. + * + * The returned description must be freed with + * raptor_free_option_description(). + * + * Return value: option description or NULL on failure or if option is unknown + **/ +raptor_option_description* +raptor_world_get_option_description(raptor_world* world, + const raptor_domain domain, + const raptor_option option) +{ + raptor_option_area area; + raptor_option_description *option_description = NULL; + raptor_uri *base_uri = NULL; + int i; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL); + + raptor_world_open(world); + + area = raptor_option_get_option_area_for_domain(domain); + if(area == RAPTOR_OPTION_AREA_NONE) + return NULL; + + for(i = 0; i <= RAPTOR_OPTION_LAST; i++) { + if(raptor_options_list[i].option == option && + (raptor_options_list[i].area & area)) + break; + } + + if(i > RAPTOR_OPTION_LAST) + return NULL; + + option_description = RAPTOR_CALLOC(raptor_option_description*, 1, + sizeof(*option_description)); + if(!option_description) + return NULL; + + option_description->domain = domain; + option_description->option = option; + option_description->value_type = raptor_options_list[i].value_type; + option_description->name = raptor_options_list[i].name; + option_description->name_len = strlen(option_description->name); + option_description->label = raptor_options_list[i].label; + + base_uri = raptor_new_uri_from_counted_string(world, + (const unsigned char*)raptor_option_uri_prefix, + raptor_option_uri_prefix_len); + if(!base_uri) { + raptor_free_option_description(option_description); + return NULL; + } + + option_description->uri = raptor_new_uri_from_uri_local_name(world, + base_uri, + (const unsigned char*)raptor_options_list[i].name); + raptor_free_uri(base_uri); + if(!option_description->uri) { + raptor_free_option_description(option_description); + return NULL; + } + + return option_description; +} + + + +int +raptor_option_is_valid_for_area(const raptor_option option, + raptor_option_area area) +{ + if(option > RAPTOR_OPTION_LAST) + return 0; + return (raptor_options_list[option].area & area) != 0; +} + + +int +raptor_option_value_is_numeric(const raptor_option option) +{ + raptor_option_value_type t = raptor_options_list[option].value_type; + + return t == RAPTOR_OPTION_VALUE_TYPE_BOOL || + t == RAPTOR_OPTION_VALUE_TYPE_INT; +} + + +/** + * raptor_world_get_option_from_uri: + * @world: raptor_world instance + * @uri: option URI + * + * Get an option ID from a URI + * + * Option URIs are the concatenation of the string + * "http://feature.librdf.org/raptor-" plus the short name. + * + * They are automatically returned for any option described with + * raptor_world_get_option_description(). + * + * Return value: < 0 if the option is unknown or on error + **/ +raptor_option +raptor_world_get_option_from_uri(raptor_world* world, raptor_uri *uri) +{ + unsigned char *uri_string; + int i; + raptor_option option = (raptor_option)-1; + + if(!uri) + return option; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, (raptor_option)-1); + + raptor_world_open(world); + + uri_string = raptor_uri_as_string(uri); + if(strncmp((const char*)uri_string, raptor_option_uri_prefix, + raptor_option_uri_prefix_len)) + return option; + + uri_string += raptor_option_uri_prefix_len; + + for(i = 0; i <= RAPTOR_OPTION_LAST; i++) + if(!strcmp(raptor_options_list[i].name, (const char*)uri_string)) { + option = (raptor_option)i; + break; + } + + return option; +} + + +/** + * raptor_option_get_count: + * + * Get the count of options defined. + * + * This is prefered to the compile time-only symbol #RAPTOR_OPTION_LAST + * and returns a count of the number of options which is + * #RAPTOR_OPTION_LAST + 1. + * + * Return value: count of options in the #raptor_option enumeration + **/ +unsigned int +raptor_option_get_count(void) +{ + return RAPTOR_OPTION_LAST + 1; +} + + +const char* const +raptor_option_value_type_labels[RAPTOR_OPTION_VALUE_TYPE_URI + 1] = { + "boolean", + "integer", + "string", + "uri" +}; + + +/** + * raptor_option_get_value_type_label: + * @type: value type + * + * Get a label for a value type + * + * Return value: label for type or NULL for invalid type + */ +const char* +raptor_option_get_value_type_label(const raptor_option_value_type type) +{ + if(type > RAPTOR_OPTION_VALUE_TYPE_LAST) + return NULL; + return raptor_option_value_type_labels[type]; +} + + +int +raptor_object_options_copy_state(raptor_object_options* to, + raptor_object_options* from) +{ + int rc = 0; + int i; + + to->area = from->area; + for(i = 0; !rc && i <= RAPTOR_OPTION_LAST; i++) { + if(raptor_option_value_is_numeric((raptor_option)i)) + to->options[i].integer = from->options[i].integer; + else { + /* non-numeric values may need allocations */ + char* string = from->options[i].string; + if(string) { + size_t len = strlen(string); + to->options[i].string = RAPTOR_MALLOC(char*, len + 1); + if(to->options[i].string) + memcpy(to->options[i].string, string, len + 1); + else + rc = 1; + } + } + } + + return rc; +} + + +void +raptor_object_options_init(raptor_object_options* options, + raptor_option_area area) +{ + int i; + + options->area = area; + + for(i = 0; i <= RAPTOR_OPTION_LAST; i++) { + if(raptor_option_value_is_numeric((raptor_option)i)) + options->options[i].integer = 0; + else + options->options[i].string = NULL; + } + + /* Initialise default options that are not 0 or NULL */ + + /* Emit @base directive or equivalent */ + options->options[RAPTOR_OPTION_WRITE_BASE_URI].integer = 1; + + /* Emit relative URIs where possible */ + options->options[RAPTOR_OPTION_RELATIVE_URIS].integer = 1; + + /* XML 1.0 output */ + options->options[RAPTOR_OPTION_WRITER_XML_VERSION].integer = 10; + + /* Write XML declaration */ + options->options[RAPTOR_OPTION_WRITER_XML_DECLARATION].integer = 1; + + /* Indent 2 spaces */ + options->options[RAPTOR_OPTION_WRITER_INDENT_WIDTH].integer = 2; + + /* lax (no strict) parsing */ + options->options[RAPTOR_OPTION_STRICT].integer = 0; + + /* SSL verify peers */ + options->options[RAPTOR_OPTION_WWW_SSL_VERIFY_PEER].integer = 1; + + /* SSL fully verify hosts */ + options->options[RAPTOR_OPTION_WWW_SSL_VERIFY_HOST].integer = 2; + +} + + +void +raptor_object_options_clear(raptor_object_options* options) +{ + int i; + + for(i = 0; i <= RAPTOR_OPTION_LAST; i++) { + if(raptor_option_value_is_numeric((raptor_option)i)) + continue; + + if(options->options[i].string) + RAPTOR_FREE(char*, options->options[i].string); + } +} + + +/* + * raptor_object_options_get_option: + * @options: options object + * @option: option to get value + * @string_p: pointer to where to store string value + * @integer_p: pointer to where to store integer value + * + * INTERNAL - get option value + * + * Any string value returned in *@string_p is shared and must be + * copied by the caller. + * + * The allowed options vary by the area field of @options. + * + * Return value: option value or < 0 for an illegal option + **/ +int +raptor_object_options_get_option(raptor_object_options* options, + raptor_option option, + char** string_p, int* integer_p) +{ + if(!raptor_option_is_valid_for_area(option, options->area)) + return 1; + + if(raptor_option_value_is_numeric(option)) { + /* numeric options */ + int value = options->options[(int)option].integer; + if(integer_p) + *integer_p = value; + } else { + /* non-numeric options */ + char* string = options->options[(int)option].string; + if(string_p) + *string_p = string; + } + + return 0; +} + + +/* + * raptor_object_options_set_option: + * @options: options object + * @option: option to set + * @string: string option value (or NULL) + * @integer: integer option value + * + * INTERNAL - set option + * + * If @string is not NULL and the option type is numeric, the string + * value is converted to an integer and used in preference to @integer. + * + * If @string is NULL and the option type is not numeric, an error is + * returned. + * + * The @string values used are copied. + * + * The allowed options vary by the area field of @options. + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_object_options_set_option(raptor_object_options *options, + raptor_option option, + const char* string, int integer) +{ + if(!raptor_option_is_valid_for_area(option, options->area)) + return 1; + + if(raptor_option_value_is_numeric(option)) { + /* numeric options */ + if(string) + integer = atoi((const char*)string); + + options->options[(int)option].integer = integer; + return 0; + } else { + /* non-numeric options */ + char *string_copy; + size_t len = 0; + + if(string) + len = strlen((const char*)string); + string_copy = RAPTOR_MALLOC(char*, len + 1); + if(!string_copy) + return 1; + + if(len) + memcpy(string_copy, string, len); + string_copy[len] = '\0'; + + options->options[(int)option].string = string_copy; + } + + return 0; +} diff --git a/src/raptor_parse.c b/src/raptor_parse.c new file mode 100644 index 0000000..9bc0a39 --- /dev/null +++ b/src/raptor_parse.c @@ -0,0 +1,1832 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_parse.c - Raptor Parser API + * + * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif +#ifdef HAVE_FCNTL_H +#include <fcntl.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +/* prototypes for helper functions */ +static void raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict); + +/* helper methods */ + +static void +raptor_free_parser_factory(raptor_parser_factory* factory) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_parser_factory); + + if(factory->finish_factory) + factory->finish_factory(factory); + + RAPTOR_FREE(raptor_parser_factory, factory); +} + + +/* class methods */ + +int +raptor_parsers_init(raptor_world *world) +{ + int rc = 0; + + world->parsers = raptor_new_sequence((raptor_data_free_handler)raptor_free_parser_factory, NULL); + if(!world->parsers) + return 1; + +#ifdef RAPTOR_PARSER_RDFXML + rc+= raptor_init_parser_rdfxml(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_NTRIPLES + rc+= raptor_init_parser_ntriples(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_N3 + rc+= raptor_init_parser_n3(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_TURTLE + rc+= raptor_init_parser_turtle(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_TRIG + rc+= raptor_init_parser_trig(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_RSS + rc+= raptor_init_parser_rss(world) != 0; +#endif + +#if defined(RAPTOR_PARSER_GRDDL) + rc+= raptor_init_parser_grddl_common(world) != 0; + +#ifdef RAPTOR_PARSER_GRDDL + rc+= raptor_init_parser_grddl(world) != 0; +#endif + +#endif + +#ifdef RAPTOR_PARSER_GUESS + rc+= raptor_init_parser_guess(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_RDFA + rc+= raptor_init_parser_rdfa(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_JSON + rc+= raptor_init_parser_json(world) != 0; +#endif + +#ifdef RAPTOR_PARSER_NQUADS + rc+= raptor_init_parser_nquads(world) != 0; +#endif + + return rc; +} + + +/* + * raptor_finish_parsers - delete all the registered parsers + */ +void +raptor_parsers_finish(raptor_world *world) +{ + if(world->parsers) { + raptor_free_sequence(world->parsers); + world->parsers = NULL; + } +#if defined(RAPTOR_PARSER_GRDDL) + raptor_terminate_parser_grddl_common(world); +#endif +} + + +/* + * raptor_world_register_parser_factory: + * @world: raptor world + * @factory: pointer to function to call to register the factory + * + * Internal - Register a parser via parser factory. + * + * All strings set in the @factory method are shared with the + * #raptor_parser_factory + * + * Return value: new factory object or NULL on failure + **/ +RAPTOR_EXTERN_C +raptor_parser_factory* +raptor_world_register_parser_factory(raptor_world* world, + int (*factory) (raptor_parser_factory*)) +{ + raptor_parser_factory *parser = NULL; + + parser = RAPTOR_CALLOC(raptor_parser_factory*, 1, sizeof(*parser)); + if(!parser) + return NULL; + + parser->world = world; + + parser->desc.mime_types = NULL; + + if(raptor_sequence_push(world->parsers, parser)) + return NULL; /* on error, parser is already freed by the sequence */ + + /* Call the parser registration function on the new object */ + if(factory(parser)) + return NULL; /* parser is owned and freed by the parsers sequence */ + + if(raptor_syntax_description_validate(&parser->desc)) { + raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Parser description failed to validate\n"); + goto tidy; + } + + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Registered parser %s\n", parser->desc.names[0]); +#endif + + return parser; + + /* Clean up on failure */ + tidy: + raptor_free_parser_factory(parser); + return NULL; +} + + +/* + * raptor_world_get_parser_factory: + * @world: world object + * @name: the factory name or NULL for the default factory + * + * INTERNAL - Get a parser factory by name. + * + * Return value: the factory object or NULL if there is no such factory + **/ +raptor_parser_factory* +raptor_world_get_parser_factory(raptor_world *world, const char *name) +{ + raptor_parser_factory *factory = NULL; + + /* return 1st parser if no particular one wanted - why? */ + if(!name) { + factory = (raptor_parser_factory *)raptor_sequence_get_at(world->parsers, 0); + if(!factory) { + RAPTOR_DEBUG1("No (default) parsers registered\n"); + return NULL; + } + } else { + int i; + + for(i = 0; + (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i)); + i++) { + int namei; + const char* fname; + + for(namei = 0; (fname = factory->desc.names[namei]); namei++) { + if(!strcmp(fname, name)) + break; + } + if(fname) + break; + } + } + + return factory; +} + + +/** + * raptor_world_get_parsers_count: + * @world: world object + * + * Get number of parsers + * + * Return value: number of parsers or <0 on failure + **/ +int +raptor_world_get_parsers_count(raptor_world* world) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + raptor_world_open(world); + + return raptor_sequence_size(world->parsers); +} + + +/** + * raptor_world_get_parser_description: + * @world: world object + * @counter: index into the list of parsers + * + * Get parser descriptive syntax information + * + * Return value: description or NULL if counter is out of range + **/ +const raptor_syntax_description* +raptor_world_get_parser_description(raptor_world* world, + unsigned int counter) +{ + raptor_parser_factory *factory; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL); + + raptor_world_open(world); + + factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, + counter); + + if(!factory) + return NULL; + + return &factory->desc; +} + + +/** + * raptor_world_is_parser_name: + * @world: world object + * @name: the syntax name + * + * Check the name of a parser is known. + * + * Return value: non 0 if name is a known syntax name + */ +int +raptor_world_is_parser_name(raptor_world* world, const char *name) +{ + if(!name) + return 0; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0); + + raptor_world_open(world); + + return (raptor_world_get_parser_factory(world, name) != NULL); +} + + +/** + * raptor_new_parser: + * @world: world object + * @name: the parser name or NULL for default parser + * + * Constructor - create a new raptor_parser object. + * + * Return value: a new #raptor_parser object or NULL on failure + */ +raptor_parser* +raptor_new_parser(raptor_world* world, const char *name) +{ + raptor_parser_factory* factory; + raptor_parser* rdf_parser; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + factory = raptor_world_get_parser_factory(world, name); + if(!factory) + return NULL; + + rdf_parser = RAPTOR_CALLOC(raptor_parser*, 1, sizeof(*rdf_parser)); + if(!rdf_parser) + return NULL; + + rdf_parser->world = world; + raptor_statement_init(&rdf_parser->statement, world); + + rdf_parser->context = RAPTOR_CALLOC(void*, 1, factory->context_length); + if(!rdf_parser->context) { + raptor_free_parser(rdf_parser); + return NULL; + } + +#ifdef RAPTOR_XML_LIBXML + rdf_parser->magic = RAPTOR_LIBXML_MAGIC; +#endif + rdf_parser->factory = factory; + + /* Bit flags */ + rdf_parser->failed = 0; + rdf_parser->emit_graph_marks = 1; + rdf_parser->emitted_default_graph = 0; + + raptor_object_options_init(&rdf_parser->options, RAPTOR_OPTION_AREA_PARSER); + + /* set parsing strictness from default value */ + raptor_parser_set_strict(rdf_parser, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_STRICT)); + + if(factory->init(rdf_parser, name)) { + raptor_free_parser(rdf_parser); + return NULL; + } + + return rdf_parser; +} + + +/** + * raptor_new_parser_for_content: + * @world: world object + * @uri: URI identifying the syntax (or NULL) + * @mime_type: mime type identifying the content (or NULL) + * @buffer: buffer of content to guess (or NULL) + * @len: length of buffer + * @identifier: identifier of content (or NULL) + * + * Constructor - create a new raptor_parser. + * + * Uses raptor_world_guess_parser_name() to find a parser by scoring + * recognition of the syntax by a block of characters, the content + * identifier or a mime type. The content identifier is typically a + * filename or URI or some other identifier. + * + * Return value: a new #raptor_parser object or NULL on failure + **/ +raptor_parser* +raptor_new_parser_for_content(raptor_world* world, + raptor_uri *uri, const char *mime_type, + const unsigned char *buffer, size_t len, + const unsigned char *identifier) +{ + const char* name; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + name = raptor_world_guess_parser_name(world, uri, mime_type, + buffer, len, identifier); + return name ? raptor_new_parser(world, name) : NULL; +} + + +/** + * raptor_parser_parse_start: + * @rdf_parser: RDF parser + * @uri: base URI or may be NULL if no base URI is required + * + * Start a parse of content with base URI. + * + * Parsers that need a base URI can be identified using a syntax + * description returned by raptor_world_get_parser_description() + * statically or raptor_parser_get_description() on a constructed + * parser. + * + * Return value: non-0 on failure, <0 if a required base URI was missing + **/ +int +raptor_parser_parse_start(raptor_parser *rdf_parser, raptor_uri *uri) +{ + if((rdf_parser->factory->desc.flags & RAPTOR_SYNTAX_NEED_BASE_URI) && !uri) { + raptor_parser_error(rdf_parser, "Missing base URI for %s parser.", + rdf_parser->factory->desc.names[0]); + return -1; + } + + if(uri) + uri = raptor_uri_copy(uri); + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; + + rdf_parser->locator.uri = uri; + rdf_parser->locator.line = -1; + rdf_parser->locator.column = -1; + rdf_parser->locator.byte = -1; + + if(rdf_parser->factory->start) + return rdf_parser->factory->start(rdf_parser); + else + return 0; +} + + + + +/** + * raptor_parser_parse_chunk: + * @rdf_parser: RDF parser + * @buffer: content to parse + * @len: length of buffer + * @is_end: non-0 if this is the end of the content (such as EOF) + * + * Parse a block of content into triples. + * + * This method can only be called after raptor_parser_parse_start() has + * initialised the parser. + * + * Return value: non-0 on failure. + **/ +int +raptor_parser_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *buffer, size_t len, int is_end) +{ + if(rdf_parser->sb) + raptor_stringbuffer_append_counted_string(rdf_parser->sb, buffer, len, 1); + + return rdf_parser->factory->chunk(rdf_parser, buffer, len, is_end); +} + + +/** + * raptor_free_parser: + * @parser: #raptor_parser object + * + * Destructor - destroy a raptor_parser object. + * + **/ +void +raptor_free_parser(raptor_parser* rdf_parser) +{ + if(!rdf_parser) + return; + + if(rdf_parser->factory) + rdf_parser->factory->terminate(rdf_parser); + + if(rdf_parser->www) + raptor_free_www(rdf_parser->www); + + if(rdf_parser->context) + RAPTOR_FREE(raptor_parser_context, rdf_parser->context); + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + + if(rdf_parser->sb) + raptor_free_stringbuffer(rdf_parser->sb); + + raptor_object_options_clear(&rdf_parser->options); + + RAPTOR_FREE(raptor_parser, rdf_parser); +} + + +/** + * raptor_parser_parse_file_stream: + * @rdf_parser: parser + * @stream: FILE* of RDF content + * @filename: filename of content or NULL if it has no name + * @base_uri: the base URI to use + * + * Parse RDF content from a FILE*. + * + * After draining the FILE* stream (EOF), fclose is not called on it. + * + * Return value: non 0 on failure + **/ +int +raptor_parser_parse_file_stream(raptor_parser* rdf_parser, + FILE *stream, const char* filename, + raptor_uri *base_uri) +{ + int rc = 0; + raptor_locator *locator = &rdf_parser->locator; + + if(!stream || !base_uri) + return 1; + + locator->line= locator->column = -1; + locator->file= filename; + + if(raptor_parser_parse_start(rdf_parser, base_uri)) + return 1; + + while(!feof(stream)) { + size_t len = fread(rdf_parser->buffer, 1, RAPTOR_READ_BUFFER_SIZE, stream); + int is_end = (len < RAPTOR_READ_BUFFER_SIZE); + rdf_parser->buffer[len] = '\0'; + rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end); + if(rc || is_end) + break; + } + + return (rc != 0); +} + + +/** + * raptor_parser_parse_file: + * @rdf_parser: parser + * @uri: URI of RDF content or NULL to read from standard input + * @base_uri: the base URI to use (or NULL if the same) + * + * Parse RDF content at a file URI. + * + * If @uri is NULL (source is stdin), then the @base_uri is required. + * + * Return value: non 0 on failure + **/ +int +raptor_parser_parse_file(raptor_parser* rdf_parser, raptor_uri *uri, + raptor_uri *base_uri) +{ + int rc = 0; + int free_base_uri = 0; + const char *filename = NULL; + FILE *fh = NULL; +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + struct stat buf; +#endif + + if(uri) { + filename = raptor_uri_uri_string_to_filename(raptor_uri_as_string(uri)); + if(!filename) + return 1; + +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) { + raptor_parser_error(rdf_parser, "Cannot read from a directory '%s'", + filename); + goto cleanup; + } +#endif + + fh = fopen(filename, "r"); + if(!fh) { + raptor_parser_error(rdf_parser, "file '%s' open failed - %s", + filename, strerror(errno)); + goto cleanup; + } + if(!base_uri) { + base_uri = raptor_uri_copy(uri); + free_base_uri = 1; + } + } else { + if(!base_uri) + return 1; + fh = stdin; + } + + rc = raptor_parser_parse_file_stream(rdf_parser, fh, filename, base_uri); + + cleanup: + if(uri) { + if(fh) + fclose(fh); + RAPTOR_FREE(char*, filename); + } + if(free_base_uri) + raptor_free_uri(base_uri); + + return rc; +} + + +void +raptor_parser_parse_uri_write_bytes(raptor_www* www, + void *userdata, const void *ptr, + size_t size, size_t nmemb) +{ + raptor_parse_bytes_context* rpbc = (raptor_parse_bytes_context*)userdata; + size_t len = size * nmemb; + + if(!rpbc->started) { + raptor_uri* base_uri = rpbc->base_uri; + + if(!base_uri) { + rpbc->final_uri = raptor_www_get_final_uri(www); + /* base URI after URI resolution is finally chosen */ + base_uri = rpbc->final_uri ? rpbc->final_uri : www->uri; + } + + if(raptor_parser_parse_start(rpbc->rdf_parser, base_uri)) + raptor_www_abort(www, "Parsing failed"); + rpbc->started = 1; + } + + if(raptor_parser_parse_chunk(rpbc->rdf_parser, (unsigned char*)ptr, len, 0)) + raptor_www_abort(www, "Parsing failed"); +} + + +static void +raptor_parser_parse_uri_content_type_handler(raptor_www* www, void* userdata, + const char* content_type) +{ + raptor_parser* rdf_parser = (raptor_parser*)userdata; + if(rdf_parser->factory->content_type_handler) + rdf_parser->factory->content_type_handler(rdf_parser, content_type); +} + + +int +raptor_parser_set_uri_filter_no_net(void *user_data, raptor_uri* uri) +{ + unsigned char* uri_string = raptor_uri_as_string(uri); + + if(raptor_uri_uri_string_is_file_uri(uri_string)) + return 0; + + raptor_parser_error((raptor_parser*)user_data, + "Network fetch of URI '%s' denied", uri_string); + return 1; +} + + +/** + * raptor_parser_parse_uri: + * @rdf_parser: parser + * @uri: URI of RDF content + * @base_uri: the base URI to use (or NULL if the same) + * + * Parse the RDF content at URI. + * + * Sends an HTTP Accept: header whent the URI is of the HTTP protocol, + * see raptor_parser_parse_uri_with_connection() for details including + * how the @base_uri is used. + * + * Return value: non 0 on failure + **/ +int +raptor_parser_parse_uri(raptor_parser* rdf_parser, raptor_uri *uri, + raptor_uri *base_uri) +{ + return raptor_parser_parse_uri_with_connection(rdf_parser, uri, base_uri, + NULL); +} + + +/** + * raptor_parser_parse_uri_with_connection: + * @rdf_parser: parser + * @uri: URI of RDF content + * @base_uri: the base URI to use (or NULL if the same) + * @connection: connection object pointer or NULL to create a new one + * + * Parse RDF content at URI using existing WWW connection. + * + * If @base_uri is not given and during resolution of the URI, a + * protocol redirection occurs, the final resolved URI will be + * used as the base URI. If redirection does not occur, the + * base URI will be @uri. + * + * If @base_uri is given, it overrides the process above. + * + * When @connection is NULL and a MIME Type exists for the parser + * type, this type is sent in an HTTP Accept: header in the form + * Accept: MIME-TYPE along with a wildcard of 0.1 quality, so MIME-TYPE is + * prefered rather than the sole answer. The latter part may not be + * necessary but should ensure an HTTP 200 response. + * + * Return value: non 0 on failure + **/ +int +raptor_parser_parse_uri_with_connection(raptor_parser* rdf_parser, + raptor_uri *uri, + raptor_uri *base_uri, void *connection) +{ + int ret = 0; + raptor_parse_bytes_context rpbc; + char* ua = NULL; + char* cert_filename = NULL; + char* cert_type = NULL; + char* cert_passphrase = NULL; + int ssl_verify_peer; + int ssl_verify_host; + + if(connection) { + if(rdf_parser->www) + raptor_free_www(rdf_parser->www); + rdf_parser->www = raptor_new_www_with_connection(rdf_parser->world, + connection); + if(!rdf_parser->www) + return 1; + } else { + const char *accept_h; + + if(rdf_parser->www) + raptor_free_www(rdf_parser->www); + rdf_parser->www = raptor_new_www(rdf_parser->world); + if(!rdf_parser->www) + return 1; + + accept_h = raptor_parser_get_accept_header(rdf_parser); + if(accept_h) { + ret = raptor_www_set_http_accept2(rdf_parser->www, accept_h, 0); + RAPTOR_FREE(char*, accept_h); + if(ret) + return 1; + } + } + + rpbc.rdf_parser = rdf_parser; + rpbc.base_uri = base_uri; + rpbc.final_uri = NULL; + rpbc.started = 0; + + if(rdf_parser->uri_filter) + raptor_www_set_uri_filter(rdf_parser->www, rdf_parser->uri_filter, + rdf_parser->uri_filter_user_data); + else if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)) + raptor_www_set_uri_filter(rdf_parser->www, + raptor_parser_set_uri_filter_no_net, rdf_parser); + + raptor_www_set_write_bytes_handler(rdf_parser->www, + raptor_parser_parse_uri_write_bytes, + &rpbc); + + raptor_www_set_content_type_handler(rdf_parser->www, + raptor_parser_parse_uri_content_type_handler, + rdf_parser); + + raptor_www_set_http_cache_control(rdf_parser->www, + RAPTOR_OPTIONS_GET_STRING(rdf_parser, + RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL)); + + ua = RAPTOR_OPTIONS_GET_STRING(rdf_parser, RAPTOR_OPTION_WWW_HTTP_USER_AGENT); + if(ua) { + if(raptor_www_set_user_agent2(rdf_parser->www, ua, 0)) + return 1; + } + + cert_filename = RAPTOR_OPTIONS_GET_STRING(rdf_parser, + RAPTOR_OPTION_WWW_CERT_FILENAME); + cert_type = RAPTOR_OPTIONS_GET_STRING(rdf_parser, + RAPTOR_OPTION_WWW_CERT_TYPE); + cert_passphrase = RAPTOR_OPTIONS_GET_STRING(rdf_parser, + RAPTOR_OPTION_WWW_CERT_PASSPHRASE); + if(cert_filename || cert_type || cert_passphrase) + raptor_www_set_ssl_cert_options(rdf_parser->www, cert_filename, + cert_type, cert_passphrase); + + ssl_verify_peer = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, + RAPTOR_OPTION_WWW_SSL_VERIFY_PEER); + ssl_verify_host = RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, + RAPTOR_OPTION_WWW_SSL_VERIFY_HOST); + raptor_www_set_ssl_verify_options(rdf_parser->www, ssl_verify_peer, + ssl_verify_host); + + ret = raptor_www_fetch(rdf_parser->www, uri); + + if(!rpbc.started && !ret) + ret = raptor_parser_parse_start(rdf_parser, base_uri); + + if(rpbc.final_uri) + raptor_free_uri(rpbc.final_uri); + + if(ret) { + raptor_free_www(rdf_parser->www); + rdf_parser->www = NULL; + return 1; + } + + if(raptor_parser_parse_chunk(rdf_parser, NULL, 0, 1)) + rdf_parser->failed = 1; + + raptor_free_www(rdf_parser->www); + rdf_parser->www = NULL; + + return rdf_parser->failed; +} + + +/* + * raptor_parser_fatal_error - Fatal Error from a parser - Internal + */ +void +raptor_parser_fatal_error(raptor_parser* parser, const char *message, ...) +{ + va_list arguments; + + va_start(arguments, message); + if(parser) { + parser->failed = 1; + raptor_log_error_varargs(parser->world, + RAPTOR_LOG_LEVEL_FATAL, + &parser->locator, + message, arguments); + } else + raptor_log_error_varargs(NULL, + RAPTOR_LOG_LEVEL_FATAL, NULL, + message, arguments); + va_end(arguments); +} + + +/* + * raptor_parser_error - Error from a parser - Internal + */ +void +raptor_parser_error(raptor_parser* parser, const char *message, ...) +{ + va_list arguments; + + va_start(arguments, message); + + raptor_parser_log_error_varargs(parser, RAPTOR_LOG_LEVEL_ERROR, + message, arguments); + + va_end(arguments); +} + + +/** + * raptor_parser_log_error_varargs: + * @parser: parser (or NULL) + * @level: log level + * @message: error format message + * @arguments: varargs for message + * + * Error from a parser - Internal. + */ +void +raptor_parser_log_error_varargs(raptor_parser* parser, + raptor_log_level level, + const char *message, va_list arguments) +{ + if(parser) + raptor_log_error_varargs(parser->world, + level, + &parser->locator, + message, arguments); + else + raptor_log_error_varargs(NULL, + level, + NULL, + message, arguments); +} + + +/** + * raptor_parser_log_error: + * @parser: parser (or NULL) + * @level: log level + * @message: error format message + * + * Error from a parser - Internal. + */ +void +raptor_parser_log_error(raptor_parser* parser, + raptor_log_level level, + const char *message, ...) +{ + va_list arguments; + + va_start(arguments, message); + + if(parser) + raptor_log_error_varargs(parser->world, + level, + &parser->locator, + message, arguments); + else + raptor_log_error_varargs(NULL, + level, + NULL, + message, arguments); + + va_end(arguments); +} + + +/* + * raptor_parser_warning - Warning from a parser - Internal + */ +void +raptor_parser_warning(raptor_parser* parser, const char *message, ...) +{ + va_list arguments; + + va_start(arguments, message); + + if(parser) + raptor_log_error_varargs(parser->world, + RAPTOR_LOG_LEVEL_WARN, + &parser->locator, + message, arguments); + else + raptor_log_error_varargs(NULL, + RAPTOR_LOG_LEVEL_WARN, + NULL, + message, arguments); + + va_end(arguments); +} + + + +/* PUBLIC FUNCTIONS */ + +/** + * raptor_parser_set_statement_handler: + * @parser: #raptor_parser parser object + * @user_data: user data pointer for callback + * @handler: new statement callback function + * + * Set the statement handler function for the parser. + * + * Use this to set the function to receive statements as the parsing + * proceeds. The statement argument to @handler is shared and must be + * copied by the caller with raptor_statement_copy(). + **/ +void +raptor_parser_set_statement_handler(raptor_parser* parser, + void *user_data, + raptor_statement_handler handler) +{ + parser->user_data = user_data; + parser->statement_handler = handler; +} + + +/** + * raptor_parser_set_graph_mark_handler: + * @parser: #raptor_parser parser object + * @user_data: user data pointer for callback + * @handler: new graph callback function + * + * Set the graph mark handler function for the parser. + * + * See #raptor_graph_mark_handler and #raptor_graph_mark_flags for + * the marks that may be returned by the handler. + * + **/ +void +raptor_parser_set_graph_mark_handler(raptor_parser* parser, + void *user_data, + raptor_graph_mark_handler handler) +{ + parser->user_data = user_data; + parser->graph_mark_handler = handler; +} + + +/** + * raptor_parser_set_namespace_handler: + * @parser: #raptor_parser parser object + * @user_data: user data pointer for callback + * @handler: new namespace callback function + * + * Set the namespace handler function for the parser. + * + * When a prefix/namespace is seen in a parser, call the given + * @handler with the prefix string and the #raptor_uri namespace URI. + * Either can be NULL for the default prefix or default namespace. + * + * The handler function does not deal with duplicates so any + * namespace may be declared multiple times. + * + **/ +void +raptor_parser_set_namespace_handler(raptor_parser* parser, + void *user_data, + raptor_namespace_handler handler) +{ + parser->namespace_handler = handler; + parser->namespace_handler_user_data = user_data; +} + + +/** + * raptor_parser_set_uri_filter: + * @parser: parser object + * @filter: URI filter function + * @user_data: User data to pass to filter function + * + * Set URI filter function for WWW retrieval. + **/ +void +raptor_parser_set_uri_filter(raptor_parser* parser, + raptor_uri_filter_func filter, + void *user_data) +{ + parser->uri_filter = filter; + parser->uri_filter_user_data = user_data; +} + + +/** + * raptor_parser_set_option: + * @parser: #raptor_parser parser object + * @option: option to set from enumerated #raptor_option values + * @string: string option value (or NULL) + * @integer: integer option value + * + * Set parser option. + * + * If @string is not NULL and the option type is numeric, the string + * value is converted to an integer and used in preference to @integer. + * + * If @string is NULL and the option type is not numeric, an error is + * returned. + * + * The @string values used are copied. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_parser_set_option(raptor_parser *parser, raptor_option option, + const char* string, int integer) +{ + int rc; + + rc = raptor_object_options_set_option(&parser->options, option, + string, integer); + if(option == RAPTOR_OPTION_STRICT && !rc) { + int is_strict = RAPTOR_OPTIONS_GET_NUMERIC(parser, RAPTOR_OPTION_STRICT); + raptor_parser_set_strict(parser, is_strict); + } + + return rc; +} + + +/** + * raptor_parser_get_option: + * @parser: #raptor_parser parser object + * @option: option to get value + * @string_p: pointer to where to store string value + * @integer_p: pointer to where to store integer value + * + * Get parser option. + * + * Any string value returned in *@string_p is shared and must + * be copied by the caller. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: option value or < 0 for an illegal option + **/ +int +raptor_parser_get_option(raptor_parser *parser, raptor_option option, + char** string_p, int* integer_p) +{ + return raptor_object_options_get_option(&parser->options, option, + string_p, integer_p); +} + + +/** + * raptor_parser_set_strict: + * @rdf_parser: #raptor_parser object + * @is_strict: Non 0 for strict parsing + * + * INTERNAL - Set parser to strict / lax mode. + * + **/ +static void +raptor_parser_set_strict(raptor_parser* rdf_parser, int is_strict) +{ + is_strict = (is_strict) ? 1 : 0; + + /* Initialise default parser mode */ + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING, 0); + + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST, 0); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE, 1); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL, is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID, 1); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_TAG_SOUP, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_MICROFORMATS, !is_strict); + RAPTOR_OPTIONS_SET_NUMERIC(rdf_parser, RAPTOR_OPTION_HTML_LINK, !is_strict); +} + + +/** + * raptor_parser_get_name: + * @rdf_parser: #raptor_parser parser object + * + * Get the name of a parser. + * + * Use raptor_parser_get_description() to get the alternate names and + * aliases as well as other descriptive values. + * + * Return value: the short name for the parser. + **/ +const char* +raptor_parser_get_name(raptor_parser *rdf_parser) +{ + if(rdf_parser->factory->get_name) + return rdf_parser->factory->get_name(rdf_parser); + else + return rdf_parser->factory->desc.names[0]; +} + + +/** + * raptor_parser_get_description: + * @rdf_parser: #raptor_parser parser object + * + * Get description of the syntaxes of the parser. + * + * The returned description is static and lives as long as the raptor + * library (raptor world). + * + * Return value: description of syntax + **/ +const raptor_syntax_description* +raptor_parser_get_description(raptor_parser *rdf_parser) +{ + if(rdf_parser->factory->get_description) + return rdf_parser->factory->get_description(rdf_parser); + else + return &rdf_parser->factory->desc; +} + + + +/** + * raptor_parser_parse_abort: + * @rdf_parser: #raptor_parser parser object + * + * Abort an ongoing parsing. + * + * Causes any ongoing generation of statements by a parser to be + * terminated and the parser to return controlto the application + * as soon as draining any existing buffers. + * + * Most useful inside raptor_parser_parse_file() or + * raptor_parser_parse_uri() when the Raptor library is directing the + * parsing and when one of the callback handlers such as as set by + * raptor_parser_set_statement_handler() requires to return to the main + * application code. + **/ +void +raptor_parser_parse_abort(raptor_parser *rdf_parser) +{ + rdf_parser->failed = 1; +} + + +/** + * raptor_parser_get_locator: + * @rdf_parser: raptor parser + * + * Get the current raptor locator object. + * + * Return value: raptor locator + **/ +raptor_locator* +raptor_parser_get_locator(raptor_parser *rdf_parser) +{ + if(rdf_parser->factory->get_locator) + return rdf_parser->factory->get_locator(rdf_parser); + else + return &rdf_parser->locator; +} + + +#ifdef RAPTOR_DEBUG +void +raptor_stats_print(raptor_parser *rdf_parser, FILE *stream) +{ +#ifdef RAPTOR_PARSER_RDFXML +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(!strcmp(rdf_parser->factory->desc.names[0], "rdfxml")) { + raptor_rdfxml_parser *rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + fputs("raptor parser stats\n ", stream); + raptor_rdfxml_parser_stats_print(rdf_xml_parser, stream); + } +#endif +#endif +} +#endif + + +struct syntax_score +{ + int score; + raptor_parser_factory* factory; +}; + + +static int +compare_syntax_score(const void *a, const void *b) { + return ((struct syntax_score*)b)->score - ((struct syntax_score*)a)->score; +} + +#define RAPTOR_MIN_GUESS_SCORE 2 + +/** + * raptor_world_guess_parser_name: + * @world: world object + * @uri: URI identifying the syntax (or NULL) + * @mime_type: mime type identifying the content (or NULL) + * @buffer: buffer of content to guess (or NULL) + * @len: length of buffer + * @identifier: identifier of content (or NULL) + * + * Guess a parser name for content. + * + * Find a parser by scoring recognition of the syntax by a block of + * characters, the content identifier or a mime type. The content + * identifier is typically a filename or URI or some other identifier. + * + * If the guessing finds only low scores, NULL will be returned. + * + * Return value: a parser name or NULL if no guess could be made + **/ +const char* +raptor_world_guess_parser_name(raptor_world* world, + raptor_uri *uri, const char *mime_type, + const unsigned char *buffer, size_t len, + const unsigned char *identifier) +{ + unsigned int i; + raptor_parser_factory *factory; + unsigned char *suffix = NULL; + struct syntax_score* scores; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL); + + raptor_world_open(world); + + scores = RAPTOR_CALLOC(struct syntax_score*, + raptor_sequence_size(world->parsers), + sizeof(struct syntax_score)); + if(!scores) + return NULL; + + if(identifier) { + unsigned char *p = (unsigned char*)strrchr((const char*)identifier, '.'); + if(p) { + unsigned char *from, *to; + + p++; + suffix = RAPTOR_MALLOC(unsigned char*, strlen((const char*)p) + 1); + if(!suffix) { + RAPTOR_FREE(syntax_scores, scores); + return NULL; + } + + for(from = p, to = suffix; *from; ) { + unsigned char c = *from++; + /* discard the suffix if it wasn't '\.[a-zA-Z0-9]+$' */ + if(!isalpha(c) && !isdigit(c)) { + RAPTOR_FREE(char*, suffix); + suffix = NULL; + to = NULL; + break; + } + *to++ = isupper(c) ? (unsigned char)tolower(c): c; + } + if(to) + *to = '\0'; + } + } + + for(i = 0; + (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i)); + i++) { + int score = -1; + const raptor_type_q* type_q = NULL; + + if(mime_type && factory->desc.mime_types) { + int j; + type_q = NULL; + for(j = 0; + (type_q = &factory->desc.mime_types[j]) && type_q->mime_type; + j++) { + if(!strcmp(mime_type, type_q->mime_type)) + break; + } + /* got an exact match mime type - score it via the Q */ + if(type_q) + score = type_q->q; + } + /* mime type match has high Q - return factory as result */ + if(score >= 10) + break; + + if(uri && factory->desc.uri_strings) { + int j; + const char* uri_string = (const char*)raptor_uri_as_string(uri); + const char* factory_uri_string = NULL; + + for(j = 0; + (factory_uri_string = factory->desc.uri_strings[j]); + j++) { + if(!strcmp(uri_string, factory_uri_string)) + break; + } + if(factory_uri_string) + /* got an exact match syntax for URI - return factory as result */ + break; + } + + if(factory->recognise_syntax) { + int c = -1; + + /* Only use first N bytes to avoid HTML documents that contain + * RDF/XML examples + */ +#define FIRSTN 1024 +#if FIRSTN > RAPTOR_READ_BUFFER_SIZE +#error "RAPTOR_READ_BUFFER_SIZE is not large enough" +#endif + if(buffer && len && len > FIRSTN) { + c = buffer[FIRSTN]; + ((char*)buffer)[FIRSTN] = '\0'; + } + + score += factory->recognise_syntax(factory, buffer, len, + identifier, suffix, + mime_type); + + if(c >= 0) + ((char*)buffer)[FIRSTN] = RAPTOR_GOOD_CAST(char, c); + } + + scores[i].score = score < 10 ? score : 10; + scores[i].factory = factory; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG3("Score %15s : %d\n", factory->desc.names[0], score); +#endif + } + + if(!factory) { + /* sort the scores and pick a factory if score is good enough */ + qsort(scores, i, sizeof(struct syntax_score), compare_syntax_score); + + if(scores[0].score >= RAPTOR_MIN_GUESS_SCORE) + factory = scores[0].factory; + } + + if(suffix) + RAPTOR_FREE(char*, suffix); + + RAPTOR_FREE(syntax_scores, scores); + + return factory ? factory->desc.names[0] : NULL; +} + + +/* + * raptor_parser_copy_flags_state: + * @to_parser: destination parser + * @from_parser: source parser + * + * Copy status flags between parsers - INTERNAL. + **/ +void +raptor_parser_copy_flags_state(raptor_parser *to_parser, + raptor_parser *from_parser) +{ + to_parser->failed = from_parser->failed; + to_parser->emit_graph_marks = from_parser->emit_graph_marks; + to_parser->emitted_default_graph = from_parser->emitted_default_graph; +} + + + +/* + * raptor_parser_copy_user_state: + * @to_parser: destination parser + * @from_parser: source parser + * + * Copy user state between parsers - INTERNAL. + * + * Return value: non-0 on failure + **/ +int +raptor_parser_copy_user_state(raptor_parser *to_parser, + raptor_parser *from_parser) +{ + int rc = 0; + + to_parser->user_data = from_parser->user_data; + to_parser->statement_handler = from_parser->statement_handler; + to_parser->namespace_handler = from_parser->namespace_handler; + to_parser->namespace_handler_user_data = from_parser->namespace_handler_user_data; + to_parser->uri_filter = from_parser->uri_filter; + to_parser->uri_filter_user_data = from_parser->uri_filter_user_data; + + /* copy bit flags */ + raptor_parser_copy_flags_state(to_parser, from_parser); + + /* copy options */ + if(!rc) + rc = raptor_object_options_copy_state(&to_parser->options, + &from_parser->options); + + return rc; +} + + +/* + * raptor_parser_start_namespace: + * @rdf_parser: parser + * @nspace: namespace starting + * + * Internal - Invoke start namespace handler + **/ +void +raptor_parser_start_namespace(raptor_parser* rdf_parser, + raptor_namespace* nspace) +{ + if(!rdf_parser->namespace_handler) + return; + + (*rdf_parser->namespace_handler)(rdf_parser->namespace_handler_user_data, + nspace); +} + + +/** + * raptor_parser_get_accept_header: + * @rdf_parser: parser + * + * Get an HTTP Accept value for the parser. + * + * The returned string must be freed by the caller such as with + * raptor_free_memory(). + * + * Return value: a new Accept: header string or NULL on failure + **/ +const char* +raptor_parser_get_accept_header(raptor_parser* rdf_parser) +{ + raptor_parser_factory *factory = rdf_parser->factory; + char *accept_header = NULL; + size_t len; + char *p; + int i; + const raptor_type_q* type_q; + + if(factory->accept_header) + return factory->accept_header(rdf_parser); + + if(!factory->desc.mime_types) + return NULL; + + len = 0; + for(i = 0; + (type_q = &factory->desc.mime_types[i]) && type_q->mime_type; + i++) { + len += type_q->mime_type_len + 2; /* ", " */ + if(type_q->q < 10) + len += 6; /* ";q=X.Y" */ + } + + /* 9 = strlen("\*\/\*;q=0.1") */ +#define ACCEPT_HEADER_LEN 9 + accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1); + if(!accept_header) + return NULL; + + p = accept_header; + for(i = 0; + (type_q = &factory->desc.mime_types[i]) && type_q->mime_type; + i++) { + memcpy(p, type_q->mime_type, type_q->mime_type_len); + p += type_q->mime_type_len; + if(type_q->q < 10) { + *p++ = ';'; + *p++ = 'q'; + *p++ = '='; + *p++ = '0'; + *p++ = '.'; + *p++ = RAPTOR_GOOD_CAST(char, '0' + (type_q->q)); + } + + *p++ = ','; + *p++ = ' '; + } + + memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1); + + return accept_header; +} + + +const char* +raptor_parser_get_accept_header_all(raptor_world* world) +{ + raptor_parser_factory *factory; + char *accept_header = NULL; + size_t len; + char *p; + int i; + + len = 0; + for(i = 0; + (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i)); + i++) { + const raptor_type_q* type_q; + int j; + + for(j = 0; + (type_q = &factory->desc.mime_types[j]) && type_q->mime_type; + j++) { + len += type_q->mime_type_len + 2; /* ", " */ + if(type_q->q < 10) + len += 6; /* ";q=X.Y" */ + } + } + + /* 9 = strlen("\*\/\*;q=0.1") */ +#define ACCEPT_HEADER_LEN 9 + accept_header = RAPTOR_MALLOC(char*, len + ACCEPT_HEADER_LEN + 1); + if(!accept_header) + return NULL; + + p = accept_header; + for(i = 0; + (factory = (raptor_parser_factory*)raptor_sequence_get_at(world->parsers, i)); + i++) { + const raptor_type_q* type_q; + int j; + + for(j = 0; + (type_q = &factory->desc.mime_types[j]) && type_q->mime_type; + j++) { + memcpy(p, type_q->mime_type, type_q->mime_type_len); + p+= type_q->mime_type_len; + if(type_q->q < 10) { + *p++ = ';'; + *p++ = 'q'; + *p++ = '='; + *p++ = '0'; + *p++ = '.'; + *p++ = RAPTOR_GOOD_CAST(char, '0' + (type_q->q)); + } + + *p++ = ','; + *p++ = ' '; + } + + } + + memcpy(p, "*/*;q=0.1", ACCEPT_HEADER_LEN + 1); + + return accept_header; +} + + +void +raptor_parser_save_content(raptor_parser* rdf_parser, int save) +{ + if(rdf_parser->sb) + raptor_free_stringbuffer(rdf_parser->sb); + + rdf_parser->sb= save ? raptor_new_stringbuffer() : NULL; +} + + +const unsigned char* +raptor_parser_get_content(raptor_parser* rdf_parser, size_t* length_p) +{ + unsigned char* buffer; + size_t len; + + if(!rdf_parser->sb) + return NULL; + + len = raptor_stringbuffer_length(rdf_parser->sb); + buffer = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!buffer) + return NULL; + + raptor_stringbuffer_copy_to_string(rdf_parser->sb, buffer, len); + + if(length_p) + *length_p=len; + + return buffer; +} + + +void +raptor_parser_start_graph(raptor_parser* parser, raptor_uri* uri, + int is_declared) +{ + int flags = RAPTOR_GRAPH_MARK_START; + if(is_declared) + flags |= RAPTOR_GRAPH_MARK_DECLARED; + + if(!parser->emit_graph_marks) + return; + + if(parser->graph_mark_handler) + (*parser->graph_mark_handler)(parser->user_data, uri, flags); +} + + +void +raptor_parser_end_graph(raptor_parser* parser, raptor_uri* uri, int is_declared) +{ + int flags = 0; + if(is_declared) + flags |= RAPTOR_GRAPH_MARK_DECLARED; + + if(!parser->emit_graph_marks) + return; + + if(parser->graph_mark_handler) + (*parser->graph_mark_handler)(parser->user_data, uri, flags); +} + + +/** + * raptor_parser_get_world: + * @rdf_parser: parser + * + * Get the #raptor_world object associated with a parser. + * + * Return value: raptor_world* pointer + **/ +raptor_world * +raptor_parser_get_world(raptor_parser* rdf_parser) +{ + return rdf_parser->world; +} + + +/** + * raptor_parser_get_graph: + * @rdf_parser: parser + * + * Get the current graph for the parser + * + * The returned URI is owned by the caller and must be freed with + * raptor_free_uri() + * + * Return value: raptor_uri* graph name or NULL for the default graph + **/ +raptor_uri* +raptor_parser_get_graph(raptor_parser* rdf_parser) +{ + if(rdf_parser->factory->get_graph) + return rdf_parser->factory->get_graph(rdf_parser); + return NULL; +} + + +/** + * raptor_parser_parse_iostream: + * @rdf_parser: parser + * @iostr: iostream to read from + * @base_uri: the base URI to use (or NULL) + * + * Parse content from an iostream + * + * If the parser requires a base URI and @base_uri is NULL, an error + * will be generated and the function will fail. + * + * Return value: non 0 on failure, <0 if a required base URI was missing + **/ +int +raptor_parser_parse_iostream(raptor_parser* rdf_parser, raptor_iostream *iostr, + raptor_uri *base_uri) +{ + int rc = 0; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(rdf_parser, raptor_parser, 1); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(iostr, raptor_iostr, 1); + + rc = raptor_parser_parse_start(rdf_parser, base_uri); + if(rc) + return rc; + + while(!raptor_iostream_read_eof(iostr)) { + int ilen; + size_t len; + int is_end; + + ilen = raptor_iostream_read_bytes(rdf_parser->buffer, 1, + RAPTOR_READ_BUFFER_SIZE, iostr); + if(ilen < 0) + break; + len = RAPTOR_GOOD_CAST(size_t, ilen); + is_end = (len < RAPTOR_READ_BUFFER_SIZE); + + rc = raptor_parser_parse_chunk(rdf_parser, rdf_parser->buffer, len, is_end); + if(rc || is_end) + break; + } + + return rc; +} + + +/* end not STANDALONE */ +#endif + + +#ifdef STANDALONE +#include <stdio.h> + +int main(int argc, char *argv[]); + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + int i; + const char *s; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Known options:\n", program); +#endif + + for(i = 0; i <= (int)raptor_option_get_count(); i++) { + raptor_option_description *od; + int fn; + + od = raptor_world_get_option_description(world, + RAPTOR_DOMAIN_PARSER, + (raptor_option)i); + if(!od) + continue; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, " %2d %-20s %s <%s>\n", i, od->name, od->label, + (od->uri ? (const char*)raptor_uri_as_string(od->uri) : "")); +#endif + fn = raptor_world_get_option_from_uri(world, od->uri); + if(fn != i) { + fprintf(stderr, + "%s: raptor_option_from_uri() returned %d expected %d\n", + program, fn, i); + return 1; + } + raptor_free_option_description(od); + } + + s = raptor_parser_get_accept_header_all(world); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "Default HTTP accept header: '%s'\n", s); +#endif + if(!s) { + fprintf(stderr, "%s: raptor_parser_get_accept_header_all() failed\n", + program); + return 1; + } + RAPTOR_FREE(char*, s); + + raptor_free_world(world); + + return 0; +} + +#endif diff --git a/src/raptor_permute_test.c b/src/raptor_permute_test.c new file mode 100644 index 0000000..ad504fb --- /dev/null +++ b/src/raptor_permute_test.c @@ -0,0 +1,342 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_permute.c - Test of permutations of ints in a sequence + * + * Copyright (C) 2011, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#define PERMUTE_DEBUG 0 + + +typedef struct +{ + raptor_sequence* seq; + int* contents; +} intseq; + + +static int +print_handler(void *object, FILE *fh) +{ + int* pi = (int*)object; + fprintf(fh, "%d", *pi); + return 0; +} + + +static intseq* +new_intseq(int size) +{ + intseq* iseq; + int i; + + iseq = RAPTOR_CALLOC(intseq*, 1, sizeof(*iseq)); + iseq->contents = RAPTOR_CALLOC(int*, size, sizeof(int)); + /* will be a sequence of int* pointing into iseq->contents */ + iseq->seq = raptor_new_sequence(NULL, print_handler); + + for(i = 0; i < (int)size; i++) { + iseq->contents[i] = i + 1; + raptor_sequence_set_at(iseq->seq, i, &iseq->contents[i]); + } + + return iseq; +} + +static void +free_intseq(intseq* iseq) +{ + if(iseq->contents) + RAPTOR_FREE(int*, iseq->contents); + + if(iseq->seq) + raptor_free_sequence(iseq->seq); + + RAPTOR_FREE(intseq*, iseq); +} + +static void +intseq_print(intseq *iseq, FILE* stream) +{ + raptor_sequence_print(iseq->seq, stream); +} + +static int +intseq_reverse(intseq *iseq, int start_index, int length) +{ + return raptor_sequence_reverse(iseq->seq, start_index, length); +} + +static int +intseq_compare_at(const void* a, const void* b) +{ + int* ia = (int*)a; + int* ib = (int*)b; + return *ia - *ib; +} + +static int +intseq_next_permutation(intseq *iseq) +{ + return raptor_sequence_next_permutation(iseq->seq, intseq_compare_at); +} + +static int +intseq_get_at(intseq *iseq, int idx) +{ + return *(int*)(raptor_sequence_get_at(iseq->seq, idx)); +} + +#define MAX_SIZE 5 + +int expected_results_size5[120][5] = { + {1, 2, 3, 4, 5}, + {1, 2, 3, 5, 4}, + {1, 2, 4, 3, 5}, + {1, 2, 4, 5, 3}, + {1, 2, 5, 3, 4}, + {1, 2, 5, 4, 3}, + {1, 3, 2, 4, 5}, + {1, 3, 2, 5, 4}, + {1, 3, 4, 2, 5}, + {1, 3, 4, 5, 2}, + {1, 3, 5, 2, 4}, + {1, 3, 5, 4, 2}, + {1, 4, 2, 3, 5}, + {1, 4, 2, 5, 3}, + {1, 4, 3, 2, 5}, + {1, 4, 3, 5, 2}, + {1, 4, 5, 2, 3}, + {1, 4, 5, 3, 2}, + {1, 5, 2, 3, 4}, + {1, 5, 2, 4, 3}, + {1, 5, 3, 2, 4}, + {1, 5, 3, 4, 2}, + {1, 5, 4, 2, 3}, + {1, 5, 4, 3, 2}, + {2, 1, 3, 4, 5}, + {2, 1, 3, 5, 4}, + {2, 1, 4, 3, 5}, + {2, 1, 4, 5, 3}, + {2, 1, 5, 3, 4}, + {2, 1, 5, 4, 3}, + {2, 3, 1, 4, 5}, + {2, 3, 1, 5, 4}, + {2, 3, 4, 1, 5}, + {2, 3, 4, 5, 1}, + {2, 3, 5, 1, 4}, + {2, 3, 5, 4, 1}, + {2, 4, 1, 3, 5}, + {2, 4, 1, 5, 3}, + {2, 4, 3, 1, 5}, + {2, 4, 3, 5, 1}, + {2, 4, 5, 1, 3}, + {2, 4, 5, 3, 1}, + {2, 5, 1, 3, 4}, + {2, 5, 1, 4, 3}, + {2, 5, 3, 1, 4}, + {2, 5, 3, 4, 1}, + {2, 5, 4, 1, 3}, + {2, 5, 4, 3, 1}, + {3, 1, 2, 4, 5}, + {3, 1, 2, 5, 4}, + {3, 1, 4, 2, 5}, + {3, 1, 4, 5, 2}, + {3, 1, 5, 2, 4}, + {3, 1, 5, 4, 2}, + {3, 2, 1, 4, 5}, + {3, 2, 1, 5, 4}, + {3, 2, 4, 1, 5}, + {3, 2, 4, 5, 1}, + {3, 2, 5, 1, 4}, + {3, 2, 5, 4, 1}, + {3, 4, 1, 2, 5}, + {3, 4, 1, 5, 2}, + {3, 4, 2, 1, 5}, + {3, 4, 2, 5, 1}, + {3, 4, 5, 1, 2}, + {3, 4, 5, 2, 1}, + {3, 5, 1, 2, 4}, + {3, 5, 1, 4, 2}, + {3, 5, 2, 1, 4}, + {3, 5, 2, 4, 1}, + {3, 5, 4, 1, 2}, + {3, 5, 4, 2, 1}, + {4, 1, 2, 3, 5}, + {4, 1, 2, 5, 3}, + {4, 1, 3, 2, 5}, + {4, 1, 3, 5, 2}, + {4, 1, 5, 2, 3}, + {4, 1, 5, 3, 2}, + {4, 2, 1, 3, 5}, + {4, 2, 1, 5, 3}, + {4, 2, 3, 1, 5}, + {4, 2, 3, 5, 1}, + {4, 2, 5, 1, 3}, + {4, 2, 5, 3, 1}, + {4, 3, 1, 2, 5}, + {4, 3, 1, 5, 2}, + {4, 3, 2, 1, 5}, + {4, 3, 2, 5, 1}, + {4, 3, 5, 1, 2}, + {4, 3, 5, 2, 1}, + {4, 5, 1, 2, 3}, + {4, 5, 1, 3, 2}, + {4, 5, 2, 1, 3}, + {4, 5, 2, 3, 1}, + {4, 5, 3, 1, 2}, + {4, 5, 3, 2, 1}, + {5, 1, 2, 3, 4}, + {5, 1, 2, 4, 3}, + {5, 1, 3, 2, 4}, + {5, 1, 3, 4, 2}, + {5, 1, 4, 2, 3}, + {5, 1, 4, 3, 2}, + {5, 2, 1, 3, 4}, + {5, 2, 1, 4, 3}, + {5, 2, 3, 1, 4}, + {5, 2, 3, 4, 1}, + {5, 2, 4, 1, 3}, + {5, 2, 4, 3, 1}, + {5, 3, 1, 2, 4}, + {5, 3, 1, 4, 2}, + {5, 3, 2, 1, 4}, + {5, 3, 2, 4, 1}, + {5, 3, 4, 1, 2}, + {5, 3, 4, 2, 1}, + {5, 4, 1, 2, 3}, + {5, 4, 1, 3, 2}, + {5, 4, 2, 1, 3}, + {5, 4, 2, 3, 1}, + {5, 4, 3, 1, 2}, + {5, 4, 3, 2, 1} +}; + +int main (int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); + int failures = 0; + raptor_world *world; + int size; + int expected_counts[MAX_SIZE + 1] = { 1, 1, 2, 6, 24, 120 }; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + for(size = 0; size <= MAX_SIZE; size++) { + intseq* iseq; + int count; + + iseq = new_intseq(size); + +#if PERMUTE_DEBUG > 0 + fprintf(stderr, "%s: Permutation test %d. Initial state: ", program, size); + intseq_print(iseq, stderr); + fputc('\n', stderr); +#endif + + for(count = 1; 1; count++) { +#if PERMUTE_DEBUG > 1 + fprintf(stderr, "Permutation %3d: ", count); + intseq_print(iseq, stderr); + fputc('\n', stderr); +#endif + if(size == 5) { + int* expected_result = expected_results_size5[count - 1]; + int j; + int ok = 1; + + for(j = 0; j < size; j++) { + int actual = intseq_get_at(iseq, j); + int expected = expected_result[j]; + if(actual != expected) { + ok = 0; + break; + } + } + if(!ok) { + fprintf(stderr, "%s: FAILED test %d result %d - returned ", + program, size, count); + intseq_print(iseq, stderr); + fputs(" expected [", stderr); + for(j = 0; j < size; j++) { + fprintf(stderr, "%d, ", expected_result[j]); + } + fputs("]\n", stderr); + + failures++; + } + } + + if(intseq_next_permutation(iseq)) + break; + } + +#if PERMUTE_DEBUG > 0 + fprintf(stderr, "%s: Returned %d results. Final state: ", program, count); + intseq_print(iseq, stderr); + fputc('\n', stderr); +#endif + + free_intseq(iseq); + + if(count != expected_counts[size]) { + fprintf(stderr, "%s: FAILED test %d - returned %d items expected %d\n", + program, size, count, expected_counts[size]); + failures++; + break; + } + } + + + /* This is mainly a test for crashes */ + for(size = 0; size <= MAX_SIZE; size++) { + intseq* iseq; + int st; + + iseq = new_intseq(size); + + for(st = 0; st < size + 1; st++) { + intseq_reverse(iseq, 0, st) ; + intseq_reverse(iseq, st, st) ; + intseq_reverse(iseq, st, 0) ; + } + + free_intseq(iseq); + + } + + raptor_free_world(world); + + return failures; +} diff --git a/src/raptor_qname.c b/src/raptor_qname.c new file mode 100644 index 0000000..3995f8f --- /dev/null +++ b/src/raptor_qname.c @@ -0,0 +1,695 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_qname.c - Raptor XML qname class + * + * Copyright (C) 2002-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Namespaces in XML + * http://www.w3.org/TR/1999/REC-xml-names-19990114/#defaulting + * says: + * + * -------------------------------------------------------------------- + * 5.2 Namespace Defaulting + * + * A default namespace is considered to apply to the element where it + * is declared (if that element has no namespace prefix), and to all + * elements with no prefix within the content of that element. + * + * If the URI reference in a default namespace declaration is empty, + * then unprefixed elements in the scope of the declaration are not + * considered to be in any namespace. + * + * Note that default namespaces do not apply directly to attributes. + * + * [...] + * + * 5.3 Uniqueness of Attributes + * + * In XML documents conforming to this specification, no tag may + * contain two attributes which: + * + * 1. have identical names, or + * + * 2. have qualified names with the same local part and with + * prefixes which have been bound to namespace names that are + * identical. + * -------------------------------------------------------------------- + */ + +/** + * raptor_new_qname: + * @nstack: namespace stack to look up for namespaces + * @name: element or attribute name + * @value: attribute value (else is an element) + * + * Constructor - create a new XML qname. + * + * Create a new qname from the local element/attribute name, + * with optional (attribute) value. The namespace stack is used + * to look up the name and find the namespace and generate the + * URI of the qname. + * + * Return value: a new #raptor_qname object or NULL on failure + **/ +raptor_qname* +raptor_new_qname(raptor_namespace_stack *nstack, + const unsigned char *name, + const unsigned char *value) +{ + raptor_qname* qname; + const unsigned char *p; + raptor_namespace* ns; + unsigned char* new_name; + unsigned int prefix_length; + unsigned int local_name_length = 0; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("name %s\n", name); +#endif + + qname = RAPTOR_CALLOC(raptor_qname*, 1, sizeof(*qname)); + if(!qname) + return NULL; + qname->world = nstack->world; + + if(value) { + size_t value_length = strlen((char*)value); + unsigned char* new_value; + + new_value = RAPTOR_MALLOC(unsigned char*, value_length + 1); + if(!new_value) { + RAPTOR_FREE(raptor_qname, qname); + return NULL; + } + + memcpy(new_value, value, value_length + 1); /* copy NUL */ + qname->value = new_value; + qname->value_length = value_length; + } + + + /* Find : */ + for(p = name; *p && *p != ':'; p++) + ; + + + if(!*p) { + local_name_length = (unsigned int)(p - name); + + /* No : in the name */ + new_name = RAPTOR_MALLOC(unsigned char*, local_name_length + 1); + if(!new_name) { + raptor_free_qname(qname); + return NULL; + } + memcpy(new_name, name, local_name_length); /* no NUL to copy */ + new_name[local_name_length] = '\0'; + qname->local_name = new_name; + qname->local_name_length = local_name_length; + + /* For elements only, pick up the default namespace if there is one */ + if(!value) { + ns = raptor_namespaces_get_default_namespace(nstack); + + if(ns) { + qname->nspace = ns; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Found default namespace with URI %s\n", ns->uri ? raptor_uri_as_string(ns->uri) : (unsigned char*)"None"); +#endif + } else { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG1("No default namespace defined\n"); +#endif + } + } /* if is_element */ + + } else { + /* There is a namespace prefix */ + + prefix_length = (unsigned int)(p - name); + p++; + + /* p now is at start of local_name */ + local_name_length = (unsigned int)strlen((char*)p); + new_name = RAPTOR_MALLOC(unsigned char*, local_name_length + 1); + if(!new_name) { + raptor_free_qname(qname); + return NULL; + } + memcpy(new_name, p, local_name_length); /* No NUL to copy */ + new_name[local_name_length] = '\0'; + qname->local_name = new_name; + qname->local_name_length = local_name_length; + + /* Find the namespace */ + ns = raptor_namespaces_find_namespace(nstack, name, prefix_length); + + if(!ns) { + /* failed to find namespace - now what? */ + raptor_log_error_formatted(qname->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "The namespace prefix in \"%s\" was not declared.", name); + } else { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("Found namespace prefix %s URI %s\n", ns->prefix, ns->uri ? raptor_uri_as_string(ns->uri) : (unsigned char*)"None"); +#endif + qname->nspace = ns; + } + } + + + + /* If namespace has a URI and a local_name is defined, create the URI + * for this element + */ + if(qname->nspace && local_name_length) { + raptor_uri *uri = raptor_namespace_get_uri(qname->nspace); + if(uri) + uri = raptor_new_uri_from_uri_local_name(qname->world, uri, new_name); + + qname->uri = uri; + } + + + return qname; +} + + +/** + * raptor_new_qname_from_namespace_local_name: + * @world: raptor_world object + * @ns: namespace of qname (or NULL) + * @local_name: element or attribute name + * @value: attribute value (else is an element) + * + * Constructor - create a new XML qname. + * + * Create a new qname from the namespace and local element/attribute name, + * with optional (attribute) value. + * + * Return value: a new #raptor_qname object or NULL on failure + **/ +raptor_qname* +raptor_new_qname_from_namespace_local_name(raptor_world* world, + raptor_namespace *ns, + const unsigned char *local_name, + const unsigned char *value) +{ + raptor_qname* qname; + unsigned char* new_name; + unsigned int local_name_length; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!local_name) + return NULL; + + local_name_length = (unsigned int)strlen((char*)local_name); + + raptor_world_open(world); + + qname = RAPTOR_CALLOC(raptor_qname*, 1, sizeof(*qname)); + if(!qname) + return NULL; + qname->world = world; + + if(value) { + unsigned int value_length = (unsigned int)strlen((char*)value); + unsigned char* new_value; + + new_value = RAPTOR_MALLOC(unsigned char*, value_length + 1); + if(!new_value) { + RAPTOR_FREE(raptor_qname, qname); + return NULL; + } + + memcpy(new_value, value, value_length + 1); /* Copy NUL */ + qname->value = new_value; + qname->value_length = value_length; + } + + new_name = RAPTOR_MALLOC(unsigned char*, local_name_length + 1); + if(!new_name) { + raptor_free_qname(qname); + return NULL; + } + + memcpy(new_name, local_name, local_name_length); /* No NUL to copy */ + new_name[local_name_length] = '\0'; + qname->local_name = new_name; + qname->local_name_length = local_name_length; + + qname->nspace = ns; + + if(qname->nspace) { + qname->uri = raptor_namespace_get_uri(qname->nspace); + if(qname->uri) + qname->uri = raptor_new_uri_from_uri_local_name(qname->world, qname->uri, new_name); + } + + return qname; +} + + +/** + * raptor_qname_copy: + * @qname: existing qname + * + * Copy constructor - copy an existing XML qname. + * + * Return value: a new #raptor_qname object or NULL on failure + **/ +raptor_qname* +raptor_qname_copy(raptor_qname *qname) +{ + raptor_qname* new_qname; + unsigned char* new_name; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(qname, raptor_qname, NULL); + + new_qname = RAPTOR_CALLOC(raptor_qname*, 1, sizeof(*qname)); + if(!new_qname) + return NULL; + new_qname->world = qname->world; + + if(qname->value) { + size_t value_length = qname->value_length; + unsigned char* new_value; + + new_value = RAPTOR_MALLOC(unsigned char*, value_length + 1); + if(!new_value) { + RAPTOR_FREE(raptor_qname, new_qname); + return NULL; + } + + memcpy(new_value, qname->value, value_length + 1); /* Copy NUL */ + new_qname->value = new_value; + new_qname->value_length = value_length; + } + + new_name = RAPTOR_MALLOC(unsigned char*, qname->local_name_length + 1); + if(!new_name) { + raptor_free_qname(new_qname); + return NULL; + } + + memcpy(new_name, qname->local_name, qname->local_name_length + 1); /* Copy NUL */ + new_qname->local_name = new_name; + new_qname->local_name_length = qname->local_name_length; + + new_qname->nspace = qname->nspace; + + new_qname->uri = raptor_namespace_get_uri(new_qname->nspace); + if(new_qname->uri) + new_qname->uri = raptor_new_uri_from_uri_local_name(qname->world, new_qname->uri, new_name); + + return new_qname; +} + + +#ifdef RAPTOR_DEBUG +void +raptor_qname_print(FILE *stream, raptor_qname* name) +{ + if(name->nspace) { + const unsigned char *prefix = raptor_namespace_get_prefix(name->nspace); + if(prefix) + fprintf(stream, "%s:%s", prefix, name->local_name); + else + fprintf(stream, "(default):%s", name->local_name); + } else + fputs((char*)name->local_name, stream); +} +#endif + + +/** + * raptor_free_qname: + * @name: #raptor_qname object + * + * Destructor - destroy a raptor_qname object. + **/ +void +raptor_free_qname(raptor_qname* name) +{ + if(!name) + return; + + if(name->local_name) + RAPTOR_FREE(char*, name->local_name); + + if(name->uri && name->nspace) + raptor_free_uri(name->uri); + + if(name->value) + RAPTOR_FREE(char*, name->value); + RAPTOR_FREE(raptor_qname, name); +} + + +/** + * raptor_qname_equal: + * @name1: first #raptor_qname + * @name2: second #raptor_name + * + * Compare two XML Qnames for equality. + * + * Return value: non-0 if the qnames are equal. + **/ +int +raptor_qname_equal(raptor_qname *name1, raptor_qname *name2) +{ + if(name1->nspace != name2->nspace) + return 0; + if(name1->local_name_length != name2->local_name_length) + return 0; + if(strcmp((char*)name1->local_name, (char*)name2->local_name)) + return 0; + return 1; +} + + + +/** + * raptor_qname_string_to_uri: + * @nstack: #raptor_namespace_stack to decode the namespace + * @name: QName string or NULL + * @name_len: QName string length + * + * Get the URI for a qname. + * + * Utility function to turn a string representing a QName in the + * N3 style, into a new URI representing it. A NULL name or name ":" + * returns the default namespace URI. A name "p:" returns + * namespace name (URI) for the namespace with prefix "p". + * + * Partially equivalent to + * qname = raptor_new_qname(nstack, name, NULL); + * uri = raptor_uri_copy(qname->uri); + * raptor_free_qname(qname) + * but without making the qname, and it also handles the NULL and + * ":" name cases as well as error checking. + * + * Return value: new #raptor_uri object or NULL on failure + **/ +raptor_uri* +raptor_qname_string_to_uri(raptor_namespace_stack *nstack, + const unsigned char *name, size_t name_len) +{ + raptor_uri *uri = NULL; + const unsigned char *p; + const unsigned char *original_name = name; + const unsigned char *local_name = NULL; + unsigned int local_name_length = 0; + raptor_namespace* ns; + + /* Empty string is default namespace URI */ + if(!name) { + ns = raptor_namespaces_get_default_namespace(nstack); + } else { + /* If starts with :, it is relative to default namespace, so skip it */ + if(*name == ':') { + name++; + name_len--; + p = name + name_len; + } else { + for(p = name; *p && *p != ':'; p++) + ; + } + + /* If ends with :, it is the URI of a namespace */ + if(RAPTOR_GOOD_CAST(size_t, p-name) == (name_len - 1)) { + ns = raptor_namespaces_find_namespace(nstack, name, + RAPTOR_BAD_CAST(int, (name_len - 1))); + } else { + if(!*p) { + local_name = name; + local_name_length = (unsigned int)(p - name); + + /* pick up the default namespace if there is one */ + ns = raptor_namespaces_get_default_namespace(nstack); + } else { + /* There is a namespace prefix */ + unsigned int prefix_length = (unsigned int)(p - name); + p++; + + local_name = p; + local_name_length = (unsigned int)strlen((char*)p); + + /* Find the namespace */ + ns = raptor_namespaces_find_namespace(nstack, name, prefix_length); + } + } + } + + if(!ns) { + raptor_log_error_formatted(nstack->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "The namespace prefix in \"%s\" was not declared.", + original_name); + } + + + + /* If namespace has a URI and a local_name is defined, return the URI + * for this name + */ + if(ns && (uri = raptor_namespace_get_uri(ns))) { + if(local_name_length) + uri = raptor_new_uri_from_uri_local_name(nstack->world, uri, local_name); + else + uri = raptor_uri_copy(uri); + } + + return uri; +} + + +/** + * raptor_qname_write: + * @qname: QName to write + * @iostr: raptor iosteram + * + * Write a formatted qname to an iostream + * + * Return value: non-0 on failure + **/ +int +raptor_qname_write(raptor_qname *qname, raptor_iostream* iostr) +{ + if(qname->nspace && qname->nspace->prefix_length > 0) { + raptor_iostream_counted_string_write(qname->nspace->prefix, + qname->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + } + + raptor_iostream_counted_string_write(qname->local_name, + qname->local_name_length, + iostr); + return 0; +} + + +/** + * raptor_qname_to_counted_name: + * @qname: QName to write + * @length_p: pointer to variable to store length of name (or NULL) + * + * Get the string form of a QName name + * + * Return value: new string name or NULL on failure + **/ +unsigned char* +raptor_qname_to_counted_name(raptor_qname *qname, size_t* length_p) +{ + size_t len = qname->local_name_length; + unsigned char* s; + unsigned char *p; + + if(qname->nspace && qname->nspace->prefix_length > 0) + len+= 1 + qname->nspace->prefix_length; + + if(length_p) + *length_p=len; + + s = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!s) + return NULL; + + p = s; + if(qname->nspace && qname->nspace->prefix_length > 0) { + memcpy(p, qname->nspace->prefix, qname->nspace->prefix_length); /* Do not copy NUL */ + p += qname->nspace->prefix_length; + *p++ = ':'; + } + + memcpy(p, qname->local_name, qname->local_name_length + 1); /* Copy final NUL */ + + return s; +} + + +/** + * raptor_qname_get_namespace: + * @name: #raptor_qname object + * + * Get the #raptor_namespace of an XML QName. + * + * Return value: the namespace + **/ +const raptor_namespace* +raptor_qname_get_namespace(raptor_qname* name) +{ + return name->nspace; +} + + +/** + * raptor_qname_get_local_name: + * @name: #raptor_qname object + * + * Get the #raptor_local_name of an XML QName. + * + * Return value: the local_name + **/ +const unsigned char* +raptor_qname_get_local_name(raptor_qname* name) +{ + return name->local_name; +} + + +/** + * raptor_qname_get_value: + * @name: #raptor_qname object + * + * Get the #raptor_value of an XML QName. + * + * Return value: the value + **/ +const unsigned char* +raptor_qname_get_value(raptor_qname* name) +{ + return name->value; +} + +/** + * raptor_qname_get_counted_value: + * @name: #raptor_qname object + * @length_p: pointer to variable to store length of name (or NULL) + * + * Get the #raptor_value of an XML QName. + * + * Return value: the value + **/ +const unsigned char* +raptor_qname_get_counted_value(raptor_qname* name, size_t* length_p) +{ + if(length_p) + *length_p=name->value_length; + return name->value; +} + + +/** + * raptor_qname_format_as_xml: + * @qname: qname object + * @length_p: pointer to length (or NULL) + * + * Format a qname in an XML style into a newly allocated string. + * + * Generates a string of the form a:b="value" or a="value" + * depending on the qname's prefix. Double quotes are always used. + * + * If @length_p is not NULL, the length of the string is + * stored in the address it points to. + * + * Return value: qname formatted as newly allocated string or NULL on failure + **/ +unsigned char* +raptor_qname_format_as_xml(const raptor_qname *qname, size_t *length_p) +{ + size_t length; + unsigned char *buffer; + const char quote='"'; + unsigned char *p; + + length = qname->local_name_length + 3 /* ="" */; + if(qname->value_length) + length += raptor_xml_escape_string(qname->world, + qname->value, qname->value_length, + NULL, 0, quote); + + if(qname->nspace && qname->nspace->prefix_length > 0) + length += qname->nspace->prefix_length + 1; /* for : */ + + if(length_p) + *length_p = length; + + buffer = RAPTOR_MALLOC(unsigned char*, length + 1); + if(!buffer) + return NULL; + + p = buffer; + + if(qname->nspace && qname->nspace->prefix_length > 0) { + memcpy(p, qname->nspace->prefix, qname->nspace->prefix_length); + p += qname->nspace->prefix_length; + *p++ = ':'; + } + memcpy(p, qname->local_name, qname->local_name_length); + p += qname->local_name_length; + *p++ = '='; + *p++ = quote; + if(qname->value_length) { + p += raptor_xml_escape_string(qname->world, + qname->value, qname->value_length, + p, length, quote); + } + *p++ = quote; + /* *p used here since we never need to use value of p again [CLANG] */ + *p = '\0'; + + return buffer; +} + + diff --git a/src/raptor_rdfxml.c b/src/raptor_rdfxml.c new file mode 100644 index 0000000..2a742d0 --- /dev/null +++ b/src/raptor_rdfxml.c @@ -0,0 +1,3224 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_rdfxml.c - Raptor RDF/XML Parser + * + * Copyright (C) 2000-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* Define these for far too much output */ +#undef RAPTOR_DEBUG_VERBOSE +#undef RAPTOR_DEBUG_CDATA + + +/* Raptor structures */ + +typedef enum { + /* Catch uninitialised state */ + RAPTOR_STATE_INVALID = 0, + + /* Skipping current tree of elements - used to recover finding + * illegal content, when parsling permissively. + */ + RAPTOR_STATE_SKIPPING, + + /* Not in RDF grammar yet - searching for a start element. + * + * This can be <rdf:RDF> (goto NODE_ELEMENT_LIST) but since it is optional, + * the start element can also be one of + * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementURIs + * + * If RDF content is assumed, go straight to OBJ + */ + RAPTOR_STATE_UNKNOWN, + + /* A list of node elements + * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList + */ + RAPTOR_STATE_NODE_ELEMENT_LIST, + + /* Found an <rdf:Description> */ + RAPTOR_STATE_DESCRIPTION, + + /* Found a property element + * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt + */ + RAPTOR_STATE_PROPERTYELT, + + /* A property element that is an ordinal - rdf:li, rdf:_n + */ + RAPTOR_STATE_MEMBER_PROPERTYELT, + + /* Found a node element + * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement + */ + RAPTOR_STATE_NODE_ELEMENT, + + /* A property element with rdf:parseType="Literal" + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeLiteralPropertyElt + */ + RAPTOR_STATE_PARSETYPE_LITERAL, + + /* A property element with rdf:parseType="Resource" + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt + */ + RAPTOR_STATE_PARSETYPE_RESOURCE, + + /* A property element with rdf:parseType="Collection" + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt + * + * (This also handles daml:Collection) + */ + RAPTOR_STATE_PARSETYPE_COLLECTION, + + /* A property element with a rdf:parseType attribute and a value + * not "Literal" or "Resource" + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeOtherPropertyElt + */ + RAPTOR_STATE_PARSETYPE_OTHER, + + RAPTOR_STATE_PARSETYPE_LAST = RAPTOR_STATE_PARSETYPE_OTHER + + +} raptor_state; + + +static const char* const raptor_state_names[RAPTOR_STATE_PARSETYPE_LAST+2] = { + "INVALID", + "SKIPPING", + "UNKNOWN", + "nodeElementList", + "propertyElt", + "Description", + "propertyElt", + "memberPropertyElt", + "nodeElement", + "parseTypeLiteral", + "parseTypeResource", + "parseTypeCollection", + "parseTypeOther" +}; + + +static const char * raptor_rdfxml_state_as_string(raptor_state state) +{ + if(state < 1 || state > RAPTOR_STATE_PARSETYPE_LAST) + state = (raptor_state)0; + return raptor_state_names[(int)state]; +} + + +/* + * raptor_rdfxml_check_propertyElement_name: + * @name: rdf namespace term + * + * Check if an rdf namespace name is allowed to be used as a Node Element. + * + * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed + */ +static int +raptor_rdfxml_check_nodeElement_name(const char *name) +{ + int i; + + if(*name == '_') + return 1; + + for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) + if(!strcmp(raptor_rdf_ns_terms_info[i].name, name)) + return raptor_rdf_ns_terms_info[i].allowed_as_nodeElement; + + return -1; +} + + +/* + * raptor_rdfxml_check_propertyElement_name: + * @name: rdf namespace term + * + * Check if an rdf namespace name is allowed to be used as a Property Element. + * + * Return value: < 0 if unknown rdf namespace term, 0 if known and not allowed, > 0 if known and allowed + */ +static int +raptor_rdfxml_check_propertyElement_name(const char *name) +{ + int i; + + if(*name == '_') + return 1; + + for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) + if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name)) + return raptor_rdf_ns_terms_info[i].allowed_as_propertyElement; + + return -1; +} + + +static int +raptor_rdfxml_check_propertyAttribute_name(const char *name) +{ + int i; + + if(*name == '_') + return 1; + + for(i = 0; raptor_rdf_ns_terms_info[i].name; i++) + if(!strcmp(raptor_rdf_ns_terms_info[i].name, (const char*)name)) + return raptor_rdf_ns_terms_info[i].allowed_as_propertyAttribute; + + return -1; +} + + +typedef enum { + /* undetermined yet - whitespace is stored */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN, + + /* literal content - no elements, cdata allowed, whitespace significant + * <propElement> blah </propElement> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL, + + /* parseType literal content (WF XML) - all content preserved + * <propElement rdf:parseType="Literal"><em>blah</em></propElement> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL, + + /* top-level nodes - 0+ elements expected, no cdata, whitespace ignored, + * any non-whitespace cdata is error + * only used for <rdf:RDF> or implict <rdf:RDF> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES, + + /* properties - 0+ elements expected, no cdata, whitespace ignored, + * any non-whitespace cdata is error + * <nodeElement><prop1>blah</prop1> <prop2>blah</prop2> </nodeElement> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES, + + /* property content - all content preserved + * any content type changes when first non-whitespace found + * <propElement>... + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT, + + /* resource URI given - no element, no cdata, whitespace ignored, + * any non-whitespace cdata is error + * <propElement rdf:resource="uri"/> + * <propElement rdf:resource="uri"></propElement> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE, + + /* skipping content - all content is preserved + * Used when skipping content for unknown parseType-s, + * error recovery, some other reason + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED, + + /* parseType Collection - all content preserved + * Parsing of this determined by RDF/XML (Revised) closed collection rules + * <propElement rdf:parseType="Collection">...</propElement> + */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION, + + /* Like above but handles "daml:collection" */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION, + + /* dummy for use in strings below */ + RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST + +} raptor_rdfxml_element_content_type; + + +static const struct { + const char * name; + int whitespace_significant; + /* non-blank cdata */ + int cdata_allowed; + /* XML element content */ + int element_allowed; + /* Do RDF-specific processing? (property attributes, rdf: attributes, ...) */ + int rdf_processing; +} rdf_content_type_info[RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST]={ + {"Unknown", 1, 1, 1, 0 }, + {"Literal", 1, 1, 0, 0 }, + {"XML Literal", 1, 1, 1, 0 }, + {"Nodes", 0, 0, 1, 1 }, + {"Properties", 0, 1, 1, 1 }, + {"Property Content",1, 1, 1, 1 }, + {"Resource", 0, 0, 0, 0 }, + {"Preserved", 1, 1, 1, 0 }, + {"Collection", 1, 1, 1, 1 }, + {"DAML Collection", 1, 1, 1, 1 }, +}; + + + +static const char * +raptor_rdfxml_element_content_type_as_string(raptor_rdfxml_element_content_type type) +{ + if(type >= RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST) + return "INVALID"; + + return rdf_content_type_info[type].name; +} + + + + + +/* + * Raptor Element/attributes on stack + */ +struct raptor_rdfxml_element_s { + raptor_world* world; + + raptor_xml_element *xml_element; + + /* NULL at bottom of stack */ + struct raptor_rdfxml_element_s *parent; + + /* attributes declared in M&S */ + const unsigned char * rdf_attr[RDF_NS_LAST + 1]; + /* how many of above seen */ + int rdf_attr_count; + + /* state that this production matches */ + raptor_state state; + + /* how to handle the content inside this XML element */ + raptor_rdfxml_element_content_type content_type; + + + /* starting state for children of this element */ + raptor_state child_state; + + /* starting content type for children of this element */ + raptor_rdfxml_element_content_type child_content_type; + + + /* Reified statement identifier */ + raptor_term* reified; + + unsigned const char* reified_id; + + /* Bag identifier */ + raptor_term* bag; + int last_bag_ordinal; /* starts at 0, so first predicate is rdf:_1 */ + + /* Subject identifier (URI/anon ID), type, source + * + * When the XML element represents a node, this is the identifier + */ + raptor_term* subject; + + /* Predicate URI + * + * When the XML element represents a node or predicate, + * this is the identifier of the predicate + */ + raptor_term* predicate; + + /* Object identifier (URI/anon ID), type, source + * + * When this XML element generates a statement that needs an object, + * possibly from a child element, this is the identifier of the object + */ + raptor_term* object; + + /* URI of datatype of literal */ + raptor_uri *object_literal_datatype; + + /* last ordinal used, so initialising to 0 works, emitting rdf:_1 first */ + int last_ordinal; + + /* If this element's parseType is a Collection + * this identifies the anon node of current tail of the collection(list). + */ + const unsigned char *tail_id; + + /* RDF/XML specific checks */ + + /* all cdata so far is whitespace */ + unsigned int content_cdata_all_whitespace; +}; + +typedef struct raptor_rdfxml_element_s raptor_rdfxml_element; + + +#define RAPTOR_RDFXML_N_CONCEPTS 5 + +/* + * Raptor parser object + */ +struct raptor_rdfxml_parser_s { + raptor_sax2 *sax2; + + /* stack of elements - elements add after current_element */ + raptor_rdfxml_element *root_element; + raptor_rdfxml_element *current_element; + + raptor_uri* concepts[RAPTOR_RDFXML_N_CONCEPTS]; + + /* set of seen rdf:ID / rdf:bagID values (with in-scope base URI) */ + raptor_id_set* id_set; + + void *xml_content; + size_t xml_content_length; + raptor_iostream* iostream; + + /* writer for building parseType="Literal" content */ + raptor_xml_writer* xml_writer; +}; + + + + +/* static variables */ + +#define RAPTOR_DAML_NS_URI(rdf_xml_parser) rdf_xml_parser->concepts[0] + +#define RAPTOR_DAML_List_URI(rdf_xml_parser) rdf_xml_parser->concepts[1] +#define RAPTOR_DAML_first_URI(rdf_xml_parser) rdf_xml_parser->concepts[2] +#define RAPTOR_DAML_rest_URI(rdf_xml_parser) rdf_xml_parser->concepts[3] +#define RAPTOR_DAML_nil_URI(rdf_xml_parser) rdf_xml_parser->concepts[4] + +/* RAPTOR_RDFXML_N_CONCEPTS defines size of array */ + + +/* prototypes for element functions */ +static raptor_rdfxml_element* raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_parser); +static void raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_parser, raptor_rdfxml_element* element); + +static int raptor_rdfxml_record_ID(raptor_parser *rdf_parser, raptor_rdfxml_element *element, const unsigned char *id); + +/* prototypes for grammar functions */ +static void raptor_rdfxml_start_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); +static void raptor_rdfxml_end_element_grammar(raptor_parser *parser, raptor_rdfxml_element *element); +static void raptor_rdfxml_cdata_grammar(raptor_parser *parser, const unsigned char *s, int len, int is_cdata); + + +/* prototype for statement related functions */ +static void raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, raptor_term *subject, raptor_uri *predicate_uri, raptor_term *object, raptor_term *reified, raptor_rdfxml_element *bag_element); + + + +/* Prototypes for parsing data functions */ +static int raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name); +static void raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser); +static int raptor_rdfxml_parse_start(raptor_parser* rdf_parser); +static int raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, const unsigned char *buffer, size_t len, int is_end); +static void raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser); + +static raptor_uri* raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser); + + +static raptor_rdfxml_element* +raptor_rdfxml_element_pop(raptor_rdfxml_parser *rdf_xml_parser) +{ + raptor_rdfxml_element *element = rdf_xml_parser->current_element; + + if(!element) + return NULL; + + rdf_xml_parser->current_element = element->parent; + if(rdf_xml_parser->root_element == element) /* just deleted root */ + rdf_xml_parser->root_element = NULL; + + return element; +} + + +static void +raptor_rdfxml_element_push(raptor_rdfxml_parser *rdf_xml_parser, raptor_rdfxml_element* element) +{ + element->parent = rdf_xml_parser->current_element; + rdf_xml_parser->current_element = element; + if(!rdf_xml_parser->root_element) + rdf_xml_parser->root_element = element; +} + + +static void +raptor_free_rdfxml_element(raptor_rdfxml_element *element) +{ + int i; + + /* Free special RDF M&S attributes */ + for(i = 0; i <= RDF_NS_LAST; i++) + if(element->rdf_attr[i]) + RAPTOR_FREE(char*, element->rdf_attr[i]); + + if(element->subject) + raptor_free_term(element->subject); + if(element->predicate) + raptor_free_term(element->predicate); + if(element->object) + raptor_free_term(element->object); + if(element->bag) + raptor_free_term(element->bag); + if(element->reified) + raptor_free_term(element->reified); + + if(element->tail_id) + RAPTOR_FREE(char*, (char*)element->tail_id); + if(element->object_literal_datatype) + raptor_free_uri(element->object_literal_datatype); + + if(element->reified_id) + RAPTOR_FREE(char*, (char*)element->reified_id); + + RAPTOR_FREE(raptor_rdfxml_element, element); +} + + +static void +raptor_rdfxml_sax2_new_namespace_handler(void *user_data, + raptor_namespace* nspace) +{ + raptor_parser* rdf_parser; + const unsigned char* namespace_name; + size_t namespace_name_len; + raptor_uri* uri = raptor_namespace_get_uri(nspace); + + rdf_parser = (raptor_parser*)user_data; + raptor_parser_start_namespace(rdf_parser, nspace); + + if(!uri) + return; + + namespace_name = raptor_uri_as_counted_string(uri, &namespace_name_len); + + if(namespace_name_len == raptor_rdf_namespace_uri_len-1 && + !strncmp((const char*)namespace_name, + (const char*)raptor_rdf_namespace_uri, + namespace_name_len)) { + const unsigned char *prefix = raptor_namespace_get_prefix(nspace); + raptor_parser_warning(rdf_parser, + "Declaring a namespace with prefix %s to URI %s - one letter short of the RDF namespace URI and probably a mistake.", + prefix, namespace_name); + } + + if(namespace_name_len > raptor_rdf_namespace_uri_len && + !strncmp((const char*)namespace_name, + (const char*)raptor_rdf_namespace_uri, + raptor_rdf_namespace_uri_len)) { + raptor_parser_error(rdf_parser, + "Declaring a namespace URI %s to which the RDF namespace URI is a prefix is forbidden.", + namespace_name); + } +} + + + +static void +raptor_rdfxml_start_element_handler(void *user_data, + raptor_xml_element* xml_element) +{ + raptor_parser* rdf_parser; + raptor_rdfxml_parser* rdf_xml_parser; + raptor_rdfxml_element* element; + int ns_attributes_count = 0; + raptor_qname** named_attrs = NULL; + int i; + int count_bumped = 0; + + rdf_parser = (raptor_parser*)user_data; + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + if(rdf_parser->failed) + return; + + raptor_rdfxml_update_document_locator(rdf_parser); + + /* Create new element structure */ + element = RAPTOR_CALLOC(raptor_rdfxml_element*, 1, sizeof(*element)); + if(!element) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + rdf_parser->failed = 1; + return; + } + element->world = rdf_parser->world; + element->xml_element = xml_element; + + raptor_rdfxml_element_push(rdf_xml_parser, element); + + named_attrs = raptor_xml_element_get_attributes(xml_element); + ns_attributes_count = raptor_xml_element_get_attributes_count(xml_element); + + /* RDF-specific processing of attributes */ + if(ns_attributes_count) { + raptor_qname** new_named_attrs; + int offset = 0; + raptor_rdfxml_element* parent_element; + + parent_element = element->parent; + + /* Allocate new array to move namespaced-attributes to if + * rdf processing is performed + */ + new_named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count, + sizeof(raptor_qname*)); + if(!new_named_attrs) { + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + rdf_parser->failed = 1; + return; + } + + for(i = 0; i < ns_attributes_count; i++) { + raptor_qname* attr = named_attrs[i]; + + /* If: + * 1 We are handling RDF content and RDF processing is allowed on + * this element + * OR + * 2 We are not handling RDF content and + * this element is at the top level (top level Desc. / typedNode) + * i.e. we have no parent + * then handle the RDF attributes + */ + if((parent_element && + rdf_content_type_info[parent_element->child_content_type].rdf_processing) || + !parent_element) { + + /* Save pointers to some RDF M&S attributes */ + + /* If RDF namespace-prefixed attributes */ + if(attr->nspace && attr->nspace->is_rdf_ms) { + const unsigned char *attr_name = attr->local_name; + int j; + + for(j = 0; j <= RDF_NS_LAST; j++) + if(!strcmp((const char*)attr_name, + raptor_rdf_ns_terms_info[j].name)) { + element->rdf_attr[j] = attr->value; + element->rdf_attr_count++; + /* Delete it if it was stored elsewhere */ +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Found RDF namespace attribute '%s' URI %s\n", + (char*)attr_name, attr->value); +#endif + /* make sure value isn't deleted from qname structure */ + attr->value = NULL; + raptor_free_qname(attr); + attr = NULL; + break; + } + } /* end if RDF namespaced-prefixed attributes */ + + if(!attr) + continue; + + /* If non namespace-prefixed RDF attributes found on an element */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES) && + !attr->nspace) { + const unsigned char *attr_name = attr->local_name; + int j; + + for(j = 0; j <= RDF_NS_LAST; j++) + if(!strcmp((const char*)attr_name, + raptor_rdf_ns_terms_info[j].name)) { + element->rdf_attr[j] = attr->value; + element->rdf_attr_count++; + if(!raptor_rdf_ns_terms_info[j].allowed_unprefixed_on_attribute) + raptor_parser_warning(rdf_parser, + "Using rdf attribute '%s' without the RDF namespace has been deprecated.", + attr_name); + + /* Delete it if it was stored elsewhere */ + /* make sure value isn't deleted from qname structure */ + attr->value = NULL; + raptor_free_qname(attr); + attr = NULL; + break; + } + } /* end if non-namespace prefixed RDF attributes */ + + if(!attr) + continue; + + } /* end if leave literal XML alone */ + + if(attr) + new_named_attrs[offset++] = attr; + } + + /* new attribute count is set from attributes that haven't been skipped */ + ns_attributes_count = offset; + if(!ns_attributes_count) { + /* all attributes were deleted so delete the new array */ + RAPTOR_FREE(raptor_qname_array, new_named_attrs); + new_named_attrs = NULL; + } + + RAPTOR_FREE(raptor_qname_array, named_attrs); + named_attrs = new_named_attrs; + raptor_xml_element_set_attributes(xml_element, + named_attrs, ns_attributes_count); + } /* end if ns_attributes_count */ + + + /* start from unknown; if we have a parent, it may set this */ + element->state = RAPTOR_STATE_UNKNOWN; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN; + + if(element->parent && + element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN) { + element->content_type = element->parent->child_content_type; + + if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE && + element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && + element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { + raptor_qname* parent_el_name; + parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); + /* If parent has an rdf:resource, this element should not be here */ + raptor_parser_error(rdf_parser, + "property element '%s' has multiple object node elements, skipping.", + parent_el_name->local_name); + element->state = RAPTOR_STATE_SKIPPING; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; + + } else { + if(!element->parent->child_state) { + raptor_parser_fatal_error(rdf_parser, + "%s: Internal error: no parent element child_state set", + __FUNCTION__); + return; + } + + element->state = element->parent->child_state; + element->parent->xml_element->content_element_seen++; + count_bumped++; + + /* leave literal XML alone */ + if(!rdf_content_type_info[element->content_type].cdata_allowed) { + if(element->parent->xml_element->content_element_seen && + element->parent->xml_element->content_cdata_seen) { + raptor_qname* parent_el_name; + + parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); + /* Uh oh - mixed content, the parent element has cdata too */ + raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", + parent_el_name->local_name); + } + + /* If there is some existing all-whitespace content cdata + * before this node element, delete it + */ + if(element->parent->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES && + element->parent->xml_element->content_element_seen && + element->parent->content_cdata_all_whitespace && + element->parent->xml_element->content_cdata_length) { + + element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + + raptor_free_stringbuffer(element->parent->xml_element->content_cdata_sb); + element->parent->xml_element->content_cdata_sb = NULL; + element->parent->xml_element->content_cdata_length = 0; + } + + } /* end if leave literal XML alone */ + + } /* end if parent has no rdf:resource */ + + } /* end if element->parent */ + + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Using content type %s\n", + rdf_content_type_info[element->content_type].name); + + fprintf(stderr, "raptor_rdfxml_start_element_handler: Start ns-element: "); + raptor_print_xml_element(xml_element, stderr); +#endif + + + /* Check for non namespaced stuff when not in a parseType literal, other */ + if(rdf_content_type_info[element->content_type].rdf_processing) { + const raptor_namespace* ns; + + ns = raptor_xml_element_get_name(xml_element)->nspace; + /* The element */ + + /* If has no namespace or the namespace has no name (xmlns="") */ + if((!ns || (ns && !raptor_namespace_get_uri(ns))) && element->parent) { + raptor_qname* parent_el_name; + + parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); + + raptor_parser_error(rdf_parser, + "Using an element '%s' without a namespace is forbidden.", + parent_el_name->local_name); + element->state = RAPTOR_STATE_SKIPPING; + /* Remove count above so that parent thinks this is empty */ + if(count_bumped) + element->parent->xml_element->content_element_seen--; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; + } + + + /* Check for any remaining non-namespaced attributes */ + if(named_attrs) { + for(i = 0; i < ns_attributes_count; i++) { + raptor_qname *attr = named_attrs[i]; + /* Check if any attributes are non-namespaced */ + if(!attr->nspace || + (attr->nspace && !raptor_namespace_get_uri(attr->nspace))) { + raptor_parser_error(rdf_parser, + "Using an attribute '%s' without a namespace is forbidden.", + attr->local_name); + raptor_free_qname(attr); + named_attrs[i] = NULL; + } + } + } + } + + + if(element->rdf_attr[RDF_NS_aboutEach] || + element->rdf_attr[RDF_NS_aboutEachPrefix]) { + raptor_parser_warning(rdf_parser, + "element '%s' has aboutEach / aboutEachPrefix, skipping.", + raptor_xml_element_get_name(xml_element)->local_name); + element->state = RAPTOR_STATE_SKIPPING; + /* Remove count above so that parent thinks this is empty */ + if(count_bumped) + element->parent->xml_element->content_element_seen--; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; + } + + /* Right, now ready to enter the grammar */ + raptor_rdfxml_start_element_grammar(rdf_parser, element); + + return; +} + + +static void +raptor_rdfxml_end_element_handler(void *user_data, + raptor_xml_element* xml_element) +{ + raptor_parser* rdf_parser; + raptor_rdfxml_parser* rdf_xml_parser; + raptor_rdfxml_element* element; + + rdf_parser = (raptor_parser*)user_data; + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + if(!rdf_parser->failed) { + raptor_rdfxml_update_document_locator(rdf_parser); + + raptor_rdfxml_end_element_grammar(rdf_parser, + rdf_xml_parser->current_element); + } + + element = raptor_rdfxml_element_pop(rdf_xml_parser); + if(element) { + if(element->parent) { + /* Do not change this; PROPERTYELT will turn into MEMBER if necessary + * See the switch case for MEMBER / PROPERTYELT where the test is done. + * + * PARSETYPE_RESOURCE should never be propogated up since it + * will turn the next child (node) element into a property + */ + if(element->state != RAPTOR_STATE_MEMBER_PROPERTYELT && + element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) + element->parent->child_state = element->state; + } + + raptor_free_rdfxml_element(element); + } +} + + +/* cdata (and ignorable whitespace for libxml). + * s 0 terminated is for libxml + */ +static void +raptor_rdfxml_characters_handler(void *user_data, + raptor_xml_element* xml_element, + const unsigned char *s, int len) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + + raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 0); +} + + +/* cdata (and ignorable whitespace for libxml). + * s is 0 terminated for libxml2 + */ +static void +raptor_rdfxml_cdata_handler(void *user_data, raptor_xml_element* xml_element, + const unsigned char *s, int len) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + + raptor_rdfxml_cdata_grammar(rdf_parser, s, len, 1); +} + + +/* comment handler + * s is 0 terminated + */ +static void +raptor_rdfxml_comment_handler(void *user_data, raptor_xml_element* xml_element, + const unsigned char *s) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_rdfxml_parser* rdf_xml_parser; + raptor_rdfxml_element* element; + + if(rdf_parser->failed || !xml_element) + return; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + element = rdf_xml_parser->current_element; + + if(element) { + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) + raptor_xml_writer_comment(rdf_xml_parser->xml_writer, s); + } + + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("XML Comment '%s'\n", s); +#endif +} + + +static const unsigned char* const daml_namespace_uri_string = (const unsigned char*)"http://www.daml.org/2001/03/daml+oil#"; +static const int daml_namespace_uri_string_len = 37; + + +static int +raptor_rdfxml_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_rdfxml_parser* rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + raptor_sax2* sax2; + raptor_world* world = rdf_parser->world; + + /* Allocate sax2 object */ + sax2 = raptor_new_sax2(rdf_parser->world, &rdf_parser->locator, rdf_parser); + rdf_xml_parser->sax2 = sax2; + if(!sax2) + return 1; + + /* Initialize sax2 element handlers */ + raptor_sax2_set_start_element_handler(sax2, raptor_rdfxml_start_element_handler); + raptor_sax2_set_end_element_handler(sax2, raptor_rdfxml_end_element_handler); + raptor_sax2_set_characters_handler(sax2, raptor_rdfxml_characters_handler); + raptor_sax2_set_cdata_handler(sax2, raptor_rdfxml_cdata_handler); + raptor_sax2_set_comment_handler(sax2, raptor_rdfxml_comment_handler); + raptor_sax2_set_namespace_handler(sax2, raptor_rdfxml_sax2_new_namespace_handler); + + /* Allocate uris */ + RAPTOR_DAML_NS_URI(rdf_xml_parser) = raptor_new_uri_from_counted_string(world, + daml_namespace_uri_string, + daml_namespace_uri_string_len); + + RAPTOR_DAML_List_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"List"); + RAPTOR_DAML_first_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser) ,(const unsigned char *)"first"); + RAPTOR_DAML_rest_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"rest"); + RAPTOR_DAML_nil_URI(rdf_xml_parser) = raptor_new_uri_from_uri_local_name(world, RAPTOR_DAML_NS_URI(rdf_xml_parser), (const unsigned char *)"nil"); + + /* Check for uri allocation failures */ + if(!RAPTOR_DAML_NS_URI(rdf_xml_parser) || + !RAPTOR_DAML_List_URI(rdf_xml_parser) || + !RAPTOR_DAML_first_URI(rdf_xml_parser) || + !RAPTOR_DAML_rest_URI(rdf_xml_parser) || + !RAPTOR_DAML_nil_URI(rdf_xml_parser)) + return 1; + + /* Everything succeeded */ + return 0; +} + + +static int +raptor_rdfxml_parse_start(raptor_parser* rdf_parser) +{ + raptor_uri *uri = rdf_parser->base_uri; + raptor_rdfxml_parser* rdf_xml_parser; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + /* base URI required for RDF/XML */ + if(!uri) + return 1; + + /* Optionally normalize language to lowercase + * http://www.w3.org/TR/rdf-concepts/#dfn-language-identifier + */ + raptor_sax2_set_option(rdf_xml_parser->sax2, + RAPTOR_OPTION_NORMALIZE_LANGUAGE, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NORMALIZE_LANGUAGE)); + + /* Optionally forbid internal network and file requests in the XML parser */ + raptor_sax2_set_option(rdf_xml_parser->sax2, + RAPTOR_OPTION_NO_NET, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)); + raptor_sax2_set_option(rdf_xml_parser->sax2, + RAPTOR_OPTION_NO_FILE, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_FILE)); + raptor_sax2_set_option(rdf_xml_parser->sax2, + RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES)); + if(rdf_parser->uri_filter) + raptor_sax2_set_uri_filter(rdf_xml_parser->sax2, rdf_parser->uri_filter, + rdf_parser->uri_filter_user_data); + + raptor_sax2_parse_start(rdf_xml_parser->sax2, uri); + + /* Delete any existing id_set */ + if(rdf_xml_parser->id_set) { + raptor_free_id_set(rdf_xml_parser->id_set); + rdf_xml_parser->id_set = NULL; + } + + /* Create a new id_set if needed */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) { + rdf_xml_parser->id_set = raptor_new_id_set(rdf_parser->world); + if(!rdf_xml_parser->id_set) + return 1; + } + + return 0; +} + + +static void +raptor_rdfxml_parse_terminate(raptor_parser *rdf_parser) +{ + raptor_rdfxml_parser* rdf_xml_parser; + raptor_rdfxml_element* element; + int i; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + if(rdf_xml_parser->sax2) { + raptor_free_sax2(rdf_xml_parser->sax2); + rdf_xml_parser->sax2 = NULL; + } + + while( (element = raptor_rdfxml_element_pop(rdf_xml_parser)) ) + raptor_free_rdfxml_element(element); + + + for(i = 0; i < RAPTOR_RDFXML_N_CONCEPTS; i++) { + raptor_uri* concept_uri = rdf_xml_parser->concepts[i]; + if(concept_uri) { + raptor_free_uri(concept_uri); + rdf_xml_parser->concepts[i] = NULL; + } + } + + if(rdf_xml_parser->id_set) { + raptor_free_id_set(rdf_xml_parser->id_set); + rdf_xml_parser->id_set = NULL; + } + + if (rdf_xml_parser->xml_writer) { + raptor_free_xml_writer(rdf_xml_parser->xml_writer); + rdf_xml_parser->xml_writer = NULL; + } + + if (rdf_xml_parser->iostream) { + raptor_free_iostream(rdf_xml_parser->iostream); + rdf_xml_parser->iostream = NULL; + } + + if (rdf_xml_parser->xml_content) { + RAPTOR_FREE(char*, rdf_xml_parser->xml_content); + rdf_xml_parser->xml_content = NULL; + rdf_xml_parser->xml_content_length = 0; + } +} + + +static int +raptor_rdfxml_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "rdf") || + !strcmp((const char*)suffix, "rdfs") || + !strcmp((const char*)suffix, "foaf") || + !strcmp((const char*)suffix, "doap") || + !strcmp((const char*)suffix, "owl") || + !strcmp((const char*)suffix, "daml")) + score = 9; + if(!strcmp((const char*)suffix, "rss")) + score = 3; + } + + if(identifier) { + if(strstr((const char*)identifier, "rss1")) + score += 5; + else if(!suffix && strstr((const char*)identifier, "rss")) + score += 3; + else if(!suffix && strstr((const char*)identifier, "rdf")) + score += 2; + else if(!suffix && strstr((const char*)identifier, "RDF")) + score += 2; + } + + if(mime_type) { + if(strstr((const char*)mime_type, "html")) + score -= 4; + else if(!strcmp((const char*)mime_type, "text/rdf")) + score += 7; + else if(!strcmp((const char*)mime_type, "application/xml")) + score += 5; + } + + if(buffer && len) { + /* Check it's an XML namespace declared and not N3 or Turtle which + * mention the namespace URI but not in this form. + */ +#define HAS_RDF_XMLNS1 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) +#define HAS_RDF_XMLNS2 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) +#define HAS_RDF_XMLNS3 (raptor_memstr((const char*)buffer, len, "xmlns=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) +#define HAS_RDF_XMLNS4 (raptor_memstr((const char*)buffer, len, "xmlns='http://www.w3.org/1999/02/22-rdf-syntax-ns#") != NULL) +#define HAS_RDF_ENTITY1 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'") != NULL) +#define HAS_RDF_ENTITY2 (raptor_memstr((const char*)buffer, len, "!ENTITY rdf \"http://www.w3.org/1999/02/22-rdf-syntax-ns#\"") != NULL) +#define HAS_RDF_ENTITY3 (raptor_memstr((const char*)buffer, len, "xmlns:rdf=\"&rdf;\"") != NULL) +#define HAS_RDF_ENTITY4 (raptor_memstr((const char*)buffer, len, "xmlns:rdf='&rdf;'") != NULL) +#define HAS_HTML_NS (raptor_memstr((const char*)buffer, len, "http://www.w3.org/1999/xhtml") != NULL) +#define HAS_HTML_ROOT (raptor_memstr((const char*)buffer, len, "<html") != NULL) + + if(!HAS_HTML_NS && !HAS_HTML_ROOT && + (HAS_RDF_XMLNS1 || HAS_RDF_XMLNS2 || HAS_RDF_XMLNS3 || HAS_RDF_XMLNS4 || + HAS_RDF_ENTITY1 || HAS_RDF_ENTITY2 || HAS_RDF_ENTITY3 || HAS_RDF_ENTITY4) + ) { + int has_rdf_RDF = (raptor_memstr((const char*)buffer, len, "<rdf:RDF") != NULL); + int has_rdf_Description = (raptor_memstr((const char*)buffer, len, "rdf:Description") != NULL); + int has_rdf_about = (raptor_memstr((const char*)buffer, len, "rdf:about") != NULL); + + score += 7; + if(has_rdf_RDF) + score++; + if(has_rdf_Description) + score++; + if(has_rdf_about) + score++; + } + } + + return score; +} + + + +static int +raptor_rdfxml_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *buffer, + size_t len, int is_end) +{ + raptor_rdfxml_parser* rdf_xml_parser; + int rc; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + if(rdf_parser->failed) + return 1; + + rc = raptor_sax2_parse_chunk(rdf_xml_parser->sax2, buffer, len, is_end); + + if(is_end) { + if(rdf_parser->emitted_default_graph) { + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + } + + return rc; +} + + +static void +raptor_rdfxml_generate_statement(raptor_parser *rdf_parser, + raptor_term *subject_term, + raptor_uri *predicate_uri, + raptor_term *object_term, + raptor_term *reified_term, + raptor_rdfxml_element* bag_element) +{ + raptor_statement *statement = &rdf_parser->statement; + raptor_term* predicate_term = NULL; + int free_reified_term = 0; + + if(rdf_parser->failed) + return; + +#ifdef RAPTOR_DEBUG_VERBOSE + if(!subject_term) + RAPTOR_FATAL1("Statement has no subject\n"); + + if(!predicate_uri) + RAPTOR_FATAL1("Statement has no predicate\n"); + + if(!object_term) + RAPTOR_FATAL1("Statement has no object\n"); + +#endif + + predicate_term = raptor_new_term_from_uri(rdf_parser->world, predicate_uri); + if(!predicate_term) + return; + + statement->subject = subject_term; + statement->predicate = predicate_term; + statement->object = object_term; + +#ifdef RAPTOR_DEBUG_VERBOSE + fprintf(stderr, "raptor_rdfxml_generate_statement: Generating statement: "); + raptor_statement_print(statement, stderr); + fputc('\n', stderr); +#endif + + if(!rdf_parser->emitted_default_graph) { + raptor_parser_start_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph++; + } + + if(!rdf_parser->statement_handler) + goto generate_tidy; + + /* Generate the statement; or is it a fact? */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + + /* the bagID mess */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID) && + bag_element && bag_element->bag) { + raptor_term* bag = bag_element->bag; + raptor_uri* bag_predicate_uri = NULL; + raptor_term* bag_predicate_term = NULL; + + statement->subject = bag; + + bag_element->last_bag_ordinal++; + + /* new URI object */ + bag_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, + bag_element->last_bag_ordinal); + if(!bag_predicate_uri) + goto generate_tidy; + + bag_predicate_term = raptor_new_term_from_uri(rdf_parser->world, + bag_predicate_uri); + raptor_free_uri(bag_predicate_uri); + + if(!bag_predicate_term) + goto generate_tidy; + + statement->predicate = bag_predicate_term; + + if(!reified_term || !reified_term->value.blank.string) { + unsigned char *reified_id = NULL; + + /* reified_term is NULL so generate a bag ID */ + reified_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!reified_id) + goto generate_tidy; + + reified_term = raptor_new_term_from_blank(rdf_parser->world, reified_id); + RAPTOR_FREE(char*, reified_id); + + if(!reified_term) + goto generate_tidy; + free_reified_term = 1; + } + + statement->object = reified_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + if(bag_predicate_term) + raptor_free_term(bag_predicate_term); + } + + + /* return if is there no reified ID (that is valid) */ + if(!reified_term || !reified_term->value.blank.string) + goto generate_tidy; + + + /* otherwise generate reified statements */ + + statement->subject = reified_term; + statement->predicate = RAPTOR_RDF_type_term(rdf_parser->world); + statement->object = RAPTOR_RDF_Statement_term(rdf_parser->world); + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + /* statement->subject = reified_term; */ + statement->predicate = RAPTOR_RDF_subject_term(rdf_parser->world); + statement->object = subject_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + + /* statement->subject = reified_term; */ + statement->predicate = RAPTOR_RDF_predicate_term(rdf_parser->world); + statement->object = predicate_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + /* statement->subject = reified_term; */ + statement->predicate = RAPTOR_RDF_object_term(rdf_parser->world); + statement->object = object_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, statement); + + + generate_tidy: + /* Tidy up things allocated here */ + if(predicate_term) + raptor_free_term(predicate_term); + if(free_reified_term && reified_term) + raptor_free_term(reified_term); +} + + + +/** + * raptor_rdfxml_element_has_property_attributes: + * @element: element with the property attributes + * + * Return true if the element has at least one property attribute. + * + **/ +static int +raptor_rdfxml_element_has_property_attributes(raptor_rdfxml_element *element) +{ + int i; + + if(element->xml_element->attribute_count > 0) + return 1; + + /* look for rdf: properties */ + for(i = 0; i <= RDF_NS_LAST; i++) { + if(element->rdf_attr[i] && + raptor_rdf_ns_terms_info[i].type != RAPTOR_TERM_TYPE_UNKNOWN) + return 1; + } + return 0; +} + + +/** + * raptor_rdfxml_process_property_attributes: + * @rdf_parser: Raptor parser object + * @attributes_element: element with the property attributes + * @resource_element: element that defines the resource URI + * subject->value etc. + * @property_node_identifier: Use this identifier for the resource URI + * and count any ordinals for it locally + * + * Process the property attributes for an element for a given resource. + * + **/ +static int +raptor_rdfxml_process_property_attributes(raptor_parser *rdf_parser, + raptor_rdfxml_element *attributes_element, + raptor_rdfxml_element *resource_element, + raptor_term *property_node_identifier) +{ + unsigned int i; + raptor_term *resource_identifier; + + resource_identifier = property_node_identifier ? property_node_identifier : resource_element->subject; + + + /* Process attributes as propAttr* = * (propName="string")* + */ + for(i = 0; i < attributes_element->xml_element->attribute_count; i++) { + raptor_qname* attr = attributes_element->xml_element->attributes[i]; + const unsigned char *name; + const unsigned char *value; + int handled = 0; + + if(!attr) + continue; + + name = attr->local_name; + value = attr->value; + + if(!attr->nspace) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, + "Using property attribute '%s' without a namespace is forbidden.", + name); + continue; + } + + + if(!raptor_unicode_check_utf8_nfc_string(value, strlen((const char*)value))) { + raptor_log_level l; + + raptor_rdfxml_update_document_locator(rdf_parser); + l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : + RAPTOR_LOG_LEVEL_WARN; + raptor_parser_log_error(rdf_parser, l, + "Property attribute '%s' has a string not in Unicode Normal Form C: %s", + name, value); + continue; + } + + + /* Generate the property statement using one of these properties: + * 1) rdf:_n + * 2) the URI from the rdf:* attribute where allowed + * 3) otherwise forbidden (including rdf:li) + */ + if(attr->nspace->is_rdf_ms) { + /* is rdf: namespace */ + + if(*name == '_') { + int ordinal; + + /* recognise rdf:_ */ + name++; + ordinal = raptor_check_ordinal(name); + if(ordinal < 1) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, + "Illegal ordinal value %d in property attribute '%s' seen on containing element '%s'.", + ordinal, attr->local_name, name); + } + } else { + int rc; + + raptor_rdfxml_update_document_locator(rdf_parser); + + rc = raptor_rdfxml_check_propertyAttribute_name((const char*)name); + if(!rc) + raptor_parser_error(rdf_parser, + "RDF term %s is forbidden as a property attribute.", + name); + else if(rc < 0) + raptor_parser_warning(rdf_parser, + "Unknown RDF namespace property attribute '%s'.", + name); + } + + } /* end is RDF namespace property */ + + + if(!handled) { + raptor_term* object_term; + + object_term = raptor_new_term_from_literal(rdf_parser->world, + (unsigned char*)value, + NULL, NULL); + + /* else not rdf: namespace or unknown in rdf: namespace so + * generate a statement with a literal object + */ + raptor_rdfxml_generate_statement(rdf_parser, + resource_identifier, + attr->uri, + object_term, + NULL, /* Property attributes are never reified*/ + resource_element); + + raptor_free_term(object_term); + } + + } /* end for ... attributes */ + + + /* Handle rdf property attributes + * (only rdf:type and rdf:value at present) + */ + for(i = 0; i <= RDF_NS_LAST; i++) { + const unsigned char *value = attributes_element->rdf_attr[i]; + size_t value_len; + int object_is_literal; + raptor_uri *property_uri; + raptor_term* object_term; + + if(!value) + continue; + + value_len = strlen((const char*)value); + + object_is_literal = (raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_LITERAL); + + if(raptor_rdf_ns_terms_info[i].type == RAPTOR_TERM_TYPE_UNKNOWN) { + const char *name = raptor_rdf_ns_terms_info[i].name; + int rc = raptor_rdfxml_check_propertyAttribute_name(name); + if(!rc) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, + "RDF term %s is forbidden as a property attribute.", + name); + continue; + } else if(rc < 0) + raptor_parser_warning(rdf_parser, + "Unknown RDF namespace property attribute '%s'.", + name); + } + + if(object_is_literal && + !raptor_unicode_check_utf8_nfc_string(value, value_len)) { + raptor_log_level l; + + raptor_rdfxml_update_document_locator(rdf_parser); + l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : + RAPTOR_LOG_LEVEL_WARN; + + raptor_parser_log_error(rdf_parser, l, + "Property attribute '%s' has a string not in Unicode Normal Form C: %s", + raptor_rdf_ns_terms_info[i].name, value); + continue; + } + + property_uri = raptor_new_uri_for_rdf_concept(rdf_parser->world, + (const unsigned char*)raptor_rdf_ns_terms_info[i].name); + + if(object_is_literal) { + object_term = raptor_new_term_from_literal(rdf_parser->world, + (unsigned char*)value, + NULL, NULL); + } else { + raptor_uri *base_uri; + raptor_uri *object_uri; + base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); + object_uri = raptor_new_uri_relative_to_base(rdf_parser->world, + base_uri, value); + object_term = raptor_new_term_from_uri(rdf_parser->world, object_uri); + raptor_free_uri(object_uri); + } + + raptor_rdfxml_generate_statement(rdf_parser, + resource_identifier, + property_uri, + object_term, + NULL, /* Property attributes are never reified*/ + resource_element); + + raptor_free_term(object_term); + + raptor_free_uri(property_uri); + + } /* end for rdf:property values */ + + return 0; +} + + +static void +raptor_rdfxml_start_element_grammar(raptor_parser *rdf_parser, + raptor_rdfxml_element *element) +{ + raptor_rdfxml_parser *rdf_xml_parser; + int finished; + raptor_state state; + raptor_xml_element* xml_element; + raptor_qname* el_qname; + const unsigned char *el_name; + int element_in_rdf_ns; + int rc = 0; + raptor_uri* base_uri; + raptor_uri* element_name_uri; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + xml_element = element->xml_element; + el_qname = raptor_xml_element_get_name(xml_element); + el_name = el_qname->local_name; + element_in_rdf_ns = (el_qname->nspace && el_qname->nspace->is_rdf_ms); + base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); + element_name_uri = el_qname->uri; + + state = element->state; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif + + finished = 0; + while(!finished) { + + switch(state) { + case RAPTOR_STATE_SKIPPING: + element->child_state = state; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED; + finished = 1; + break; + + case RAPTOR_STATE_UNKNOWN: + /* found <rdf:RDF> ? */ + + if(element_in_rdf_ns) { + if(raptor_uri_equals(element_name_uri, + RAPTOR_RDF_RDF_URI(rdf_parser->world))) { + element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; + /* Yes - need more content before can continue, + * so wait for another element + */ + finished = 1; + break; + } + if(raptor_uri_equals(element_name_uri, + RAPTOR_RDF_Description_URI(rdf_parser->world))) { + state = RAPTOR_STATE_DESCRIPTION; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; + /* Yes - found something so move immediately to description */ + break; + } + + if(element_in_rdf_ns) { + rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name); + if(!rc) { + raptor_parser_error(rdf_parser, + "rdf:%s is forbidden as a node element.", + el_name); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } else if(rc < 0) { + raptor_parser_warning(rdf_parser, + "rdf:%s is an unknown RDF namespaced element.", + el_name); + } + } + } + + /* If scanning for element, can continue */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) { + finished = 1; + break; + } + + /* Otherwise the choice of the next state can be made + * from the current element by the OBJ state + */ + state = RAPTOR_STATE_NODE_ELEMENT_LIST; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES; + break; + + + case RAPTOR_STATE_NODE_ELEMENT_LIST: + /* Handling + * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElementList + * + * Everything goes to nodeElement + */ + + state = RAPTOR_STATE_NODE_ELEMENT; + + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; + + break; + + + + case RAPTOR_STATE_DESCRIPTION: + case RAPTOR_STATE_NODE_ELEMENT: + case RAPTOR_STATE_PARSETYPE_RESOURCE: + case RAPTOR_STATE_PARSETYPE_COLLECTION: + /* Handling <rdf:Description> or other node element + * http://www.w3.org/TR/rdf-syntax-grammar/#nodeElement + * + * or a property element acting as a node element for + * rdf:parseType="Resource" + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeResourcePropertyElt + * or rdf:parseType="Collection" (and daml:Collection) + * http://www.w3.org/TR/rdf-syntax-grammar/#parseTypeCollectionPropertyElt + * + * Only create a bag if bagID given + */ + + if(!element_name_uri) { + /* We cannot handle this */ + raptor_parser_warning(rdf_parser, "Using node element '%s' without a namespace is forbidden.", + el_qname->local_name); + raptor_rdfxml_update_document_locator(rdf_parser); + element->state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + + if(element_in_rdf_ns) { + rc = raptor_rdfxml_check_nodeElement_name((const char*)el_name); + if(!rc) { + raptor_parser_error(rdf_parser, + "rdf:%s is forbidden as a node element.", + el_name); + state = RAPTOR_STATE_SKIPPING; + element->state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } else if(rc < 0) { + raptor_parser_warning(rdf_parser, + "rdf:%s is an unknown RDF namespaced element.", + el_name); + } + } + + if(element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && + element->content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && + element->parent && + (element->parent->state == RAPTOR_STATE_PROPERTYELT || + element->parent->state == RAPTOR_STATE_MEMBER_PROPERTYELT) && + element->parent->xml_element->content_element_seen > 1) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, "The enclosing property already has an object"); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + + if(state == RAPTOR_STATE_NODE_ELEMENT || + state == RAPTOR_STATE_DESCRIPTION || + state == RAPTOR_STATE_PARSETYPE_COLLECTION) { + if(element_in_rdf_ns && + raptor_uri_equals(element_name_uri, + RAPTOR_RDF_Description_URI(rdf_parser->world))) + state = RAPTOR_STATE_DESCRIPTION; + else + state = RAPTOR_STATE_NODE_ELEMENT; + } + + + if((element->rdf_attr[RDF_NS_ID]!=NULL) + + (element->rdf_attr[RDF_NS_about]!=NULL) + + (element->rdf_attr[RDF_NS_nodeID]!=NULL) > 1) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, "Multiple attributes of rdf:ID, rdf:about and rdf:nodeID on element '%s' - only one allowed.", el_name); + } + + if(element->rdf_attr[RDF_NS_ID]) { + unsigned char* subject_id; + raptor_uri* subject_uri; + + subject_id = (unsigned char*)element->rdf_attr[RDF_NS_ID]; + + if(!raptor_valid_xml_ID(rdf_parser, subject_id)) { + raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", + subject_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + if(raptor_rdfxml_record_ID(rdf_parser, element, subject_id)) { + raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", + subject_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + + /* after this, subject_id is the owner of the ID string */ + element->rdf_attr[RDF_NS_ID] = NULL; + + subject_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, + subject_id); + RAPTOR_FREE(char*, subject_id); + + if(!subject_uri) + goto oom; + element->subject = raptor_new_term_from_uri(rdf_parser->world, + subject_uri); + raptor_free_uri(subject_uri); + + if(!element->subject) + goto oom; + + } else if(element->rdf_attr[RDF_NS_about]) { + raptor_uri* subject_uri; + + subject_uri = raptor_new_uri_relative_to_base(rdf_parser->world, + base_uri, + (const unsigned char*)element->rdf_attr[RDF_NS_about]); + if(!subject_uri) + goto oom; + + element->subject = raptor_new_term_from_uri(rdf_parser->world, + subject_uri); + raptor_free_uri(subject_uri); + + RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_about]); + element->rdf_attr[RDF_NS_about] = NULL; + if(!element->subject) + goto oom; + + } else if(element->rdf_attr[RDF_NS_nodeID]) { + unsigned char* subject_id; + subject_id = raptor_world_internal_generate_id(rdf_parser->world, + (unsigned char*)element->rdf_attr[RDF_NS_nodeID]); + if(!subject_id) + goto oom; + + element->subject = raptor_new_term_from_blank(rdf_parser->world, + subject_id); + RAPTOR_FREE(char*, subject_id); + + element->rdf_attr[RDF_NS_nodeID] = NULL; + if(!element->subject) + goto oom; + + if(!raptor_valid_xml_ID(rdf_parser, element->subject->value.blank.string)) { + raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", + (const char*)element->subject->value.blank.string); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + } else if(element->parent && + element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION && + element->parent->child_content_type != RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION && + element->parent->object) { + /* copy from parent (property element), it has a URI for us */ + element->subject = raptor_term_copy(element->parent->object); + } else { + unsigned char* subject_id; + subject_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!subject_id) + goto oom; + + element->subject = raptor_new_term_from_blank(rdf_parser->world, + subject_id); + RAPTOR_FREE(char*, subject_id); + + if(!element->subject) + goto oom; + } + + + if(element->rdf_attr[RDF_NS_bagID]) { + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { + unsigned char* bag_id; + raptor_uri* bag_uri = NULL; + + bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID]; + element->rdf_attr[RDF_NS_bagID] = NULL; + + bag_uri = raptor_new_uri_from_id(rdf_parser->world, + base_uri, bag_id); + if(!bag_uri) { + RAPTOR_FREE(char*, bag_id); + goto oom; + } + + element->bag = raptor_new_term_from_uri(rdf_parser->world, bag_uri); + raptor_free_uri(bag_uri); + + if(!raptor_valid_xml_ID(rdf_parser, bag_id)) { + raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", + bag_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + RAPTOR_FREE(char*, bag_id); + break; + } + if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) { + raptor_parser_error(rdf_parser, "Duplicated rdf:bagID value '%s'", + bag_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + RAPTOR_FREE(char*, bag_id); + break; + } + + RAPTOR_FREE(char*, bag_id); + raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); + + + raptor_rdfxml_generate_statement(rdf_parser, + element->bag, + RAPTOR_RDF_type_URI(rdf_parser->world), + RAPTOR_RDF_Bag_term(rdf_parser->world), + NULL, + NULL); + } else { + /* bagID forbidden */ + raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + } + + + if(element->parent) { + + /* In a rdf:parseType="Collection" the resources are appended + * to the list at the genid element->parent->tail_id + */ + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || + element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { + /* <idList> rdf:type rdf:List */ + const unsigned char * idList; + raptor_uri *predicate_uri; + raptor_term* idList_term; + raptor_term* object_term; + + idList = raptor_world_generate_bnodeid(rdf_parser->world); + if(!idList) + goto oom; + /* idList string is saved below in element->parent->tail_id */ + + idList_term = raptor_new_term_from_blank(rdf_parser->world, idList); + if(!idList_term) { + RAPTOR_FREE(char*, idList); + goto oom; + } + + if((element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) || + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST)) { + raptor_uri* class_uri = NULL; + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { + class_uri = RAPTOR_DAML_List_URI(rdf_xml_parser); + object_term = raptor_new_term_from_uri(rdf_parser->world, + class_uri); + } else + object_term = raptor_term_copy(RAPTOR_RDF_List_term(rdf_parser->world)); + + raptor_rdfxml_generate_statement(rdf_parser, + idList_term, + RAPTOR_RDF_type_URI(rdf_parser->world), + object_term, + NULL, + element); + raptor_free_term(object_term); + } + + predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_first_URI(rdf_xml_parser) : RAPTOR_RDF_first_URI(rdf_parser->world); + + /* <idList> rdf:first <element->uri> */ + raptor_rdfxml_generate_statement(rdf_parser, + idList_term, + predicate_uri, + element->subject, + NULL, + NULL); + + /* If there is no rdf:parseType="Collection" */ + if(!element->parent->tail_id) { + /* Free any existing object still around. + * I suspect this can never happen. + */ + if(element->parent->object) + raptor_free_term(element->parent->object); + + element->parent->object = raptor_new_term_from_blank(rdf_parser->world, + idList); + } else { + raptor_term* tail_id_term; + + tail_id_term = raptor_new_term_from_blank(rdf_parser->world, + element->parent->tail_id); + + predicate_uri = (element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) ? RAPTOR_DAML_rest_URI(rdf_xml_parser) : RAPTOR_RDF_rest_URI(rdf_parser->world); + + /* _:tail_id rdf:rest _:listRest */ + raptor_rdfxml_generate_statement(rdf_parser, + tail_id_term, + predicate_uri, + idList_term, + NULL, + NULL); + + raptor_free_term(tail_id_term); + } + + /* update new tail */ + if(element->parent->tail_id) + RAPTOR_FREE(char*, (char*)element->parent->tail_id); + + element->parent->tail_id = idList; + + raptor_free_term(idList_term); + } else if(element->parent->state != RAPTOR_STATE_UNKNOWN && + element->state != RAPTOR_STATE_PARSETYPE_RESOURCE) { + /* If there is a parent element (property) containing this + * element (node) and it has no object, set it from this subject + */ + + if(element->parent->object) { + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_error(rdf_parser, + "Tried to set multiple objects of a statement"); + } else { + /* Store URI of this node in our parent as the property object */ + element->parent->object = raptor_term_copy(element->subject); + element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + } + + } + } + + + /* If this is a node element, generate the rdf:type statement + * from this node + */ + if(state == RAPTOR_STATE_NODE_ELEMENT) { + raptor_term* el_name_term; + + el_name_term = raptor_new_term_from_uri(rdf_parser->world, + element_name_uri); + + raptor_rdfxml_generate_statement(rdf_parser, + element->subject, + RAPTOR_RDF_type_URI(rdf_parser->world), + el_name_term, + element->reified, + element); + + raptor_free_term(el_name_term); + } + + if(raptor_rdfxml_process_property_attributes(rdf_parser, element, + element, NULL)) + goto oom; + + /* for both productions now need some more content or + * property elements before can do any more work. + */ + + element->child_state = RAPTOR_STATE_PROPERTYELT; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; + finished = 1; + break; + + + case RAPTOR_STATE_PARSETYPE_OTHER: + /* FALLTHROUGH */ + + case RAPTOR_STATE_PARSETYPE_LITERAL: + raptor_xml_writer_start_element(rdf_xml_parser->xml_writer, xml_element); + element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; + + finished = 1; + break; + + /* Handle all the detail of the various options of property element + * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt + * + * All the attributes must be scanned here to see what additional + * property element work is needed. No triples are generated + * until the end of this element, until it is clear if the + * element was empty. + */ + case RAPTOR_STATE_MEMBER_PROPERTYELT: + case RAPTOR_STATE_PROPERTYELT: + + if(!element_name_uri) { + raptor_parser_error(rdf_parser, "Using property element '%s' without a namespace is forbidden.", + raptor_xml_element_get_name(element->parent->xml_element)->local_name); + raptor_rdfxml_update_document_locator(rdf_parser); + element->state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + + /* Handling rdf:li as a property, noting special processing */ + if(element_in_rdf_ns && + raptor_uri_equals(element_name_uri, + RAPTOR_RDF_li_URI(rdf_parser->world))) { + state = RAPTOR_STATE_MEMBER_PROPERTYELT; + } + + + if(element_in_rdf_ns) { + rc = raptor_rdfxml_check_propertyElement_name((const char*)el_name); + if(!rc) { + raptor_parser_error(rdf_parser, + "rdf:%s is forbidden as a property element.", + el_name); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } else if(rc < 0) { + raptor_parser_warning(rdf_parser, + "rdf:%s is an unknown RDF namespaced element.", + el_name); + } + } + + + /* rdf:ID on a property element - reify a statement. + * Allowed on all property element forms + */ + if(element->rdf_attr[RDF_NS_ID]) { + raptor_uri *reified_uri; + + element->reified_id = element->rdf_attr[RDF_NS_ID]; + element->rdf_attr[RDF_NS_ID] = NULL; + reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, + element->reified_id); + if(!reified_uri) + goto oom; + + element->reified = raptor_new_term_from_uri(rdf_parser->world, + reified_uri); + raptor_free_uri(reified_uri); + + if(!element->reified) + goto oom; + + if(!raptor_valid_xml_ID(rdf_parser, element->reified_id)) { + raptor_parser_error(rdf_parser, "Illegal rdf:ID value '%s'", + element->reified_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + if(raptor_rdfxml_record_ID(rdf_parser, element, element->reified_id)) { + raptor_parser_error(rdf_parser, "Duplicated rdf:ID value '%s'", + element->reified_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + } + + /* rdf:datatype on a property element. + * Only allowed for + * http://www.w3.org/TR/rdf-syntax-grammar/#literalPropertyElt + */ + if(element->rdf_attr[RDF_NS_datatype]) { + raptor_uri *datatype_uri; + + datatype_uri = raptor_new_uri_relative_to_base(rdf_parser->world, + base_uri, + (const unsigned char*)element->rdf_attr[RDF_NS_datatype]); + element->object_literal_datatype = datatype_uri; + RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_datatype]); + element->rdf_attr[RDF_NS_datatype] = NULL; + if(!element->object_literal_datatype) + goto oom; + } + + if(element->rdf_attr[RDF_NS_bagID]) { + + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { + + if(element->rdf_attr[RDF_NS_resource] || + element->rdf_attr[RDF_NS_parseType]) { + + raptor_parser_error(rdf_parser, "rdf:bagID is forbidden on property element '%s' with an rdf:resource or rdf:parseType attribute.", el_name); + /* prevent this being used later either */ + RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_bagID]); + element->rdf_attr[RDF_NS_bagID] = NULL; + } else { + unsigned char* bag_id; + raptor_uri* bag_uri; + + bag_id = (unsigned char*)element->rdf_attr[RDF_NS_bagID]; + element->rdf_attr[RDF_NS_bagID] = NULL; + bag_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, + bag_id); + if(!bag_uri) { + RAPTOR_FREE(char*, bag_id); + goto oom; + } + + element->bag = raptor_new_term_from_uri(rdf_parser->world, + bag_uri); + raptor_free_uri(bag_uri); + + if(!element->bag) { + RAPTOR_FREE(char*, bag_id); + goto oom; + } + + if(!raptor_valid_xml_ID(rdf_parser, bag_id)) { + raptor_parser_error(rdf_parser, "Illegal rdf:bagID value '%s'", + bag_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + RAPTOR_FREE(char*, bag_id); + break; + } + if(raptor_rdfxml_record_ID(rdf_parser, element, bag_id)) { + raptor_parser_error(rdf_parser, + "Duplicated rdf:bagID value '%s'", bag_id); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + RAPTOR_FREE(char*, bag_id); + finished = 1; + break; + } + + RAPTOR_FREE(char*, bag_id); + raptor_parser_warning(rdf_parser, "rdf:bagID is deprecated."); + } + } else { + /* bagID forbidden */ + raptor_parser_error(rdf_parser, "rdf:bagID is forbidden."); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + } /* if rdf:bagID on property element */ + + + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; + + if(element->rdf_attr[RDF_NS_parseType]) { + const unsigned char *parse_type; + int i; + int is_parseType_Literal = 0; + + parse_type = element->rdf_attr[RDF_NS_parseType]; + + if(raptor_rdfxml_element_has_property_attributes(element)) { + raptor_parser_error(rdf_parser, "Property attributes cannot be used with rdf:parseType='%s'", parse_type); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + finished = 1; + break; + } + + /* Check for bad combinations of things with parseType */ + for(i = 0; i <= RDF_NS_LAST; i++) + if(element->rdf_attr[i] && i != RDF_NS_parseType) { + raptor_parser_error(rdf_parser, "Attribute '%s' cannot be used with rdf:parseType='%s'", raptor_rdf_ns_terms_info[i].name, parse_type); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + break; + } + + + if(!strcmp((char*)parse_type, "Literal")) + is_parseType_Literal = 1; + else if(!strcmp((char*)parse_type, "Resource")) { + unsigned char* subject_id; + + state = RAPTOR_STATE_PARSETYPE_RESOURCE; + element->child_state = RAPTOR_STATE_PROPERTYELT; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; + + /* create a node for the subject of the contained properties */ + subject_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!subject_id) + goto oom; + + element->subject = raptor_new_term_from_blank(rdf_parser->world, + subject_id); + RAPTOR_FREE(char*, subject_id); + + if(!element->subject) + goto oom; + } else if(!strcmp((char*)parse_type, "Collection")) { + /* An rdf:parseType="Collection" appears as a single node */ + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION; + } else { + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES) && + !raptor_strcasecmp((char*)parse_type, "daml:collection")) { + /* A DAML collection appears as a single node */ + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + element->child_state = RAPTOR_STATE_PARSETYPE_COLLECTION; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION; + } else { + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_WARN_OTHER_PARSETYPES)) { + raptor_parser_warning(rdf_parser, "Unknown rdf:parseType value '%s' taken as 'Literal'", parse_type); + } + is_parseType_Literal = 1; + } + + } + + if(is_parseType_Literal) { + raptor_xml_writer* xml_writer; + + /* rdf:parseType="Literal" - explicitly or default + * if the parseType value is not recognised + */ + rdf_xml_parser->xml_content = NULL; + rdf_xml_parser->xml_content_length = 0; + rdf_xml_parser->iostream = + raptor_new_iostream_to_string(rdf_parser->world, + &rdf_xml_parser->xml_content, + &rdf_xml_parser->xml_content_length, + raptor_alloc_memory); + if(!rdf_xml_parser->iostream) + goto oom; + xml_writer = raptor_new_xml_writer(rdf_parser->world, NULL, + rdf_xml_parser->iostream); + rdf_xml_parser->xml_writer = xml_writer; + if(!rdf_xml_parser->xml_writer) + goto oom; + + raptor_xml_writer_set_option(rdf_xml_parser->xml_writer, + RAPTOR_OPTION_WRITER_XML_DECLARATION, + NULL, 0); + + element->child_state = RAPTOR_STATE_PARSETYPE_LITERAL; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; + } + } else { + + /* Can only be the empty property element case + * http://www.w3.org/TR/rdf-syntax-grammar/#emptyPropertyElt + */ + + /* The presence of the rdf:resource or rdf:nodeID + * attributes is checked at element close time + */ + + /* + * Assign reified URI here so we don't reify property attributes + * using this id + */ + if(element->reified_id && !element->reified) { + raptor_uri* reified_uri; + reified_uri = raptor_new_uri_from_id(rdf_parser->world, base_uri, + element->reified_id); + if(!reified_uri) + goto oom; + element->reified = raptor_new_term_from_uri(rdf_parser->world, + reified_uri); + raptor_free_uri(reified_uri); + + if(!element->reified) + goto oom; + } + + if(element->rdf_attr[RDF_NS_resource] || + element->rdf_attr[RDF_NS_nodeID]) { + /* Done - wait for end of this element to end in order to + * check the element was empty as expected */ + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + } else { + /* Otherwise process content in obj (value) state */ + element->child_state = RAPTOR_STATE_NODE_ELEMENT_LIST; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT; + } + } + + finished = 1; + + break; + + + case RAPTOR_STATE_INVALID: + default: + raptor_parser_fatal_error(rdf_parser, + "%s Internal error - unexpected parser state %u - %s", + __FUNCTION__, + state, raptor_rdfxml_state_as_string(state)); + finished = 1; + + } /* end switch */ + + if(state != element->state) { + element->state = state; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Moved to state %d - %s\n", state, + raptor_rdfxml_state_as_string(state)); +#endif + } + + } /* end while */ + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif + + return; + + oom: + raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); + element->state = RAPTOR_STATE_SKIPPING; +} + + +static void +raptor_rdfxml_end_element_grammar(raptor_parser *rdf_parser, + raptor_rdfxml_element *element) +{ + raptor_rdfxml_parser *rdf_xml_parser; + raptor_state state; + int finished; + raptor_xml_element* xml_element = element->xml_element; + raptor_qname* el_qname; + const unsigned char *el_name; + int element_in_rdf_ns; + raptor_uri* element_name_uri; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + el_qname = raptor_xml_element_get_name(xml_element); + el_name = el_qname->local_name; + element_in_rdf_ns= (el_qname->nspace && el_qname->nspace->is_rdf_ms); + element_name_uri = el_qname->uri; + + + state = element->state; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Starting in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif + + finished= 0; + while(!finished) { + switch(state) { + case RAPTOR_STATE_SKIPPING: + finished = 1; + break; + + case RAPTOR_STATE_UNKNOWN: + finished = 1; + break; + + case RAPTOR_STATE_NODE_ELEMENT_LIST: + if(element_in_rdf_ns && + raptor_uri_equals(element_name_uri, + RAPTOR_RDF_RDF_URI(rdf_parser->world))) { + /* end of RDF - boo hoo */ + state = RAPTOR_STATE_UNKNOWN; + finished = 1; + break; + } + /* When scanning, another element ending is outside the RDF + * world so this can happen without further work + */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) { + state = RAPTOR_STATE_UNKNOWN; + finished = 1; + break; + } + /* otherwise found some junk after RDF content in an RDF-only + * document (probably never get here since this would be + * a mismatched XML tag and cause an error earlier) + */ + raptor_rdfxml_update_document_locator(rdf_parser); + raptor_parser_warning(rdf_parser, + "Element '%s' ended, expected end of RDF element", + el_name); + state = RAPTOR_STATE_UNKNOWN; + finished = 1; + break; + + + case RAPTOR_STATE_DESCRIPTION: + case RAPTOR_STATE_NODE_ELEMENT: + case RAPTOR_STATE_PARSETYPE_RESOURCE: + + /* If there is a parent element containing this element and + * the parent isn't a description, has an identifier, + * create the statement between this node using parent property + * (Need to check for identifier so that top-level typed nodes + * don't get connect to <rdf:RDF> parent element) + */ + if(state == RAPTOR_STATE_NODE_ELEMENT && + element->parent && element->parent->subject) { + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + element_name_uri, + element->subject, + NULL, + element); + } else if(state == RAPTOR_STATE_PARSETYPE_RESOURCE && + element->parent && element->parent->subject) { + /* Handle rdf:li as the rdf:parseType="resource" property */ + if(element_in_rdf_ns && + raptor_uri_equals(element_name_uri, + RAPTOR_RDF_li_URI(rdf_parser->world))) { + raptor_uri* ordinal_predicate_uri; + + element->parent->last_ordinal++; + ordinal_predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal); + + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + ordinal_predicate_uri, + element->subject, + element->reified, + element->parent); + raptor_free_uri(ordinal_predicate_uri); + } else { + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + element_name_uri, + element->subject, + element->reified, + element->parent); + } + } + finished = 1; + break; + + case RAPTOR_STATE_PARSETYPE_COLLECTION: + + finished = 1; + break; + + case RAPTOR_STATE_PARSETYPE_OTHER: + /* FALLTHROUGH */ + + case RAPTOR_STATE_PARSETYPE_LITERAL: + element->parent->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL; + + raptor_xml_writer_end_element(rdf_xml_parser->xml_writer, xml_element); + + finished = 1; + break; + + + case RAPTOR_STATE_PROPERTYELT: + case RAPTOR_STATE_MEMBER_PROPERTYELT: + /* A property element + * http://www.w3.org/TR/rdf-syntax-grammar/#propertyElt + * + * Literal content part is handled here. + * The element content is handled in the internal states + * Empty content is checked here. + */ + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { + if(xml_element->content_cdata_seen) + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; + else if(xml_element->content_element_seen) + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES; + else { + /* Empty Literal */ + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; + } + + } + + + /* Handle terminating a rdf:parseType="Collection" list */ + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION || + element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { + raptor_term* nil_term; + + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) { + raptor_uri* nil_uri = RAPTOR_DAML_nil_URI(rdf_xml_parser); + nil_term = raptor_new_term_from_uri(rdf_parser->world, nil_uri); + } else { + nil_term = raptor_term_copy(RAPTOR_RDF_nil_term(rdf_parser->world)); + } + + if(!element->tail_id) { + /* If No List: set object of statement to rdf:nil */ + element->object = raptor_term_copy(nil_term); + } else { + raptor_uri* rest_uri = NULL; + raptor_term* tail_id_term; + + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION) + rest_uri = RAPTOR_DAML_rest_URI(rdf_xml_parser); + else + rest_uri = RAPTOR_RDF_rest_URI(rdf_parser->world); + + tail_id_term = raptor_new_term_from_blank(rdf_parser->world, + element->tail_id); + + /* terminate the list */ + raptor_rdfxml_generate_statement(rdf_parser, + tail_id_term, + rest_uri, + nil_term, + NULL, + NULL); + + raptor_free_term(tail_id_term); + } + + raptor_free_term(nil_term); + + } /* end rdf:parseType="Collection" termination */ + + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Content type %s (%d)\n", + raptor_rdfxml_element_content_type_as_string(element->content_type), + element->content_type); +#endif + + switch(element->content_type) { + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE: + + if(raptor_rdfxml_element_has_property_attributes(element) && + element->child_state == RAPTOR_STATE_DESCRIPTION) { + raptor_parser_error(rdf_parser, + "Property element '%s' has both property attributes and a node element content", + el_name); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + break; + } + + if(!element->object) { + if(element->rdf_attr[RDF_NS_resource]) { + raptor_uri* resource_uri; + resource_uri = raptor_new_uri_relative_to_base(rdf_parser->world, + raptor_rdfxml_inscope_base_uri(rdf_parser), + (const unsigned char*)element->rdf_attr[RDF_NS_resource]); + if(!resource_uri) + goto oom; + + element->object = raptor_new_term_from_uri(rdf_parser->world, + resource_uri); + raptor_free_uri(resource_uri); + + RAPTOR_FREE(char*, element->rdf_attr[RDF_NS_resource]); + element->rdf_attr[RDF_NS_resource] = NULL; + if(!element->object) + goto oom; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + } else if(element->rdf_attr[RDF_NS_nodeID]) { + unsigned char* resource_id; + resource_id = raptor_world_internal_generate_id(rdf_parser->world, + (unsigned char*)element->rdf_attr[RDF_NS_nodeID]); + if(!resource_id) + goto oom; + + element->object = raptor_new_term_from_blank(rdf_parser->world, + resource_id); + RAPTOR_FREE(char*, resource_id); + element->rdf_attr[RDF_NS_nodeID] = NULL; + if(!element->object) + goto oom; + + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + if(!raptor_valid_xml_ID(rdf_parser, + element->object->value.blank.string)) { + raptor_parser_error(rdf_parser, "Illegal rdf:nodeID value '%s'", (const char*)element->object->value.blank.string); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + break; + } + } else { + unsigned char* resource_id; + resource_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!resource_id) + goto oom; + + element->object = raptor_new_term_from_blank(rdf_parser->world, + resource_id); + RAPTOR_FREE(char*, resource_id); + + if(!element->object) + goto oom; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + } + + if(raptor_rdfxml_process_property_attributes(rdf_parser, element, + element->parent, + element->object)) + goto oom; + + } + + /* We know object is a resource, so delete any unsignficant + * whitespace so that FALLTHROUGH code below finds the object. + */ + if(xml_element->content_cdata_length) { + raptor_free_stringbuffer(xml_element->content_cdata_sb); + xml_element->content_cdata_sb = NULL; + xml_element->content_cdata_length = 0; + } + + /* FALLTHROUGH */ + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL: + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { + + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_ALLOW_BAGID)) { + /* Only an empty literal can have a rdf:bagID */ + if(element->bag) { + if(xml_element->content_cdata_length > 0) { + raptor_parser_error(rdf_parser, + "rdf:bagID is forbidden on a literal property element '%s'.", + el_name); + + /* prevent this being used later either */ + element->rdf_attr[RDF_NS_bagID] = NULL; + } else { + raptor_rdfxml_generate_statement(rdf_parser, + element->bag, + RAPTOR_RDF_type_URI(rdf_parser->world), + RAPTOR_RDF_Bag_term(rdf_parser->world), + NULL, + NULL); + } + } + } /* if rdf:bagID */ + + /* If there is empty literal content with properties + * generate a node to hang properties off + */ + if(raptor_rdfxml_element_has_property_attributes(element) && + xml_element->content_cdata_length > 0) { + raptor_parser_error(rdf_parser, + "Literal property element '%s' has property attributes", + el_name); + state = RAPTOR_STATE_SKIPPING; + element->child_state = RAPTOR_STATE_SKIPPING; + break; + } + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL && + raptor_rdfxml_element_has_property_attributes(element) && + !element->object) { + unsigned char* object_id; + object_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!object_id) + goto oom; + + element->object = raptor_new_term_from_blank(rdf_parser->world, + object_id); + RAPTOR_FREE(char*, object_id); + + if(!element->object) + goto oom; + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_RESOURCE; + } + + if(raptor_rdfxml_process_property_attributes(rdf_parser, element, + element, + element->object)) + goto oom; + } + + + /* just be friendly to older compilers and don't declare + * variables in the middle of a block + */ + if(1) { + raptor_uri *predicate_uri = NULL; + int predicate_ordinal = -1; + raptor_term* object_term = NULL; + + if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { + predicate_ordinal = ++element->parent->last_ordinal; + predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, + predicate_ordinal); + + } else { + predicate_uri = element_name_uri; + } + + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL) { + unsigned char* literal = NULL; + raptor_uri* literal_datatype; + unsigned char* literal_language = NULL; + + /* an empty stringbuffer - empty CDATA - is OK */ + if(raptor_stringbuffer_length(xml_element->content_cdata_sb)) { + literal = raptor_stringbuffer_as_string(xml_element->content_cdata_sb); + if(!literal) + goto oom; + } + + literal_datatype = element->object_literal_datatype; + if(!literal_datatype) + literal_language = (unsigned char*)raptor_sax2_inscope_xml_language(rdf_xml_parser->sax2); + + if(!literal_datatype && literal && + !raptor_unicode_check_utf8_nfc_string(literal, + xml_element->content_cdata_length)) { + raptor_log_level l; + + raptor_rdfxml_update_document_locator(rdf_parser); + l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : + RAPTOR_LOG_LEVEL_WARN; + + raptor_parser_log_error(rdf_parser, l, + "Property element '%s' has a string not in Unicode Normal Form C: %s", + el_name, literal); + } + + object_term = raptor_new_term_from_literal(rdf_parser->world, + literal, + literal_datatype, + literal_language); + } else { + object_term = raptor_term_copy(element->object); + } + + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + predicate_uri, + object_term, + element->reified, + element->parent); + + if(predicate_ordinal >= 0) + raptor_free_uri(predicate_uri); + + raptor_free_term(object_term); + } + + break; + + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PRESERVED: + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL: + { + unsigned char *buffer; + size_t length; + raptor_term* xmlliteral_term = NULL; + + if(rdf_xml_parser->xml_writer) { + raptor_xml_writer_flush(rdf_xml_parser->xml_writer); + + raptor_free_iostream(rdf_xml_parser->iostream); + rdf_xml_parser->iostream = NULL; + + buffer = (unsigned char*)rdf_xml_parser->xml_content; + length = rdf_xml_parser->xml_content_length; + } else { + buffer = raptor_stringbuffer_as_string(xml_element->content_cdata_sb); + length = xml_element->content_cdata_length; + } + + if(!raptor_unicode_check_utf8_nfc_string(buffer, length)) { + raptor_log_level l; + + raptor_rdfxml_update_document_locator(rdf_parser); + l = (RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NON_NFC_FATAL)) ? RAPTOR_LOG_LEVEL_ERROR : + RAPTOR_LOG_LEVEL_WARN; + + raptor_parser_log_error(rdf_parser, l, + "Property element '%s' has XML literal content not in Unicode Normal Form C: %s", + el_name, buffer); + } + + xmlliteral_term = raptor_new_term_from_literal(rdf_parser->world, + buffer, + RAPTOR_RDF_XMLLiteral_URI(rdf_parser->world), + NULL); + + if(state == RAPTOR_STATE_MEMBER_PROPERTYELT) { + raptor_uri* predicate_uri; + + element->parent->last_ordinal++; + predicate_uri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, element->parent->last_ordinal); + + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + predicate_uri, + xmlliteral_term, + element->reified, + element->parent); + + raptor_free_uri(predicate_uri); + } else { + raptor_rdfxml_generate_statement(rdf_parser, + element->parent->subject, + element_name_uri, + xmlliteral_term, + element->reified, + element->parent); + } + + raptor_free_term(xmlliteral_term); + + /* Finish the xml writer iostream for parseType="Literal" */ + if(rdf_xml_parser->xml_writer) { + raptor_free_xml_writer(rdf_xml_parser->xml_writer); + rdf_xml_parser->xml_writer = NULL; + RAPTOR_FREE(char*, rdf_xml_parser->xml_content); + rdf_xml_parser->xml_content = NULL; + rdf_xml_parser->xml_content_length = 0; + } + } + + break; + + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_COLLECTION: + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_DAML_COLLECTION: + + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_NODES: + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES: + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT: + + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_UNKNOWN: + case RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LAST: + default: + raptor_parser_fatal_error(rdf_parser, + "%s: Internal error in state RAPTOR_STATE_PROPERTYELT - got unexpected content type %s (%u)", + __FUNCTION__, + raptor_rdfxml_element_content_type_as_string(element->content_type), + element->content_type); + } /* end switch */ + + finished = 1; + break; + + case RAPTOR_STATE_INVALID: + default: + raptor_parser_fatal_error(rdf_parser, + "%s: Internal error - unexpected parser state %u - %s", + __FUNCTION__, + state, + raptor_rdfxml_state_as_string(state)); + finished = 1; + + } /* end switch */ + + if(state != element->state) { + element->state = state; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Moved to state %d - %s\n", state, + raptor_rdfxml_state_as_string(state)); +#endif + } + + } /* end while */ + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif + + return; + + oom: + raptor_parser_fatal_error(rdf_parser, "Out of memory, skipping"); + element->state = RAPTOR_STATE_SKIPPING; +} + + + +static void +raptor_rdfxml_cdata_grammar(raptor_parser *rdf_parser, + const unsigned char *s, int len, + int is_cdata) +{ + raptor_rdfxml_parser* rdf_xml_parser; + raptor_rdfxml_element* element; + raptor_xml_element* xml_element; + raptor_state state; + int all_whitespace = 1; + int i; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + if(rdf_parser->failed) + return; + +#ifdef RAPTOR_DEBUG_CDATA + RAPTOR_DEBUG2("Adding characters (is_cdata=%d): '", is_cdata); + (void)fwrite(s, 1, len, stderr); + fprintf(stderr, "' (%d bytes)\n", len); +#endif + + for(i = 0; i < len; i++) + if(!isspace(s[i])) { + all_whitespace = 0; + break; + } + + element = rdf_xml_parser->current_element; + + /* this file is very broke - probably not XML, whatever */ + if(!element) + return; + + xml_element = element->xml_element; + + raptor_rdfxml_update_document_locator(rdf_parser); + + /* cdata never changes the parser state + * and the containing element state always determines what to do. + * Use the child_state first if there is one, since that applies + */ + state = element->child_state; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Working in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif + + +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Content type %s (%d)\n", + raptor_rdfxml_element_content_type_as_string(element->content_type), + element->content_type); +#endif + + + + if(state == RAPTOR_STATE_SKIPPING) + return; + + if(state == RAPTOR_STATE_UNKNOWN) { + /* Ignore all cdata if still looking for RDF */ + if(RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_SCANNING)) + return; + + /* Ignore all whitespace cdata before first element */ + if(all_whitespace) + return; + + /* This probably will never happen since that would make the + * XML not be well-formed + */ + raptor_parser_warning(rdf_parser, "Character data before RDF element."); + } + + + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTIES) { + /* If found non-whitespace content, move to literal content */ + if(!all_whitespace) + element->child_content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; + } + + + if(!rdf_content_type_info[element->child_content_type].whitespace_significant) { + + /* Whitespace is ignored except for literal or preserved content types */ + if(all_whitespace) { +#ifdef RAPTOR_DEBUG_CDATA + RAPTOR_DEBUG2("Ignoring whitespace cdata inside element '%s'\n", + raptor_xml_element_get_name(element->parent->xml_element)->local_name); +#endif + return; + } + + if(xml_element->content_cdata_seen && xml_element->content_element_seen) { + raptor_qname* parent_el_name; + + parent_el_name = raptor_xml_element_get_name(element->parent->xml_element); + /* Uh oh - mixed content, this element has elements too */ + raptor_parser_warning(rdf_parser, "element '%s' has mixed content.", + parent_el_name->local_name); + } + } + + + if(element->content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_PROPERTY_CONTENT) { + element->content_type = RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_LITERAL; +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG3("Content type changed to %s (%d)\n", + raptor_rdfxml_element_content_type_as_string(element->content_type), + element->content_type); +#endif + } + + if(element->child_content_type == RAPTOR_RDFXML_ELEMENT_CONTENT_TYPE_XML_LITERAL) + raptor_xml_writer_cdata_counted(rdf_xml_parser->xml_writer, s, len); + else { + raptor_stringbuffer_append_counted_string(xml_element->content_cdata_sb, + s, len, 1); + element->content_cdata_all_whitespace &= all_whitespace; + + /* adjust stored length */ + xml_element->content_cdata_length += len; + } + + +#ifdef RAPTOR_DEBUG_CDATA + RAPTOR_DEBUG3("Content cdata now: %d bytes\n", + xml_element->content_cdata_length); +#endif +#ifdef RAPTOR_DEBUG_VERBOSE + RAPTOR_DEBUG2("Ending in state %s\n", raptor_rdfxml_state_as_string(state)); +#endif +} + + + +/** + * raptor_rdfxml_inscope_base_uri: + * @rdf_parser: Raptor parser object + * + * Return the in-scope base URI. + * + * Looks for the innermost xml:base on an element or document URI + * + * Return value: The URI string value or NULL on failure. + **/ +static raptor_uri* +raptor_rdfxml_inscope_base_uri(raptor_parser *rdf_parser) +{ + raptor_rdfxml_parser* rdf_xml_parser; + raptor_uri* base_uri; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + base_uri = raptor_sax2_inscope_base_uri(rdf_xml_parser->sax2); + if(!base_uri) + base_uri = rdf_parser->base_uri; + + return base_uri; +} + + +/** + * raptor_rdfxml_record_ID: + * @rdf_parser: Raptor parser object + * @element: Current element + * @id: ID string + * + * Record an rdf:ID / rdf:bagID value (with xml base) and check it hasn't been seen already. + * + * Record and check the ID values, if they have been seen already. + * per in-scope-base URI. + * + * Return value: non-zero if already seen, or failure + **/ +static int +raptor_rdfxml_record_ID(raptor_parser *rdf_parser, + raptor_rdfxml_element *element, + const unsigned char *id) +{ + raptor_rdfxml_parser *rdf_xml_parser; + raptor_uri* base_uri; + size_t id_len; + int rc; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + if(!RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_CHECK_RDF_ID)) + return 0; + + base_uri = raptor_rdfxml_inscope_base_uri(rdf_parser); + + id_len = strlen((const char*)id); + + rc = raptor_id_set_add(rdf_xml_parser->id_set, base_uri, id, id_len); + + return (rc != 0); +} + + + +static void +raptor_rdfxml_update_document_locator(raptor_parser *rdf_parser) +{ + raptor_rdfxml_parser *rdf_xml_parser; + + rdf_xml_parser = (raptor_rdfxml_parser*)rdf_parser->context; + + raptor_sax2_update_document_locator(rdf_xml_parser->sax2, + &rdf_parser->locator); +} + + + +static void +raptor_rdfxml_parse_finish_factory(raptor_parser_factory* factory) +{ +} + + +static const char* const rdfxml_names[3] = { "rdfxml", "raptor", NULL}; + +static const char* const rdfxml_uri_strings[3] = { + "http://www.w3.org/ns/formats/RDF_XML", + "http://www.w3.org/TR/rdf-syntax-grammar", + NULL +}; + +#define RDFXML_TYPES_COUNT 2 +static const raptor_type_q rdfxml_types[RDFXML_TYPES_COUNT + 1] = { + { "application/rdf+xml", 19, 10}, + { "text/rdf", 8, 6}, + { NULL, 0, 0} +}; + +static int +raptor_rdfxml_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = rdfxml_names; + + factory->desc.mime_types = rdfxml_types; + + factory->desc.label = "RDF/XML"; + factory->desc.uri_strings = rdfxml_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_rdfxml_parser); + + factory->init = raptor_rdfxml_parse_init; + factory->terminate = raptor_rdfxml_parse_terminate; + factory->start = raptor_rdfxml_parse_start; + factory->chunk = raptor_rdfxml_parse_chunk; + factory->finish_factory = raptor_rdfxml_parse_finish_factory; + factory->recognise_syntax = raptor_rdfxml_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_rdfxml(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_rdfxml_parser_register_factory); +} + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +void +raptor_rdfxml_parser_stats_print(raptor_rdfxml_parser* rdf_xml_parser, + FILE *stream) +{ + fputs("rdf:ID set ", stream); + raptor_id_set_stats_print(rdf_xml_parser->id_set, stream); +} +#endif diff --git a/src/raptor_rfc2396.c b/src/raptor_rfc2396.c new file mode 100644 index 0000000..89183d9 --- /dev/null +++ b/src/raptor_rfc2396.c @@ -0,0 +1,881 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_rfc2396.c - Raptor URI resolving from RFC2396 and RFC3986 + * + * Copyright (C) 2004-2009, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +/** + * raptor_new_uri_detail: + * @uri_string: The URI string to split + * + * Create a URI detailed structure from a URI string. + * + **/ +raptor_uri_detail* +raptor_new_uri_detail(const unsigned char *uri_string) +{ + const unsigned char *s = NULL; + unsigned char *b = NULL; + raptor_uri_detail *ud; + size_t uri_len; + + if(!uri_string) + return NULL; + + uri_len = strlen((const char*)uri_string); + + /* The extra +5 is for the 5 \0s that may be added for each component + * even if the entire URI is empty + */ + ud = RAPTOR_CALLOC(raptor_uri_detail*, 1, sizeof(*ud) + uri_len + 5 + 1); + if(!ud) + return NULL; + ud->uri_len = uri_len; + ud->buffer = (unsigned char*)((unsigned char*)ud + sizeof(raptor_uri_detail)); + + s = uri_string; + b = ud->buffer; + + + /* Split the URI into it's syntactic components */ + + /* + * scheme is checked in more detail since it is important + * to recognise absolute URIs for resolving, and it is easy to do. + * + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + * RFC 2396 section 3.1 Scheme Component + */ + if(*s && isalpha((int)*s)) { + s++; + + while(*s && (isalnum((int)*s) || + (*s == '+') || (*s == '-') || (*s == '.'))) + s++; + + if(*s == ':') { + /* it matches the URI scheme grammar, so store this as a scheme */ + ud->scheme = b; + ud->scheme_len = s-uri_string; + + while(*uri_string != ':') + *b++ = *uri_string++; + + *b++ = '\0'; + + /* and move past the : */ + s++; + } else + s = uri_string; + } + + + /* authority */ + if(*s && s[1] && *s == '/' && s[1] == '/') { + ud->authority = b; + + s += 2; /* skip "//" */ + + while(*s && *s != '/' && *s != '?' && *s != '#') + *b++ = *s++; + + ud->authority_len = b-ud->authority; + + *b++ = '\0'; + } + + + /* path */ + if(*s && *s != '?' && *s != '#') { + ud->path = b; + + while(*s && *s != '?' && *s != '#') + *b++ = *s++; + + ud->path_len = b-ud->path; + + *b++ = '\0'; + } + + + /* query */ + if(*s && *s == '?') { + ud->query = b; + + s++; + + while(*s && *s != '#') + *b++ = *s++; + + ud->query_len = b-ud->query; + + *b++ = '\0'; + } + + + /* fragment identifier - RFC2396 Section 4.1 */ + if(*s && *s == '#') { + ud->fragment = b; + + s++; + + while(*s) + *b++ = *s++; + + ud->fragment_len = b-ud->fragment; + + *b='\0'; + } + + ud->is_hierarchical = (ud->path && *ud->path == '/'); + + return ud; +} + + +void +raptor_free_uri_detail(raptor_uri_detail* uri_detail) +{ + /* Also frees the uri_detail->buffer allocated in raptor_uri_parse() */ + RAPTOR_FREE(raptor_uri_detail, uri_detail); +} + + +unsigned char* +raptor_uri_detail_to_string(raptor_uri_detail *ud, size_t* len_p) +{ + size_t len = 0; + unsigned char *buffer, *p; + + if(ud->scheme) + len+= ud->scheme_len+1; /* : */ + if(ud->authority) + len+= 2 + ud->authority_len; /* // */ + if(ud->path) + len+= ud->path_len; + if(ud->fragment) + len+= 1 + ud->fragment_len; /* # */ + if(ud->query) + len+= 1 + ud->query_len; /* ? */ + + if(len_p) + *len_p=len; + + buffer = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!buffer) + return NULL; + + p = buffer; + + if(ud->scheme) { + unsigned char *src = ud->scheme; + while(*src) + *p++ = *src++; + *p++ = ':'; + } + if(ud->authority) { + unsigned char *src = ud->authority; + *p++ = '/'; + *p++ = '/'; + while(*src) + *p++ = *src++; + } + if(ud->path) { + unsigned char *src = ud->path; + while(*src) + *p++ = *src++; + } + if(ud->fragment) { + unsigned char *src = ud->fragment; + *p++ = '#'; + while(*src) + *p++ = *src++; + } + if(ud->query) { + unsigned char *src = ud->query; + *p++ = '?'; + while(*src) + *p++ = *src++; + } + *p='\0'; + + return buffer; +} + + +/* + * raptor_uri_normalize_path: + * @path_buffer: URI/file path + * @path_len: length of above + * + * INTERNAL - normalize a URI path (POSIX path too) + * + * Return value: new path length or 0 on failure + */ +size_t +raptor_uri_normalize_path(unsigned char* path_buffer, size_t path_len) +{ + unsigned char *p, *cur, *prev, *s; + unsigned char last_char; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG3("Input path \"%s\" (%zu)\n", (const char*)path_buffer, path_len); +#endif + + /* remove all "./" path components */ + for(p = (prev = path_buffer); *p; p++) { + if(*p != '/') + continue; + + if(p == (prev+1) && *prev == '.') { + unsigned char *dest = prev; + + p++; + while(*p) + *dest++ = *p++; + *dest= '\0'; + + p = prev; + path_len -= 2; + if(!*p) + break; + } else { + prev = p+1; + } + } + + if(p == (prev+1) && *prev == '.') { + /* Remove "." at the end of a path */ + *prev = '\0'; + path_len--; + } + + +#if defined(RAPTOR_DEBUG) + if(path_len != strlen((const char*)path_buffer)) + RAPTOR_FATAL4("Path '%s' length %ld does not match calculated %ld.", (const char*)path_buffer, (long)strlen((const char*)path_buffer), (long)path_len); +#endif + + /* Remove all "<component>/../" path components */ + + /* + * The pointers: + * <component>/../<next> + * prev-^ cur-^ + * and p points to the previous prev (can be NULL) + */ + prev = NULL; + cur = NULL; + p = NULL; + last_char='\0'; + + for(s = path_buffer; *s; last_char=*s++) { + + /* find the path components */ + if(*s != '/') { + /* If it is the start or following a /, record a new path component */ + if(!last_char || last_char == '/') { + /* Store 2 path components */ + if(!prev) + prev = s; + else if(!cur) + cur = s; + } + continue; + } + + + /* Wait till there are two path components */ + if(!prev || !cur) + continue; + +#if defined(RAPTOR_DEBUG) + if(path_len != strlen((const char*)path_buffer)) + RAPTOR_FATAL3("Path length %ld does not match calculated %ld.", (long)strlen((const char*)path_buffer), (long)path_len); +#endif + + /* If the current one is '..' */ + if(s == (cur+2) && cur[0] == '.' && cur[1] == '.') { + + /* and if the previous one isn't '..' + * (which means it is beyond the root such as a path "/foo/../..") + */ + if(cur != (prev+3) || prev[0] != '.' || prev[1] != '.') { + unsigned char *dest = prev; + + /* remove the <component>/../<next> + * prev-^ cur-^ ^-s + */ + size_t len = s-prev+1; /* length of path component we are removing */ + + s++; + while(*s) + *dest++ = *s++; + *dest = '\0'; + path_len -= len; + + if(p && p < prev) { + /* We know the previous prev path component and we didn't do + * two adjustments in a row, so can adjust the + * pointers to continue the newly shortened path: + * s to the / before <next> (autoincremented by the loop) + * prev to the previous prev path component + * cur to NULL. Will be set by the next loop iteration since s + * points to a '/', last_char will be set to *s. */ + s = prev-1; + prev = p; + cur = NULL; + p = NULL; + } else { + /* Otherwise must start from the beginning again */ + prev = NULL; + cur = NULL; + p = NULL; + s = path_buffer; + } + + } + + } else { + /* otherwise this is not a special path component so + * shift the path components stack + */ + p = prev; + prev = cur; + cur = NULL; + } + + } + + + if(prev && s == (cur+2) && cur[0] == '.' && cur[1] == '.') { + /* Remove <component>/.. at the end of the path */ + *prev = '\0'; + path_len -= (s-prev); + } + + +#if defined(RAPTOR_DEBUG) + if(path_len != strlen((const char*)path_buffer)) + RAPTOR_FATAL3("Path length %ld does not match calculated %ld.", (long)strlen((const char*)path_buffer), (long)path_len); +#endif + + /* RFC3986 Appendix C.2 / 5.4.2 Abnormal Examples + * Remove leading /../ and /./ + */ + for(p = path_buffer; p; ) { + if(!strncmp((const char *)p, "/../", 4)) { + path_len -= 3; + memmove(p, p+3, path_len+1); + } else if(!strncmp((const char *)p, "/./", 3)) { + path_len -= 2; + memmove(p, p+2, path_len+1); + } else + break; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + fprintf(stderr, " Normalized path \"%s\" (%zu)\n", path_buffer, path_len); +#endif + + return path_len; +} + + + +/** + * raptor_uri_resolve_uri_reference: + * @base_uri: Base URI string + * @reference_uri: Reference URI string + * @buffer: Destination URI output buffer + * @length: Length of destination output buffer + * + * Resolve a URI against a base URI to create a new absolute URI. + * + * Return value: length of resolved string or 0 on failure (such as @buffer too small) + **/ +size_t +raptor_uri_resolve_uri_reference(const unsigned char *base_uri, + const unsigned char *reference_uri, + unsigned char *buffer, size_t length) +{ + raptor_uri_detail *ref = NULL; + raptor_uri_detail *base = NULL; + raptor_uri_detail result; /* static - pointers go to inside ref or base */ + unsigned char *path_buffer = NULL; + unsigned char *p; + size_t result_len = 0; + size_t l; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG4("base uri='%s', reference_uri='%s, buffer size %d\n", + (base_uri ? (const char*)base_uri : "NULL"), + (reference_uri ? (const char*)reference_uri : "NULL"), + (int)length); +#endif + + *buffer = '\0'; + memset(&result, 0, sizeof(result)); + + ref = raptor_new_uri_detail(reference_uri); + if(!ref) + goto resolve_tidy; + + + /* is reference URI "" or "#frag"? */ + if(!ref->scheme && !ref->authority && !ref->path && !ref->query) { + unsigned char c; + + /* Copy base URI to result up to '\0' or '#' */ + for(p = buffer, l = length; + (c = *base_uri) && c != '#' && l; + p++, base_uri++, l--) + *p = c; + + if(!l) { + result_len = 0; + goto resolve_tidy; + } + *p = '\0'; + + if(ref->fragment) { + unsigned char *src = ref->fragment; + /* Append any fragment */ + *p++ = '#'; + while(*src && l) { + *p++ = *src++; + l--; + } + if(!l) { + result_len = 0; + goto resolve_tidy; + } + *p = '\0'; + } + + result_len = p - buffer; + goto resolve_tidy; + } + + /* reference has a scheme - is an absolute URI */ + if(ref->scheme) { + /* Copy over schema and authority */ + result.scheme = ref->scheme; + result.scheme_len = ref->scheme_len; + result.authority = ref->authority; + result.authority_len = ref->authority_len; + + /* Allocate path so it can be normalized below */ + result.path_len = ref->path_len; + path_buffer = RAPTOR_MALLOC(unsigned char*, result.path_len + 1); + if(!path_buffer) { + result_len = 0; + goto resolve_tidy; + } + if(ref->path_len) + memcpy(path_buffer, ref->path, ref->path_len); + path_buffer[result.path_len] = '\0'; + result.path = path_buffer; + + goto normalize; + } + + + /* now the reference URI must be schemeless, i.e. relative */ + base = raptor_new_uri_detail(base_uri); + if(!base) + goto resolve_tidy; + + /* result URI must be of the base URI scheme */ + result.scheme = base->scheme; + result.scheme_len = base->scheme_len; + + /* an authority is given ( [user:pass@]hostname[:port] for http) + * so the reference URI is like //authority + */ + if(ref->authority) { + result.authority = ref->authority; + result.authority_len = ref->authority_len; + result.path = ref->path; + result.path_len = ref->path_len; + goto resolve_end; + } + + /* no - so now we have path (maybe with query, fragment) relative to base */ + result.authority = base->authority; + result.authority_len = base->authority_len; + + + if(ref->is_hierarchical || !base->is_hierarchical) { + /* if the reference path is absolute OR the base URI + * is a non-hierarchical URI then just copy the reference path + * to the result and normalize. + */ + path_buffer = RAPTOR_MALLOC(unsigned char*, ref->path_len + 1); + if(!path_buffer) { + result_len = 0; + goto resolve_tidy; + } + result.path = path_buffer; + result.path_len = ref->path_len; + if(ref->path) + memcpy(path_buffer, ref->path, result.path_len); + path_buffer[result.path_len] = '\0'; + goto normalize; + } + + + /* need to resolve relative path */ + + /* Build the result path in path_buffer */ + result.path_len = 0; + + if(base->path) + result.path_len += base->path_len; + else { + /* Add a missing path - makes the base URI 1 character longer */ + base->path = (unsigned char*)"/"; /* static, but copied and not free()d */ + base->path_len = 1; + base->uri_len++; + result.path_len++; + } + + if(ref->path) + result.path_len += ref->path_len; + + /* the resulting path can be no longer than result.path_len */ + path_buffer = RAPTOR_MALLOC(unsigned char*, result.path_len + 1); + if(!path_buffer) { + result_len = 0; + goto resolve_tidy; + } + result.path = path_buffer; + *path_buffer = '\0'; + + if(!ref->path) { + /* If there is no reference path, copy the full base over */ + result.path_len = base->path_len; + memcpy(path_buffer, base->path, result.path_len); + } else { + /** Otherwise copy base path up to previous / and append ref path */ + for(p = base->path + base->path_len - 1; p > base->path && *p != '/'; p--) + ; + + if(p >= base->path) { + result.path_len = p-base->path + 1; + + /* Found a /, copy everything before that to path_buffer */ + memcpy(path_buffer, base->path, result.path_len); + path_buffer[result.path_len] = '\0'; + } + + memcpy(path_buffer + result.path_len, ref->path, ref->path_len + 1); + result.path_len += ref->path_len; + } + path_buffer[result.path_len] = '\0'; + + normalize: + + result.path_len = raptor_uri_normalize_path(path_buffer, result.path_len); + + resolve_end: + + if(ref->query) { + result.query = ref->query; + result.query_len = ref->query_len; + } + + if(ref->fragment) { + result.fragment = ref->fragment; + result.fragment_len = ref->fragment_len; + } + + l = 0; + if(result.scheme) + l = result.scheme_len + 1; + if(result.authority) + l += 2 + result.authority_len; + if(result.path) + l += result.path_len; + if(result.query) + l += 1 + result.query_len; + if(result.fragment) + l += 1 + result.fragment_len; + + if(l > length) { + /* Output buffer is too small */ + result_len = 0; + goto resolve_tidy; + } + + p = buffer; + if(result.scheme) { + memcpy(p, result.scheme, result.scheme_len); + p += result.scheme_len; + *p++ = ':'; + } + + if(result.authority) { + *p++ = '/'; + *p++ = '/'; + memcpy(p, result.authority, result.authority_len); + p+= result.authority_len; + } + + if(result.path) { + memcpy(p, result.path, result.path_len); + p+= result.path_len; + } + + if(result.query) { + *p++ = '?'; + memcpy(p, result.query, result.query_len); + p+= result.query_len; + } + + if(result.fragment) { + *p++ = '#'; + memcpy(p, result.fragment, result.fragment_len); + p+= result.fragment_len; + } + *p = '\0'; + + result_len = p - buffer; + + resolve_tidy: + if(path_buffer) + RAPTOR_FREE(char*, path_buffer); + if(base) + raptor_free_uri_detail(base); + if(ref) + raptor_free_uri_detail(ref); + +#ifdef RAPTOR_DEBUG + RAPTOR_ASSERT(result_len && strlen((const char*)buffer) != result_len, + "URI string is not declared length"); +#endif + + return result_len; +} + +#endif + + + +#ifdef STANDALONE + +#include <stdio.h> + +/* one more prototype */ +int main(int argc, char *argv[]); + +static const char *program; + + +static int +check_resolve(const char *base_uri, const char *reference_uri, + const char *result_uri) +{ + unsigned char buffer[1024]; + + raptor_uri_resolve_uri_reference((const unsigned char*)base_uri, + (const unsigned char*)reference_uri, + buffer, sizeof(buffer)); + + if(strcmp((const char*)buffer, result_uri)) { + fprintf(stderr, + "%s: raptor_uri_resolve_uri_reference(%s, %s) FAILED giving '%s' != '%s'\n", + program, base_uri, reference_uri, + buffer, result_uri); + return 1; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + fprintf(stderr, + "%s: raptor_uri_resolve_uri_reference(%s, %s) OK giving '%s'\n", + program, base_uri, reference_uri, + buffer); +#endif + return 0; +} + + +static int +check_parses(const char *uri_string) { + raptor_uri_detail* ud; + ud = raptor_new_uri_detail((unsigned const char*)uri_string); + if(!ud) { + fprintf(stderr, "%s: raptor_new_uri_detail(%s) FAILED to parse\n", + program, uri_string); + return 1; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + fprintf(stderr, "%s: raptor_new_uri_detail(%s) OK\n", + program, uri_string); +#endif + raptor_free_uri_detail(ud); + return 0; +} + + +int +main(int argc, char *argv[]) +{ + const char *base_uri="http://example.org/bpath/cpath/d;p?querystr#frag"; + int failures = 0; + + program = raptor_basename(argv[0]); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Using base URI '%s'\n", program, base_uri); +#endif + + /* Tests from RFC2396 Appendix C + * and RFC3986 Section 5 + * + * Modifications: + * - add 'path' when items are path components to make easier to read + * - use example.org instead of 'a' for the authority + * - results are against the base_uri above + */ + + /* Appendix C.1 / 5.4.1 Normal Examples */ + failures += check_resolve(base_uri, "g:h", "g:h"); + failures += check_resolve(base_uri, "gpath", "http://example.org/bpath/cpath/gpath"); + failures += check_resolve(base_uri, "./gpath", "http://example.org/bpath/cpath/gpath"); + failures += check_resolve(base_uri, "gpath/", "http://example.org/bpath/cpath/gpath/"); + failures += check_resolve(base_uri, "/gpath", "http://example.org/gpath"); + failures += check_resolve(base_uri, "//gpath", "http://gpath"); + failures += check_resolve(base_uri, "?y", "http://example.org/bpath/cpath/d;p?y"); + failures += check_resolve(base_uri, "gpath?y", "http://example.org/bpath/cpath/gpath?y"); + failures += check_resolve(base_uri, "#s", "http://example.org/bpath/cpath/d;p?querystr#s"); + failures += check_resolve(base_uri, "gpath#s", "http://example.org/bpath/cpath/gpath#s"); + failures += check_resolve(base_uri, "gpath?y#s", "http://example.org/bpath/cpath/gpath?y#s"); + failures += check_resolve(base_uri, ";x", "http://example.org/bpath/cpath/;x"); + failures += check_resolve(base_uri, "gpath;x", "http://example.org/bpath/cpath/gpath;x"); + failures += check_resolve(base_uri, "gpath;x?y#s", "http://example.org/bpath/cpath/gpath;x?y#s"); + failures += check_resolve(base_uri, ".", "http://example.org/bpath/cpath/"); + failures += check_resolve(base_uri, "./", "http://example.org/bpath/cpath/"); + failures += check_resolve(base_uri, "..", "http://example.org/bpath/"); + failures += check_resolve(base_uri, "../", "http://example.org/bpath/"); + failures += check_resolve(base_uri, "../gpath", "http://example.org/bpath/gpath"); + failures += check_resolve(base_uri, "../..", "http://example.org/"); + failures += check_resolve(base_uri, "../../", "http://example.org/"); + failures += check_resolve(base_uri, "../../gpath", "http://example.org/gpath"); + + + /* Appendix C.2 / 5.4.2 Abnormal Examples */ + failures += check_resolve(base_uri, "", "http://example.org/bpath/cpath/d;p?querystr"); /* This is a Normal Example in RFC 3986 */ + + failures += check_resolve(base_uri, "../../../gpath", "http://example.org/gpath"); /* RFC 3986 changed the answer here */ + failures += check_resolve(base_uri, "../../../../gpath", "http://example.org/gpath"); /* RFC 3986 changed the answer here */ + + failures += check_resolve(base_uri, "/./gpath", "http://example.org/gpath"); /* RFC 3986 changed the answer here */ + failures += check_resolve(base_uri, "/../gpath", "http://example.org/gpath"); /* RFC 3986 changed the answer here */ + failures += check_resolve(base_uri, "gpath.", "http://example.org/bpath/cpath/gpath."); + failures += check_resolve(base_uri, ".gpath", "http://example.org/bpath/cpath/.gpath"); + failures += check_resolve(base_uri, "gpath..", "http://example.org/bpath/cpath/gpath.."); + failures += check_resolve(base_uri, "..gpath", "http://example.org/bpath/cpath/..gpath"); + + failures += check_resolve(base_uri, "./../gpath", "http://example.org/bpath/gpath"); + failures += check_resolve(base_uri, "./gpath/.", "http://example.org/bpath/cpath/gpath/"); + failures += check_resolve(base_uri, "gpath/./hpath", "http://example.org/bpath/cpath/gpath/hpath"); + failures += check_resolve(base_uri, "gpath/../hpath", "http://example.org/bpath/cpath/hpath"); + failures += check_resolve(base_uri, "gpath;x = 1/./y", "http://example.org/bpath/cpath/gpath;x = 1/y"); + failures += check_resolve(base_uri, "gpath;x = 1/../y", "http://example.org/bpath/cpath/y"); + + failures += check_resolve(base_uri, "gpath?y/./x", "http://example.org/bpath/cpath/gpath?y/./x"); + failures += check_resolve(base_uri, "gpath?y/../x", "http://example.org/bpath/cpath/gpath?y/../x"); + failures += check_resolve(base_uri, "gpath#s/./x", "http://example.org/bpath/cpath/gpath#s/./x"); + failures += check_resolve(base_uri, "gpath#s/../x", "http://example.org/bpath/cpath/gpath#s/../x"); + + /* RFC 3986 makes this the strict answer but also allows + * http://example.org/bpath/cpath/gauthority + * for backward compatibility + */ + failures += check_resolve(base_uri, "http:gauthority", "http:gauthority"); + + + /* Examples from 1.3 */ + failures += check_parses("ftp://ftp.is.co.za/rfc/rfc1808.txt"); + failures += check_parses("gopher://spinaltap.micro.umn.edu/00/Weather/California/Los%20Angeles"); + failures += check_parses("http://www.math.uio.no/faq/compression-faq/part1.html"); + failures += check_parses("mailto:mduerst@ifi.unizh.ch"); + failures += check_parses("news:comp.infosystems.www.servers.unix"); + failures += check_parses("telnet://melvyl.ucop.edu/"); + failures += check_parses(""); + + /* This is a not-crashing test */ + raptor_new_uri_detail(NULL); + + /* Extra checks not in RFC2396 */ + + /* RDF xml:base check that fragments and query strings are removed */ + failures += check_resolve(base_uri, "gpath/../../../hpath", "http://example.org/hpath"); + + /* RFC3986 changed the answer to this test + * Was "RDF xml:base check that extra ../ are not lost" + * with answer "http://example.org/../../../absfile" + */ + failures += check_resolve("http://example.org/dir/file", "../../../absfile", "http://example.org/absfile"); + + /* RDF xml:base check that an absolute URI replaces */ + failures += check_resolve("http://example.org/dir/file", "http://another.example.org/dir2/file2", "http://another.example.org/dir2/file2"); + + /* base URI and relative URI with no absolute path works */ + failures += check_resolve("foo:", "not_scheme:blah", "foo:not_scheme:blah"); + + /* Issue#000177 http://bugs.librdf.org/mantis/view.php?id=177 */ + failures += check_resolve("foo:1234", "9999", "foo:9999"); + + /* RDFa 1.1 test 0114 */ + failures += check_resolve("http://example.org/file", + "?foo=bar../baz", + "http://example.org/file?foo=bar../baz"); + + /* BUG 556 - http://bugs.librdf.org/mantis/view.php?id=556 */ + failures += check_resolve("http://example.com/folder1/folder2/", + "http://example.com/folder1/folder2/../folder1/../entity1", + "http://example.com/folder1/entity1"); + + return failures; +} + +#endif diff --git a/src/raptor_rss.c b/src/raptor_rss.c new file mode 100644 index 0000000..ebbbbbb --- /dev/null +++ b/src/raptor_rss.c @@ -0,0 +1,1733 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_rss.c - Raptor Feeds (RSS and Atom) tag soup parser + * + * Copyright (C) 2003-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif + + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" +#include "raptor_rss.h" + + +/* local prototypes */ + +static void raptor_rss_uplift_items(raptor_parser* rdf_parser); +static int raptor_rss_emit(raptor_parser* rdf_parser); + +static void raptor_rss_start_element_handler(void *user_data, raptor_xml_element* xml_element); +static void raptor_rss_end_element_handler(void *user_data, raptor_xml_element* xml_element); +static void raptor_rss_cdata_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s, int len); +static void raptor_rss_comment_handler(void *user_data, raptor_xml_element* xml_element, const unsigned char *s); +static void raptor_rss_sax2_new_namespace_handler(void *user_data, raptor_namespace* nspace); + +/* + * RSS parser object + */ +struct raptor_rss_parser_s { + /* static model */ + raptor_rss_model model; + + /* current line */ + char *line; + /* current line length */ + int line_length; + /* current char in line buffer */ + int offset; + + /* static statement for use in passing to user code */ + raptor_statement statement; + + raptor_sax2 *sax2; + + /* rss node type of current CONTAINER item */ + raptor_rss_type current_type; + + /* one place stack */ + raptor_rss_type prev_type; + raptor_rss_fields_type current_field; + + /* emptyness of current element */ + int element_is_empty; + + /* stack of namespaces */ + raptor_namespace_stack *nstack; + + /* non-0 if this is an atom 1.0 parser */ + int is_atom; + + /* namespaces declared here */ + raptor_namespace* nspaces[RAPTOR_RSS_NAMESPACES_SIZE]; + + /* namespaces seen during parsing or creating output model */ + char nspaces_seen[RAPTOR_RSS_NAMESPACES_SIZE]; + + /* current BLOCK pointer (inside CONTAINER of type current_type) */ + raptor_rss_block *current_block; +}; + +typedef struct raptor_rss_parser_s raptor_rss_parser; + + +typedef enum { + RAPTOR_RSS_CONTENT_TYPE_NONE, + RAPTOR_RSS_CONTENT_TYPE_XML, + RAPTOR_RSS_CONTENT_TYPE_TEXT +} raptor_rss_content_type; + + +struct raptor_rss_element_s +{ + raptor_world* world; + + raptor_uri* uri; + + /* Two types of content */ + raptor_rss_content_type type; + + /* 1) XML */ + raptor_xml_writer* xml_writer; + /* XML written to this iostream to the xml_content string */ + raptor_iostream* iostream; + /* ends up here */ + void *xml_content; + size_t xml_content_length; + + /* 2) cdata */ + raptor_stringbuffer* sb; +}; + +typedef struct raptor_rss_element_s raptor_rss_element; + + +static void +raptor_free_rss_element(raptor_rss_element *rss_element) +{ + if(rss_element->uri) + raptor_free_uri(rss_element->uri); + if(rss_element->type == RAPTOR_RSS_CONTENT_TYPE_XML) { + if(rss_element->xml_writer) + raptor_free_xml_writer(rss_element->xml_writer); + if(rss_element->iostream) + raptor_free_iostream(rss_element->iostream); + if(rss_element->xml_content) + raptor_free_memory(rss_element->xml_content); + } + if(rss_element->sb) + raptor_free_stringbuffer(rss_element->sb); + + RAPTOR_FREE(raptor_rss_element, rss_element); +} + + +static int +raptor_rss_parse_init(raptor_parser* rdf_parser, const char *name) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + raptor_sax2* sax2; + int n; + + raptor_rss_common_init(rdf_parser->world); + + raptor_rss_model_init(rdf_parser->world, &rss_parser->model); + + rss_parser->prev_type = RAPTOR_RSS_NONE; + rss_parser->current_field = RAPTOR_RSS_FIELD_NONE; + rss_parser->current_type = RAPTOR_RSS_NONE; + rss_parser->current_block = NULL; + + if(rss_parser->sax2) { + raptor_free_sax2(rss_parser->sax2); + rss_parser->sax2 = NULL; + } + + rss_parser->nstack = raptor_new_namespaces(rdf_parser->world, 1); + + /* Initialise the namespaces */ + for(n = 0; n < RAPTOR_RSS_NAMESPACES_SIZE; n++) { + unsigned const char* prefix; + raptor_uri* uri; + raptor_namespace* nspace = NULL; + + prefix = (unsigned const char*)raptor_rss_namespaces_info[n].prefix; + uri = rdf_parser->world->rss_namespaces_info_uris[n]; + if(prefix && uri) + nspace = raptor_new_namespace_from_uri(rss_parser->nstack, + prefix, uri, 0); + rss_parser->nspaces[n] = nspace; + } + + sax2 = raptor_new_sax2(rdf_parser->world, &rdf_parser->locator, rdf_parser); + rss_parser->sax2 = sax2; + + raptor_sax2_set_start_element_handler(sax2, raptor_rss_start_element_handler); + raptor_sax2_set_end_element_handler(sax2, raptor_rss_end_element_handler); + raptor_sax2_set_characters_handler(sax2, raptor_rss_cdata_handler); + raptor_sax2_set_cdata_handler(sax2, raptor_rss_cdata_handler); + raptor_sax2_set_comment_handler(sax2, raptor_rss_comment_handler); + raptor_sax2_set_namespace_handler(sax2, raptor_rss_sax2_new_namespace_handler); + + raptor_statement_init(&rss_parser->statement, rdf_parser->world); + + return 0; +} + + +static void +raptor_rss_parse_terminate(raptor_parser *rdf_parser) +{ + raptor_rss_parser *rss_parser = (raptor_rss_parser*)rdf_parser->context; + int n; + + if(rss_parser->sax2) + raptor_free_sax2(rss_parser->sax2); + + raptor_rss_model_clear(&rss_parser->model); + + for(n = 0; n < RAPTOR_RSS_NAMESPACES_SIZE; n++) { + if(rss_parser->nspaces[n]) + raptor_free_namespace(rss_parser->nspaces[n]); + } + + if(rss_parser->nstack) + raptor_free_namespaces(rss_parser->nstack); + + raptor_rss_common_terminate(rdf_parser->world); +} + + +static int +raptor_rss_parse_start(raptor_parser *rdf_parser) +{ + raptor_uri *uri = rdf_parser->base_uri; + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int n; + + /* base URI required for RSS */ + if(!uri) + return 1; + + for(n = 0; n < RAPTOR_RSS_NAMESPACES_SIZE; n++) + rss_parser->nspaces_seen[n] = 'N'; + + /* Optionally forbid internal network and file requests in the XML parser */ + raptor_sax2_set_option(rss_parser->sax2, + RAPTOR_OPTION_NO_NET, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_NET)); + raptor_sax2_set_option(rss_parser->sax2, + RAPTOR_OPTION_NO_FILE, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_NO_FILE)); + raptor_sax2_set_option(rss_parser->sax2, + RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(rdf_parser, RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES)); + if(rdf_parser->uri_filter) + raptor_sax2_set_uri_filter(rss_parser->sax2, rdf_parser->uri_filter, + rdf_parser->uri_filter_user_data); + + raptor_sax2_parse_start(rss_parser->sax2, uri); + + return 0; +} + + + +static int +raptor_rss_add_container(raptor_rss_parser *rss_parser, const char *name) +{ + raptor_rss_type type = RAPTOR_RSS_NONE; + + if(!strcmp(name, "rss") || !strcmp(name, "rdf") || !strcmp(name, "RDF")) { + /* rss */ + } else if(!raptor_strcasecmp(name, "channel")) { + /* rss or atom 0.3 channel */ + type = RAPTOR_RSS_CHANNEL; + } else if(!strcmp(name, "feed")) { + /* atom 1.0 feed */ + type = RAPTOR_RSS_CHANNEL; + rss_parser->is_atom = 1; + } else if(!strcmp(name, "item")) { + type = RAPTOR_RSS_ITEM; + } else if(!strcmp(name, "entry")) { + type = RAPTOR_RSS_ITEM; + rss_parser->is_atom = 1; + } else { + int i; + for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) { + if(!(raptor_rss_items_info[i].flags & RAPTOR_RSS_ITEM_CONTAINER)) + continue; + + if(!strcmp(name, raptor_rss_items_info[i].name)) { + /* rss and atom clash on the author name field (rss) or type (atom) */ + if(i != RAPTOR_ATOM_AUTHOR || + (i == RAPTOR_ATOM_AUTHOR && rss_parser->is_atom)) { + type = (raptor_rss_type)i; + break; + } + } + } + } + + if(type != RAPTOR_RSS_NONE) { + if(type == RAPTOR_RSS_ITEM) + raptor_rss_model_add_item(&rss_parser->model); + else + raptor_rss_model_add_common(&rss_parser->model, type); + + /* Inner container - push the current type onto a 1-place stack */ + if(rss_parser->current_type != RAPTOR_RSS_NONE) + rss_parser->prev_type = rss_parser->current_type; + + rss_parser->current_type = type; + } + + return (type == RAPTOR_RSS_NONE); +} + + +static raptor_uri* +raptor_rss_promote_namespace_uri(raptor_world *world, raptor_uri* nspace_URI) +{ + /* RSS 0.9 and RSS 1.1 namespaces => RSS 1.0 namespace */ + if((raptor_uri_equals(nspace_URI, + world->rss_namespaces_info_uris[RSS0_9_NS]) || + raptor_uri_equals(nspace_URI, + world->rss_namespaces_info_uris[RSS1_1_NS]))) { + nspace_URI = world->rss_namespaces_info_uris[RSS1_0_NS]; + } + + /* Atom 0.3 namespace => Atom 1.0 namespace */ + if(raptor_uri_equals(nspace_URI, + world->rss_namespaces_info_uris[ATOM0_3_NS])) { + nspace_URI = world->rss_namespaces_info_uris[ATOM1_0_NS]; + } + + return nspace_URI; +} + + + +static raptor_rss_item* +raptor_rss_get_current_item(raptor_rss_parser *rss_parser) +{ + raptor_rss_item* item; + + if(rss_parser->current_type == RAPTOR_RSS_ITEM) + item = rss_parser->model.last; + else + item = raptor_rss_model_get_common(&rss_parser->model, + rss_parser->current_type); + return item; +} + + +static int +raptor_rss_block_set_field(raptor_world *world, raptor_uri *base_uri, + raptor_rss_block *block, + const raptor_rss_block_field_info *bfi, + const char *string) +{ + int attribute_type = bfi->attribute_type; + int offset = bfi->offset; + if(attribute_type == RSS_BLOCK_FIELD_TYPE_URL) { + raptor_uri* uri; + uri = raptor_new_uri_relative_to_base(world, base_uri, + (const unsigned char*)string); + if(!uri) + return 1; + + block->urls[offset] = uri; + } else if(attribute_type == RSS_BLOCK_FIELD_TYPE_STRING) { + size_t len = strlen(string); + block->strings[offset] = RAPTOR_MALLOC(char*, len + 1); + if(!block->strings[offset]) + return 1; + + memcpy(block->strings[offset], string, len+1); + } else { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL2("Found unknown attribute_type %d\n", attribute_type); +#else + return 1; +#endif + } + + return 0; +} + + +static void +raptor_rss_start_element_handler(void *user_data, + raptor_xml_element* xml_element) +{ + raptor_parser *rdf_parser; + raptor_rss_parser *rss_parser; + raptor_rss_block *block = NULL; + raptor_uri* base_uri; + raptor_qname *el_qname; + const unsigned char *name; + int ns_attributes_count; + raptor_qname** named_attrs; + const raptor_namespace* el_nspace; + raptor_rss_element* rss_element; + int i; + + rdf_parser = (raptor_parser*)user_data; + rss_parser = (raptor_rss_parser*)rdf_parser->context; + + rss_element = RAPTOR_CALLOC(raptor_rss_element*, 1, sizeof(*rss_element)); + if(!rss_element) { + rdf_parser->failed = 1; + return; + } + + rss_element->world = rdf_parser->world; + rss_element->sb = raptor_new_stringbuffer(); + + xml_element->user_data = rss_element; + + if(xml_element->parent) { + raptor_rss_element* parent_rss_element; + parent_rss_element = (raptor_rss_element*)(xml_element->parent->user_data); + if(parent_rss_element->xml_writer) + rss_element->xml_writer = parent_rss_element->xml_writer; + } + + if(rss_element->xml_writer) { + raptor_xml_writer_start_element(rss_element->xml_writer, xml_element); + return; + } + + el_qname = raptor_xml_element_get_name(xml_element); + name = el_qname->local_name; + el_nspace = el_qname->nspace; + + named_attrs = raptor_xml_element_get_attributes(xml_element); + ns_attributes_count = raptor_xml_element_get_attributes_count(xml_element); + + base_uri = raptor_sax2_inscope_base_uri(rss_parser->sax2); + + + /* No container type - identify and record in rss_parser->current_type + * either as a top-level container or an inner-container */ + if(!raptor_rss_add_container(rss_parser, (const char*)name)) { +#ifdef RAPTOR_DEBUG + if(1) { + raptor_rss_type old_type = rss_parser->prev_type; + + if(old_type != rss_parser->current_type && old_type != RAPTOR_RSS_NONE) + RAPTOR_DEBUG5("FOUND inner container type %u - %s INSIDE current container type %u - %s\n", + rss_parser->current_type, + raptor_rss_items_info[rss_parser->current_type].name, + old_type, raptor_rss_items_info[old_type].name); + else + RAPTOR_DEBUG3("FOUND container type %u - %s\n", + rss_parser->current_type, + raptor_rss_items_info[rss_parser->current_type].name); + } +#endif + + /* check a few container attributes */ + if(named_attrs) { + raptor_rss_item* update_item = raptor_rss_get_current_item(rss_parser); + + for(i = 0; i < ns_attributes_count; i++) { + raptor_qname* attr = named_attrs[i]; + const char* attrName = (const char*)attr->local_name; + const unsigned char* attrValue = attr->value; + + RAPTOR_DEBUG3(" container attribute %s=%s\n", attrName, attrValue); + if(!strcmp(attrName, "about")) { + if(update_item) { + update_item->uri = raptor_new_uri(rdf_parser->world, attrValue); + update_item->term = raptor_new_term_from_uri(rdf_parser->world, + update_item->uri); + } + } + } + } + return; + } else if(rss_parser->current_type == RAPTOR_RSS_NONE) { + RAPTOR_DEBUG2("Unknown container element named %s\n", name); + /* Nothing more that can be done with unknown element - skip it */ + return; + } + + + /* have container (current_type) so this element is inside it is either: + * 1. a metadata block element (such as rss:enclosure) + * 2. a field (such as atom:title) + */ + + /* Find field ID */ + rss_parser->current_field = RAPTOR_RSS_FIELD_UNKNOWN; + for(i = 0; i < RAPTOR_RSS_FIELDS_SIZE; i++) { + raptor_uri* nspace_URI; + raptor_uri* field_nspace_URI; + rss_info_namespace nsid = raptor_rss_fields_info[i].nspace; + + if(strcmp((const char*)name, raptor_rss_fields_info[i].name)) + continue; + + if(!el_nspace) { + if(nsid != RSS_NO_NS && nsid != RSS1_0_NS && nsid != RSS0_91_NS && + nsid != RSS0_9_NS && nsid != RSS1_1_NS) + continue; + + /* Matches if the element has no namespace and field is not atom */ + rss_parser->current_field = (raptor_rss_fields_type)i; + break; + } + + /* Promote element namespaces */ + nspace_URI = raptor_rss_promote_namespace_uri(rdf_parser->world, + raptor_namespace_get_uri(el_nspace)); + field_nspace_URI = rdf_parser->world->rss_namespaces_info_uris[raptor_rss_fields_info[i].nspace]; + + if(raptor_uri_equals(nspace_URI, + field_nspace_URI)) { + rss_parser->current_field = (raptor_rss_fields_type)i; + break; + } + } + + if(rss_parser->current_field == RAPTOR_RSS_FIELD_UNKNOWN) { + RAPTOR_DEBUG3("Unknown field element named %s inside type %s\n", name, + raptor_rss_items_info[rss_parser->current_type].name); + return; + } + + + /* Found a block element to process */ + if(raptor_rss_fields_info[rss_parser->current_field].flags & + RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE) { + raptor_rss_type block_type; + raptor_rss_item* update_item; + const unsigned char *id; + raptor_term* block_term; + + block_type = raptor_rss_fields_info[rss_parser->current_field].block_type; + + RAPTOR_DEBUG3("FOUND new block type %u - %s\n", block_type, + raptor_rss_items_info[block_type].name); + + update_item = raptor_rss_get_current_item(rss_parser); + + id = raptor_world_generate_bnodeid(rdf_parser->world); + block_term = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + + block = raptor_new_rss_block(rdf_parser->world, block_type, block_term); + raptor_free_term(block_term); + + raptor_rss_item_add_block(update_item, block); + rss_parser->current_block = block; + + rss_parser->nspaces_seen[raptor_rss_items_info[block_type].nspace] = 'Y'; + + /* Now check block attributes */ + if(named_attrs) { + for(i = 0; i < ns_attributes_count; i++) { + raptor_qname* attr = named_attrs[i]; + const char* attrName = (const char*)attr->local_name; + const unsigned char* attrValue = attr->value; + const raptor_rss_block_field_info *bfi; + int offset = -1; + + for(bfi = &raptor_rss_block_fields_info[0]; + bfi->type != RAPTOR_RSS_NONE; + bfi++) { + if(!bfi->attribute) + continue; + + if(bfi->type == block_type && !strcmp(attrName, bfi->attribute)) { + offset = bfi->offset; + break; + } + } + + if(offset < 0) + continue; + + /* Found attribute for this block type */ + RAPTOR_DEBUG3(" found block attribute %s=%s\n", attrName, attrValue); + if(raptor_rss_block_set_field(rdf_parser->world, base_uri, + block, bfi, (const char*)attrValue)) { + rdf_parser->failed = 1; + return; + } + + } + + } + + return; + } + + + /* Process field */ + RAPTOR_DEBUG4("FOUND field %u - %s inside type %s\n", + rss_parser->current_field, + raptor_rss_fields_info[rss_parser->current_field].name, + raptor_rss_items_info[rss_parser->current_type].name); + + /* Mark namespace seen in new field */ + if(1) { + rss_info_namespace ns_index; + ns_index = raptor_rss_fields_info[rss_parser->current_field].nspace; + rss_parser->nspaces_seen[ns_index] = 'Y'; + } + + + /* Now check for field attributes */ + if(named_attrs) { + for(i = 0; i < ns_attributes_count; i++) { + raptor_qname* attr = named_attrs[i]; + const unsigned char* attrName = attr->local_name; + const unsigned char* attrValue = attr->value; + + RAPTOR_DEBUG3(" attribute %s=%s\n", attrName, attrValue); + + /* Pick a few attributes to care about */ + if(!strcmp((const char*)attrName, "isPermaLink")) { + raptor_rss_item* update_item = rss_parser->model.last; + if(!strcmp((const char*)name, "guid")) { + /* <guid isPermaLink="..."> */ + if(update_item) { + raptor_rss_field* field = raptor_rss_new_field(rdf_parser->world); + RAPTOR_DEBUG1("fa1 - "); + raptor_rss_item_add_field(update_item, RAPTOR_RSS_FIELD_GUID, field); + if(!strcmp((const char*)attrValue, "true")) { + RAPTOR_DEBUG2(" setting guid to URI '%s'\n", attrValue); + field->uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, + (const unsigned char*)attrValue); + } else { + size_t len = strlen((const char*)attrValue); + RAPTOR_DEBUG2(" setting guid to string '%s'\n", attrValue); + field->value = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!field->value) { + rdf_parser->failed = 1; + return; + } + memcpy(field->value, attrValue, len + 1); + } + } + } + } else if(!strcmp((const char*)attrName, "href")) { + if(rss_parser->current_field == RAPTOR_RSS_FIELD_LINK || + rss_parser->current_field == RAPTOR_RSS_FIELD_ATOM_LINK) { + RAPTOR_DEBUG2(" setting href as URI string for type %s\n", raptor_rss_items_info[rss_parser->current_type].name); + if(rss_element->uri) + raptor_free_uri(rss_element->uri); + rss_element->uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, + (const unsigned char*)attrValue); + } + } else if(!strcmp((const char*)attrName, "type")) { + if(rss_parser->current_field == RAPTOR_RSS_FIELD_ATOM_LINK) { + /* do nothing with atom link attribute type */ + } else if(rss_parser->is_atom) { + /* Atom only typing */ + if(!strcmp((const char*)attrValue, "xhtml") || + !strcmp((const char*)attrValue, "xml") || + strstr((const char*)attrValue, "+xml")) { + + RAPTOR_DEBUG2(" found type '%s', making an XML writer\n", + attrValue); + + rss_element->type = RAPTOR_RSS_CONTENT_TYPE_XML; + rss_element->iostream = raptor_new_iostream_to_string(rdf_parser->world, + &rss_element->xml_content, + &rss_element->xml_content_length, + raptor_alloc_memory); + rss_element->xml_writer = raptor_new_xml_writer(rdf_parser->world, + NULL, + rss_element->iostream); + raptor_xml_writer_set_option(rss_element->xml_writer, + RAPTOR_OPTION_WRITER_XML_DECLARATION, + NULL, 0); + + raptor_free_stringbuffer(rss_element->sb); + rss_element->sb = NULL; + + } + } + } else if(!strcmp((const char*)attrName, "version")) { + if(!raptor_strcasecmp((const char*)name, "feed")) { + if(!strcmp((const char*)attrValue, "0.3")) + rss_parser->is_atom = 1; + } + } + } + } /* if have field attributes */ + +} + + +static void +raptor_rss_end_element_handler(void *user_data, + raptor_xml_element* xml_element) +{ + raptor_parser* rdf_parser; + raptor_rss_parser* rss_parser; +#ifdef RAPTOR_DEBUG + const unsigned char* name = raptor_xml_element_get_name(xml_element)->local_name; +#endif + raptor_rss_element* rss_element; + size_t cdata_len = 0; + unsigned char* cdata = NULL; + + rss_element = (raptor_rss_element*)xml_element->user_data; + + rdf_parser = (raptor_parser*)user_data; + rss_parser = (raptor_rss_parser*)rdf_parser->context; + + if(rss_element->xml_writer) { + if(rss_element->type != RAPTOR_RSS_CONTENT_TYPE_XML) { + raptor_xml_writer_end_element(rss_element->xml_writer, xml_element); + goto tidy_end_element; + } + + /* otherwise we are done making XML */ + raptor_free_iostream(rss_element->iostream); + rss_element->iostream = NULL; + cdata = (unsigned char*)rss_element->xml_content; + cdata_len = rss_element->xml_content_length; + } + + if(rss_element->sb) { + cdata_len = raptor_stringbuffer_length(rss_element->sb); + cdata = raptor_stringbuffer_as_string(rss_element->sb); + } + + if(cdata) { + raptor_uri* base_uri = NULL; + + base_uri = raptor_sax2_inscope_base_uri(rss_parser->sax2); + + if(rss_parser->current_block) { + const raptor_rss_block_field_info *bfi; +#ifdef RAPTOR_DEBUG + int handled = 0; +#endif + /* in a block, maybe store the CDATA there */ + + for(bfi = &raptor_rss_block_fields_info[0]; + bfi->type != RAPTOR_RSS_NONE; + bfi++) { + + if(bfi->type != rss_parser->current_block->rss_type || + bfi->attribute != NULL) + continue; + + /* Set author name from element */ + if(raptor_rss_block_set_field(rdf_parser->world, base_uri, + rss_parser->current_block, + bfi, (const char*)cdata)) { + rdf_parser->failed = 1; + return; + } + +#ifdef RAPTOR_DEBUG + handled = 1; +#endif + break; + } + +#ifdef RAPTOR_DEBUG + if(!handled) { + raptor_rss_type block_type = rss_parser->current_block->rss_type; + RAPTOR_DEBUG3("Ignoring cdata for block %u - %s\n", + block_type, raptor_rss_items_info[block_type].name); + } +#endif + rss_parser->current_block = NULL; + goto do_end_element; + } + + if(rss_parser->current_type == RAPTOR_RSS_NONE || + (rss_parser->current_field == RAPTOR_RSS_FIELD_NONE || + rss_parser->current_field == RAPTOR_RSS_FIELD_UNKNOWN)) { + unsigned char *p = cdata; + size_t i; + for(i = cdata_len; i > 0 && *p; i--) { + if(!isspace(*p)) + break; + p++; + } + if(i > 0 && *p) { + RAPTOR_DEBUG4("IGNORING non-whitespace text '%s' inside type %s, field %s\n", cdata, + raptor_rss_items_info[rss_parser->current_type].name, + raptor_rss_fields_info[rss_parser->current_field].name); + } + + goto do_end_element; + } + + if(rss_parser->current_type >= RAPTOR_RSS_COMMON_IGNORED) { + /* skipHours, skipDays common but IGNORED */ + RAPTOR_DEBUG2("Ignoring fields for type %s\n", raptor_rss_items_info[rss_parser->current_type].name); + } else { + raptor_rss_item* update_item = raptor_rss_get_current_item(rss_parser); + raptor_rss_field* field = raptor_rss_new_field(rdf_parser->world); + + /* if value is always an uri, make it so */ + if(raptor_rss_fields_info[rss_parser->current_field].flags & + RAPTOR_RSS_INFO_FLAG_URI_VALUE) { + RAPTOR_DEBUG4("Added URI %s to field %s of type %s\n", cdata, raptor_rss_fields_info[rss_parser->current_field].name, raptor_rss_items_info[rss_parser->current_type].name); + field->uri = raptor_new_uri_relative_to_base(rdf_parser->world, base_uri, cdata); + } else { + RAPTOR_DEBUG4("Added text '%s' to field %s of type %s\n", cdata, raptor_rss_fields_info[rss_parser->current_field].name, raptor_rss_items_info[rss_parser->current_type].name); + field->uri = NULL; + field->value = RAPTOR_MALLOC(unsigned char*, cdata_len + 1); + if(!field->value) { + rdf_parser->failed = 1; + raptor_rss_field_free(field); + return; + } + + memcpy(field->value, cdata, cdata_len); + field->value[cdata_len] = '\0'; + } + + RAPTOR_DEBUG1("fa3 - "); + raptor_rss_item_add_field(update_item, rss_parser->current_field, field); + } + } /* end if contained cdata */ + + if(raptor_xml_element_is_empty(xml_element)) { + /* Empty element, so consider adding one of the attributes as + * literal or URI content + */ + if(rss_parser->current_type >= RAPTOR_RSS_COMMON_IGNORED) { + /* skipHours, skipDays common but IGNORED */ + RAPTOR_DEBUG3("Ignoring empty element %s for type %s\n", name, raptor_rss_items_info[rss_parser->current_type].name); + } else if(rss_element->uri) { + raptor_rss_item* update_item = raptor_rss_get_current_item(rss_parser); + raptor_rss_field* field = raptor_rss_new_field(rdf_parser->world); + + if(rss_parser->current_field == RAPTOR_RSS_FIELD_UNKNOWN) { + RAPTOR_DEBUG2("Cannot add URI from alternate attribute to type %s unknown field\n", raptor_rss_items_info[rss_parser->current_type].name); + raptor_rss_field_free(field); + } else { + RAPTOR_DEBUG3("Added URI to field %s of type %s\n", raptor_rss_fields_info[rss_parser->current_field].name, raptor_rss_items_info[rss_parser->current_type].name); + field->uri = rss_element->uri; + rss_element->uri = NULL; + RAPTOR_DEBUG1("fa2 - "); + raptor_rss_item_add_field(update_item, rss_parser->current_field, field); + } + } + + } + + do_end_element: + if(rss_parser->current_type != RAPTOR_RSS_NONE) { + if(rss_parser->current_field != RAPTOR_RSS_FIELD_NONE) { + RAPTOR_DEBUG3("Ending element %s field %s\n", name, raptor_rss_fields_info[rss_parser->current_field].name); + rss_parser->current_field = RAPTOR_RSS_FIELD_NONE; + } else { + RAPTOR_DEBUG3("Ending element %s type %s\n", name, raptor_rss_items_info[rss_parser->current_type].name); + if(rss_parser->prev_type != RAPTOR_RSS_NONE) { + rss_parser->current_type = rss_parser->prev_type; + rss_parser->prev_type = RAPTOR_RSS_NONE; + RAPTOR_DEBUG3("Returning to type %u - %s\n", rss_parser->current_type, raptor_rss_items_info[rss_parser->current_type].name); + } else + rss_parser->current_type = RAPTOR_RSS_NONE; + } + } + + if(rss_parser->current_block) { +#ifdef RAPTOR_DEBUG + raptor_rss_type block_type = rss_parser->current_block->rss_type; + RAPTOR_DEBUG3("Ending current block %u - %s\n", + block_type, raptor_rss_items_info[block_type].name); +#endif + rss_parser->current_block = NULL; + } + + + tidy_end_element: + + raptor_free_rss_element(rss_element); + +} + + + +static void +raptor_rss_cdata_handler(void *user_data, raptor_xml_element* xml_element, + const unsigned char *s, int len) +{ + raptor_rss_element* rss_element; + + rss_element = (raptor_rss_element*)xml_element->user_data; + + if(rss_element->xml_writer) { + raptor_xml_writer_cdata_counted(rss_element->xml_writer, s, len); + return; + } + + raptor_stringbuffer_append_counted_string(rss_element->sb, s, len, 1); +} + + +static void +raptor_rss_comment_handler(void *user_data, raptor_xml_element* xml_element, + const unsigned char *s) +{ + raptor_rss_element* rss_element; + + if(!xml_element) + return; + + rss_element = (raptor_rss_element*)xml_element->user_data; + + if(rss_element->xml_writer) { + raptor_xml_writer_comment(rss_element->xml_writer, s); + return; + } +} + + +static void +raptor_rss_sax2_new_namespace_handler(void *user_data, + raptor_namespace* nspace) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_rss_parser* rss_parser; + int n; + + rss_parser = (raptor_rss_parser*)rdf_parser->context; + for(n = 0; n < RAPTOR_RSS_NAMESPACES_SIZE; n++) { + raptor_uri* ns_uri = rdf_parser->world->rss_namespaces_info_uris[n]; + if(!ns_uri) + continue; + + if(!raptor_uri_equals(ns_uri, nspace->uri)) { + rss_parser->nspaces_seen[n] = 'Y'; + break; + } + } + +} + + +/* Add an rss:link from string contents of either: + * atom:id + * atom:link[@rel="self"]/@href + */ +static int +raptor_rss_insert_rss_link(raptor_parser* rdf_parser, + raptor_rss_item* item) +{ + raptor_rss_block *block; + raptor_rss_field* id_field; + raptor_rss_field* field = NULL; + + /* Try atom:id first */ + id_field = item->fields[RAPTOR_RSS_FIELD_ATOM_ID]; + if(id_field && id_field->value) { + const char *value = (const char*)id_field->value; + size_t len = strlen(value); + + field = raptor_rss_new_field(item->world); + if(!field) + return 1; + + field->value = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!field->value) { + raptor_rss_field_free(field); + return 1; + } + + memcpy(field->value, value, len + 1); + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_LINK, field); + + return 0; + } + + + for(block = item->blocks; block; block = block->next) { + if(block->rss_type != RAPTOR_ATOM_LINK) + continue; + + /* <link @href> is url at offset RAPTOR_RSS_LINK_HREF_URL_OFFSET + * <link @rel> is string at offset RAPTOR_RSS_LINK_REL_STRING_OFFSET + * The raptor_rss_block_fields_info structure records this + */ + if(!block->urls[RAPTOR_RSS_LINK_HREF_URL_OFFSET] || + (block->strings[RAPTOR_RSS_LINK_REL_STRING_OFFSET] && + strcmp(block->strings[RAPTOR_RSS_LINK_REL_STRING_OFFSET], "self")) + ) + continue; + + /* set the field rss:link to the string value of the @href */ + field = raptor_rss_new_field(item->world); + field->value = raptor_uri_to_string(block->urls[0]); + + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_LINK, field); + return 0; + } + + return 0; +} + + +static int +raptor_rss_insert_identifiers(raptor_parser* rdf_parser) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int i; + raptor_rss_item* item; + + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + for(item = rss_parser->model.common[i]; item; item = item->next) { + if(!item->fields_count) + continue; + + RAPTOR_DEBUG3("Inserting identifiers in common type %d - %s\n", i, raptor_rss_items_info[i].name); + + if(item->uri) { + item->term = raptor_new_term_from_uri(rdf_parser->world, item->uri); + } else { + int url_fields[2]; + int url_fields_count = 1; + int f; + + url_fields[0] = (i== RAPTOR_RSS_IMAGE) ? RAPTOR_RSS_FIELD_URL : + RAPTOR_RSS_FIELD_LINK; + if(i == RAPTOR_RSS_CHANNEL) { + url_fields[1] = RAPTOR_RSS_FIELD_ATOM_ID; + url_fields_count++; + } + + for(f = 0; f < url_fields_count; f++) { + raptor_rss_field* field; + + for(field = item->fields[url_fields[f]]; field; field = field->next) { + raptor_uri *new_uri = NULL; + if(field->value) + new_uri = raptor_new_uri(rdf_parser->world, + (const unsigned char*)field->value); + else if(field->uri) + new_uri = raptor_uri_copy(field->uri); + + if(new_uri) { + item->term = raptor_new_term_from_uri(rdf_parser->world, new_uri); + raptor_free_uri(new_uri); + if(!item->term) + return 1; + break; + } + } + } + + if(!item->term) { + const unsigned char *id; + + /* need to make bnode */ + id = raptor_world_generate_bnodeid(rdf_parser->world); + item->term = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + } + } + + /* Try to add an rss:link if missing */ + if(i == RAPTOR_RSS_CHANNEL && !item->fields[RAPTOR_RSS_FIELD_LINK]) { + if(raptor_rss_insert_rss_link(rdf_parser, item)) + return 1; + } + + item->node_type = &raptor_rss_items_info[i]; + item->node_typei = i; + } + } + /* sequence of rss:item */ + for(item = rss_parser->model.items; item; item = item->next) { + raptor_rss_block *block; + raptor_uri* uri = NULL; + + if(!item->fields[RAPTOR_RSS_FIELD_LINK]) { + if(raptor_rss_insert_rss_link(rdf_parser, item)) + return 1; + } + + + if(item->uri) { + uri = raptor_uri_copy(item->uri); + } else { + if(item->fields[RAPTOR_RSS_FIELD_LINK]) { + if(item->fields[RAPTOR_RSS_FIELD_LINK]->value) + uri = raptor_new_uri(rdf_parser->world, + (const unsigned char*)item->fields[RAPTOR_RSS_FIELD_LINK]->value); + else if(item->fields[RAPTOR_RSS_FIELD_LINK]->uri) + uri = raptor_uri_copy(item->fields[RAPTOR_RSS_FIELD_LINK]->uri); + } else if(item->fields[RAPTOR_RSS_FIELD_ATOM_ID]) { + if(item->fields[RAPTOR_RSS_FIELD_ATOM_ID]->value) + uri = raptor_new_uri(rdf_parser->world, + (const unsigned char*)item->fields[RAPTOR_RSS_FIELD_ATOM_ID]->value); + else if(item->fields[RAPTOR_RSS_FIELD_ATOM_ID]->uri) + uri = raptor_uri_copy(item->fields[RAPTOR_RSS_FIELD_ATOM_ID]->uri); + } + } + + if(!uri) + continue; + + item->term = raptor_new_term_from_uri(rdf_parser->world, uri); + raptor_free_uri(uri); + uri = NULL; + + for(block = item->blocks; block; block = block->next) { + if(!block->identifier) { + const unsigned char *id; + /* need to make bnode */ + id = raptor_world_generate_bnodeid(rdf_parser->world); + item->term = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + } + } + + item->node_type = &raptor_rss_items_info[RAPTOR_RSS_ITEM]; + item->node_typei = RAPTOR_RSS_ITEM; + } + + return 0; +} + + +static int +raptor_rss_emit_type_triple(raptor_parser* rdf_parser, + raptor_term *resource, + raptor_uri *type_uri) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + raptor_term *predicate_term; + raptor_term *object_term; + + if(!resource) { + raptor_parser_error(rdf_parser, "RSS node has no identifier"); + return 1; + } + + rss_parser->statement.subject = resource; + + predicate_term = raptor_new_term_from_uri(rdf_parser->world, + RAPTOR_RDF_type_URI(rdf_parser->world)); + rss_parser->statement.predicate = predicate_term; + + object_term = raptor_new_term_from_uri(rdf_parser->world, type_uri); + rss_parser->statement.object = object_term; + + /* Generate the statement */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, &rss_parser->statement); + + raptor_free_term(predicate_term); + raptor_free_term(object_term); + + return 0; +} + + +static int +raptor_rss_emit_block(raptor_parser* rdf_parser, + raptor_term *resource, + raptor_rss_block *block) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + raptor_rss_type block_type = block->rss_type; + raptor_uri *predicate_uri; + raptor_term *predicate_term = NULL; + const raptor_rss_block_field_info *bfi; + raptor_rss_fields_type predicate_field; + + if(!block->identifier) { + raptor_parser_error(rdf_parser, "Block has no identifier"); + return 1; + } + + predicate_field = raptor_rss_items_info[block_type].predicate; + predicate_uri = rdf_parser->world->rss_fields_info_uris[predicate_field]; + predicate_term = raptor_new_term_from_uri(rdf_parser->world, + predicate_uri); + + rss_parser->statement.subject = resource; + rss_parser->statement.predicate = predicate_term; + rss_parser->statement.object = block->identifier; + (*rdf_parser->statement_handler)(rdf_parser->user_data, + &rss_parser->statement); + + raptor_free_term(predicate_term); predicate_term = NULL; + + if(raptor_rss_emit_type_triple(rdf_parser, block->identifier, + block->node_type)) + return 1; + + + for(bfi = &raptor_rss_block_fields_info[0]; + bfi->type != RAPTOR_RSS_NONE; + bfi++) { + int attribute_type; + int offset; + + if(bfi->type != block_type || !bfi->attribute) + continue; + + attribute_type = bfi->attribute_type; + offset = bfi->offset; + predicate_uri = rdf_parser->world->rss_fields_info_uris[bfi->field]; + + predicate_term = raptor_new_term_from_uri(rdf_parser->world, + predicate_uri); + rss_parser->statement.predicate = predicate_term; + + if(attribute_type == RSS_BLOCK_FIELD_TYPE_URL) { + raptor_uri *uri = block->urls[offset]; + if(uri) { + raptor_term* object_term; + + object_term = raptor_new_term_from_uri(rdf_parser->world, uri); + rss_parser->statement.object = object_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, + &rss_parser->statement); + raptor_free_term(object_term); + } + } else if(attribute_type == RSS_BLOCK_FIELD_TYPE_STRING) { + const char *str = block->strings[offset]; + if(str) { + raptor_term* object_term; + + object_term = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)str, + NULL, NULL); + rss_parser->statement.object = object_term; + (*rdf_parser->statement_handler)(rdf_parser->user_data, + &rss_parser->statement); + raptor_free_term(object_term); + } + } else { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL2("Found unknown attribute_type %d\n", attribute_type); +#endif + } + + raptor_free_term(predicate_term); predicate_term = NULL; + } + + return 0; +} + + +static int +raptor_rss_emit_item(raptor_parser* rdf_parser, raptor_rss_item *item) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int f; + raptor_rss_block *block; + raptor_uri *type_uri; + + if(!item->fields_count) + return 0; + + /* HACK - FIXME - set correct atom output class type */ + if(item->node_typei == RAPTOR_ATOM_AUTHOR) + type_uri = rdf_parser->world->rss_fields_info_uris[RAPTOR_RSS_RDF_ATOM_AUTHOR_CLASS]; + else + type_uri = rdf_parser->world->rss_types_info_uris[item->node_typei]; + + if(raptor_rss_emit_type_triple(rdf_parser, item->term, type_uri)) + return 1; + + for(f = 0; f< RAPTOR_RSS_FIELDS_SIZE; f++) { + raptor_rss_field* field; + raptor_uri* predicate_uri = NULL; + raptor_term* predicate_term = NULL; + + /* This is only made by a connection */ + if(f == RAPTOR_RSS_FIELD_ITEMS) + continue; + + /* skip predicates with no URI (no namespace e.g. RSS 2) */ + predicate_uri = rdf_parser->world->rss_fields_info_uris[f]; + if(!predicate_uri) + continue; + + predicate_term = raptor_new_term_from_uri(rdf_parser->world, + predicate_uri); + if(!predicate_term) + continue; + + rss_parser->statement.predicate = predicate_term; + + for(field = item->fields[f]; field; field = field->next) { + raptor_term* object_term; + + if(field->value) { + /* FIXME - should store and emit languages */ + object_term = raptor_new_term_from_literal(rdf_parser->world, + field->value, + NULL, NULL); + } else { + object_term = raptor_new_term_from_uri(rdf_parser->world, + field->uri); + } + rss_parser->statement.object = object_term; + + /* Generate the statement */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, + &rss_parser->statement); + + raptor_free_term(object_term); + } + + raptor_free_term(predicate_term); + } + + for(block = item->blocks; block; block = block->next) { + raptor_rss_emit_block(rdf_parser, item->term, block); + } + + return 0; +} + + +static int +raptor_rss_emit_connection(raptor_parser* rdf_parser, + raptor_term *subject_identifier, + raptor_uri* predicate_uri, int predicate_ordinal, + raptor_term *object_identifier) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + raptor_uri *puri = NULL; + raptor_term *predicate_term = NULL; + + if(!subject_identifier) { + raptor_parser_error(rdf_parser, "Connection subject has no identifier"); + return 1; + } + + rss_parser->statement.subject = subject_identifier; + + if(!predicate_uri) { + /* new URI object */ + puri = raptor_new_uri_from_rdf_ordinal(rdf_parser->world, predicate_ordinal); + predicate_uri = puri; + } + predicate_term = raptor_new_term_from_uri(rdf_parser->world, + predicate_uri); + rss_parser->statement.predicate = predicate_term; + rss_parser->statement.object = object_identifier; + + /* Generate the statement */ + (*rdf_parser->statement_handler)(rdf_parser->user_data, + &rss_parser->statement); + + raptor_free_term(predicate_term); + + if(puri) + raptor_free_uri(puri); + + return 0; +} + + +static int +raptor_rss_emit(raptor_parser* rdf_parser) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int i; + raptor_rss_item* item; + int rc = 0; + + if(!rss_parser->model.common[RAPTOR_RSS_CHANNEL]) { + raptor_parser_error(rdf_parser, "No RSS channel item present"); + return 1; + } + + if(!rss_parser->model.common[RAPTOR_RSS_CHANNEL]->term) { + raptor_parser_error(rdf_parser, "RSS channel has no identifier"); + return 1; + } + + /* Emit start default graph mark */ + raptor_parser_start_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph++; + + + /* Emit all the common type blocks (channel, author, ...) */ + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + for(item = rss_parser->model.common[i]; item; item = item->next) { + if(!item->fields_count) + continue; + + RAPTOR_DEBUG3("Emitting type %i - %s\n", i, raptor_rss_items_info[i].name); + + if(!item->term) { + raptor_parser_error(rdf_parser, "RSS %s has no identifier", + raptor_rss_items_info[i].name); + rc = 1; + goto tidy; + } + + if(raptor_rss_emit_item(rdf_parser, item)) { + rc = 1; + goto tidy; + } + + /* Add connections to channel */ + if(i != RAPTOR_RSS_CHANNEL) { + if(raptor_rss_emit_connection(rdf_parser, + rss_parser->model.common[RAPTOR_RSS_CHANNEL]->term, + rdf_parser->world->rss_types_info_uris[i], 0, + item->term)) { + rc = 1; + goto tidy; + } + } + } + } + + + /* Emit the feed item blocks */ + if(rss_parser->model.items_count) { + const unsigned char* id; + raptor_term *items; + + id = raptor_world_generate_bnodeid(rdf_parser->world); + + /* make a new genid for the <rdf:Seq> node */ + items = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + + /* _:genid1 rdf:type rdf:Seq . */ + if(raptor_rss_emit_type_triple(rdf_parser, items, + RAPTOR_RDF_Seq_URI(rdf_parser->world))) { + raptor_free_term(items); + rc = 1; + goto tidy; + } + + /* <channelURI> rss:items _:genid1 . */ + if(raptor_rss_emit_connection(rdf_parser, + rss_parser->model.common[RAPTOR_RSS_CHANNEL]->term, + rdf_parser->world->rss_fields_info_uris[RAPTOR_RSS_FIELD_ITEMS], 0, + items)) { + raptor_free_term(items); + rc= 1; + goto tidy; + } + + /* sequence of rss:item */ + for(i = 1, item = rss_parser->model.items; item; item = item->next, i++) { + + if(raptor_rss_emit_item(rdf_parser, item) || + raptor_rss_emit_connection(rdf_parser, items, NULL, i,item->term)) { + raptor_free_term(items); + rc = 1; + goto tidy; + } + } + + raptor_free_term(items); + } + + tidy: + if(rdf_parser->emitted_default_graph) { + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + + return rc; +} + + +static int +raptor_rss_copy_field(raptor_rss_parser* rss_parser, + raptor_rss_item* item, + const raptor_field_pair* pair) +{ + raptor_rss_fields_type from_field = pair->from; + raptor_rss_fields_type to_field = pair->to; + raptor_rss_field* field = NULL; + + if(!(item->fields[from_field] && item->fields[from_field]->value)) + return 1; + + if(from_field == to_field) { + field = item->fields[from_field]; + } else { + if(item->fields[to_field] && item->fields[to_field]->value) + return 1; + + field = raptor_rss_new_field(item->world); + field->is_mapped = 1; + raptor_rss_item_add_field(item, to_field, field); + } + + /* Ensure output namespace is declared */ + rss_parser->nspaces_seen[raptor_rss_fields_info[to_field].nspace] = 'Y'; + + if(!field->value) { + if(pair->conversion) + pair->conversion(item->fields[from_field], field); + else { + size_t len; + + /* Otherwise default action is to copy from_field value */ + len = strlen((const char*)item->fields[from_field]->value); + + field->value = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!field->value) + return 1; + + memcpy(field->value, item->fields[from_field]->value, len + 1); + } + } + + return 0; +} + + +static void +raptor_rss_uplift_fields(raptor_rss_parser* rss_parser, raptor_rss_item* item) +{ + int i; + + /* COPY some fields from atom to rss/dc */ + for(i = 0; raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) { +#ifdef RAPTOR_DEBUG + raptor_rss_fields_type from_field = raptor_atom_to_rss[i].from; + raptor_rss_fields_type to_field = raptor_atom_to_rss[i].to; +#endif + + if(raptor_rss_copy_field(rss_parser, item, &raptor_atom_to_rss[i])) + continue; + RAPTOR_DEBUG3("Copied field %s to rss field %s\n", + raptor_rss_fields_info[from_field].name, + raptor_rss_fields_info[to_field].name); + } +} + + +static void +raptor_rss_uplift_items(raptor_parser* rdf_parser) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int i; + raptor_rss_item* item; + + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + for(item = rss_parser->model.common[i]; item; item = item->next) { + raptor_rss_uplift_fields(rss_parser, item); + } + } + + for(item = rss_parser->model.items; item; item = item->next) { + raptor_rss_uplift_fields(rss_parser, item); + } + +} + + +static void +raptor_rss_start_namespaces(raptor_parser* rdf_parser) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + int i; + int n; + + /* for each item type (channel, item, ...) */ + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + raptor_rss_item* item; + + /* for each item instance of a type */ + for(item = rss_parser->model.common[i]; item; item = item->next) { + int f; + if(!item->fields_count) + continue; + + /* for each field */ + for(f = 0; f< RAPTOR_RSS_FIELDS_SIZE; f++) { + raptor_rss_field* field = item->fields[f]; + if(field) { + /* knowing there is one value is enough */ + rss_info_namespace ns_index = raptor_rss_fields_info[f].nspace; + rss_parser->nspaces_seen[ns_index] = 'Y'; + } + } + } + } + + /* start the namespaces */ + for(n = 0; n < RAPTOR_RSS_NAMESPACES_SIZE; n++) { + if(rss_parser->nspaces[n] && rss_parser->nspaces_seen[n] == 'Y') + raptor_parser_start_namespace(rdf_parser, rss_parser->nspaces[n]); + } +} + + +static int +raptor_rss_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_rss_parser* rss_parser = (raptor_rss_parser*)rdf_parser->context; + + if(rdf_parser->failed) + return 1; + + raptor_sax2_parse_chunk(rss_parser->sax2, s, len, is_end); + + if(!is_end) + return 0; + + if(rdf_parser->failed) + return 1; + + /* turn strings into URIs, move things around if needed */ + if(raptor_rss_insert_identifiers(rdf_parser)) { + rdf_parser->failed = 1; + return 1; + } + + /* add some new fields */ + raptor_rss_uplift_items(rdf_parser); + + /* find out what namespaces to declare and start them */ + raptor_rss_start_namespaces(rdf_parser); + + /* generate the triples */ + raptor_rss_emit(rdf_parser); + + return 0; +} + + +static int +raptor_rss_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score = 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "rss")) + score = 7; + if(!strcmp((const char*)suffix, "atom")) + score = 5; + if(!strcmp((const char*)suffix, "xml")) + score = 4; + } + + if(identifier) { + if(!strncmp((const char*)identifier, "http://feed", 11)) + score += 5; + else if(strstr((const char*)identifier, "feed")) + score += 3; + + if(strstr((const char*)identifier, "rss2")) + score += 5; + else if(!suffix && strstr((const char*)identifier, "rss")) + score += 4; + else if(!suffix && strstr((const char*)identifier, "atom")) + score += 4; + else if(strstr((const char*)identifier, "rss.xml")) + score += 4; + else if(strstr((const char*)identifier, "atom.xml")) + score += 4; + } + + if(mime_type) { + if(!strstr((const char*)mime_type, "html")) { + if(strstr((const char*)mime_type, "rss")) + score += 4; + else if(strstr((const char*)mime_type, "xml")) + score += 4; + else if(strstr((const char*)mime_type, "atom")) + score += 4; + } + } + + return score; +} + + +static const char* const rss_tag_soup_names[2] = { "rss-tag-soup", NULL }; + +#define RSS_TAG_SOUP_TYPES_COUNT 6 +static const raptor_type_q rss_tag_soup_types[RSS_TAG_SOUP_TYPES_COUNT + 1] = { + { "application/rss", 15, 8}, + { "application/rss+xml", 19, 8}, + { "text/rss", 8, 8}, + { "application/xml", 15, 3}, + { "text/xml", 8, 3}, + { "application/atom+xml", 20, 3}, + { NULL, 0, 0} +}; + +static int +raptor_rss_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = rss_tag_soup_names; + + factory->desc.mime_types = rss_tag_soup_types; + + factory->desc.label = "RSS Tag Soup"; + factory->desc.uri_strings = NULL; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_rss_parser); + + factory->init = raptor_rss_parse_init; + factory->terminate = raptor_rss_parse_terminate; + factory->start = raptor_rss_parse_start; + factory->chunk = raptor_rss_parse_chunk; + factory->recognise_syntax = raptor_rss_parse_recognise_syntax; + + return rc; +} + + +int +raptor_init_parser_rss(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_rss_parser_register_factory); +} diff --git a/src/raptor_rss.h b/src/raptor_rss.h new file mode 100644 index 0000000..0e53369 --- /dev/null +++ b/src/raptor_rss.h @@ -0,0 +1,448 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_rss.h - Redland Parser Toolkit Internal RSS Model and API + * + * Copyright (C) 2004-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + + +#ifndef RAPTOR_RSS_H +#define RAPTOR_RSS_H + +#ifdef __cplusplus +extern "C" { +#endif + + +typedef enum { +/* CONTAINERs */ + /* common */ + RAPTOR_RSS_CHANNEL, + RAPTOR_RSS_IMAGE, + RAPTOR_RSS_TEXTINPUT, + + /* list items */ + RAPTOR_RSS_ITEM, + + /* atom author */ + RAPTOR_ATOM_AUTHOR, + /* atom link */ + RAPTOR_ATOM_LINK, + + /* itunes owner */ + RAPTOR_ITUNES_OWNER, + + /* containers but IGNORED */ + RAPTOR_RSS_SKIPHOURS, + RAPTOR_RSS_SKIPDAYS, + +/* metadata BLOCKs */ + RAPTOR_RSS_ENCLOSURE, + RAPTOR_ATOM_CATEGORY, + RAPTOR_RSS_SOURCE, + +/* serializing containers */ + RAPTOR_ATOM_FEED, + RAPTOR_ATOM_ENTRY, + + /* nothing found yet */ + RAPTOR_RSS_NONE, + + /* deliberately not counting NONE */ + RAPTOR_RSS_COMMON_SIZE = RAPTOR_RSS_NONE - RAPTOR_RSS_CHANNEL, + RAPTOR_RSS_COMMON_IGNORED = RAPTOR_RSS_SKIPHOURS +} raptor_rss_type; + + +/* Namespaces used in RSS */ +#define RSS1_0_NAMESPACE_URI "http://purl.org/rss/1.0/" +#define RSS0_91_NAMESPACE_URI "http://purl.org/rss/1.0/modules/rss091#" +#define RSS2_0_ENC_NAMESPACE_URI "http://purl.oclc.org/net/rss_2.0/enc#" +#define ATOM0_3_NAMESPACE_URI "http://purl.org/atom/ns#" +#define DC_NAMESPACE_URI "http://purl.org/dc/elements/1.1/" +#define RSS1_1_NAMESPACE_URI "http://purl.org/net/rss1.1#" +#define CONTENT_NAMESPACE_URI "http://purl.org/rss/1.0/modules/content/" +#define ATOM1_0_NAMESPACE_URI "http://www.w3.org/2005/Atom" +#define RDF_NAMESPACE_URI "http://www.w3.org/1999/02/22-rdf-syntax-ns#" +#define ATOMTRIPLES_NAMESPACE_URI "http://purl.org/syndication/atomtriples/1" +#define ITUNES_NAMESPACE_URI "http://www.itunes.com/dtds/podcast-1.0.dtd" + +/* Old netscape namespace, turn into RSS 1.0 */ +#define RSS0_9_NAMESPACE_URI "http://my.netscape.com/rdf/simple/0.9/" + +typedef enum { + RSS_UNKNOWN_NS = 0, + RSS_NO_NS = 1, + RSS0_91_NS = 2, + RSS0_9_NS = 3, + RSS0_92_NS = RSS_NO_NS, + RSS2_0_NS = RSS_NO_NS, + RSS1_0_NS = 4, + ATOM0_3_NS = 5, + DC_NS = 6, + RSS2_0_ENC_NS = 7, + RSS1_1_NS = 8, + CONTENT_NS = 9, + ATOM1_0_NS = 10, + RDF_NS = 11, + ATOMTRIPLES_NS = 12, + ITUNES_NS = 13, + + RAPTOR_RSS_NAMESPACES_SIZE = ITUNES_NS + 1 +} rss_info_namespace; + + +typedef struct { + const char *uri_string; + const char *prefix; +} raptor_rss_namespace_info; + + +extern const raptor_rss_namespace_info raptor_rss_namespaces_info[RAPTOR_RSS_NAMESPACES_SIZE]; + +#define RAPTOR_RSS_INFO_FLAG_URI_VALUE 1 +#define RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE 2 + +/* Namespaced elements used in feeds */ +typedef struct { + const char* name; + rss_info_namespace nspace; + int flags; + raptor_rss_type block_type; +} raptor_rss_field_info; + +/* Fields of typed nodes used in RSS */ +typedef enum { + RAPTOR_RSS_FIELD_TITLE, + RAPTOR_RSS_FIELD_LINK, + RAPTOR_RSS_FIELD_DESCRIPTION, + RAPTOR_RSS_FIELD_URL, /* image */ + RAPTOR_RSS_FIELD_NAME, /* textinput */ + RAPTOR_RSS_FIELD_LANGUAGE, /* channel 0.91 */ + RAPTOR_RSS_FIELD_RATING, /* channel 0.91 */ + RAPTOR_RSS_FIELD_COPYRIGHT, /* channel 0.91 */ + RAPTOR_RSS_FIELD_PUBDATE, /* channel 0.91, item 2.0 */ + RAPTOR_RSS_FIELD_LASTBUILDDATE, /* channel 0.91 */ + RAPTOR_RSS_FIELD_DOCS, /* channel 0.91 */ + RAPTOR_RSS_FIELD_MANAGINGEDITOR,/* channel 0.91 */ + RAPTOR_RSS_FIELD_WEBMASTER, /* channel 0.91 */ + RAPTOR_RSS_FIELD_CLOUD, /* channel 0.92, 2.0 */ + RAPTOR_RSS_FIELD_TTL, /* channel 2.0 */ + RAPTOR_RSS_FIELD_WIDTH, /* image 0.91 */ + RAPTOR_RSS_FIELD_HEIGHT, /* image 0.91 */ + RAPTOR_RSS_FIELD_HOUR, /* skipHours 0.91 */ + RAPTOR_RSS_FIELD_DAY, /* skipDays 0.91 */ + RAPTOR_RSS_FIELD_GENERATOR, /* channel 0.92, 2.0 */ + RAPTOR_RSS_FIELD_SOURCE, /* item 0.92, 2.0 */ + RAPTOR_RSS_FIELD_AUTHOR, /* item 2.0 */ + RAPTOR_RSS_FIELD_GUID, /* item 2.0 */ + RAPTOR_RSS_FIELD_ENCLOSURE, /* item 0.92, 2.0 */ + RAPTOR_RSS_RDF_ENCLOSURE, /* In RDF output, not an RSS field */ + RAPTOR_RSS_RDF_ENCLOSURE_CLASS, /* In RDF output, not an RSS field */ + RAPTOR_RSS_RDF_ENCLOSURE_URL, /* In RDF output, not an RSS field */ + RAPTOR_RSS_RDF_ENCLOSURE_LENGTH, /* In RDF output, not an RSS field */ + RAPTOR_RSS_RDF_ENCLOSURE_TYPE, /* In RDF output, not an RSS field */ + RAPTOR_RSS_FIELD_LENGTH, /* item 0.92, 2.0 */ + RAPTOR_RSS_FIELD_TYPE, /* item 0.92, 2.0 */ + RAPTOR_RSS_FIELD_CATEGORY, /* item 0.92, 2.0, channel 2.0 */ + RAPTOR_RSS_FIELD_COMMENTS, /* comments v? */ + RAPTOR_RSS_FIELD_ITEMS, /* rss 1.0 items */ + RAPTOR_RSS_FIELD_IMAGE, /* rss 1.0 property from channel->image) */ + RAPTOR_RSS_FIELD_TEXTINPUT, /* rss 1.0 property from channel->textinput */ + + RAPTOR_RSS_FIELD_ATOM_COPYRIGHT, /* atom 0.3 copyright */ + RAPTOR_RSS_FIELD_ATOM_CREATED, /* atom 0.3 created */ + RAPTOR_RSS_FIELD_ATOM_ISSUED, /* atom 0.3 issued */ + RAPTOR_RSS_FIELD_ATOM_MODIFIED, /* atom 0.3 modified */ + RAPTOR_RSS_FIELD_ATOM_TAGLINE, /* atom 0.3 tagline */ + + /* atom 1.0 required fields */ + RAPTOR_RSS_FIELD_ATOM_ID, /* atom 1.0 id */ + RAPTOR_RSS_FIELD_ATOM_TITLE, /* atom 1.0 title */ + RAPTOR_RSS_FIELD_ATOM_UPDATED, /* atom 1.0 updated */ + /* atom 1.0 optional fields */ + RAPTOR_RSS_FIELD_ATOM_AUTHOR, /* atom 1.0 author */ + RAPTOR_RSS_FIELD_ATOM_CATEGORY, /* atom 1.0 category */ + RAPTOR_RSS_FIELD_ATOM_CONTENT, /* atom 1.0 content */ + RAPTOR_RSS_FIELD_ATOM_CONTRIBUTOR, /* atom 1.0 contributor */ + RAPTOR_RSS_FIELD_ATOM_EMAIL, /* atom 1.0 email */ + RAPTOR_RSS_FIELD_ATOM_ENTRY, /* atom 1.0 entry */ + RAPTOR_RSS_FIELD_ATOM_FEED, /* atom 1.0 feed */ + RAPTOR_RSS_FIELD_ATOM_GENERATOR, /* atom 1.0 generator */ + RAPTOR_RSS_FIELD_ATOM_ICON, /* atom 1.0 icon */ + RAPTOR_RSS_FIELD_ATOM_LINK, /* atom 1.0 link */ + RAPTOR_RSS_FIELD_ATOM_LOGO, /* atom 1.0 logo */ + RAPTOR_RSS_FIELD_ATOM_NAME, /* atom 1.0 name */ + RAPTOR_RSS_FIELD_ATOM_PUBLISHED, /* atom 1.0 published */ + RAPTOR_RSS_FIELD_ATOM_RIGHTS, /* atom 1.0 rights */ + RAPTOR_RSS_FIELD_ATOM_SOURCE, /* atom 1.0 source */ + RAPTOR_RSS_FIELD_ATOM_SUBTITLE, /* atom 1.0 subtitle */ + RAPTOR_RSS_FIELD_ATOM_SUMMARY, /* atom 1.0 summary */ + RAPTOR_RSS_FIELD_ATOM_URI, /* atom 1.0 uri */ + + RAPTOR_RSS_RDF_ATOM_AUTHOR_CLASS, /* In RDF output, not atom field */ + RAPTOR_RSS_RDF_ATOM_CATEGORY_CLASS, /* In RDF output, not atom field */ + RAPTOR_RSS_RDF_ATOM_LINK_CLASS, /* In RDF output, not atom field */ + + RAPTOR_RSS_FIELD_ATOM_LABEL, /* atom 1.0 attribute label */ + RAPTOR_RSS_FIELD_ATOM_SCHEME, /* atom 1.0 attribute scheme */ + RAPTOR_RSS_FIELD_ATOM_TERM, /* atom 1.0 attribute term */ + RAPTOR_RSS_FIELD_ATOM_HREF, /* atom 1.0 attribute term */ + RAPTOR_RSS_FIELD_ATOM_REL, /* atom 1.0 attribute term */ + RAPTOR_RSS_FIELD_ATOM_TYPE, /* atom 1.0 attribute term */ + RAPTOR_RSS_FIELD_ATOM_HREFLANG, /* atom 1.0 attribute term */ + RAPTOR_RSS_FIELD_ATOM_LENGTH, /* atom 1.0 attribute term */ + + RAPTOR_RSS_FIELD_DC_TITLE, /* DC title */ + RAPTOR_RSS_FIELD_DC_CONTRIBUTOR, /* DC contributor */ + RAPTOR_RSS_FIELD_DC_CREATOR, /* DC creator */ + RAPTOR_RSS_FIELD_DC_PUBLISHER, /* DC publisher */ + RAPTOR_RSS_FIELD_DC_SUBJECT, /* DC subject */ + RAPTOR_RSS_FIELD_DC_DESCRIPTION, /* DC description */ + RAPTOR_RSS_FIELD_DC_DATE, /* DC date */ + RAPTOR_RSS_FIELD_DC_TYPE, /* DC type */ + RAPTOR_RSS_FIELD_DC_FORMAT, /* DC format */ + RAPTOR_RSS_FIELD_DC_IDENTIFIER, /* DC identifier */ + RAPTOR_RSS_FIELD_DC_LANGUAGE, /* DC language */ + RAPTOR_RSS_FIELD_DC_RELATION, /* DC relation */ + RAPTOR_RSS_FIELD_DC_SOURCE, /* DC source */ + RAPTOR_RSS_FIELD_DC_COVERAGE, /* DC coverage */ + RAPTOR_RSS_FIELD_DC_RIGHTS, /* DC rights */ + + + RAPTOR_RSS_FIELD_CONTENT_ENCODED, /* rss 1.0 module content:encoded */ + + RAPTOR_RSS_FIELD_AT_CONTENT_TYPE, /* at:contentType */ + + RAPTOR_RSS_FIELD_ITUNES_AUTHOR, + RAPTOR_RSS_FIELD_ITUNES_SUBTITLE, + RAPTOR_RSS_FIELD_ITUNES_SUMARY, + RAPTOR_RSS_FIELD_ITUNES_KEYWORDS, + RAPTOR_RSS_FIELD_ITUNES_EXPLICIT, + RAPTOR_RSS_FIELD_ITUNES_IMAGE, + RAPTOR_RSS_FIELD_ITUNES_NAME, + RAPTOR_RSS_FIELD_ITUNES_OWNER, + RAPTOR_RSS_FIELD_ITUNES_BLOCK, + RAPTOR_RSS_FIELD_ITUNES_CATEGORY, + RAPTOR_RSS_FIELD_ITUNES_EMAIL, + + RAPTOR_RSS_FIELD_UNKNOWN, + + RAPTOR_RSS_FIELD_NONE, + + RAPTOR_RSS_FIELDS_SIZE = RAPTOR_RSS_FIELD_UNKNOWN +} raptor_rss_fields_type; + +extern const raptor_rss_field_info raptor_rss_fields_info[RAPTOR_RSS_FIELDS_SIZE+2]; + +typedef struct raptor_rss_field_s raptor_rss_field; + +typedef int (*raptor_rss_field_conversion)(raptor_rss_field* from_field, raptor_rss_field* to_field); + +typedef struct { + raptor_rss_fields_type from; + raptor_rss_fields_type to; + raptor_rss_field_conversion conversion; +} raptor_field_pair; + +extern const raptor_field_pair raptor_atom_to_rss[]; + + +#define RAPTOR_RSS_LINK_HREF_URL_OFFSET 0 +#define RAPTOR_RSS_LINK_REL_STRING_OFFSET 0 + +#define RSS_BLOCK_FIELD_TYPE_URL 0 +#define RSS_BLOCK_FIELD_TYPE_STRING 1 + +#define RSS_BLOCK_MAX_URLS 1 +#define RSS_BLOCK_MAX_STRINGS 5 + +/* Feed metadata blocks support (was raptor_rss_enclosure) */ +struct raptor_rss_block_s +{ + raptor_rss_type rss_type; + + /* enclosure: subject node URI/blank node */ + raptor_term *identifier; + + /* enclosure: node RAPTOR_RSS_ENCLOSURE + category: node RAPTOR_ATOM_CATEGORY + person: node RAPTOR_ATOM_AUTHOR or RAPTOR_ATOM_CONTRIBUTOR + link: node RAPTOR_ATOM_LINK + */ + raptor_uri *node_type; + + /* enclosure: 0: where enclosure is located - @url attr (required) + atom category: 0: @scheme attr (optional) + rss category: 0: @domain attr (optional) + rss source: 0: @url attr (required) + person: 0: @atom:uri attr (optional) + link: 0: @href attr (required) + */ + raptor_uri *urls[RSS_BLOCK_MAX_URLS]; + + /* enclosure: 0: content length @length attr (required) + 1: content type @type attr (required) + atom category: 0: @term attr (required) + 1: @label attr (optional) + person: 0: @atom:name attr (required) + 1: @atom:email attr (optional) + link: 0: @length attr (optional) + 1: @type attr (optional) + 2: @rel attr (optional) + 3: @hreflang attr (optional) + 4: @title attr (optional) + */ + char *strings[RSS_BLOCK_MAX_STRINGS]; + + /* next in list */ + struct raptor_rss_block_s* next; +}; +typedef struct raptor_rss_block_s raptor_rss_block; + +#define RAPTOR_RSS_ITEM_CONTAINER 1 +#define RAPTOR_RSS_ITEM_BLOCK 2 +typedef struct { + const char* name; + rss_info_namespace nspace; + int flags; + /* RDF class URI */ + raptor_rss_fields_type cls; + /* RDF predicate URI to connect to the instance of this item */ + raptor_rss_fields_type predicate; +} raptor_rss_item_info; + + +extern const raptor_rss_item_info raptor_rss_items_info[RAPTOR_RSS_COMMON_SIZE+1]; + +#define RAPTOR_RSS_BLOCKS_SIZE 17 +/* Metadata blocks info */ +typedef struct { + /* metadata block type it applies to */ + raptor_rss_type type; + /* XML attribute (or NULL for field to use to store CDATA) */ + const char *attribute; + /* How that attribute should be interpreted: url or string */ + int attribute_type; + /* Index into urls/strings array to store it */ + int offset; + /* RDF predicate this maps to */ + raptor_rss_fields_type field; +} raptor_rss_block_field_info; + +extern const raptor_rss_block_field_info raptor_rss_block_fields_info[RAPTOR_RSS_BLOCKS_SIZE+1]; + + +struct raptor_rss_field_s +{ + raptor_world* world; + unsigned char* value; + raptor_uri* uri; + struct raptor_rss_field_s* next; + /* this field was mapped from another vocab */ + unsigned int is_mapped:1; + /* value is XML */ + unsigned int is_xml:1; +}; + +#define RAPTOR_RSS_FIELD_MAPPED + +/* RSS items (instances of typed nodes) containing fields */ +struct raptor_rss_item_s +{ + raptor_world* world; + raptor_uri *uri; + raptor_term* term; + const raptor_rss_item_info *node_type; + int node_typei; + raptor_rss_field* fields[RAPTOR_RSS_FIELDS_SIZE]; + raptor_rss_block* blocks; + int fields_count; + struct raptor_rss_item_s* next; + /* Triples with this item as subject and do not fit in @fields */ + raptor_sequence* triples; +}; +typedef struct raptor_rss_item_s raptor_rss_item; + + +/* raptor_rss_common.c */ +#define RAPTOR_RSS_N_CONCEPTS 1 + +#define RAPTOR_RSS_RSS_items_URI(rss_model) ((rss_model)->concepts[0]) + + +typedef struct { + raptor_world* world; + + /* RAPTOR_RSS_CHANNEL, RAPTOR_RSS_IMAGE, RAPTOR_RSS_TEXTINPUT */ + raptor_rss_item* common[RAPTOR_RSS_COMMON_SIZE]; + + /* list of items RAPTOR_RSS_ITEM */ + raptor_rss_item* items; + + /* this points to the last one added, so we can append easy */ + raptor_rss_item* last; + + /* item count */ + int items_count; + + raptor_uri* concepts[RAPTOR_RSS_N_CONCEPTS]; + + raptor_namespace_stack *nstack; + +} raptor_rss_model; + + +/* raptor_rss_common.c */ +int raptor_rss_common_init(raptor_world* world); +void raptor_rss_common_terminate(raptor_world* world); + +void raptor_rss_model_init(raptor_world* world, raptor_rss_model* rss_model); +void raptor_rss_model_clear(raptor_rss_model* rss_model); + +raptor_rss_item* raptor_new_rss_item(raptor_world* world); +int raptor_rss_model_add_item(raptor_rss_model* rss_model); +raptor_rss_item* raptor_rss_model_add_common(raptor_rss_model* rss_model, raptor_rss_type type); +raptor_rss_item* raptor_rss_model_get_common(raptor_rss_model* rss_model, raptor_rss_type type); + +void raptor_clear_rss_item(raptor_rss_item* item); +void raptor_free_rss_item(raptor_rss_item* item); +void raptor_rss_item_add_block(raptor_rss_item* item, raptor_rss_block *block); +void raptor_rss_item_add_field(raptor_rss_item* item, int type, raptor_rss_field* field); +int raptor_rss_item_equals_statement_subject(const raptor_rss_item *item, const raptor_statement *statement); +int raptor_rss_item_set_uri(raptor_rss_item *item, raptor_uri* uri); + +raptor_rss_block *raptor_new_rss_block(raptor_world *world, raptor_rss_type rss_type, raptor_term* block_term); +void raptor_free_rss_block(raptor_rss_block *block); + +raptor_rss_field* raptor_rss_new_field(raptor_world* world); +void raptor_rss_field_free(raptor_rss_field* field); + +#define RAPTOR_ISO_DATE_LEN 20 +int raptor_rss_format_iso_date(char* buffer, size_t len, time_t unix_time); +int raptor_rss_set_date_field(raptor_rss_field* field, time_t unix_time); +int raptor_rss_date_uplift(raptor_rss_field* to_field, const unsigned char *date_string); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/raptor_rss_common.c b/src/raptor_rss_common.c new file mode 100644 index 0000000..b6db424 --- /dev/null +++ b/src/raptor_rss_common.c @@ -0,0 +1,732 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_rss_common.c - Raptor Feeds (RSS and Atom) common code + * + * Copyright (C) 2003-2009, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif + + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" +#include "raptor_rss.h" + + +static int raptor_rss_field_conversion_date_uplift(raptor_rss_field* from_field, raptor_rss_field* to_field); + + +const raptor_rss_namespace_info raptor_rss_namespaces_info[RAPTOR_RSS_NAMESPACES_SIZE]={ + { NULL, NULL, }, + { NULL, NULL, }, + { RSS0_91_NAMESPACE_URI, "rss091", }, + { RSS0_9_NAMESPACE_URI, NULL, }, + { RSS1_0_NAMESPACE_URI, "rss", }, + { ATOM0_3_NAMESPACE_URI, NULL, }, + { DC_NAMESPACE_URI, "dc", }, + { RSS2_0_ENC_NAMESPACE_URI, "enc", }, + { RSS1_1_NAMESPACE_URI, NULL, }, + { CONTENT_NAMESPACE_URI, "content", }, + { ATOM1_0_NAMESPACE_URI, "atom", }, + { RDF_NAMESPACE_URI, "rdf", }, + { ATOMTRIPLES_NAMESPACE_URI, "at", }, + { ITUNES_NAMESPACE_URI, "itunes", }, +}; + + +const raptor_rss_item_info raptor_rss_items_info[RAPTOR_RSS_COMMON_SIZE+1]={ + { "channel", RSS1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "image", RSS1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "textinput", RSS1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "item", RSS1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "author", ATOM1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_RDF_ATOM_AUTHOR_CLASS, RAPTOR_RSS_FIELD_ATOM_AUTHOR }, + { "Link", ATOM1_0_NS, RAPTOR_RSS_ITEM_BLOCK, RAPTOR_RSS_RDF_ATOM_LINK_CLASS, RAPTOR_RSS_FIELD_ATOM_LINK }, + { "owner" , ITUNES_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_ITUNES_OWNER, RAPTOR_RSS_FIELD_ITUNES_OWNER }, + { "skipHours", RSS0_91_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "skipDays", RSS0_91_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "Enclosure", RSS2_0_ENC_NS, RAPTOR_RSS_ITEM_BLOCK, RAPTOR_RSS_RDF_ENCLOSURE_CLASS, RAPTOR_RSS_RDF_ENCLOSURE }, + { "category", ATOM1_0_NS, RAPTOR_RSS_ITEM_BLOCK, RAPTOR_RSS_RDF_ATOM_CATEGORY_CLASS, RAPTOR_RSS_FIELD_ATOM_CATEGORY }, + { "source" , RSS2_0_NS, RAPTOR_RSS_ITEM_BLOCK, RAPTOR_RSS_FIELD_SOURCE, RAPTOR_RSS_FIELD_NONE }, + { "feed", ATOM1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "entry", ATOM1_0_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE }, + { "<none>", RSS_UNKNOWN_NS, RAPTOR_RSS_ITEM_CONTAINER, RAPTOR_RSS_FIELD_NONE, RAPTOR_RSS_FIELD_NONE } +}; + + +const raptor_rss_field_info raptor_rss_fields_info[RAPTOR_RSS_FIELDS_SIZE+2]={ + { "title", RSS1_0_NS, 0 }, + { "link", RSS1_0_NS, 0 }, /* Actually a URI but RSS 1.0 spec wants this as an (XML & RDF) literal */ + { "description", RSS1_0_NS, 0 }, + { "url", RSS1_0_NS, 0 }, + { "name", RSS1_0_NS, 0 }, + { "language", RSS0_91_NS, 0 }, + { "rating", RSS0_91_NS, 0 }, + { "copyright", RSS0_91_NS, 0 }, + { "pubDate", RSS0_91_NS, 0 }, + { "lastBuildDate", RSS0_91_NS, 0 }, + { "docs", RSS0_91_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "managingEditor", RSS0_91_NS, 0 }, + { "webMaster", RSS0_91_NS, 0 }, + { "cloud", RSS0_92_NS, 0 }, + { "ttl", RSS2_0_NS, 0 }, + { "width", RSS0_91_NS, 0 }, + { "height", RSS0_91_NS, 0 }, + { "hour", RSS0_91_NS, 0 }, + { "day", RSS0_91_NS, 0 }, + { "generator", RSS0_92_NS, 0 }, + { "source", RSS0_92_NS, 0 }, + { "author", RSS2_0_NS, 0 }, + { "guid", RSS2_0_NS, 0 }, + { "enclosure", RSS2_0_NS, RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE, RAPTOR_RSS_ENCLOSURE }, /* enclosure in RSS */ + { "enclosure", RSS2_0_ENC_NS, 0 }, /* RDF output predicate, not an RSS field */ + { "Enclosure", RSS2_0_ENC_NS, 0 }, /* RDF output class, not an RSS field */ + { "url", RSS2_0_ENC_NS, 0 }, /* In RDF output, not an RSS field */ + { "length", RSS2_0_ENC_NS, 0 }, /* In RDF output, not an RSS field */ + { "type", RSS2_0_ENC_NS, 0 }, /* In RDF output, not an RSS field */ + { "length", RSS2_0_NS, 0 }, + { "type", RSS2_0_NS, 0 }, + { "category", RSS0_92_NS, 0 }, + { "comments", RSS0_92_NS, 0 }, + { "items", RSS1_0_NS, 0 }, + { "image", RSS1_0_NS, 0 }, + { "textinput", RSS1_0_NS, 0 }, + + { "copyright", ATOM0_3_NS, 0 }, + { "created", ATOM0_3_NS, 0 }, + { "issued", ATOM0_3_NS, 0 }, + { "modified", ATOM0_3_NS, 0 }, + { "tagline", ATOM0_3_NS, 0 }, + + /* atom 1.0 required fields */ + { "id", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "title", ATOM1_0_NS, 0 }, + { "updated", ATOM1_0_NS, 0 }, + /* atom 1.0 optional fields */ + { "author", ATOM1_0_NS, 0, RAPTOR_ATOM_AUTHOR }, + { "category", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE, RAPTOR_ATOM_CATEGORY }, + { "content", ATOM1_0_NS, 0 }, + { "contributor", ATOM1_0_NS, 0 }, + { "email", ATOM1_0_NS, 0 }, + { "entry", ATOM1_0_NS, 0 }, + { "feed", ATOM1_0_NS, 0 }, + { "generator", ATOM1_0_NS, 0 }, + { "icon", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "link", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE, RAPTOR_ATOM_LINK }, + { "logo", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "name", ATOM1_0_NS, 0 }, + { "published", ATOM1_0_NS, 0 }, + { "rights", ATOM1_0_NS, 0 }, + { "source", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_BLOCK_VALUE, RAPTOR_RSS_SOURCE }, + { "subtitle", ATOM1_0_NS, 0 }, + { "summary", ATOM1_0_NS, 0 }, + { "uri", ATOM1_0_NS, 0 }, + + { "Author", ATOM1_0_NS, 0 }, + { "Category", ATOM1_0_NS, 0 }, + { "Link", ATOM1_0_NS, 0 }, + + { "label", ATOM1_0_NS, 0 }, + { "scheme", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "term", ATOM1_0_NS, 0 }, + { "href", ATOM1_0_NS, RAPTOR_RSS_INFO_FLAG_URI_VALUE }, + { "rel", ATOM1_0_NS, 0 }, + { "type", ATOM1_0_NS, 0 }, + { "hreflang", ATOM1_0_NS, 0 }, + { "length", ATOM1_0_NS, 0 }, + + { "title", DC_NS, 0 }, + { "contributor", DC_NS, 0 }, + { "creator", DC_NS, 0 }, + { "publisher", DC_NS, 0 }, + { "subject", DC_NS, 0 }, + { "description", DC_NS, 0 }, + { "date", DC_NS, 0 }, + { "type", DC_NS, 0 }, + { "format", DC_NS, 0 }, + { "identifier", DC_NS, 0 }, + { "language", DC_NS, 0 }, + { "relation", DC_NS, 0 }, + { "source", DC_NS, 0 }, + { "coverage", DC_NS, 0 }, + { "rights", DC_NS, 0 }, + + { "encoded", CONTENT_NS, 0 }, + + { "contentType", ATOMTRIPLES_NS, 0 }, + + { "author", ITUNES_NS, 0 }, + { "subtitle", ITUNES_NS, 0 }, + { "summary", ITUNES_NS, 0 }, + { "keywords", ITUNES_NS, 0 }, + { "explicit", ITUNES_NS, 0 }, + { "image", ITUNES_NS, 0 }, + { "name", ITUNES_NS, 0 }, + { "owner", ITUNES_NS, 0 }, + { "block", ITUNES_NS, 0 }, + { "category", ITUNES_NS, 0 }, + { "email", ITUNES_NS, 0 }, + + + { "<unknown>", RSS_UNKNOWN_NS, 0 }, + { "<none>", RSS_UNKNOWN_NS, 0 } +}; + + +/* FIeld mappings from atom fields to RSS/DC */ +const raptor_field_pair raptor_atom_to_rss[]={ + /* rss clone of atom fields */ + { RAPTOR_RSS_FIELD_ATOM_SUMMARY, RAPTOR_RSS_FIELD_DESCRIPTION }, + { RAPTOR_RSS_FIELD_ATOM_ID, RAPTOR_RSS_FIELD_LINK }, + { RAPTOR_RSS_FIELD_ATOM_UPDATED, RAPTOR_RSS_FIELD_DC_DATE }, + { RAPTOR_RSS_FIELD_ATOM_RIGHTS, RAPTOR_RSS_FIELD_DC_RIGHTS }, + { RAPTOR_RSS_FIELD_ATOM_TITLE, RAPTOR_RSS_FIELD_TITLE }, + { RAPTOR_RSS_FIELD_ATOM_SUMMARY, RAPTOR_RSS_FIELD_CONTENT_ENCODED }, + + /* atom 0.3 to atom 1.0 */ + { RAPTOR_RSS_FIELD_ATOM_COPYRIGHT, RAPTOR_RSS_FIELD_ATOM_RIGHTS }, + { RAPTOR_RSS_FIELD_ATOM_TAGLINE, RAPTOR_RSS_FIELD_ATOM_SUBTITLE }, + +#if 0 + /* other old atom 0.3 fields - IGNORED */ + { RAPTOR_RSS_FIELD_ATOM_CREATED, RAPTOR_RSS_FIELD_UNKNOWN }, + { RAPTOR_RSS_FIELD_ATOM_ISSUED, RAPTOR_RSS_FIELD_UNKNOWN }, + { RAPTOR_RSS_FIELD_ATOM_MODIFIED, RAPTOR_RSS_FIELD_UNKNOWN }, +#endif + +#ifdef RAPTOR_PARSEDATE_FUNCTION + /* convert to ISO date */ + { RAPTOR_RSS_FIELD_PUBDATE, RAPTOR_RSS_FIELD_DC_DATE, + &raptor_rss_field_conversion_date_uplift }, +#endif + + /* rss content encoded */ + { RAPTOR_RSS_FIELD_DESCRIPTION, RAPTOR_RSS_FIELD_CONTENT_ENCODED }, + + { RAPTOR_RSS_FIELD_UNKNOWN, RAPTOR_RSS_FIELD_UNKNOWN } +}; + + +const raptor_rss_block_field_info raptor_rss_block_fields_info[RAPTOR_RSS_BLOCKS_SIZE+1] = { + /* + RSS 2 <enclosure> - optional element inside <item> + attributes: + url (required): where the enclosure is located. url + length (required): how big enclosure it is in bytes. integer + type (required): what enclosure type is as a standard MIME type. string + content: empty + */ + { RAPTOR_RSS_ENCLOSURE, "url", RSS_BLOCK_FIELD_TYPE_URL, 0, RAPTOR_RSS_RDF_ENCLOSURE_URL }, + { RAPTOR_RSS_ENCLOSURE, "length", RSS_BLOCK_FIELD_TYPE_STRING, 0, RAPTOR_RSS_RDF_ENCLOSURE_LENGTH }, + { RAPTOR_RSS_ENCLOSURE, "type", RSS_BLOCK_FIELD_TYPE_STRING, 1, RAPTOR_RSS_RDF_ENCLOSURE_TYPE }, + + /* + RSS 2 <source> - optional element inside <item> + attributes: + url (required): location of source. url + content: source name. string + */ + { RAPTOR_RSS_SOURCE, "url", RSS_BLOCK_FIELD_TYPE_URL, 0 }, + + /* + Atom <category> - optional element inside <entry> + attributes: + term (required): the category. string + scheme (optional): categorization scheme. url + label (optional): human-readable label. string + content: empty + */ + { RAPTOR_ATOM_CATEGORY, "term", RSS_BLOCK_FIELD_TYPE_STRING, 0, RAPTOR_RSS_FIELD_ATOM_TERM }, + { RAPTOR_ATOM_CATEGORY, "scheme", RSS_BLOCK_FIELD_TYPE_URL, 0, RAPTOR_RSS_FIELD_ATOM_SCHEME }, + { RAPTOR_ATOM_CATEGORY, "label", RSS_BLOCK_FIELD_TYPE_STRING, 1, RAPTOR_RSS_FIELD_ATOM_LABEL }, + + /* + Atom <link> - optional element inside <entry> + attributes: + href (required): . url + rel (optional): . string + type (optional): . string + hreflang (optional): . string + title (optional): . string + length (optional): . string + content: empty + */ + { RAPTOR_ATOM_LINK, "href", RSS_BLOCK_FIELD_TYPE_URL, RAPTOR_RSS_LINK_HREF_URL_OFFSET, RAPTOR_RSS_FIELD_ATOM_HREF }, + { RAPTOR_ATOM_LINK, "rel", RSS_BLOCK_FIELD_TYPE_STRING, RAPTOR_RSS_LINK_REL_STRING_OFFSET, RAPTOR_RSS_FIELD_ATOM_REL }, + { RAPTOR_ATOM_LINK, "type", RSS_BLOCK_FIELD_TYPE_STRING, 1, RAPTOR_RSS_FIELD_ATOM_TYPE }, + { RAPTOR_ATOM_LINK, "hreflang", RSS_BLOCK_FIELD_TYPE_STRING, 2, RAPTOR_RSS_FIELD_ATOM_HREFLANG }, + { RAPTOR_ATOM_LINK, "title", RSS_BLOCK_FIELD_TYPE_STRING, 3, RAPTOR_RSS_FIELD_ATOM_TITLE }, + { RAPTOR_ATOM_LINK, "length", RSS_BLOCK_FIELD_TYPE_STRING, 4, RAPTOR_RSS_FIELD_ATOM_LENGTH }, + { RAPTOR_ATOM_LINK, NULL, RSS_BLOCK_FIELD_TYPE_URL, 0, RAPTOR_RSS_FIELD_ATOM_HREF }, + + /* sentinel */ + { RAPTOR_RSS_NONE, NULL, 0, 0 } +}; + + +const unsigned char * const raptor_atom_namespace_uri = (const unsigned char *)"http://www.w3.org/2005/Atom"; + + + +int +raptor_rss_common_init(raptor_world* world) { + int i; + raptor_uri *namespace_uri; + + if(world->rss_common_initialised++) + return 0; + + world->rss_namespaces_info_uris = RAPTOR_CALLOC(raptor_uri**, + RAPTOR_RSS_NAMESPACES_SIZE, + sizeof(raptor_uri*)); + if(!world->rss_namespaces_info_uris) + return -1; + for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE;i++) { + const char *uri_string = raptor_rss_namespaces_info[i].uri_string; + if(uri_string) { + world->rss_namespaces_info_uris[i] = raptor_new_uri(world, (const unsigned char*)uri_string); + if(!world->rss_namespaces_info_uris[i]) + return -1; + } + } + + world->rss_types_info_uris = RAPTOR_CALLOC(raptor_uri**, + RAPTOR_RSS_COMMON_SIZE, + sizeof(raptor_uri*)); + if(!world->rss_types_info_uris) + return -1; + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + int n = raptor_rss_items_info[i].nspace; + namespace_uri = world->rss_namespaces_info_uris[n]; + if(namespace_uri) { + world->rss_types_info_uris[i] = raptor_new_uri_from_uri_local_name(world, namespace_uri, (const unsigned char*)raptor_rss_items_info[i].name); + if(!world->rss_types_info_uris[i]) + return -1; + } + } + + world->rss_fields_info_uris = RAPTOR_CALLOC(raptor_uri**, + RAPTOR_RSS_FIELDS_SIZE, + sizeof(raptor_uri*)); + if(!world->rss_fields_info_uris) + return -1; + for(i = 0; i< RAPTOR_RSS_FIELDS_SIZE; i++) { + namespace_uri = world->rss_namespaces_info_uris[raptor_rss_fields_info[i].nspace]; + if(namespace_uri) { + world->rss_fields_info_uris[i] = raptor_new_uri_from_uri_local_name(world, namespace_uri, + (const unsigned char*)raptor_rss_fields_info[i].name); + if(!world->rss_fields_info_uris[i]) + return -1; + } + } + + return 0; +} + + +void +raptor_rss_common_terminate(raptor_world* world) { + int i; + if(--world->rss_common_initialised) + return; + + if(world->rss_types_info_uris) { + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + if(world->rss_types_info_uris[i]) + raptor_free_uri(world->rss_types_info_uris[i]); + } + RAPTOR_FREE(raptor_uri* array, world->rss_types_info_uris); + world->rss_types_info_uris = NULL; + } + + if(world->rss_fields_info_uris) { + for(i = 0; i< RAPTOR_RSS_FIELDS_SIZE; i++) { + if(world->rss_fields_info_uris[i]) + raptor_free_uri(world->rss_fields_info_uris[i]); + } + RAPTOR_FREE(raptor_uri* array, world->rss_fields_info_uris); + world->rss_fields_info_uris = NULL; + } + + if(world->rss_namespaces_info_uris) { + for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE;i++) { + if(world->rss_namespaces_info_uris[i]) + raptor_free_uri(world->rss_namespaces_info_uris[i]); + } + RAPTOR_FREE(raptor_uri* array, world->rss_namespaces_info_uris); + world->rss_namespaces_info_uris = NULL; + } +} + + +void +raptor_rss_model_init(raptor_world* world, raptor_rss_model* rss_model) +{ + memset(rss_model->common, 0, + sizeof(raptor_rss_item*) * RAPTOR_RSS_COMMON_SIZE); + + rss_model->world = world; + + rss_model->last = rss_model->items = NULL; + rss_model->items_count = 0; + + RAPTOR_RSS_RSS_items_URI(rss_model) = raptor_new_uri_relative_to_base(world, world->rss_namespaces_info_uris[RSS1_0_NS], (const unsigned char*)"items"); +} + + +void +raptor_rss_model_clear(raptor_rss_model* rss_model) +{ + int i; + raptor_rss_item* item; + + for(i = 0; i< RAPTOR_RSS_COMMON_SIZE; i++) { + item = rss_model->common[i]; + while(item) { + raptor_rss_item *next = item->next; + raptor_free_rss_item(item); + item = next; + } + } + + item = rss_model->items; + while(item) { + raptor_rss_item *next = item->next; + + raptor_free_rss_item(item); + item = next; + } + rss_model->last = rss_model->items = NULL; + + for(i = 0; i< RAPTOR_RSS_N_CONCEPTS; i++) { + raptor_uri* concept_uri = rss_model->concepts[i]; + if(concept_uri) { + raptor_free_uri(concept_uri); + rss_model->concepts[i] = NULL; + } + } +} + + +raptor_rss_item* +raptor_new_rss_item(raptor_world* world) +{ + raptor_rss_item* item; + + item = RAPTOR_CALLOC(raptor_rss_item*, 1, sizeof(*item)); + if(!item) + return NULL; + + item->world = world; + item->triples = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, (raptor_data_print_handler)raptor_statement_print); + if(!item->triples) { + RAPTOR_FREE(raptor_rss_item, item); + return NULL; + } + return item; +} + + +int +raptor_rss_model_add_item(raptor_rss_model* rss_model) +{ + raptor_rss_item* item; + + item = raptor_new_rss_item(rss_model->world); + if(!item) + return 1; + + /* new list */ + if(!rss_model->items) + rss_model->items = item; + + /* join last item to this one */ + if(rss_model->last) + rss_model->last->next = item; + + /* this is now the last item */ + rss_model->last = item; + rss_model->items_count++; + + RAPTOR_DEBUG2("Added item %d\n", rss_model->items_count); + + return 0; +} + + +raptor_rss_item* +raptor_rss_model_add_common(raptor_rss_model* rss_model, + raptor_rss_type type) +{ + raptor_rss_item* item; + + item = raptor_new_rss_item(rss_model->world); + if(!item) + return NULL; + + if(rss_model->common[type] == NULL) { + RAPTOR_DEBUG3("Adding common type %u - %s\n", type, + raptor_rss_items_info[type].name); + rss_model->common[type] = item; + } else { + raptor_rss_item* next; + RAPTOR_DEBUG3("Appending common type %u - %s\n", type, + raptor_rss_items_info[type].name); + for(next = rss_model->common[type]; next->next; next = next->next) + ; + next->next = item; + } + return item; +} + + +raptor_rss_item* +raptor_rss_model_get_common(raptor_rss_model* rss_model, raptor_rss_type type) +{ + raptor_rss_item* item; + for(item = rss_model->common[type]; + item && item->next; + item = item->next) ; + return item; +} + + +void +raptor_free_rss_item(raptor_rss_item* item) +{ + int i; + for(i = 0; i< RAPTOR_RSS_FIELDS_SIZE; i++) { + if(item->fields[i]) + raptor_rss_field_free(item->fields[i]); + } + if(item->blocks) + raptor_free_rss_block(item->blocks); + if(item->uri) + raptor_free_uri(item->uri); + if(item->term) + raptor_free_term(item->term); + if(item->triples) + raptor_free_sequence(item->triples); + + RAPTOR_FREE(raptor_rss_item, item); +} + + +void +raptor_rss_item_add_block(raptor_rss_item* item, + raptor_rss_block *block) +{ + if(!item->blocks) { + RAPTOR_DEBUG1("Adding first block\n"); + item->blocks = block; + } else { + raptor_rss_block *cur; + + RAPTOR_DEBUG1("Adding subsequent block\n"); + for(cur = item->blocks; cur->next; cur = cur->next) + ; + cur->next = block; + } +} + + +void +raptor_rss_item_add_field(raptor_rss_item* item, int type, + raptor_rss_field* field) +{ + if(!item->fields[type]) { + RAPTOR_DEBUG3("Adding first type %d field %s\n", type, raptor_rss_fields_info[type].name); + item->fields_count++; + item->fields[type] = field; + } else { + raptor_rss_field* cur; + + RAPTOR_DEBUG1("Adding subsequent field\n"); + for(cur = item->fields[type]; cur->next; cur = cur->next) ; + cur->next = field; + } +} + + +int +raptor_rss_item_equals_statement_subject(const raptor_rss_item *item, + const raptor_statement *statement) +{ + return raptor_term_equals(statement->subject, item->term); +} + + +int +raptor_rss_item_set_uri(raptor_rss_item *item, raptor_uri* uri) +{ + RAPTOR_DEBUG3("Set node %p to URI <%s>\n", RAPTOR_VOIDP(item), + raptor_uri_as_string(uri)); + + item->uri = raptor_uri_copy(uri); + if(!item->uri) + return 1; + + item->term = raptor_new_term_from_uri(item->world, item->uri); + return 0; +} + + +/* + * raptor_new_rss_block: + * @world: world + * @type: RSS block type + * @block_term: Block subject term (shared) + * + * INTERNAL - Create a new RSS Block such as <author> etc + * + * Return value: new RSS block or NULL on failure + */ +raptor_rss_block* +raptor_new_rss_block(raptor_world* world, raptor_rss_type type, + raptor_term* block_term) +{ + raptor_rss_block *block; + block = RAPTOR_CALLOC(raptor_rss_block*, 1, sizeof(*block)); + + if(block) { + block->rss_type = type; + block->node_type = world->rss_types_info_uris[type]; + block->identifier = raptor_term_copy(block_term); + } + + return block; +} + + +void +raptor_free_rss_block(raptor_rss_block *block) +{ + int i; + + for(i = 0; i < RSS_BLOCK_MAX_URLS; i++) { + if(block->urls[i]) + raptor_free_uri(block->urls[i]); + } + + for(i = 0; i < RSS_BLOCK_MAX_STRINGS; i++) { + if(block->strings[i]) + RAPTOR_FREE(char*, block->strings[i]); + } + + if(block->next) + raptor_free_rss_block(block->next); + + if(block->identifier) + raptor_free_term(block->identifier); + + RAPTOR_FREE(raptor_rss_block, block); +} + + +raptor_rss_field* +raptor_rss_new_field(raptor_world* world) +{ + raptor_rss_field* field = RAPTOR_CALLOC(raptor_rss_field*, 1, sizeof(*field)); + if(field) + field->world = world; + return field; +} + + +void +raptor_rss_field_free(raptor_rss_field* field) +{ + if(field->value) + RAPTOR_FREE(char*, field->value); + if(field->uri) + raptor_free_uri(field->uri); + if(field->next) + raptor_rss_field_free(field->next); + RAPTOR_FREE(raptor_rss_field, field); +} + + +#define RAPTOR_ISO_DATE_FORMAT "%Y-%m-%dT%H:%M:%SZ" + +int +raptor_rss_format_iso_date(char* buffer, size_t len, time_t unix_time) +{ + struct tm* structured_time; + + if(len < RAPTOR_ISO_DATE_LEN) + return 1; + + structured_time = gmtime(&unix_time); + strftime(buffer, len+1, RAPTOR_ISO_DATE_FORMAT, structured_time); + + return 0; +} + + +int +raptor_rss_set_date_field(raptor_rss_field* field, time_t unix_time) +{ + size_t len = RAPTOR_ISO_DATE_LEN; + + if(field->value) + RAPTOR_FREE(char*, field->value); + field->value = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!field->value) + return 1; + + if(raptor_rss_format_iso_date((char*)field->value, len, unix_time)) { + RAPTOR_FREE(char*, field->value); + return 1; + } + + return 0; +} + + +static int +raptor_rss_field_conversion_date_uplift(raptor_rss_field* from_field, + raptor_rss_field* to_field) +{ +#ifdef RAPTOR_PARSEDATE_FUNCTION + time_t unix_time; + char *date_string = (char*)from_field->value; + + if(!date_string) + return 1; + + unix_time = RAPTOR_PARSEDATE_FUNCTION(date_string, NULL); + if(unix_time < 0) + return 1; + + return raptor_rss_set_date_field(to_field, unix_time); +#else + return 1; +#endif +} diff --git a/src/raptor_sax2.c b/src/raptor_sax2.c new file mode 100644 index 0000000..450f7ac --- /dev/null +++ b/src/raptor_sax2.c @@ -0,0 +1,1026 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_sax2.c - Raptor SAX2 API + * + * Copyright (C) 2000-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2000-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* Define this for far too much output */ +#undef RAPTOR_DEBUG_CDATA + + +int +raptor_sax2_init(raptor_world* world) +{ + return 0; +} + + +void +raptor_sax2_finish(raptor_world* world) +{ +} + + +/** + * raptor_new_sax2: + * @world: raptor world + * @locator: raptor locator to use for errors + * @user_data: pointer context information to pass to SAX handlers + * + * Constructor - Create a new SAX2 with error handlers + * + * Return value: new #raptor_sax2 object or NULL on failure + */ +raptor_sax2* +raptor_new_sax2(raptor_world *world, raptor_locator *locator, + void* user_data) +{ + raptor_sax2* sax2; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!locator) + return NULL; + + raptor_world_open(world); + + sax2 = RAPTOR_CALLOC(raptor_sax2*, 1, sizeof(*sax2)); + if(!sax2) + return NULL; + +#ifdef RAPTOR_XML_LIBXML + sax2->magic = RAPTOR_LIBXML_MAGIC; +#endif + + sax2->world = world; + sax2->locator = locator; + sax2->user_data = user_data; + + sax2->enabled = 1; + + raptor_object_options_init(&sax2->options, RAPTOR_OPTION_AREA_SAX2); + + return sax2; +} + + +/** + * raptor_free_sax2: + * @sax2: SAX2 object + * + * Destructor - destroy a SAX2 object + */ +void +raptor_free_sax2(raptor_sax2 *sax2) +{ + raptor_xml_element *xml_element; + + if(!sax2) + return; + +#ifdef RAPTOR_XML_LIBXML + if(sax2->xc) { + raptor_libxml_free(sax2->xc); + sax2->xc = NULL; + } +#endif + + while( (xml_element = raptor_xml_element_pop(sax2)) ) + raptor_free_xml_element(xml_element); + + raptor_namespaces_clear(&sax2->namespaces); + + if(sax2->base_uri) + raptor_free_uri(sax2->base_uri); + + raptor_object_options_clear(&sax2->options); + + RAPTOR_FREE(raptor_sax2, sax2); +} + + +/** + * raptor_sax2_set_start_element_handler: + * @sax2: SAX2 object + * @handler: start element handler + * + * Set SAX2 start element handler. + */ +void +raptor_sax2_set_start_element_handler(raptor_sax2* sax2, + raptor_sax2_start_element_handler handler) +{ + sax2->start_element_handler = handler; +} + + +/** + * raptor_sax2_set_end_element_handler: + * @sax2: SAX2 object + * @handler: end element handler + * + * Set SAX2 end element handler. + */ +void +raptor_sax2_set_end_element_handler(raptor_sax2* sax2, + raptor_sax2_end_element_handler handler) +{ + sax2->end_element_handler = handler; +} + + +/** + * raptor_sax2_set_characters_handler: + * @sax2: SAX2 object + * @handler: characters handler + * + * Set SAX2 characters handler. + */ +void +raptor_sax2_set_characters_handler(raptor_sax2* sax2, + raptor_sax2_characters_handler handler) +{ + sax2->characters_handler = handler; +} + + +/** + * raptor_sax2_set_cdata_handler: + * @sax2: SAX2 object + * @handler: CDATA handler + * + * Set SAX2 CDATA handler. + */ +void +raptor_sax2_set_cdata_handler(raptor_sax2* sax2, + raptor_sax2_cdata_handler handler) +{ + sax2->cdata_handler = handler; +} + + +/** + * raptor_sax2_set_comment_handler: + * @sax2: SAX2 object + * @handler: comment handler + * + * Set SAX2 XML comment handler. + */ +void +raptor_sax2_set_comment_handler(raptor_sax2* sax2, + raptor_sax2_comment_handler handler) +{ + sax2->comment_handler = handler; +} + + +/** + * raptor_sax2_set_unparsed_entity_decl_handler: + * @sax2: SAX2 object + * @handler: unparsed entity declaration handler + * + * Set SAX2 XML unparsed entity declaration handler. + */ +void +raptor_sax2_set_unparsed_entity_decl_handler(raptor_sax2* sax2, + raptor_sax2_unparsed_entity_decl_handler handler) +{ + sax2->unparsed_entity_decl_handler = handler; +} + + +/** + * raptor_sax2_set_external_entity_ref_handler: + * @sax2: SAX2 object + * @handler: entity reference handler + * + * Set SAX2 XML entity reference handler. + */ +void +raptor_sax2_set_external_entity_ref_handler(raptor_sax2* sax2, + raptor_sax2_external_entity_ref_handler handler) +{ + sax2->external_entity_ref_handler = handler; +} + + +/** + * raptor_sax2_set_namespace_handler: + * @sax2: #raptor_sax2 object + * @handler: new namespace callback function + * + * Set the XML namespace handler function. + * + * When a prefix/namespace is seen in an XML parser, call the given + * @handler with the prefix string and the #raptor_uri namespace URI. + * Either can be NULL for the default prefix or default namespace. + * + * The handler function does not deal with duplicates so any + * namespace may be declared multiple times when a namespace is seen + * in different parts of a document. + * + */ +void +raptor_sax2_set_namespace_handler(raptor_sax2* sax2, + raptor_namespace_handler handler) +{ + sax2->namespace_handler = handler; +} + + +raptor_xml_element* +raptor_xml_element_pop(raptor_sax2 *sax2) +{ + raptor_xml_element *element = sax2->current_element; + + if(!element) + return NULL; + + sax2->current_element = element->parent; + if(sax2->root_element == element) /* just deleted root */ + sax2->root_element = NULL; + + return element; +} + + +void +raptor_xml_element_push(raptor_sax2 *sax2, raptor_xml_element* element) +{ + element->parent = sax2->current_element; + sax2->current_element = element; + if(!sax2->root_element) + sax2->root_element = element; +} + + +/** + * raptor_xml_element_is_empty: + * @xml_element: XML Element + * + * Check if an XML Element is empty. + * + * Return value: non-0 if the element is empty. + */ +int +raptor_xml_element_is_empty(raptor_xml_element* xml_element) +{ + return !xml_element->content_cdata_seen && + !xml_element->content_element_seen; +} + + +/** + * raptor_sax2_inscope_xml_language: + * @sax2: SAX2 object + * + * Get the in-scope XML language + * + * The result is a language string which may be "" if xml:lang="" is + * given. NULL is returned only if there is no xml:lang in any outer + * scope. + * + * Return value: shared pointer to the XML language or NULL if none is in scope. + */ +const unsigned char* +raptor_sax2_inscope_xml_language(raptor_sax2 *sax2) +{ + raptor_xml_element* xml_element; + + for(xml_element = sax2->current_element; + xml_element; + xml_element = xml_element->parent) { + if(xml_element->xml_language) + return xml_element->xml_language; + } + + return NULL; +} + + +/** + * raptor_sax2_inscope_base_uri: + * @sax2: SAX2 object + * + * Get the in-scope base URI + * + * Return value: the in-scope base URI shared object or NULL if none is in scope. + */ +raptor_uri* +raptor_sax2_inscope_base_uri(raptor_sax2 *sax2) +{ + raptor_xml_element *xml_element; + + for(xml_element = sax2->current_element; + xml_element; + xml_element = xml_element->parent) + if(xml_element->base_uri) + return xml_element->base_uri; + + return sax2->base_uri; +} + + +/** + * raptor_sax2_set_uri_filter: + * @sax2: SAX2 object + * @filter: URI filter function + * @user_data: User data to pass to filter function + * + * Set URI filter function for SAX2 internal retrievals. + **/ +void +raptor_sax2_set_uri_filter(raptor_sax2* sax2, + raptor_uri_filter_func filter, + void *user_data) +{ + sax2->uri_filter = filter; + sax2->uri_filter_user_data = user_data; +} + + +int +raptor_sax2_get_depth(raptor_sax2 *sax2) +{ + return sax2->depth; +} + +void +raptor_sax2_inc_depth(raptor_sax2 *sax2) +{ + sax2->depth++; +} + +void +raptor_sax2_dec_depth(raptor_sax2 *sax2) +{ + sax2->depth--; +} + + +static void raptor_sax2_simple_error(void* user_data, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +/* + * raptor_sax2_simple_error - Error from a sax2 - Internal + * + * Matches the raptor_simple_message_handler API but calls + * the sax2 error_handler + */ +static void +raptor_sax2_simple_error(void* user_data, const char *message, ...) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + va_list arguments; + + va_start(arguments, message); + + if(sax2) { + raptor_log_error_varargs(sax2->world, + RAPTOR_LOG_LEVEL_ERROR, + sax2->locator, + message, arguments); + } + + va_end(arguments); +} + + + +/** + * raptor_sax2_parse_start: + * @sax2: sax2 object + * @base_uri: base URI + * + * Start an XML SAX2 parse. + */ +void +raptor_sax2_parse_start(raptor_sax2* sax2, raptor_uri *base_uri) +{ + sax2->depth = 0; + sax2->root_element = NULL; + sax2->current_element = NULL; + + if(sax2->base_uri) + raptor_free_uri(sax2->base_uri); + if(base_uri) + sax2->base_uri = raptor_uri_copy(base_uri); + else + sax2->base_uri = NULL; + +#ifdef RAPTOR_XML_LIBXML + raptor_libxml_sax_init(sax2); + +#if LIBXML_VERSION < 20425 + sax2->first_read = 1; +#endif + + if(sax2->xc) { + raptor_libxml_free(sax2->xc); + sax2->xc = NULL; + } +#endif + + raptor_namespaces_clear(&sax2->namespaces); + + if(raptor_namespaces_init(sax2->world, &sax2->namespaces, 1)) { + /* log a fatal error and set sax2 to failed state + since the function signature does not currently support returning an error */ + raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL, sax2->locator, + "raptor_namespaces_init() failed"); + sax2->failed = 1; + } +} + + +/** + * raptor_sax2_parse_chunk: + * @sax2: sax2 object + * @buffer: input buffer + * @len: input buffer lenght + * @is_end: non-0 if end of data + * + * Parse a chunk of XML data generating SAX2 events + * + * Return value: non-0 on failure + */ +int +raptor_sax2_parse_chunk(raptor_sax2* sax2, const unsigned char *buffer, + size_t len, int is_end) +{ +#ifdef RAPTOR_XML_LIBXML + /* parser context */ + xmlParserCtxtPtr xc = sax2->xc; + int rc; + + if(!xc) { + int libxml_options = 0; + + if(!len) { + /* no data given at all */ + raptor_sax2_update_document_locator(sax2, sax2->locator); + raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_ERROR, sax2->locator, + "XML Parsing failed - no element found"); + return 1; + } + + xc = xmlCreatePushParserCtxt(&sax2->sax, sax2, /* user data */ + (char*)buffer, RAPTOR_BAD_CAST(int, len), + NULL); + if(!xc) + goto handle_error; + +#ifdef RAPTOR_LIBXML_XML_PARSE_NONET + if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET)) + libxml_options |= XML_PARSE_NONET; +#endif +#ifdef HAVE_XMLCTXTUSEOPTIONS + xmlCtxtUseOptions(xc, libxml_options); +#endif + + xc->userData = sax2; /* user data */ + xc->vctxt.userData = sax2; /* user data */ + xc->vctxt.error = (xmlValidityErrorFunc)raptor_libxml_validation_error; + xc->vctxt.warning = (xmlValidityWarningFunc)raptor_libxml_validation_warning; + xc->replaceEntities = 1; + + sax2->xc = xc; + + if(is_end) + len = 0; + else + return 0; + } + + if(!len) { + rc = xmlParseChunk(xc, (char*)buffer, 0, 1); + return rc; + } + + + /* This works around some libxml versions that fail to work + * if the buffer size is larger than the entire file + * and thus the entire parsing is done in one operation. + * + * The code below: + * 2.4.19 (oldest tested) to 2.4.24 - required + * 2.4.25 - works with or without it + * 2.4.26 or later - fails with this code + */ + +#if LIBXML_VERSION < 20425 + if(sax2->first_read && is_end) { + /* parse all but the last character */ + rc = xmlParseChunk(xc, (char*)buffer, len-1, 0); + if(rc && rc != XML_WAR_UNDECLARED_ENTITY) + goto handle_error; + /* last character */ + rc = xmlParseChunk(xc, (char*)buffer + (len-1), 1, 0); + if(rc && rc != XML_WAR_UNDECLARED_ENTITY) + goto handle_error; + /* end */ + xmlParseChunk(xc, (char*)buffer, 0, 1); + return 0; + } +#endif + +#if LIBXML_VERSION < 20425 + sax2->first_read = 0; +#endif + + rc = xmlParseChunk(xc, (char*)buffer, RAPTOR_BAD_CAST(int, len), is_end); + if(rc && rc != XML_WAR_UNDECLARED_ENTITY) /* libxml: non 0 is failure */ + goto handle_error; + if(is_end) + return 0; + + return rc; + + handle_error: +#endif + + return 1; +} + + +/** + * raptor_sax2_set_option: + * @sax2: #raptor_sax2 SAX2 object + * @option: option to set from enumerated #raptor_option values + * @string: string option value (or NULL) + * @integer: integer option value + * + * Set SAX2 option. + * + * If @string is not NULL and the option type is numeric, the string + * value is converted to an integer and used in preference to @integer. + * + * If @string is NULL and the option type is not numeric, an error is + * returned. + * + * The @string values used are copied. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: non 0 on failure or if the option is unknown + */ +int +raptor_sax2_set_option(raptor_sax2 *sax2, raptor_option option, + char* string, int integer) +{ + return raptor_object_options_set_option(&sax2->options, option, + string, integer); +} + + +void +raptor_sax2_update_document_locator(raptor_sax2* sax2, + raptor_locator* locator) +{ +#ifdef RAPTOR_XML_LIBXML + raptor_libxml_update_document_locator(sax2, locator); +#endif +} + + +/* start of an element */ +void +raptor_sax2_start_element(void* user_data, const unsigned char *name, + const unsigned char **atts) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + raptor_qname* el_name; + unsigned char **xml_atts_copy = NULL; + size_t xml_atts_size = 0; + int all_atts_count = 0; + int ns_attributes_count = 0; + raptor_qname** named_attrs = NULL; + raptor_xml_element* xml_element = NULL; + unsigned char *xml_language = NULL; + raptor_uri *xml_base = NULL; + + if(sax2->failed || !sax2->enabled) + return; + +#ifdef RAPTOR_XML_LIBXML + if(atts) { + int i; + + /* Do XML attribute value normalization */ + for(i = 0; atts[i]; i += 2) { + unsigned char *value = (unsigned char*)atts[i+1]; + unsigned char *src = value; + unsigned char *dst = xmlStrdup(value); + + if(!dst) { + raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL, + sax2->locator, "Out of memory"); + return; + } + + atts[i+1] = dst; + + while(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) + src++; + while(*src) { + if(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) { + while(*src == 0x20 || *src == 0x0d || *src == 0x0a || *src == 0x09) + src++; + if(*src) + *dst++ = 0x20; + } else { + *dst++ = *src++; + } + } + *dst = '\0'; + xmlFree(value); + } + } +#endif + + raptor_sax2_inc_depth(sax2); + + if(atts) { + int i; + + /* Save passed in XML attributes pointers so we can + * NULL the pointers when they get handled below (various atts[i]=NULL) + */ + for(i = 0; atts[i]; i++) ; + xml_atts_size = sizeof(unsigned char*) * i; + if(xml_atts_size) { + xml_atts_copy = RAPTOR_MALLOC(unsigned char**, xml_atts_size); + if(!xml_atts_copy) + goto fail; + memcpy(xml_atts_copy, atts, xml_atts_size); + } + + /* XML attributes processing: + * xmlns* - XML namespaces (Namespaces in XML REC) + * Deleted and used to synthesise namespaces declarations + * xml:lang - XML language (XML REC) + * Deleted and optionally normalised to lowercase + * xml:base - XML Base (XML Base REC) + * Deleted and used to set the in-scope base URI for this XML element + */ + for(i = 0; atts[i]; i+= 2) { + all_atts_count++; + + if(strncmp((char*)atts[i], "xml", 3)) { + /* count and skip non xml* attributes */ + ns_attributes_count++; + continue; + } + + /* synthesise the XML namespace events */ + if(!memcmp((const char*)atts[i], "xmlns", 5)) { + const unsigned char *prefix = atts[i][5] ? &atts[i][6] : NULL; + const unsigned char *namespace_name = atts[i+1]; + + raptor_namespace* nspace; + nspace = raptor_new_namespace(&sax2->namespaces, + prefix, namespace_name, + raptor_sax2_get_depth(sax2)); + + if(nspace) { + raptor_namespaces_start_namespace(&sax2->namespaces, nspace); + + if(sax2->namespace_handler) + (*sax2->namespace_handler)(sax2->user_data, nspace); + } + } else if(!strcmp((char*)atts[i], "xml:lang")) { + size_t lang_len = strlen((char*)atts[i+1]); + xml_language = RAPTOR_MALLOC(unsigned char*, lang_len + 1); + if(!xml_language) { + raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL, + sax2->locator, "Out of memory"); + goto fail; + } + + /* optionally normalize language to lowercase */ + if(RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NORMALIZE_LANGUAGE)) { + unsigned char *from = (unsigned char*)atts[i+1]; + unsigned char *to = xml_language; + + while(*from) { + if(isupper(*from)) + *to++ = RAPTOR_GOOD_CAST(unsigned char, tolower(*from++)); + else + *to++ = *from++; + } + *to = '\0'; + } else + memcpy(xml_language, atts[i+1], lang_len + 1); /* Copy NUL */ + } else if(!strcmp((char*)atts[i], "xml:base")) { + raptor_uri* base_uri; + raptor_uri* xuri; + base_uri = raptor_sax2_inscope_base_uri(sax2); + xuri = raptor_new_uri_relative_to_base(sax2->world, base_uri, atts[i+1]); + xml_base = raptor_new_uri_for_xmlbase(xuri); + raptor_free_uri(xuri); + } + + /* delete all xml attributes whether processed above or not */ + atts[i] = NULL; + } + } + + + /* Create new element structure */ + el_name = raptor_new_qname(&sax2->namespaces, name, NULL); + if(!el_name) + goto fail; + +#ifdef __clang_analyzer__ + /* clang --analyze does not know about ownership of next call */ + if(xml_language) { + free(xml_language); xml_language = NULL; + } + if(xml_base) { + raptor_free_uri(xml_base); xml_base = NULL; + } +#endif + xml_element = raptor_new_xml_element(el_name, xml_language, xml_base); + if(!xml_element) { + raptor_free_qname(el_name); + goto fail; + } + /* xml_language,xml_base now owned by xml_element */ + xml_language = NULL; + xml_base = NULL; + + /* Turn string attributes into namespaced-attributes */ + if(ns_attributes_count) { + int i; + int offset = 0; + + /* Allocate new array to hold namespaced-attributes */ + named_attrs = RAPTOR_CALLOC(raptor_qname**, ns_attributes_count, + sizeof(raptor_qname*)); + if(!named_attrs) { + raptor_log_error(sax2->world, RAPTOR_LOG_LEVEL_FATAL, + sax2->locator, "Out of memory"); + goto fail; + } + + for(i = 0; i < all_atts_count; i++) { + raptor_qname* attr; + + /* Skip previously processed attributes */ + if(!atts[i<<1]) + continue; + + /* namespace-name[i] stored in named_attrs[i] */ + attr = raptor_new_qname(&sax2->namespaces, atts[i<<1], atts[(i<<1)+1]); + if(!attr) { /* failed - tidy up and return */ + int j; + + for(j = 0; j < i; j++) + RAPTOR_FREE(raptor_qname, named_attrs[j]); + RAPTOR_FREE(raptor_qname_array, named_attrs); + goto fail; + } + + named_attrs[offset++] = attr; + } + } /* end if ns_attributes_count */ + + + if(named_attrs) + raptor_xml_element_set_attributes(xml_element, + named_attrs, ns_attributes_count); + + raptor_xml_element_push(sax2, xml_element); + + if(sax2->start_element_handler) + sax2->start_element_handler(sax2->user_data, xml_element); + + if(xml_atts_copy) { + /* Restore passed in XML attributes, free the copy */ + memcpy((void*)atts, xml_atts_copy, xml_atts_size); + RAPTOR_FREE(cstringpointer, xml_atts_copy); + } + + return; + + fail: + if(xml_atts_copy) + RAPTOR_FREE(cstringpointer, xml_atts_copy); + if(xml_base) + raptor_free_uri(xml_base); + if(xml_language) + RAPTOR_FREE(char*, xml_language); + if(xml_element) + raptor_free_xml_element(xml_element); +} + + +/* end of an element */ +void +raptor_sax2_end_element(void* user_data, const unsigned char *name) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + raptor_xml_element* xml_element; + + if(sax2->failed || !sax2->enabled) + return; + + xml_element = sax2->current_element; + if(xml_element) { +#ifdef RAPTOR_DEBUG_VERBOSE + fprintf(stderr, "\nraptor_rdfxml_end_element_handler: End ns-element: "); + raptor_qname_print(stderr, xml_element->name); + fputc('\n', stderr); +#endif + + if(sax2->end_element_handler) + sax2->end_element_handler(sax2->user_data, xml_element); + } + + raptor_namespaces_end_for_depth(&sax2->namespaces, + raptor_sax2_get_depth(sax2)); + xml_element = raptor_xml_element_pop(sax2); + if(xml_element) + raptor_free_xml_element(xml_element); + + raptor_sax2_dec_depth(sax2); +} + + + + +/* characters */ +void +raptor_sax2_characters(void* user_data, const unsigned char *s, int len) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->characters_handler) + sax2->characters_handler(sax2->user_data, sax2->current_element, s, len); +} + + +/* like <![CDATA[...]> */ +void +raptor_sax2_cdata(void* user_data, const unsigned char *s, int len) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->cdata_handler) + sax2->cdata_handler(sax2->user_data, sax2->current_element, s, len); +} + + +/* comment */ +void +raptor_sax2_comment(void* user_data, const unsigned char *s) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->comment_handler) + sax2->comment_handler(sax2->user_data, sax2->current_element, s); +} + + +/* unparsed (NDATA) entity */ +void +raptor_sax2_unparsed_entity_decl(void* user_data, + const unsigned char* entityName, + const unsigned char* base, + const unsigned char* systemId, + const unsigned char* publicId, + const unsigned char* notationName) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + + if(sax2->failed || !sax2->enabled) + return; + + if(sax2->unparsed_entity_decl_handler) + sax2->unparsed_entity_decl_handler(sax2->user_data, + entityName, base, systemId, + publicId, notationName); +} + + +/* external entity reference */ +int +raptor_sax2_external_entity_ref(void* user_data, + const unsigned char* context, + const unsigned char* base, + const unsigned char* systemId, + const unsigned char* publicId) +{ + raptor_sax2* sax2 = (raptor_sax2*)user_data; + + if(sax2->failed || !sax2->enabled) + return 0; + + if(sax2->external_entity_ref_handler) + return sax2->external_entity_ref_handler(sax2->user_data, + context, base, systemId, publicId); + + raptor_sax2_simple_error((void*)sax2, + "Failed to handle external entity reference with base %s systemId %s publicId %s", + (base ? (const char*)base : "(None)"), + systemId, + (publicId ? (const char*)publicId: "(None)")); + + /* Failed to handle external entity reference */ + return 0; +} + + +/** + * raptor_sax2_check_load_uri_string: + * @sax2: SAX2 object + * @uri_string: URI or file URI or file name string + * + * INTERNAL - Check URI loading policy + * + * Return value: > 0 if it is OK to load the URI, 0 if not, < 0 on failure +*/ +int +raptor_sax2_check_load_uri_string(raptor_sax2* sax2, + const unsigned char* uri_string) +{ + raptor_uri* abs_uri; + const unsigned char* abs_uri_string; + int abs_uri_is_file; + int load_uri = 0; + + abs_uri = raptor_new_uri_from_uri_or_file_string(sax2->world, sax2->base_uri, + uri_string); + if(!abs_uri) + return -1; + + abs_uri_string = raptor_uri_as_string(abs_uri); + + abs_uri_is_file = raptor_uri_uri_string_is_file_uri(abs_uri_string); + if(abs_uri_is_file) + load_uri = !RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_FILE); + else + load_uri = !RAPTOR_OPTIONS_GET_NUMERIC(sax2, RAPTOR_OPTION_NO_NET); + + if(sax2->uri_filter) { + int rc = sax2->uri_filter(sax2->uri_filter_user_data, abs_uri); + if(rc) + load_uri = 0; + } + + RAPTOR_DEBUG4("URI '%s' Is a file? %s Load URI? %s\n", abs_uri_string, + (abs_uri_is_file > 0) ? "YES" : "NO", + (load_uri > 0) ? "YES" : "NO"); + + raptor_free_uri(abs_uri); + + return load_uri; +} diff --git a/src/raptor_sequence.c b/src/raptor_sequence.c new file mode 100644 index 0000000..c64ad1d --- /dev/null +++ b/src/raptor_sequence.c @@ -0,0 +1,895 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_sequence.c - Raptor sequence support + * + * Copyright (C) 2003-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + + +#include "raptor2.h" +#include "raptor_internal.h" + + +/* POLICY - minimum size */ +#define RAPTOR_SEQUENCE_MIN_CAPACITY 8 + + +#ifndef STANDALONE + +/* + * Sequence of maximum capacity C containing N data items + * + * array: + * 0 <-- N consecutive items --> C - 1 + * ----------------------------------------------------------- + * | | | data1 | ..... data N | ... | | + * ----------------------------------------------------------- + * ------ O -----> offset of first data item + * + * start = O + * size = N + * capacity = C + * + */ +struct raptor_sequence_s { + /* how many items are in the sequence 0..capacity */ + int size; + + /* length of the 'sequence' array below */ + int capacity; + + /* offset of the first data item in the sequence: 0..capacity-1 */ + int start; + + /* array of size 'capacity' pointing to the data */ + void **sequence; + + + /* handler to call to free a data item (or NULL) */ + raptor_data_free_handler free_handler; + + /* handler to call to print a data item (or NULL) */ + raptor_data_print_handler print_handler; + + + /* context pointer for @context_free_handler and @context_print_handler */ + void *handler_context; + + /* handler to call to free a data item (or NULL) also passing in + * as first arg the @handler_context */ + raptor_data_context_free_handler context_free_handler; + + /* handler to call to print a data item (or NULL) also passing in + * as first arg the @handler_context + */ + raptor_data_context_print_handler context_print_handler; +}; + + +static int raptor_sequence_ensure(raptor_sequence *seq, int capacity, int grow_at_front); + + +/** + * raptor_new_sequence: + * @free_handler: handler to free a sequence item + * @print_handler: handler to print a sequence item to a FILE* + * + * Constructor - create a new sequence with the given handlers. + * + * This creates a sequence over objects that need only the item data + * pointers in order to print or free the objects. + * + * For example sequences of strings could use handlers (free, NULL) + * and sequences of #raptor_uri could use (raptor_free_uri, + * raptor_print_uri) + * + * Return value: a new #raptor_sequence or NULL on failure + **/ +raptor_sequence* +raptor_new_sequence(raptor_data_free_handler free_handler, + raptor_data_print_handler print_handler) +{ + raptor_sequence* seq = RAPTOR_CALLOC(raptor_sequence*, 1, sizeof(*seq)); + if(!seq) + return NULL; + + seq->free_handler = free_handler; + seq->print_handler = print_handler; + + return seq; +} + + +/** + * raptor_new_sequence_with_context: + * @free_handler: handler to free a sequence item + * @print_handler: handler to print a sequence item to a FILE* + * @handler_context: context information to pass to free/print handlers + * + * Constructor - create a new sequence with the given handlers and handler context. + * + * This creates a sequence over objects that need context + item data + * pointers in order to print or free the objects. + * + * Return value: a new #raptor_sequence or NULL on failure + **/ +raptor_sequence* +raptor_new_sequence_with_context(raptor_data_context_free_handler free_handler, + raptor_data_context_print_handler print_handler, + void *handler_context) +{ + raptor_sequence* seq = RAPTOR_CALLOC(raptor_sequence*, 1, sizeof(*seq)); + if(!seq) + return NULL; + + seq->context_free_handler = free_handler; + seq->context_print_handler = print_handler; + seq->handler_context = handler_context; + + return seq; +} + + +/** + * raptor_free_sequence: + * @seq: sequence to destroy + * + * Destructor - free a #raptor_sequence + **/ +void +raptor_free_sequence(raptor_sequence* seq) +{ + int i; + int j; + + if(!seq) + return; + + if(seq->free_handler) { + for(i = seq->start, j = seq->start + seq->size; i < j; i++) + if(seq->sequence[i]) + seq->free_handler(seq->sequence[i]); + } else if(seq->context_free_handler) { + for(i = seq->start, j = seq->start + seq->size; i < j; i++) + if(seq->sequence[i]) + seq->context_free_handler(seq->handler_context, seq->sequence[i]); + } + + if(seq->sequence) + RAPTOR_FREE(ptrarray, seq->sequence); + + RAPTOR_FREE(raptor_sequence, seq); +} + + +static int +raptor_sequence_ensure(raptor_sequence *seq, int capacity, int grow_at_front) +{ + void **new_sequence; + int offset; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + if(capacity && seq->capacity >= capacity) + return 0; + + /* POLICY - minimum size */ + if(capacity < RAPTOR_SEQUENCE_MIN_CAPACITY) + capacity = RAPTOR_SEQUENCE_MIN_CAPACITY; + + new_sequence = RAPTOR_CALLOC(void**, capacity, sizeof(void*)); + if(!new_sequence) + return 1; + + offset = (grow_at_front ? (capacity - seq->capacity) : 0) + seq->start; + if(seq->size) { + memcpy(&new_sequence[offset], &seq->sequence[seq->start], + sizeof(void*) * seq->size); + RAPTOR_FREE(ptrarray, seq->sequence); + } + seq->start = offset; + + seq->sequence = new_sequence; + seq->capacity = capacity; + + return 0; +} + + +/** + * raptor_sequence_size: + * @seq: sequence object + * + * Get the number of items in a sequence. + * + * Return value: the sequence size (>=0) + **/ +int +raptor_sequence_size(raptor_sequence* seq) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, -1); + + return seq->size; +} + + +/* Store methods */ + +/** + * raptor_sequence_set_at: + * @seq: sequence object + * @idx: index into sequence to operate at + * @data: new data item. + * + * Replace/set an item in a sequence. + * + * The item at the offset @idx in the sequence is replaced with the + * new item @data (which may be NULL). Any existing item is freed + * with the sequence's free_handler. If necessary the sequence + * is extended (with NULLs) to handle a larger offset. + * + * The sequence takes ownership of the new data item. On failure, the + * item is freed immediately. + * + * Return value: non-0 on failure + **/ +int +raptor_sequence_set_at(raptor_sequence* seq, int idx, void *data) +{ + int need_capacity; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + /* Cannot provide a negative index */ + if(idx < 0) { + if(data) { + if(seq->free_handler) + seq->free_handler(data); + else if(seq->context_free_handler) + seq->context_free_handler(seq->handler_context, data); + } + return 1; + } + + need_capacity = seq->start + idx + 1; + if(need_capacity > seq->capacity) { + if(seq->capacity * 2 > need_capacity) + need_capacity = seq->capacity * 2; + + if(raptor_sequence_ensure(seq, need_capacity, 0)) { + if(data) { + if(seq->free_handler) + seq->free_handler(data); + else if(seq->context_free_handler) + seq->context_free_handler(seq->handler_context, data); + } + return 1; + } + } + + if(idx < seq->size) { + /* if there is old data, delete it if there is a free handler */ + if(seq->sequence[seq->start + idx]) { + if(seq->free_handler) + seq->free_handler(seq->sequence[seq->start + idx]); + else if(seq->context_free_handler) + seq->context_free_handler(seq->handler_context, + seq->sequence[seq->start + idx]); + } + /* size remains the same */ + } else { + /* if there is no old data, size is increasing */ + /* make sure there are seq->size items starting from seq->start */ + seq->size = idx + 1; + } + + seq->sequence[seq->start + idx] = data; + + return 0; +} + + + +/** + * raptor_sequence_push: + * @seq: sequence to add to + * @data: item to add + * + * Add an item to the end of the sequence. + * + * The sequence takes ownership of the pushed item and frees it with the + * free_handler. On failure, the item is freed immediately. + * + * Return value: non-0 on failure + **/ +int +raptor_sequence_push(raptor_sequence* seq, void *data) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + if(seq->start + seq->size == seq->capacity) { + if(raptor_sequence_ensure(seq, seq->capacity * 2, 0)) { + if(data) { + if(seq->free_handler) + seq->free_handler(data); + else if(seq->context_free_handler) + seq->context_free_handler(seq->handler_context, data); + } + return 1; + } + } + + seq->sequence[seq->start + seq->size] = data; + seq->size++; + + return 0; +} + + +/** + * raptor_sequence_shift: + * @seq: sequence to add to + * @data: item to add + * + * Add an item to the start of the sequence. + * + * The sequence takes ownership of the shifted item and frees it with the + * free_handler. On failure, the item is freed immediately. + * + * Return value: non-0 on failure + **/ +int +raptor_sequence_shift(raptor_sequence* seq, void *data) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + if(!seq->start) { + if(raptor_sequence_ensure(seq, seq->capacity * 2, 1)) { + if(data) { + if(seq->free_handler) + seq->free_handler(data); + else if(seq->context_free_handler) + seq->context_free_handler(seq->handler_context, data); + } + return 1; + } + } + + seq->sequence[--seq->start] = data; + seq->size++; + + return 0; +} + + +/** + * raptor_sequence_get_at: + * @seq: sequence to use + * @idx: index of item to get + * + * Retrieve an item at offset @index in the sequence. + * + * This is efficient to perform. #raptor_sequence is optimised + * to append/remove from the end of the sequence. + * + * After this call the item is still owned by the sequence. + * + * Return value: the object or NULL if @index is out of range (0... sequence size - 1) + **/ +void* +raptor_sequence_get_at(raptor_sequence* seq, int idx) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, NULL); + + if(idx < 0 || idx > seq->size - 1) + return NULL; + + return seq->sequence[seq->start + idx]; +} + + +/** + * raptor_sequence_delete_at: + * @seq: sequence object + * @idx: index into sequence to operate at + * + * Remove an item from a position a sequence, returning it + * + * The item at the offset @idx in the sequence is replaced with a + * NULL pointer and any existing item is returned. The caller + * owns the resulting item. + * + * Return value: NULL on failure + **/ +void* +raptor_sequence_delete_at(raptor_sequence* seq, int idx) +{ + void* data; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, NULL); + + if(idx < 0 || idx > seq->size - 1) + return NULL; + + data = seq->sequence[seq->start + idx]; + seq->sequence[seq->start + idx] = NULL; + + return data; +} + + + +/** + * raptor_sequence_pop: + * @seq: sequence to use + * + * Retrieve the item at the end of the sequence. + * + * Ownership of the item is transferred to the caller, + * i.e. caller is responsible of freeing the item. + * + * Return value: the object or NULL if the sequence is empty + **/ +void* +raptor_sequence_pop(raptor_sequence* seq) +{ + void *data; + int i; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, NULL); + + if(!seq->size) + return NULL; + + seq->size--; + i = seq->start + seq->size; + data = seq->sequence[i]; + seq->sequence[i] = NULL; + + return data; +} + + +/** + * raptor_sequence_unshift: + * @seq: sequence to use + * + * Retrieve the item at the start of the sequence. + * + * Ownership of the item is transferred to the caller, + * i.e. caller is responsible of freeing the item. + * + * Return value: the object or NULL if the sequence is empty + **/ +void* +raptor_sequence_unshift(raptor_sequence* seq) +{ + void *data; + int i; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, NULL); + + if(!seq->size) + return NULL; + + i = seq->start++; + data = seq->sequence[i]; + seq->size--; + seq->sequence[i] = NULL; + + return data; +} + + +/** + * raptor_sequence_sort: + * @seq: sequence to sort + * @compare: comparison function with args (a, b) + * + * Sort a sequence inline + * + * The comparison function @compare is compatible with that used for + * qsort() and provides the addresses of pointers to the data that + * must be dereferenced to get to the stored sequence data. + * + **/ +RAPTOR_EXTERN_C +void +raptor_sequence_sort(raptor_sequence* seq, raptor_data_compare_handler compare) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(seq, raptor_sequence); + + if(seq->size > 1) + qsort(&seq->sequence[seq->start], seq->size, sizeof(void*), compare); +} + + +/** + * raptor_sequence_sort_r: + * @seq: sequence to sort + * @compare: comparison function with args (a, b, user data) + * @user_data: User data argument for @compare + * + * Sort a sequence inline with user data + * + * The comparison function @compare_r is compatible with that used + * for raptor_sort_r() and provides the addresses of pointers to the + * data that must be dereferenced to get to the stored sequence data. + * + **/ +RAPTOR_EXTERN_C +void +raptor_sequence_sort_r(raptor_sequence* seq, + raptor_data_compare_arg_handler compare, + void* user_data) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(seq, raptor_sequence); + + if(seq->size > 1) + raptor_sort_r(&seq->sequence[seq->start], seq->size, sizeof(void*), + compare, user_data); +} + + +/** + * raptor_sequence_print: + * @seq: sequence to sort + * @fh: file handle + * + * Print the sequence contents using the print_handler to print the data items. + * + * Return value: non-0 on failure + */ +int +raptor_sequence_print(raptor_sequence* seq, FILE* fh) +{ + int rc = 0; + int i; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + fputc('[', fh); + for(i = 0; i < seq->size; i++) { + if(i) + fputs(", ", fh); + if(seq->sequence[seq->start + i]) { + if(seq->print_handler) + seq->print_handler(seq->sequence[seq->start + i], fh); + else if(seq->context_print_handler) + seq->context_print_handler(seq->handler_context, + seq->sequence[seq->start + i], fh); + } else + fputs("(empty)", fh); + } + fputc(']', fh); + + return rc; +} + + +/** + * raptor_sequence_join: + * @dest: #raptor_sequence destination sequence + * @src: #raptor_sequence source sequence + * + * Join two sequences moving all items from one sequence to the end of another. + * + * After this operation, sequence src will be empty (zero size) but + * will have the same item capacity as before. + * + * Return value: non-0 on failure + */ +int +raptor_sequence_join(raptor_sequence* dest, raptor_sequence *src) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(dest, raptor_sequence, 1); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(src, raptor_sequence, 1); + + if(raptor_sequence_ensure(dest, dest->size + src->size, 0)) + return 1; + + memcpy(&dest->sequence[dest->start + dest->size], &src->sequence[src->start], + sizeof(void*) * src->size); + dest->size += src->size; + + src->size = 0; + + return 0; +} + + +/** + * raptor_sequence_swap: + * @seq: sequence + * @i: first data index + * @j: second data index + * + * Swap a pair of elements in a sequence + * + * Return value: non-0 if arguments are out of range + */ +int +raptor_sequence_swap(raptor_sequence* seq, int i, int j) +{ + if(i < 0 || i >= seq->size || j < 0 || j >= seq->size) + return 1; + + if(i != j) { + void* tmp = seq->sequence[i]; + seq->sequence[i] = seq->sequence[j]; + seq->sequence[j] = tmp; + } + + return 0; +} + + +/** + * raptor_sequence_reverse: + * @seq: sequence + * @start_index: starting index + * @length: number of elements to reverse + * + * Reverse a range of elements + * + * Return value: non-0 if arguments are out of range + */ +int +raptor_sequence_reverse(raptor_sequence* seq, int start_index, int length) +{ + int end_index = start_index + length - 1; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(seq, raptor_sequence, 1); + + if(end_index >= seq->size || start_index < 1 || length <= 1) + return 1; + + while( (start_index != end_index) && (start_index != end_index + 1) ) { + raptor_sequence_swap(seq, start_index, end_index); + start_index++; end_index--; + } + + return 0; +} + + +/** + * raptor_sequence_next_permutation: + * @seq: int seq + * @compare: comparison function + * + * Get the next permutation of a sequence in lexicographic order + * + * Assumes the initial order of the items is lexicographically + * increasing. This function alters the order of the items until the + * last permuatation is done at which point the contents is reset to + * the intial order. + * + * Algorithm used is described in http://en.wikipedia.org/wiki/Permutation + * + * The comparison function @compare is compatible with that used for + * qsort() and provides the addresses of pointers to the data that + * must be dereferenced to get to the stored sequence data. + * + * Return value: non-0 at the last permutation + */ +RAPTOR_EXTERN_C +int +raptor_sequence_next_permutation(raptor_sequence *seq, + raptor_data_compare_handler compare) +{ + int k; + int l; + void* temp; + + if(seq->size < 2) + return 1; + + /* 1. Find the largest index k such that a[k] < a[k + 1]. If no such + * index exists, the permutation is the last permutation. + */ + k = seq->size - 2; + while(k >= 0 && compare(seq->sequence[k], seq->sequence[k + 1]) >= 0) + k--; + + if(k == -1) { + /* done - reset to starting order */ + raptor_sequence_reverse(seq, 0, seq->size); + return 1; + } + + /* 2. Find the largest index l such that a[k] < a[l]. Since k + 1 + * is such an index, l is well defined and satisfies k < l. + */ + l = seq->size - 1; + while( compare(seq->sequence[k], seq->sequence[l]) >= 0) + l--; + + /* 3. Swap a[k] with a[l]. */ +#if 1 + temp = seq->sequence[k]; + seq->sequence[k] = seq->sequence[l]; + seq->sequence[l] = temp; +#else + raptor_sequence_swap(seq, k, l); +#endif + + /* 4. Reverse the sequence from a[k + 1] up to and including the + * final element a[n]. + */ + raptor_sequence_reverse(seq, k + 1, seq->size - (k + 1)); + + return 0; +} + + +#endif + + + +#ifdef STANDALONE +#include <stdio.h> + +int main(int argc, char *argv[]); + +static int +raptor_compare_strings(const void *a, const void *b) +{ + return strcmp(*(char**)a, *(char**)b); +} + +static int +raptor_sequence_print_string(void *data, FILE *fh) +{ + fputs((char*)data, fh); + return 0; +} + +#define assert_match_string(function, expr, string) do { char *result = expr; if(strcmp(result, string)) { fprintf(stderr, "%s:" #function " failed - returned %s, expected %s\n", program, result, string); exit(1); } } while(0) +#define assert_match_int(function, expr, value) do { int result = expr; if(result != value) { fprintf(stderr, "%s:" #function " failed - returned %d, expected %d\n", program, result, value); exit(1); } } while(0) + +int +main(int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); + raptor_sequence* seq1 = raptor_new_sequence(NULL, raptor_sequence_print_string); + raptor_sequence* seq2 = raptor_new_sequence(NULL, raptor_sequence_print_string); + char *s; + int i; + + if(raptor_sequence_pop(seq1) || raptor_sequence_unshift(seq1)) { + fprintf(stderr, "%s: should not be able to pop/unshift from an empty sequence\n", program); + exit(1); + } + + raptor_sequence_set_at(seq1, 0, (void*)"first"); + + raptor_sequence_push(seq1, (void*)"third"); + + raptor_sequence_shift(seq1, (void*)"second"); + + s = (char*)raptor_sequence_get_at(seq1, 0); + assert_match_string(raptor_sequence_get_at, s, "second"); + + s = (char*)raptor_sequence_get_at(seq1, 1); + assert_match_string(raptor_sequence_get_at, s, "first"); + + s = (char*)raptor_sequence_get_at(seq1, 2); + assert_match_string(raptor_sequence_get_at, s, "third"); + + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 3); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: sequence after additions: ", program); + raptor_sequence_print(seq1, stderr); + fputc('\n', stderr); +#endif + + /* now made alphabetical i.e. first, second, third */ + raptor_sequence_sort(seq1, raptor_compare_strings); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: sequence after sort: ", program); + raptor_sequence_print(seq1, stderr); + fputc('\n', stderr); +#endif + + s = (char*)raptor_sequence_pop(seq1); + assert_match_string(raptor_sequence_get_at, s, "third"); + + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 2); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: sequence after pop: ", program); + raptor_sequence_print(seq1, stderr); + fputc('\n', stderr); +#endif + + s = (char*)raptor_sequence_unshift(seq1); + assert_match_string(raptor_sequence_get_at, s, "first"); + + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 1); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: sequence after unshift: ", program); + raptor_sequence_print(seq1, stderr); + fputc('\n', stderr); +#endif + + s = (char*)raptor_sequence_get_at(seq1, 0); + assert_match_string(raptor_sequence_get_at, s, "second"); + + raptor_sequence_push(seq2, (void*)"first.2"); + if(raptor_sequence_join(seq2, seq1)) { + fprintf(stderr, "%s: raptor_sequence_join failed\n", program); + exit(1); + } + + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 0); + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq2), 2); + + raptor_free_sequence(seq1); + raptor_free_sequence(seq2); + + /* test sequence growing */ + + seq1 = raptor_new_sequence(NULL, raptor_sequence_print_string); + for(i = 0; i < 100; i++) + if(raptor_sequence_shift(seq1, (void*)"foo")) { + fprintf(stderr, "%s: raptor_sequence_shift failed\n", program); + exit(1); + } + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 100); + for(i = 0; i < 100; i++) + raptor_sequence_unshift(seq1); + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 0); + raptor_free_sequence(seq1); + + seq1 = raptor_new_sequence(NULL, raptor_sequence_print_string); + for(i = 0; i < 100; i++) + if(raptor_sequence_push(seq1, (void*)"foo")) { + fprintf(stderr, "%s: raptor_sequence_push failed\n", program); + exit(1); + } + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 100); + for(i = 0; i < 100; i++) + raptor_sequence_pop(seq1); + assert_match_int(raptor_sequence_size, raptor_sequence_size(seq1), 0); + raptor_free_sequence(seq1); + + return (0); +} +#endif diff --git a/src/raptor_serialize.c b/src/raptor_serialize.c new file mode 100644 index 0000000..bf9f81e --- /dev/null +++ b/src/raptor_serialize.c @@ -0,0 +1,791 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize.c - Raptor Serializer API + * + * Copyright (C) 2004-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* prototypes for helper functions */ +static raptor_serializer_factory* raptor_get_serializer_factory(raptor_world* world, const char *name); + + +/* helper methods */ + +static void +raptor_free_serializer_factory(raptor_serializer_factory* factory) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(factory, raptor_serializer_factory); + + if(factory->finish_factory) + factory->finish_factory(factory); + + RAPTOR_FREE(raptor_serializer_factory, factory); +} + + +/* class methods */ + +int +raptor_serializers_init(raptor_world* world) +{ + int rc = 0; + + world->serializers = raptor_new_sequence((raptor_data_free_handler)raptor_free_serializer_factory, NULL); + if(!world->serializers) + return 1; + +#ifdef RAPTOR_SERIALIZER_NTRIPLES + rc += raptor_init_serializer_ntriples(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_TURTLE + rc += raptor_init_serializer_turtle(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_MKR + rc += raptor_init_serializer_mkr(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_RDFXML_ABBREV + rc += raptor_init_serializer_rdfxmla(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_RDFXML + rc += raptor_init_serializer_rdfxml(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_RSS_1_0 + rc += raptor_init_serializer_rss10(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_ATOM + rc += raptor_init_serializer_atom(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_DOT + rc += raptor_init_serializer_dot(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_JSON + rc += raptor_init_serializer_json(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_HTML + rc += raptor_init_serializer_html(world) != 0; +#endif + +#ifdef RAPTOR_SERIALIZER_NQUADS + rc += raptor_init_serializer_nquads(world) != 0; +#endif + + return rc; +} + + +/* + * raptor_serializers_finish - delete all the registered serializers + */ +void +raptor_serializers_finish(raptor_world* world) +{ + if(world->serializers) { + raptor_free_sequence(world->serializers); + world->serializers = NULL; + } +} + + +/* + * raptor_serializer_register_factory: + * @world: raptor_world object + * @name: the short syntax name + * @label: readable label for syntax + * @mime_type: MIME type of the syntax generated by the serializer (or NULL) + * @uri_string: URI string of the syntax (or NULL) + * @factory: pointer to function to call to register the factory + * + * INTERNAL - Register a syntax that can be generated by a serializer factory + * + * Return value: non-0 on failure + **/ +RAPTOR_EXTERN_C +raptor_serializer_factory* +raptor_serializer_register_factory(raptor_world* world, + int (*factory) (raptor_serializer_factory*)) +{ + raptor_serializer_factory *serializer; + + serializer = RAPTOR_CALLOC(raptor_serializer_factory*, 1, sizeof(*serializer)); + if(!serializer) + return NULL; + + serializer->world = world; + + serializer->desc.mime_types = NULL; + + if(raptor_sequence_push(world->serializers, serializer)) + return NULL; /* on error, serializer is already freed by the sequence */ + + /* Call the serializer registration function on the new object */ + if(factory(serializer)) + return NULL; /* serializer is owned and freed by the serializers sequence */ + + if(raptor_syntax_description_validate(&serializer->desc)) { + raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Serializer description failed to validate\n"); + goto tidy; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Registered serializer %s\n", serializer->desc.names[0]); +#endif + + return serializer; + + /* Clean up on failure */ + tidy: + raptor_free_serializer_factory(serializer); + return NULL; +} + + +/** + * raptor_get_serializer_factory: + * @world: raptor_world object + * @name: the factory name or NULL for the default factory + * + * Get a serializer factory by name. + * + * Return value: the factory object or NULL if there is no such factory + **/ +static raptor_serializer_factory* +raptor_get_serializer_factory(raptor_world* world, const char *name) +{ + raptor_serializer_factory *factory = NULL; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL); + + raptor_world_open(world); + + /* return 1st serializer if no particular one wanted - why? */ + if(!name) { + factory = (raptor_serializer_factory *)raptor_sequence_get_at(world->serializers, 0); + if(!factory) { + RAPTOR_DEBUG1("No (default) serializers registered\n"); + return NULL; + } + } else { + int i; + + for(i = 0; + (factory = (raptor_serializer_factory*)raptor_sequence_get_at(world->serializers, i)); + i++) { + int namei; + const char* fname; + + for(namei = 0; (fname = factory->desc.names[namei]); namei++) { + if(!strcmp(fname, name)) + break; + } + if(fname) + break; + } + } + + return factory; +} + + +/** + * raptor_world_get_serializers_count: + * @world: world object + * + * Get number of serializers + * + * Return value: number of serializers or <0 on failure + **/ +int +raptor_world_get_serializers_count(raptor_world* world) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + raptor_world_open(world); + + return raptor_sequence_size(world->serializers); +} + + +/** + * raptor_world_get_serializer_description: + * @world: world object + * @counter: index into the list of serializers + * + * Get serializer descriptive syntax information + * + * Return value: description or NULL if counter is out of range + **/ +const raptor_syntax_description* +raptor_world_get_serializer_description(raptor_world* world, + unsigned int counter) +{ + raptor_serializer_factory *factory; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, NULL); + + raptor_world_open(world); + + factory = (raptor_serializer_factory*)raptor_sequence_get_at(world->serializers, + counter); + + if(!factory) + return NULL; + + return &factory->desc; +} + + +/** + * raptor_world_is_serializer_name: + * @world: raptor_world object + * @name: the syntax name + * + * Check name of a serializer. + * + * Return value: non 0 if name is a known syntax name + */ +int +raptor_world_is_serializer_name(raptor_world* world, const char *name) +{ + if(!name) + return 0; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, 0); + + raptor_world_open(world); + + return (raptor_get_serializer_factory(world, name) != NULL); +} + + +/** + * raptor_new_serializer: + * @world: raptor_world object + * @name: the serializer name or NULL for default syntax + * + * Constructor - create a new raptor_serializer object. + * + * Return value: a new #raptor_serializer object or NULL on failure + */ +raptor_serializer* +raptor_new_serializer(raptor_world* world, const char *name) +{ + raptor_serializer_factory* factory; + raptor_serializer* rdf_serializer; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + factory = raptor_get_serializer_factory(world, name); + if(!factory) + return NULL; + + rdf_serializer = RAPTOR_CALLOC(raptor_serializer*, 1, sizeof(*rdf_serializer)); + if(!rdf_serializer) + return NULL; + + rdf_serializer->world = world; + + rdf_serializer->context = RAPTOR_CALLOC(void*, 1, factory->context_length); + if(!rdf_serializer->context) { + raptor_free_serializer(rdf_serializer); + return NULL; + } + + rdf_serializer->factory = factory; + + raptor_object_options_init(&rdf_serializer->options, + RAPTOR_OPTION_AREA_SERIALIZER); + + if(factory->init(rdf_serializer, name)) { + raptor_free_serializer(rdf_serializer); + return NULL; + } + + return rdf_serializer; +} + + +/** + * raptor_serializer_start_to_iostream: + * @rdf_serializer: the #raptor_serializer + * @uri: base URI or NULL if no base URI is required + * @iostream: #raptor_iostream to write serialization to + * + * Start serialization to an iostream with given base URI + * + * The passed in @iostream does not become owned by the serializer + * and can be used by the caller after serializing is done. It + * must be destroyed by the caller. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_start_to_iostream(raptor_serializer *rdf_serializer, + raptor_uri *uri, raptor_iostream *iostream) +{ + if(rdf_serializer->base_uri) + raptor_free_uri(rdf_serializer->base_uri); + + if(!iostream) + return 1; + + if(uri) + uri = raptor_uri_copy(uri); + + rdf_serializer->base_uri = uri; + rdf_serializer->locator.uri = uri; + rdf_serializer->locator.line = rdf_serializer->locator.column = 0; + + rdf_serializer->iostream = iostream; + + rdf_serializer->free_iostream_on_end = 0; + + if(rdf_serializer->factory->serialize_start) + return rdf_serializer->factory->serialize_start(rdf_serializer); + return 0; +} + + +/** + * raptor_serializer_start_to_filename: + * @rdf_serializer: the #raptor_serializer + * @filename: filename to serialize to + * + * Start serializing to a filename. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_start_to_filename(raptor_serializer *rdf_serializer, + const char *filename) +{ + unsigned char *uri_string = raptor_uri_filename_to_uri_string(filename); + if(!uri_string) + return 1; + + if(rdf_serializer->base_uri) + raptor_free_uri(rdf_serializer->base_uri); + + rdf_serializer->base_uri = raptor_new_uri(rdf_serializer->world, uri_string); + rdf_serializer->locator.uri = rdf_serializer->base_uri; + rdf_serializer->locator.line = rdf_serializer->locator.column = 0; + + RAPTOR_FREE(char*, uri_string); + + rdf_serializer->iostream = raptor_new_iostream_to_filename(rdf_serializer->world, + filename); + if(!rdf_serializer->iostream) + return 1; + + rdf_serializer->free_iostream_on_end = 1; + + if(rdf_serializer->factory->serialize_start) + return rdf_serializer->factory->serialize_start(rdf_serializer); + return 0; +} + + + +/** + * raptor_serializer_start_to_string: + * @rdf_serializer: the #raptor_serializer + * @uri: base URI or NULL if no base URI is required + * @string_p: pointer to location to hold string + * @length_p: pointer to location to hold length of string (or NULL) + * + * Start serializing to a string. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_start_to_string(raptor_serializer *rdf_serializer, + raptor_uri *uri, + void **string_p, size_t *length_p) +{ + if(rdf_serializer->base_uri) + raptor_free_uri(rdf_serializer->base_uri); + + if(uri) + rdf_serializer->base_uri = raptor_uri_copy(uri); + else + rdf_serializer->base_uri = NULL; + rdf_serializer->locator.uri = rdf_serializer->base_uri; + rdf_serializer->locator.line = rdf_serializer->locator.column = 0; + + + rdf_serializer->iostream = raptor_new_iostream_to_string(rdf_serializer->world, + string_p, length_p, + NULL); + if(!rdf_serializer->iostream) + return 1; + + rdf_serializer->free_iostream_on_end = 1; + + if(rdf_serializer->factory->serialize_start) + return rdf_serializer->factory->serialize_start(rdf_serializer); + return 0; +} + + +/** + * raptor_serializer_start_to_file_handle: + * @rdf_serializer: the #raptor_serializer + * @uri: base URI or NULL if no base URI is required + * @fh: FILE* to serialize to + * + * Start serializing to a FILE*. + * + * NOTE: This does not fclose the handle when it is finished. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_start_to_file_handle(raptor_serializer *rdf_serializer, + raptor_uri *uri, FILE *fh) +{ + if(rdf_serializer->base_uri) + raptor_free_uri(rdf_serializer->base_uri); + + if(uri) + rdf_serializer->base_uri = raptor_uri_copy(uri); + else + rdf_serializer->base_uri = NULL; + rdf_serializer->locator.uri = rdf_serializer->base_uri; + rdf_serializer->locator.line = rdf_serializer->locator.column = 0; + + rdf_serializer->iostream = raptor_new_iostream_to_file_handle(rdf_serializer->world, fh); + if(!rdf_serializer->iostream) + return 1; + + rdf_serializer->free_iostream_on_end = 1; + + if(rdf_serializer->factory->serialize_start) + return rdf_serializer->factory->serialize_start(rdf_serializer); + return 0; +} + + +/** + * raptor_serializer_set_namespace: + * @rdf_serializer: the #raptor_serializer + * @uri: #raptor_uri of namespace or NULL + * @prefix: prefix to use or NULL + * + * set a namespace uri/prefix mapping for serializing. + * + * return value: non-0 on failure. + **/ +int +raptor_serializer_set_namespace(raptor_serializer* rdf_serializer, + raptor_uri *uri, const unsigned char *prefix) +{ + if(prefix && !*prefix) + prefix = NULL; + + if(rdf_serializer->factory->declare_namespace) + return rdf_serializer->factory->declare_namespace(rdf_serializer, + uri, prefix); + + return 1; +} + + +/** + * raptor_serializer_set_namespace_from_namespace: + * @rdf_serializer: the #raptor_serializer + * @nspace: #raptor_namespace to set + * + * Set a namespace uri/prefix mapping for serializing from an existing namespace. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_set_namespace_from_namespace(raptor_serializer* rdf_serializer, + raptor_namespace *nspace) +{ + if(rdf_serializer->factory->declare_namespace_from_namespace) + return rdf_serializer->factory->declare_namespace_from_namespace(rdf_serializer, + nspace); + else if(rdf_serializer->factory->declare_namespace) + return rdf_serializer->factory->declare_namespace(rdf_serializer, + raptor_namespace_get_uri(nspace), + raptor_namespace_get_prefix(nspace)); + + return 1; +} + + +/** + * raptor_serializer_serialize_statement: + * @rdf_serializer: the #raptor_serializer + * @statement: #raptor_statement to serialize to a syntax + * + * Serialize a statement. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_serialize_statement(raptor_serializer* rdf_serializer, + raptor_statement *statement) +{ + if(!rdf_serializer->iostream) + return 1; + + return rdf_serializer->factory->serialize_statement(rdf_serializer, + statement); +} + + +/** + * raptor_serializer_serialize_end: + * @rdf_serializer: the #raptor_serializer + * + * End a serialization. + * + * Return value: non-0 on failure. + **/ +int +raptor_serializer_serialize_end(raptor_serializer *rdf_serializer) +{ + int rc; + + if(!rdf_serializer->iostream) + return 1; + + if(rdf_serializer->factory->serialize_end) + rc = rdf_serializer->factory->serialize_end(rdf_serializer); + else + rc = 0; + + if(rdf_serializer->iostream) { + if(rdf_serializer->free_iostream_on_end) + raptor_free_iostream(rdf_serializer->iostream); + rdf_serializer->iostream = NULL; + } + return rc; +} + + + +/** + * raptor_free_serializer: + * @rdf_serializer: #raptor_serializer object + * + * Destructor - destroy a raptor_serializer object. + * + **/ +void +raptor_free_serializer(raptor_serializer* rdf_serializer) +{ + if(!rdf_serializer) + return; + + if(rdf_serializer->factory) + rdf_serializer->factory->terminate(rdf_serializer); + + if(rdf_serializer->context) + RAPTOR_FREE(raptor_serializer_context, rdf_serializer->context); + + if(rdf_serializer->base_uri) + raptor_free_uri(rdf_serializer->base_uri); + + raptor_object_options_clear(&rdf_serializer->options); + + RAPTOR_FREE(raptor_serializer, rdf_serializer); +} + + +/** + * raptor_serializer_get_iostream: + * @serializer: #raptor_serializer object + * + * Get the current serializer iostream. + * + * Return value: the serializer's current iostream or NULL if + **/ +raptor_iostream* +raptor_serializer_get_iostream(raptor_serializer *serializer) +{ + return serializer->iostream; +} + + +/** + * raptor_serializer_set_option: + * @serializer: #raptor_serializer serializer object + * @option: option to set from enumerated #raptor_option values + * @string: string option value (or NULL) + * @integer: integer option value + * + * Set serializer option. + * + * If @string is not NULL and the option type is numeric, the string + * value is converted to an integer and used in preference to @integer. + * + * If @string is NULL and the option type is not numeric, an error is + * returned. + * + * The @string values used are copied. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_serializer_set_option(raptor_serializer *serializer, + raptor_option option, + const char* string, int integer) +{ + return raptor_object_options_set_option(&serializer->options, option, + string, integer); +} + + +/** + * raptor_serializer_get_option: + * @serializer: #raptor_serializer serializer object + * @option: option to get value + * @string_p: pointer to where to store string value + * @integer_p: pointer to where to store integer value + * + * Get serializer option. + * + * Any string value returned in *@string_p is shared and must + * be copied by the caller. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: option value or < 0 for an illegal option + **/ +int +raptor_serializer_get_option(raptor_serializer *serializer, + raptor_option option, + char** string_p, int* integer_p) +{ + return raptor_object_options_get_option(&serializer->options, option, + string_p, integer_p); +} + + +/** + * raptor_serializer_get_locator: + * @rdf_serializer: raptor serializer + * + * Get the serializer raptor locator object. + * + * Return value: raptor locator + **/ +raptor_locator* +raptor_serializer_get_locator(raptor_serializer *rdf_serializer) +{ + return &rdf_serializer->locator; +} + + +/** + * raptor_serializer_get_world: + * @rdf_serializer: raptor serializer + * + * Get the #raptor_world object associated with a serializer. + * + * Return value: raptor_world* pointer + **/ +raptor_world * +raptor_serializer_get_world(raptor_serializer* rdf_serializer) +{ + return rdf_serializer->world; +} + + +/** + * raptor_serializer_get_description: + * @rdf_serializer: #raptor_serializer serializer object + * + * Get description of the syntaxes of the serializer. + * + * The returned description is static and lives as long as the raptor + * library (raptor world). + * + * Return value: description of syntax + **/ +const raptor_syntax_description* +raptor_serializer_get_description(raptor_serializer *rdf_serializer) +{ + return &rdf_serializer->factory->desc; +} + + +/** + * raptor_serializer_flush: + * @rdf_serializer: raptor serializer + * + * Flush the current serializer output and free any pending state + * + * In serializers that can generate blocks of content, this causes + * the writing of any current pending block. For example in Turtle + * this may write all pending triples. + * + * Return value: non-0 on failure + **/ +int +raptor_serializer_flush(raptor_serializer *rdf_serializer) +{ + int rc; + + if(rdf_serializer->factory->serialize_flush) + rc = rdf_serializer->factory->serialize_flush(rdf_serializer); + else + rc = 0; + + return rc; +} diff --git a/src/raptor_serialize_dot.c b/src/raptor_serialize_dot.c new file mode 100644 index 0000000..208d141 --- /dev/null +++ b/src/raptor_serialize_dot.c @@ -0,0 +1,584 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_dot.c - Serialize RDF graph to GraphViz DOT format + * + * Copyright (C) 2004-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +/* + * Raptor dot serializer object + */ +typedef struct { + raptor_namespace_stack *nstack; + raptor_sequence *namespaces; + + raptor_sequence *resources; + raptor_sequence *literals; + raptor_sequence *bnodes; +} raptor_dot_context; + + +/* add a namespace */ +static int +raptor_dot_serializer_declare_namespace_from_namespace(raptor_serializer* serializer, + raptor_namespace *nspace) +{ + raptor_dot_context * context = (raptor_dot_context *)serializer->context; + int i; + + for(i = 0 ; i < raptor_sequence_size(context->namespaces) ; i++ ) { + raptor_namespace * ns; + ns = (raptor_namespace *)raptor_sequence_get_at(context->namespaces, i); + + /* If prefix is already declared, ignore it */ + if((!ns->prefix && !nspace->prefix) || + (ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) || + (ns->uri && nspace->uri && + raptor_uri_equals(ns->uri, nspace->uri)) ) + return 1; + } + + nspace = raptor_new_namespace_from_uri(context->nstack, nspace->prefix, + nspace->uri, 0); + + if(!nspace) + return 1; + + raptor_sequence_push(context->namespaces, nspace); + + return 0; +} + + +/* add a namespace */ +static int +raptor_dot_serializer_declare_namespace(raptor_serializer* serializer, + raptor_uri* uri, + const unsigned char *prefix) +{ + raptor_dot_context * context = (raptor_dot_context *)serializer->context; + raptor_namespace *ns; + int rc; + + ns = raptor_new_namespace_from_uri(context->nstack, prefix, uri, 0); + rc = raptor_dot_serializer_declare_namespace_from_namespace(serializer, ns); + + raptor_free_namespace(ns); + + return rc; +} + + +/* create a new serializer */ +static int +raptor_dot_serializer_init(raptor_serializer *serializer, const char *name) +{ + raptor_dot_context * context = (raptor_dot_context *)serializer->context; + + /* Setup namespace handling */ + context->nstack = raptor_new_namespaces(serializer->world, 1); + context->namespaces = raptor_new_sequence((raptor_data_free_handler)raptor_free_namespace, NULL); + + /* We keep a list of nodes to avoid duplication (which isn't + * critical in graphviz, but why bloat the file?) + */ + context->resources = + raptor_new_sequence((raptor_data_free_handler)raptor_free_term, NULL); + context->literals = + raptor_new_sequence((raptor_data_free_handler)raptor_free_term, NULL); + context->bnodes = + raptor_new_sequence((raptor_data_free_handler)raptor_free_term, NULL); + + return 0; +} + + +/** + * raptor_dot_iostream_write_string: + * @iostr: #raptor_iostream to write to + * @string: UTF-8 string to write + * @len: length of UTF-8 string + * or \0 for no escaping. + * + * Write an UTF-8 string, escaped for graphviz. + * + * Return value: non-0 on failure. + **/ +static int +raptor_dot_iostream_write_string(raptor_iostream *iostr, + const unsigned char *string) +{ + unsigned char c; + + for( ; (c = *string) ; string++ ) { + if( (c == '\\') || (c == '"') || (c == '|') || + (c == '{') || (c == '}') ) { + raptor_iostream_write_byte('\\', iostr); + raptor_iostream_write_byte(c, iostr); + } else if( c == '\n' ) { + raptor_iostream_write_byte('\\', iostr); + raptor_iostream_write_byte('n', iostr); + } else + raptor_iostream_write_byte(c, iostr); + } + + return 0; +} + + +static void +raptor_dot_serializer_write_term_type(raptor_serializer * serializer, + raptor_term_type type) +{ + switch(type) { + case RAPTOR_TERM_TYPE_LITERAL: + raptor_iostream_write_byte('L', serializer->iostream); + break; + + case RAPTOR_TERM_TYPE_BLANK: + raptor_iostream_write_byte('B', serializer->iostream); + break; + + case RAPTOR_TERM_TYPE_URI: + raptor_iostream_write_byte('R', serializer->iostream); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + raptor_iostream_write_byte('?', serializer->iostream); + break; + } +} + + +static void +raptor_dot_serializer_write_uri(raptor_serializer* serializer, + raptor_uri* uri) +{ + raptor_dot_context* context = (raptor_dot_context*)serializer->context; + unsigned char* full = raptor_uri_as_string(uri); + int i; + + for(i = 0 ; i < raptor_sequence_size(context->namespaces) ; i++ ) { + raptor_namespace* ns = + (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + const unsigned char* ns_uri_string; + size_t ns_uri_string_len; + ns_uri_string = raptor_uri_as_counted_string(ns->uri, &ns_uri_string_len); + + if(!strncmp((char*)full, (char*)ns_uri_string, ns_uri_string_len) ) { + const unsigned char* prefix = raptor_namespace_get_prefix(ns); + + if(prefix) { + raptor_iostream_string_write(prefix, serializer->iostream); + raptor_iostream_write_byte(':', serializer->iostream); + } + + raptor_iostream_string_write(full + ns_uri_string_len, + serializer->iostream); + + return; + } + } + + raptor_iostream_string_write(full, serializer->iostream); +} + + +static void +raptor_dot_serializer_write_term(raptor_serializer * serializer, + raptor_term* term) +{ + switch(term->type) { + case RAPTOR_TERM_TYPE_LITERAL: + raptor_dot_iostream_write_string(serializer->iostream, + term->value.literal.string); + if(term->value.literal.language) { + raptor_iostream_write_byte('|', serializer->iostream); + raptor_iostream_string_write("Language: ", serializer->iostream); + raptor_iostream_string_write(term->value.literal.language, + serializer->iostream); + } + if(term->value.literal.datatype) { + raptor_iostream_write_byte('|', serializer->iostream); + raptor_iostream_string_write("Datatype: ", serializer->iostream); + raptor_dot_serializer_write_uri(serializer, term->value.literal.datatype); + } + break; + + case RAPTOR_TERM_TYPE_BLANK: + raptor_iostream_counted_string_write("_:", 2, serializer->iostream); + raptor_iostream_string_write(term->value.blank.string, serializer->iostream); + break; + + case RAPTOR_TERM_TYPE_URI: + raptor_dot_serializer_write_uri(serializer, term->value.uri); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + term->type); + } +} + + +/* Check the list to see if the node is a duplicate. If not, add it + * to the list. + */ +static void +raptor_dot_serializer_assert_node(raptor_serializer* serializer, + raptor_term* assert_node) +{ + raptor_dot_context* context = (raptor_dot_context*)serializer->context; + raptor_sequence* seq = NULL; + int i; + + /* Which list are we searching? */ + switch(assert_node->type) { + case RAPTOR_TERM_TYPE_URI: + seq = context->resources; + break; + + case RAPTOR_TERM_TYPE_BLANK: + seq = context->bnodes; + break; + + case RAPTOR_TERM_TYPE_LITERAL: + seq = context->literals; + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + break; + } + + for(i = 0 ; i < raptor_sequence_size(seq) ; i++ ) { + raptor_term* node = (raptor_term*)raptor_sequence_get_at(seq, i); + + if(raptor_term_equals(node, assert_node)) + return; + } + + raptor_sequence_push(seq, raptor_term_copy(assert_node)); +} + + +/* start a serialize */ +static int +raptor_dot_serializer_start(raptor_serializer* serializer) +{ + raptor_iostream_string_write((const unsigned char*)"digraph {\n\trankdir = LR;\n\tcharset=\"utf-8\";\n\n", + serializer->iostream); + + return 0; +} + + +static int +raptor_dot_serializer_write_colors(raptor_serializer* serializer, + raptor_term_type type) +{ + char* value; + + switch(type) { + case RAPTOR_TERM_TYPE_URI: + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_RESOURCE_BORDER); + if(value) { + raptor_iostream_string_write((const unsigned char*)", color=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + else + raptor_iostream_string_write((const unsigned char*)", color = blue", + serializer->iostream); + + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_RESOURCE_FILL); + if(value) { + raptor_iostream_string_write((const unsigned char*)", style = filled, fillcolor=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + + break; + + case RAPTOR_TERM_TYPE_BLANK: + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_BNODE_BORDER); + if(value) { + raptor_iostream_string_write((const unsigned char*)", color=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + else + raptor_iostream_string_write((const unsigned char*)", color = green", + serializer->iostream); + + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_BNODE_FILL); + if(value) { + raptor_iostream_string_write((const unsigned char*)", style = filled, fillcolor=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + + break; + + case RAPTOR_TERM_TYPE_LITERAL: + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_LITERAL_BORDER); + if(value) { + raptor_iostream_string_write((const unsigned char*)", color=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + + value = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_LITERAL_FILL); + if(value) { + raptor_iostream_string_write((const unsigned char*)", style = filled, fillcolor=", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)value, + serializer->iostream); + } + + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + break; + } + + return 0; +} + + +/* end a serialize */ +static int +raptor_dot_serializer_end(raptor_serializer* serializer) +{ + raptor_dot_context* context = (raptor_dot_context*)serializer->context; + raptor_term* node; + int i; + + /* Print our nodes. */ + raptor_iostream_string_write((const unsigned char*)"\n\t// Resources\n", + serializer->iostream); + for(i = 0 ; i < raptor_sequence_size(context->resources) ; i++ ) { + node = (raptor_term*)raptor_sequence_get_at(context->resources, i); + raptor_iostream_string_write((const unsigned char*)"\t\"R", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, node); + raptor_iostream_string_write((const unsigned char*)"\" [ label=\"", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, node); + raptor_iostream_string_write((const unsigned char*)"\", shape = ellipse", + serializer->iostream); + raptor_dot_serializer_write_colors(serializer, RAPTOR_TERM_TYPE_URI); + raptor_iostream_string_write((const unsigned char*)" ];\n", + serializer->iostream); + + } + raptor_free_sequence(context->resources); + + raptor_iostream_string_write((const unsigned char*)"\n\t// Anonymous nodes\n", + serializer->iostream); + for(i = 0 ; i < raptor_sequence_size(context->bnodes) ; i++ ) { + node = (raptor_term *)raptor_sequence_get_at(context->bnodes, i); + raptor_iostream_string_write((const unsigned char*)"\t\"B", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, node); + raptor_iostream_string_write((const unsigned char*)"\" [ label=\"", + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)"\", shape = circle", + serializer->iostream); + raptor_dot_serializer_write_colors(serializer, RAPTOR_TERM_TYPE_BLANK); + raptor_iostream_string_write((const unsigned char*)" ];\n", + serializer->iostream); + } + raptor_free_sequence(context->bnodes); + + raptor_iostream_string_write((const unsigned char*)"\n\t// Literals\n", + serializer->iostream); + for(i = 0 ; i < raptor_sequence_size(context->literals) ; i++ ) { + node = (raptor_term*)raptor_sequence_get_at(context->literals, i); + raptor_iostream_string_write((const unsigned char*)"\t\"L", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, node); + raptor_iostream_string_write((const unsigned char*)"\" [ label=\"", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, node); + raptor_iostream_string_write((const unsigned char*)"\", shape = record", + serializer->iostream); + raptor_dot_serializer_write_colors(serializer, RAPTOR_TERM_TYPE_LITERAL); + raptor_iostream_string_write((const unsigned char*)" ];\n", + serializer->iostream); + } + raptor_free_sequence(context->literals); + + raptor_iostream_string_write((const unsigned char*)"\n\tlabel=\"\\n\\nModel:\\n", + serializer->iostream); + if(serializer->base_uri) + raptor_iostream_string_write(raptor_uri_as_string(serializer->base_uri), + serializer->iostream); + else + raptor_iostream_string_write("(Unknown)", serializer->iostream); + + if(raptor_sequence_size(context->namespaces)) { + raptor_iostream_string_write((const unsigned char*)"\\n\\nNamespaces:\\n", + serializer->iostream); + + for(i = 0 ; i < raptor_sequence_size(context->namespaces) ; i++ ) { + raptor_namespace* ns; + const unsigned char* prefix; + + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + + prefix = raptor_namespace_get_prefix(ns); + if(prefix) { + raptor_iostream_string_write((const unsigned char*)ns->prefix, + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)": ", + serializer->iostream); + } + raptor_iostream_string_write(raptor_uri_as_string(ns->uri), + serializer->iostream); + raptor_iostream_string_write((const unsigned char*)"\\n", + serializer->iostream); + } + + raptor_free_sequence(context->namespaces); + } + + raptor_iostream_string_write((const unsigned char*)"\";\n", + serializer->iostream); + + raptor_iostream_string_write((const unsigned char*) "}\n", + serializer->iostream); + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_dot_serializer_terminate(raptor_serializer* serializer) +{ + /* raptor_dot_context* context = (raptor_dot_context*)serializer->context; */ + + /* Everything should have been freed in raptor_dot_serializer_end */ +} + +/* serialize a statement */ +static int +raptor_dot_serializer_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + /* Cache the nodes for later. */ + raptor_dot_serializer_assert_node(serializer, statement->subject); + raptor_dot_serializer_assert_node(serializer, statement->object); + + raptor_iostream_string_write((const unsigned char*)"\t\"", + serializer->iostream); + raptor_dot_serializer_write_term_type(serializer, statement->subject->type); + raptor_dot_serializer_write_term(serializer, statement->subject); + raptor_iostream_string_write((const unsigned char*)"\" -> \"", + serializer->iostream); + raptor_dot_serializer_write_term_type(serializer, statement->object->type); + raptor_dot_serializer_write_term(serializer, statement->object); + raptor_iostream_string_write((const unsigned char*)"\" [ label=\"", + serializer->iostream); + raptor_dot_serializer_write_term(serializer, statement->predicate); + raptor_iostream_string_write((const unsigned char*)"\" ];\n", + serializer->iostream); + + return 0; +} + + +static const char* const dot_names[2] = { "dot", NULL}; + +static const char* const dot_uri_strings[2] = { + "http://www.graphviz.org/doc/info/lang.html", + NULL +}; + +#define DOT_TYPES_COUNT 1 +static const raptor_type_q dot_types[DOT_TYPES_COUNT + 1] = { + { "text/x-graphviz", 15, 5}, + { NULL, 0, 0} +}; + +static int +raptor_dot_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = dot_names; + factory->desc.mime_types = dot_types; + + factory->desc.label = "GraphViz DOT format"; + factory->desc.uri_strings = dot_uri_strings;; + + factory->context_length = sizeof(raptor_dot_context); + + factory->init = raptor_dot_serializer_init; + factory->declare_namespace = raptor_dot_serializer_declare_namespace; + factory->declare_namespace_from_namespace = + raptor_dot_serializer_declare_namespace_from_namespace; + factory->serialize_start = raptor_dot_serializer_start; + factory->serialize_statement = raptor_dot_serializer_statement; + factory->serialize_end = raptor_dot_serializer_end; + factory->finish_factory = NULL; + factory->terminate = raptor_dot_serializer_terminate; + + return 0; +} + + +int +raptor_init_serializer_dot(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_dot_serializer_register_factory); +} diff --git a/src/raptor_serialize_html.c b/src/raptor_serialize_html.c new file mode 100644 index 0000000..92a0a53 --- /dev/null +++ b/src/raptor_serialize_html.c @@ -0,0 +1,273 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_html.c - HTML Table serializer + * + * Copyright (C) 2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Raptor html serializer object + */ +typedef struct { + int count; +} raptor_html_context; + + + +/* create a new serializer */ +static int +raptor_html_serialize_init(raptor_serializer* serializer, const char *name) +{ + return 0; +} + + +/* destroy a serializer */ +static void +raptor_html_serialize_terminate(raptor_serializer* serializer) +{ + +} + + +/* start a serialize */ +static int +raptor_html_serialize_start(raptor_serializer* serializer) +{ + raptor_html_context * context = (raptor_html_context *)serializer->context; + raptor_iostream *iostr = serializer->iostream; + + context->count = 0; + + /* XML and HTML declarations */ + raptor_iostream_counted_string_write( + "<?xml version=\"1.0\" encoding=\"utf-8\"?>\n", 39, iostr); + raptor_iostream_counted_string_write( + "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.1//EN\"\n" + " \"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd\">\n", 106, iostr); + raptor_iostream_counted_string_write( + "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n", 44, iostr); + + raptor_iostream_counted_string_write("<head>\n", 7, iostr); + raptor_iostream_counted_string_write(" <title>Raptor Graph Serialisation</title>\n", 44, iostr); + raptor_iostream_counted_string_write("</head>\n", 8, iostr); + raptor_iostream_counted_string_write("<body>\n", 7, iostr); + + raptor_iostream_counted_string_write( + " <table id=\"triples\" border=\"1\">\n", 34, iostr); + + raptor_iostream_counted_string_write(" <tr>\n", 9, iostr); + raptor_iostream_counted_string_write(" <th>Subject</th>\n", 23, iostr); + raptor_iostream_counted_string_write(" <th>Predicate</th>\n", 25, iostr); + raptor_iostream_counted_string_write(" <th>Object</th>\n", 22, iostr); + raptor_iostream_counted_string_write(" </tr>\n", 10, iostr); + + return 0; +} + + +/* serialize a term */ +static int +raptor_term_html_write(const raptor_term *term, raptor_iostream* iostr) +{ + unsigned char *str; + size_t len; + + switch(term->type) { + case RAPTOR_TERM_TYPE_LITERAL: + raptor_iostream_counted_string_write("<span class=\"literal\">", 22, + iostr); + raptor_iostream_counted_string_write("<span class=\"value\"", 19, iostr); + if(term->value.literal.language) { + len = RAPTOR_LANG_LEN_TO_SIZE_T(term->value.literal.language_len); + raptor_iostream_counted_string_write(" xml:lang=\"", 11, iostr); + raptor_xml_escape_string_write(term->value.literal.language, len, '"', + iostr); + raptor_iostream_write_byte('"', iostr); + } + raptor_iostream_write_byte('>', iostr); + len = term->value.literal.string_len; + raptor_xml_escape_string_write(term->value.literal.string, len, 0, iostr); + raptor_iostream_counted_string_write("</span>", 7, iostr); + + if(term->value.literal.datatype) { + str = raptor_uri_as_counted_string(term->value.literal.datatype, &len); + raptor_iostream_counted_string_write("^^<<span class=\"datatype\">", 29, iostr); + raptor_xml_escape_string_write(str, len, 0, iostr); + raptor_iostream_counted_string_write("</span>>", 11, iostr); + } + break; + + case RAPTOR_TERM_TYPE_BLANK: + len = term->value.blank.string_len; + raptor_iostream_counted_string_write("<span class=\"blank\">", 20, iostr); + raptor_iostream_counted_string_write("_:", 2, iostr); + raptor_xml_escape_string_write(term->value.blank.string, len, 0, iostr); + break; + + case RAPTOR_TERM_TYPE_URI: + str = raptor_uri_as_counted_string(term->value.uri, &len); + raptor_iostream_counted_string_write("<span class=\"uri\">", 18, iostr); + raptor_iostream_counted_string_write("<a href=\"", 9, iostr); + raptor_xml_escape_string_write(str, len, '"', iostr); + raptor_iostream_counted_string_write("\">", 2, iostr); + raptor_xml_escape_string_write(str, len, 0, iostr); + raptor_iostream_counted_string_write("</a>", 4, iostr); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(term->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Triple has unsupported term type %u", + term->type); + return 1; + } + + raptor_iostream_counted_string_write("</span>", 7, iostr); + + return 0; +} + + +/* serialize a statement */ +static int +raptor_html_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_html_context * context = (raptor_html_context *)serializer->context; + raptor_iostream *iostr = serializer->iostream; + + raptor_iostream_counted_string_write(" <tr class=\"triple\">\n", 24, + iostr); + + /* Subject */ + raptor_iostream_counted_string_write(" <td>", 10, iostr); + raptor_term_html_write(statement->subject, iostr); + raptor_iostream_counted_string_write("</td>\n", 6, iostr); + + /* Predicate */ + raptor_iostream_counted_string_write(" <td>", 10, iostr); + raptor_term_html_write(statement->predicate, iostr); + raptor_iostream_counted_string_write("</td>\n", 6, iostr); + + /* Object */ + raptor_iostream_counted_string_write(" <td>", 10, iostr); + raptor_term_html_write(statement->object, iostr); + raptor_iostream_counted_string_write("</td>\n", 6, iostr); + + raptor_iostream_counted_string_write(" </tr>\n", 10, iostr); + + context->count++; + + return 0; +} + + +/* end a serialize */ +static int +raptor_html_serialize_end(raptor_serializer* serializer) +{ + raptor_html_context * context = (raptor_html_context *)serializer->context; + raptor_iostream *iostr = serializer->iostream; + + raptor_iostream_counted_string_write(" </table>\n", 11, iostr); + + raptor_iostream_counted_string_write( + " <p>Total number of triples: <span class=\"count\">", 50, iostr); + raptor_iostream_decimal_write(context->count, iostr); + raptor_iostream_counted_string_write("</span>.</p>\n", 13, iostr); + + raptor_iostream_counted_string_write("</body>\n", 8, iostr); + raptor_iostream_counted_string_write("</html>\n", 8, iostr); + + return 0; +} + + +/* finish the serializer factory */ +static void +raptor_html_serialize_finish_factory(raptor_serializer_factory* factory) +{ + /* NOP */ +} + + +static const char* const html_names[2] = { "html", NULL}; + +static const char* const html_uri_strings[2] = { + "http://www.w3.org/1999/xhtml", + NULL +}; + +#define HTML_TYPES_COUNT 2 +static const raptor_type_q html_types[HTML_TYPES_COUNT + 1] = { + { "application/xhtml+xml", 21, 10}, + { "text/html", 9, 10}, + { NULL, 0, 0} +}; + +static int +raptor_html_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = html_names; + factory->desc.mime_types = html_types; + + factory->desc.label = "HTML Table"; + factory->desc.uri_strings = html_uri_strings; + + factory->context_length = sizeof(raptor_html_context); + + factory->init = raptor_html_serialize_init; + factory->terminate = raptor_html_serialize_terminate; + factory->declare_namespace = NULL; + factory->declare_namespace_from_namespace = NULL; + factory->serialize_start = raptor_html_serialize_start; + factory->serialize_statement = raptor_html_serialize_statement; + factory->serialize_end = raptor_html_serialize_end; + factory->finish_factory = raptor_html_serialize_finish_factory; + + return 0; +} + + +int +raptor_init_serializer_html(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_html_serializer_register_factory); +} diff --git a/src/raptor_serialize_json.c b/src/raptor_serialize_json.c new file mode 100644 index 0000000..4d9355d --- /dev/null +++ b/src/raptor_serialize_json.c @@ -0,0 +1,483 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_json.c - JSON serializers + * + * Copyright (C) 2008-2009, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Raptor JSON serializer object + */ +typedef struct { + /* non-0 if json-r otherwise json-t */ + int is_resource; + + int need_subject_comma; + + /* JSON writer object */ + raptor_json_writer* json_writer; + + /* Ordered sequence of triples if is_resource */ + raptor_avltree* avltree; + + /* Last statement generated if is_resource (shared pointer) */ + raptor_statement* last_statement; + + int need_object_comma; + +} raptor_json_context; + + +static int raptor_json_serialize_init(raptor_serializer* serializer, + const char *name); +static void raptor_json_serialize_terminate(raptor_serializer* serializer); +static int raptor_json_serialize_start(raptor_serializer* serializer); +static int raptor_json_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement); +static int raptor_json_serialize_end(raptor_serializer* serializer); +static void raptor_json_serialize_finish_factory(raptor_serializer_factory* factory); + + +/* + * raptor serializer JSON implementation + */ + + +/* create a new serializer */ +static int +raptor_json_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_json_context* context = (raptor_json_context*)serializer->context; + + context->is_resource=!strcmp(name,"json"); + + /* Default for JSON serializer is absolute URIs */ + /* RAPTOR_OPTIONS_SET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS, 0); */ + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_json_serialize_terminate(raptor_serializer* serializer) +{ + raptor_json_context* context = (raptor_json_context*)serializer->context; + + if(context->json_writer) { + raptor_free_json_writer(context->json_writer); + context->json_writer = NULL; + } + + if(context->avltree) { + raptor_free_avltree(context->avltree); + context->avltree = NULL; + } +} + + +static int +raptor_json_serialize_start(raptor_serializer* serializer) +{ + raptor_json_context* context = (raptor_json_context*)serializer->context; + raptor_uri* base_uri; + char* value; + + base_uri = RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS) + ? serializer->base_uri : NULL; + + context->json_writer = raptor_new_json_writer(serializer->world, + base_uri, + serializer->iostream); + if(!context->json_writer) + return 1; + + if(context->is_resource) { + context->avltree = raptor_new_avltree((raptor_data_compare_handler)raptor_statement_compare, + (raptor_data_free_handler)raptor_free_statement, + 0); + if(!context->avltree) { + raptor_free_json_writer(context->json_writer); + context->json_writer = NULL; + return 1; + } + } + + /* start callback */ + value = RAPTOR_OPTIONS_GET_STRING(serializer, RAPTOR_OPTION_JSON_CALLBACK); + if(value) { + raptor_iostream_string_write(value, serializer->iostream); + raptor_iostream_write_byte('(', serializer->iostream); + } + + if(!context->is_resource) { + /* start outer object */ + raptor_json_writer_start_block(context->json_writer, '{'); + raptor_json_writer_newline(context->json_writer); + + /* start triples array */ + raptor_iostream_counted_string_write((const unsigned char*)"\"triples\" : ", 12, + serializer->iostream); + raptor_json_writer_start_block(context->json_writer, '['); + raptor_json_writer_newline(context->json_writer); + } + + return 0; +} + + +static int +raptor_json_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_json_context* context = (raptor_json_context*)serializer->context; + + if(context->is_resource) { + raptor_statement* s = raptor_statement_copy(statement); + if(!s) + return 1; + return raptor_avltree_add(context->avltree, s); + } + + if(context->need_subject_comma) { + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + } + + /* start triple */ + raptor_json_writer_start_block(context->json_writer, '{'); + raptor_json_writer_newline(context->json_writer); + + /* subject */ + raptor_iostream_string_write((const unsigned char*)"\"subject\" : ", + serializer->iostream); + raptor_json_writer_term(context->json_writer, statement->subject); + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + + /* predicate */ + raptor_iostream_string_write((const unsigned char*)"\"predicate\" : ", + serializer->iostream); + raptor_json_writer_term(context->json_writer, statement->predicate); + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + + /* object */ + raptor_iostream_string_write((const unsigned char*)"\"object\" : ", + serializer->iostream); + raptor_json_writer_term(context->json_writer, statement->object); + raptor_json_writer_newline(context->json_writer); + + /* end triple */ + raptor_json_writer_end_block(context->json_writer, '}'); + + context->need_subject_comma = 1; + return 0; +} + + +/* return 0 to abort visit */ +static int +raptor_json_serialize_avltree_visit(int depth, void* data, void *user_data) +{ + raptor_serializer* serializer = (raptor_serializer*)user_data; + raptor_json_context* context = (raptor_json_context*)serializer->context; + + raptor_statement* statement = (raptor_statement*)data; + raptor_statement* s1 = statement; + raptor_statement* s2 = context->last_statement; + int new_subject = 0; + int new_predicate = 0; + unsigned int flags = RAPTOR_ESCAPED_WRITE_JSON_LITERAL; + + if(s2) { + new_subject = !raptor_term_equals(s1->subject, s2->subject); + + if(new_subject) { + /* end last predicate */ + raptor_json_writer_newline(context->json_writer); + + raptor_json_writer_end_block(context->json_writer, ']'); + raptor_json_writer_newline(context->json_writer); + + /* end last statement */ + raptor_json_writer_end_block(context->json_writer, '}'); + raptor_json_writer_newline(context->json_writer); + + context->need_subject_comma = 1; + context->need_object_comma = 0; + } + } else + new_subject = 1; + + if(new_subject) { + if(context->need_subject_comma) { + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + } + + /* start triple */ + + /* subject */ + switch(s1->subject->type) { + case RAPTOR_TERM_TYPE_URI: + raptor_json_writer_key_uri_value(context->json_writer, + NULL, 0, + s1->subject->value.uri); + break; + + case RAPTOR_TERM_TYPE_BLANK: + raptor_iostream_counted_string_write("\"_:", 3, serializer->iostream); + raptor_string_escaped_write(s1->subject->value.blank.string, 0, + '"', flags, + serializer->iostream); + raptor_iostream_write_byte('"', serializer->iostream); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, + "Triple has unsupported subject term type %u", + s1->subject->type); + break; + } + + raptor_iostream_counted_string_write(" : ", 3, serializer->iostream); + raptor_json_writer_start_block(context->json_writer, '{'); + + raptor_json_writer_newline(context->json_writer); + } + + + /* predicate */ + if(context->last_statement) { + if(new_subject) + new_predicate = 1; + else { + new_predicate = !raptor_uri_equals(s1->predicate->value.uri, + s2->predicate->value.uri); + if(new_predicate) { + raptor_json_writer_newline(context->json_writer); + raptor_json_writer_end_block(context->json_writer, ']'); + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + } + } + } else + new_predicate = 1; + + if(new_predicate) { + /* start predicate */ + + raptor_json_writer_key_uri_value(context->json_writer, + NULL, 0, + s1->predicate->value.uri); + raptor_iostream_counted_string_write(" : ", 3, serializer->iostream); + raptor_json_writer_start_block(context->json_writer, '['); + raptor_iostream_write_byte(' ', serializer->iostream); + + context->need_object_comma = 0; + } + + if(context->need_object_comma) { + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + } + + /* object */ + raptor_json_writer_term(context->json_writer, s1->object); + if(s1->object->type != RAPTOR_TERM_TYPE_LITERAL) + raptor_json_writer_newline(context->json_writer); + + /* end triple */ + + context->need_object_comma = 1; + context->last_statement = statement; + + return 1; +} + + +static int +raptor_json_serialize_end(raptor_serializer* serializer) +{ + raptor_json_context* context = (raptor_json_context*)serializer->context; + char* value; + + raptor_json_writer_newline(context->json_writer); + + if(context->is_resource) { + /* start outer object */ + raptor_json_writer_start_block(context->json_writer, '{'); + raptor_json_writer_newline(context->json_writer); + + raptor_avltree_visit(context->avltree, + raptor_json_serialize_avltree_visit, + serializer); + + /* end last triples block */ + if(context->last_statement) { + raptor_json_writer_newline(context->json_writer); + raptor_json_writer_end_block(context->json_writer, ']'); + raptor_json_writer_newline(context->json_writer); + + raptor_json_writer_end_block(context->json_writer, '}'); + raptor_json_writer_newline(context->json_writer); + } + } else { + /* end triples array */ + raptor_json_writer_end_block(context->json_writer, ']'); + raptor_json_writer_newline(context->json_writer); + } + + + value = RAPTOR_OPTIONS_GET_STRING(serializer, RAPTOR_OPTION_JSON_EXTRA_DATA); + if(value) { + raptor_iostream_write_byte(',', serializer->iostream); + raptor_json_writer_newline(context->json_writer); + raptor_iostream_string_write(value, serializer->iostream); + raptor_json_writer_newline(context->json_writer); + } + + + /* end outer object */ + raptor_json_writer_end_block(context->json_writer, '}'); + raptor_json_writer_newline(context->json_writer); + + /* end callback */ + if(RAPTOR_OPTIONS_GET_STRING(serializer, RAPTOR_OPTION_JSON_CALLBACK)) + raptor_iostream_counted_string_write((const unsigned char*)");", 2, + serializer->iostream); + + return 0; +} + + +static void +raptor_json_serialize_finish_factory(raptor_serializer_factory* factory) +{ + /* NOP */ +} + + + +static const char* const json_triples_names[3] = { "json-triples", NULL}; + +#define JSON_TRIPLES_TYPES_COUNT 2 +static const raptor_type_q json_triples_types[JSON_TRIPLES_TYPES_COUNT + 1] = { + { "application/json", 16, 0}, + { "text/json", 9, 1}, + { NULL, 0, 0} +}; + +static int +raptor_json_triples_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = json_triples_names; + factory->desc.mime_types = json_triples_types; + + factory->desc.label = "RDF/JSON Triples"; + factory->desc.uri_strings = NULL; + + factory->context_length = sizeof(raptor_json_context); + + factory->init = raptor_json_serialize_init; + factory->terminate = raptor_json_serialize_terminate; + factory->declare_namespace = NULL; + factory->declare_namespace_from_namespace = NULL; + factory->serialize_start = raptor_json_serialize_start; + factory->serialize_statement = raptor_json_serialize_statement; + factory->serialize_end = raptor_json_serialize_end; + factory->finish_factory = raptor_json_serialize_finish_factory; + + return 0; +} + + +static const char* const json_resource_names[2] = { "json", NULL}; + +static const char* const json_resource_uri_strings[2] = { + "http://docs.api.talis.com/platform-api/output-types/rdf-json", + NULL +}; + +#define JSON_RESOURCE_TYPES_COUNT 2 +static const raptor_type_q json_resource_types[JSON_RESOURCE_TYPES_COUNT + 1] = { + { "application/json", 16, 10}, + { "text/json", 9, 1}, + { NULL, 0, 0} +}; + +static int +raptor_json_resource_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = json_resource_names; + factory->desc.mime_types = json_resource_types; + + factory->desc.label = "RDF/JSON Resource-Centric"; + factory->desc.uri_strings = json_resource_uri_strings; + + factory->context_length = sizeof(raptor_json_context); + + factory->init = raptor_json_serialize_init; + factory->terminate = raptor_json_serialize_terminate; + factory->declare_namespace = NULL; + factory->declare_namespace_from_namespace = NULL; + factory->serialize_start = raptor_json_serialize_start; + factory->serialize_statement = raptor_json_serialize_statement; + factory->serialize_end = raptor_json_serialize_end; + factory->finish_factory = raptor_json_serialize_finish_factory; + + return 0; +} + + +int +raptor_init_serializer_json(raptor_world* world) +{ + int rc; + + rc = !raptor_serializer_register_factory(world, + &raptor_json_triples_serializer_register_factory); + if(rc) + return rc; + + rc = !raptor_serializer_register_factory(world, + &raptor_json_resource_serializer_register_factory); + + return rc; +} diff --git a/src/raptor_serialize_ntriples.c b/src/raptor_serialize_ntriples.c new file mode 100644 index 0000000..6ccdee8 --- /dev/null +++ b/src/raptor_serialize_ntriples.c @@ -0,0 +1,342 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_ntriples.c - N-Triples and Nquads serializer + * + * Copyright (C) 2004-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Raptor N-Triples serializer object + */ +typedef struct { + int is_nquads; +} raptor_ntriples_serializer_context; + + + +/* create a new serializer */ +static int +raptor_ntriples_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_ntriples_serializer_context* ntriples_serializer; + + ntriples_serializer = (raptor_ntriples_serializer_context*)serializer->context; + ntriples_serializer->is_nquads = !strcmp(name, "nquads"); + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_ntriples_serialize_terminate(raptor_serializer* serializer) +{ + +} + + +/* add a namespace */ +static int +raptor_ntriples_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix) +{ + /* NOP */ + return 0; +} + + +#if 0 +/* start a serialize */ +static int +raptor_ntriples_serialize_start(raptor_serializer* serializer) +{ + return 0; +} +#endif + + + +/** + * raptor_string_ntriples_write: + * @string: UTF-8 string to write + * @len: length of UTF-8 string + * @delim: Terminating delimiter character for string (such as " or >) + * or \0 for no escaping. + * @iostr: #raptor_iostream to write to + * + * Write an UTF-8 string using N-Triples escapes to an iostream. + * + * Return value: non-0 on failure such as bad UTF-8 encoding. + **/ +int +raptor_string_ntriples_write(const unsigned char *string, + size_t len, + const char delim, + raptor_iostream *iostr) +{ + return raptor_string_escaped_write(string, len, delim, + RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL, + iostr); +} + + +/** + * raptor_bnodeid_ntriples_write: + * @bnodeid: bnode ID to write + * @len: length of bnode ID + * @iostr: #raptor_iostream to write to + * + * Write a blank node ID in a form legal for N-Triples with _: prefix + * + * Return value: non-0 on failure + **/ +int +raptor_bnodeid_ntriples_write(const unsigned char *bnodeid, + size_t len, + raptor_iostream *iostr) +{ + unsigned int i; + + raptor_iostream_counted_string_write("_:", 2, iostr); + + for(i = 0; i < len; i++) { + unsigned char c = *bnodeid++; + if(!isalpha(c) && !isdigit(c)) { + /* Replace characters not in legal N-Triples bnode set */ + c = 'z'; + } + raptor_iostream_write_byte(c, iostr); + } + + return 0; +} + + +/** + * raptor_term_ntriples_write: + * @term: term to write + * @iostr: raptor iostream + * + * Write a #raptor_term formatted in N-Triples format to a #raptor_iostream + * + * @Deprecated: Use raptor_term_escaped_write() that allows + * configuring format detail flags. + * + * Return value: non-0 on failure + **/ +int +raptor_term_ntriples_write(const raptor_term *term, raptor_iostream* iostr) +{ + return raptor_term_escaped_write(term, + RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL, + iostr); +} + + +/** + * raptor_statement_ntriples_write: + * @statement: statement to write + * @iostr: raptor iostream + * @write_graph_term: flag to write graph term if present + * + * Write a #raptor_statement formatted in N-Triples or N-Quads format + * to a #raptor_iostream + * + * Return value: non-0 on failure + **/ +int +raptor_statement_ntriples_write(const raptor_statement *statement, + raptor_iostream* iostr, + int write_graph_term) +{ + unsigned int flags = RAPTOR_ESCAPED_WRITE_NTRIPLES_LITERAL; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(statement, raptor_statement, 1); + + if(raptor_term_escaped_write(statement->subject, flags, iostr)) + return 1; + + raptor_iostream_write_byte(' ', iostr); + if(raptor_term_escaped_write(statement->predicate, flags, iostr)) + return 1; + + raptor_iostream_write_byte(' ', iostr); + if(raptor_term_escaped_write(statement->object, flags, iostr)) + return 1; + + if(statement->graph && write_graph_term) { + raptor_iostream_write_byte(' ', iostr); + if(raptor_term_escaped_write(statement->graph, flags, iostr)) + return 1; + } + + raptor_iostream_counted_string_write(" .\n", 3, iostr); + + return 0; +} + + +/* serialize a statement */ +static int +raptor_ntriples_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_ntriples_serializer_context* ntriples_serializer; + + ntriples_serializer = (raptor_ntriples_serializer_context*)serializer->context; + + raptor_statement_ntriples_write(statement, + serializer->iostream, + ntriples_serializer->is_nquads); + return 0; +} + + +#if 0 +/* end a serialize */ +static int +raptor_ntriples_serialize_end(raptor_serializer* serializer) +{ + return 0; +} +#endif + +/* finish the serializer factory */ +static void +raptor_ntriples_serialize_finish_factory(raptor_serializer_factory* factory) +{ + +} + + +#ifdef RAPTOR_SERIALIZER_NTRIPLES +static const char* const ntriples_names[2] = { "ntriples", NULL}; + +static const char* const ntriples_uri_strings[3] = { + "http://www.w3.org/ns/formats/N-Triples", + "http://www.w3.org/TR/rdf-testcases/#ntriples", + NULL +}; + +#define NTRIPLES_TYPES_COUNT 2 +static const raptor_type_q ntriples_types[NTRIPLES_TYPES_COUNT + 1] = { + { "application/n-triples", 21, 10}, + { "text/plain", 10, 1}, + { NULL, 0, 0} +}; + +static int +raptor_ntriples_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = ntriples_names; + factory->desc.mime_types = ntriples_types; + + factory->desc.label = "N-Triples"; + factory->desc.uri_strings = ntriples_uri_strings; + + factory->context_length = sizeof(raptor_ntriples_serializer_context); + + factory->init = raptor_ntriples_serialize_init; + factory->terminate = raptor_ntriples_serialize_terminate; + factory->declare_namespace = raptor_ntriples_serialize_declare_namespace; + factory->serialize_start = NULL; + factory->serialize_statement = raptor_ntriples_serialize_statement; + factory->serialize_end = NULL; + factory->finish_factory = raptor_ntriples_serialize_finish_factory; + + return 0; +} +#endif + + +#ifdef RAPTOR_SERIALIZER_NQUADS +static const char* const nquads_names[2] = { "nquads", NULL}; + +static const char* const nquads_uri_strings[2] = { + "http://sw.deri.org/2008/07/n-quads/#n-quads", + NULL +}; + +#define NQUADS_TYPES_COUNT 1 +static const raptor_type_q nquads_types[NQUADS_TYPES_COUNT + 1] = { + { "text/x-nquads", 13, 10}, + { NULL, 0, 0} +}; + +static int +raptor_nquads_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = nquads_names; + factory->desc.mime_types = nquads_types; + + factory->desc.label = "N-Quads"; + factory->desc.uri_strings = nquads_uri_strings; + + factory->context_length = sizeof(raptor_ntriples_serializer_context); + + factory->init = raptor_ntriples_serialize_init; + factory->terminate = raptor_ntriples_serialize_terminate; + factory->declare_namespace = raptor_ntriples_serialize_declare_namespace; + factory->serialize_start = NULL; + factory->serialize_statement = raptor_ntriples_serialize_statement; + factory->serialize_end = NULL; + factory->finish_factory = raptor_ntriples_serialize_finish_factory; + + return 0; +} +#endif + +#ifdef RAPTOR_SERIALIZER_NTRIPLES +int +raptor_init_serializer_ntriples(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_ntriples_serializer_register_factory); +} +#endif + +#ifdef RAPTOR_SERIALIZER_NQUADS +int +raptor_init_serializer_nquads(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_nquads_serializer_register_factory); +} +#endif diff --git a/src/raptor_serialize_rdfxml.c b/src/raptor_serialize_rdfxml.c new file mode 100644 index 0000000..3e44a2e --- /dev/null +++ b/src/raptor_serialize_rdfxml.c @@ -0,0 +1,729 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_rdfxml.c - RDF/XML serializer + * + * Copyright (C) 2004-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Raptor RDF/XML serializer object + */ +typedef struct { + /* Namespace stack */ + raptor_namespace_stack *nstack; + + /* the xml: namespace - this is destroyed when nstack above is deleted */ + raptor_namespace *xml_nspace; + + /* the rdf: namespace - this is destroyed when nstack above is deleted */ + raptor_namespace *rdf_nspace; + + /* the rdf:RDF element */ + raptor_xml_element* rdf_RDF_element; + + /* where the xml is being written */ + raptor_xml_writer *xml_writer; + + /* User declared namespaces */ + raptor_sequence *namespaces; + + /* non zero if rdf:RDF has been written (and thus no new namespaces + * can be declared). + */ + int written_header; +} raptor_rdfxml_serializer_context; + + +/* local prototypes */ + +static void +raptor_rdfxml_serialize_terminate(raptor_serializer* serializer); + +/* create a new serializer */ +static int +raptor_rdfxml_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + + context->nstack = raptor_new_namespaces(serializer->world, 1); + if(!context->nstack) + return 1; + context->xml_nspace = raptor_new_namespace(context->nstack, + (const unsigned char*)"xml", + (const unsigned char*)raptor_xml_namespace_uri, + 0); + + context->rdf_nspace = raptor_new_namespace(context->nstack, + (const unsigned char*)"rdf", + (const unsigned char*)raptor_rdf_namespace_uri, + 0); + + context->namespaces = raptor_new_sequence(NULL, NULL); + + if(!context->xml_nspace || !context->rdf_nspace || !context->namespaces) { + raptor_rdfxml_serialize_terminate(serializer); + return 1; + } + + /* Note: item 0 in the list is rdf:RDF's namespace */ + if(raptor_sequence_push(context->namespaces, context->rdf_nspace)) { + raptor_rdfxml_serialize_terminate(serializer); + return 1; + } + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_rdfxml_serialize_terminate(raptor_serializer* serializer) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + + if(context->xml_writer) { + raptor_free_xml_writer(context->xml_writer); + context->xml_writer = NULL; + } + + if(context->rdf_RDF_element) { + raptor_free_xml_element(context->rdf_RDF_element); + context->rdf_RDF_element = NULL; + } + + if(context->rdf_nspace) { + raptor_free_namespace(context->rdf_nspace); + context->rdf_nspace = NULL; + } + + if(context->xml_nspace) { + raptor_free_namespace(context->xml_nspace); + context->xml_nspace = NULL; + } + + if(context->namespaces) { + int i; + + /* Note: item 0 in the list is rdf:RDF's namespace and freed above */ + for(i = 1; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + if(ns) + raptor_free_namespace(ns); + } + raptor_free_sequence(context->namespaces); + context->namespaces = NULL; + } + + if(context->nstack) { + raptor_free_namespaces(context->nstack); + context->nstack = NULL; + } +} + + +#define RDFXML_NAMESPACE_DEPTH 0 + +/* add a namespace */ +static int +raptor_rdfxml_serialize_declare_namespace_from_namespace(raptor_serializer* serializer, + raptor_namespace *nspace) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + int i; + + if(context->written_header) + return 1; + + for(i = 0; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + + /* If prefix is already declared, ignore it */ + if(!ns->prefix && !nspace->prefix) + return 1; + + if(ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + return 1; + + if(ns->uri && nspace->uri && + raptor_uri_equals(ns->uri, nspace->uri)) + return 1; + } + + nspace = raptor_new_namespace_from_uri(context->nstack, + nspace->prefix, nspace->uri, + RDFXML_NAMESPACE_DEPTH); + if(!nspace) + return 1; + + raptor_sequence_push(context->namespaces, nspace); + return 0; +} + + +/* add a namespace */ +static int +raptor_rdfxml_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + raptor_namespace *ns; + int rc; + + ns = raptor_new_namespace_from_uri(context->nstack, prefix, uri, + RDFXML_NAMESPACE_DEPTH); + + rc = raptor_rdfxml_serialize_declare_namespace_from_namespace(serializer, + ns); + raptor_free_namespace(ns); + + return rc; +} + + +/* start a serialize */ +static int +raptor_rdfxml_serialize_start(raptor_serializer* serializer) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + raptor_xml_writer* xml_writer; + raptor_option option; + + if(context->xml_writer) { + raptor_free_xml_writer(context->xml_writer); + context->xml_writer = NULL; + } + + xml_writer = raptor_new_xml_writer(serializer->world, context->nstack, + serializer->iostream); + if(!xml_writer) + return 1; + + option = RAPTOR_OPTION_WRITER_XML_VERSION; + raptor_xml_writer_set_option(xml_writer, option, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(serializer, option)); + option = RAPTOR_OPTION_WRITER_XML_DECLARATION; + raptor_xml_writer_set_option(xml_writer, option, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(serializer, option)); + + context->xml_writer = xml_writer; + context->written_header = 0; + + return 0; +} + + +static int +raptor_rdfxml_ensure_writen_header(raptor_serializer* serializer, + raptor_rdfxml_serializer_context* context) +{ + raptor_xml_writer* xml_writer; + raptor_uri *base_uri; + int i; + raptor_qname **attrs = NULL; + int attrs_count = 0; + int rc = 1; + + if(context->written_header) + return 0; + + context->written_header = 1; + + xml_writer = context->xml_writer; + + base_uri = serializer->base_uri; + if(base_uri) + base_uri = raptor_uri_copy(base_uri); + + context->rdf_RDF_element = raptor_new_xml_element_from_namespace_local_name(context->rdf_nspace, (const unsigned char*)"RDF", NULL, base_uri); + if(!context->rdf_RDF_element) + goto tidy; + + /* NOTE: Starts it item 1 as item 0 is the element's namespace (rdf) + * and does not need to be declared + */ + for(i = 1; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + if(raptor_xml_element_declare_namespace(context->rdf_RDF_element, ns)) + goto tidy; + } + + if(base_uri && + RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_WRITE_BASE_URI)) { + const unsigned char* base_uri_string; + + attrs = RAPTOR_CALLOC(raptor_qname **, 1, sizeof(raptor_qname*)); + if(!attrs) + goto tidy; + + base_uri_string = raptor_uri_as_string(base_uri); + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->xml_nspace, (const unsigned char*)"base", base_uri_string); + if(!attrs[attrs_count]) { + RAPTOR_FREE(qnamearray, attrs); + goto tidy; + } + attrs_count++; + } + + if(attrs_count) + raptor_xml_element_set_attributes(context->rdf_RDF_element, attrs, + attrs_count); + else + raptor_xml_element_set_attributes(context->rdf_RDF_element, NULL, 0); + + + raptor_xml_writer_start_element(xml_writer, context->rdf_RDF_element); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + + rc = 0; + + tidy: + if(base_uri) + raptor_free_uri(base_uri); + + return rc; +} + + +/* serialize a statement */ +static int +raptor_rdfxml_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + raptor_xml_writer* xml_writer = context->xml_writer; + unsigned char* uri_string = NULL; /* predicate URI */ + unsigned char* name = NULL; /* where to split predicate name */ + unsigned char* subject_uri_string = NULL; + unsigned char* object_uri_string = NULL; + const unsigned char* nsprefix = (const unsigned char*)"ns0"; + int rc = 1; + size_t len; + raptor_xml_element* rdf_Description_element = NULL; + raptor_uri* predicate_ns_uri = NULL; + raptor_namespace* predicate_ns = NULL; + int free_predicate_ns = 0; + raptor_xml_element* predicate_element = NULL; + raptor_qname **attrs = NULL; + int attrs_count = 0; + raptor_uri* base_uri = NULL; + raptor_term_type object_type; + int allocated = 1; + int object_is_parseTypeLiteral = 0; + + if(raptor_rdfxml_ensure_writen_header(serializer, context)) + return 1; + + if(statement->predicate->type == RAPTOR_TERM_TYPE_URI) { + unsigned char *p; + size_t uri_len; + size_t name_len = 1; + unsigned char c; + + /* Do not use raptor_uri_as_counted_string() - we want a modifiable copy */ + uri_string = raptor_uri_to_counted_string(statement->predicate->value.uri, + &uri_len); + if(!uri_string) + goto oom; + + p= uri_string; + name_len = uri_len; + /* FIXME: this loop could be made smarter */ + while(name_len >0) { + if(raptor_xml_name_check(p, name_len, 10)) { + name = p; + break; + } + p++; name_len--; + } + + if(!name || (name == uri_string)) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot split predicate URI %s into an XML qname - skipping statement", uri_string); + rc = 0; /* skip but do not return an error */ + goto tidy; + } + + c = *name; *name = '\0'; + predicate_ns_uri = raptor_new_uri(serializer->world, uri_string); + *name = c; + if(!predicate_ns_uri) + goto oom; + + predicate_ns = raptor_namespaces_find_namespace_by_uri(context->nstack, + predicate_ns_uri); + if(!predicate_ns) { + predicate_ns = raptor_new_namespace_from_uri(context->nstack, + nsprefix, + predicate_ns_uri, 0); + if(!predicate_ns) { + raptor_free_uri(predicate_ns_uri); + goto oom; + } + free_predicate_ns = 1; + } + raptor_free_uri(predicate_ns_uri); + } else { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with subject node type %u\n", + statement->predicate->type); + goto tidy; + } + + /* base uri */ + if(serializer->base_uri) + base_uri = raptor_uri_copy(serializer->base_uri); + + + rdf_Description_element = raptor_new_xml_element_from_namespace_local_name(context->rdf_nspace, + (unsigned const char*)"Description", + NULL, base_uri); + if(!rdf_Description_element) + goto oom; + + attrs = RAPTOR_CALLOC(raptor_qname**, 3, sizeof(raptor_qname*)); + if(!attrs) + goto oom; + attrs_count = 0; + + /* subject */ + switch(statement->subject->type) { + case RAPTOR_TERM_TYPE_BLANK: + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"nodeID", + statement->subject->value.blank.string); + if(!attrs[attrs_count]) + goto oom; + attrs_count++; + break; + + case RAPTOR_TERM_TYPE_URI: + allocated = 1; + if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) { + subject_uri_string = raptor_uri_to_relative_uri_string(serializer->base_uri, + statement->subject->value.uri); + if(!subject_uri_string) + goto oom; + } else { + subject_uri_string = raptor_uri_as_string(statement->subject->value.uri); + allocated = 0; + } + + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"about", subject_uri_string); + if(!attrs[attrs_count]) { + if(allocated) + RAPTOR_FREE(char*, subject_uri_string); + goto oom; + } + attrs_count++; + + if(allocated) + RAPTOR_FREE(char*, subject_uri_string); + + break; + + case RAPTOR_TERM_TYPE_LITERAL: + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, "Cannot serialize a triple with a literal subject\n"); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with subject node type %u", + statement->subject->type); + } + + if(attrs_count) { + raptor_xml_element_set_attributes(rdf_Description_element, attrs, attrs_count); + attrs = NULL; /* attrs ownership transferred to element */ + } + + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)" ", 2); + raptor_xml_writer_start_element(xml_writer, rdf_Description_element); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1); + + + /* predicate */ + predicate_element = raptor_new_xml_element_from_namespace_local_name(predicate_ns, name, NULL, base_uri); + if(!predicate_element) + goto oom; + + /* object */ + attrs = RAPTOR_CALLOC(raptor_qname**, 3, sizeof(raptor_qname*)); + if(!attrs) + goto oom; + attrs_count = 0; + + object_type = statement->object->type; + switch(object_type) { + case RAPTOR_TERM_TYPE_LITERAL: + object_is_parseTypeLiteral = 0; + if(statement->object->value.literal.datatype && + raptor_uri_equals(statement->object->value.literal.datatype, + RAPTOR_RDF_XMLLiteral_URI(serializer->world))) + object_is_parseTypeLiteral = 1; + + if(statement->object->value.literal.language) { + attrs[attrs_count] = raptor_new_qname(context->nstack, + (unsigned char*)"xml:lang", + statement->object->value.literal.language); + if(!attrs[attrs_count]) + goto oom; + attrs_count++; + } + len = statement->object->value.literal.string_len; + + if(object_is_parseTypeLiteral) { + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"parseType", (const unsigned char*)"Literal"); + if(!attrs[attrs_count]) + goto oom; + attrs_count++; + + raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count); + attrs = NULL; /* attrs ownership transferred to element */ + + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)" ", 4); + raptor_xml_writer_start_element(xml_writer, predicate_element); + + /* Print without escaping XML */ + if(len) + raptor_xml_writer_raw_counted(xml_writer, + (const unsigned char*)statement->object->value.literal.string, + RAPTOR_BAD_CAST(unsigned int, len)); + } else { + if(statement->object->value.literal.datatype) { + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"datatype", raptor_uri_as_string(statement->object->value.literal.datatype)); + if(!attrs[attrs_count]) + goto oom; + attrs_count++; + } + raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count); + attrs = NULL; /* attrs ownership transferred to element */ + + raptor_xml_writer_cdata_counted(xml_writer, + (const unsigned char*)" ", 4); + raptor_xml_writer_start_element(xml_writer, predicate_element); + + if(len) + raptor_xml_writer_cdata_counted(xml_writer, + statement->object->value.literal.string, + RAPTOR_BAD_CAST(unsigned int, len)); + } + + raptor_xml_writer_end_element(xml_writer, predicate_element); + raptor_free_xml_element(predicate_element); + predicate_element = NULL; + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1); + + break; + + case RAPTOR_TERM_TYPE_BLANK: + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"nodeID", statement->object->value.blank.string); + if(!attrs[attrs_count]) + goto oom; + attrs_count++; + + raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count); + attrs = NULL; /* attrs ownership transferred to element */ + + raptor_xml_writer_cdata_counted(xml_writer, + (const unsigned char*)" ", 4); + raptor_xml_writer_empty_element(xml_writer, predicate_element); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1); + break; + + case RAPTOR_TERM_TYPE_URI: + /* must be URI */ + if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) { + object_uri_string = raptor_uri_to_relative_uri_string(serializer->base_uri, + statement->object->value.uri); + } else { + object_uri_string = raptor_uri_to_string(statement->object->value.uri); + } + if(!object_uri_string) + goto oom; + + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, context->rdf_nspace, (const unsigned char*)"resource", object_uri_string); + RAPTOR_FREE(char*, object_uri_string); + + if(!attrs[attrs_count]) + goto oom; + + attrs_count++; + raptor_xml_element_set_attributes(predicate_element, attrs, attrs_count); + attrs = NULL; /* attrs ownership transferred to element */ + + raptor_xml_writer_cdata_counted(xml_writer, + (const unsigned char*)" ", 4); + raptor_xml_writer_empty_element(xml_writer, predicate_element); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with object node type %u", + object_type); + } + + raptor_xml_writer_cdata_counted(xml_writer, + (const unsigned char*)" ", 2); + + rc = 0; /* success */ + goto tidy; + + oom: + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_FATAL, NULL, + "Out of memory"); + + tidy: + + if(attrs) + RAPTOR_FREE(qnamearray, attrs); + + if(predicate_element) + raptor_free_xml_element(predicate_element); + + if(rdf_Description_element) { + raptor_xml_writer_end_element(xml_writer, rdf_Description_element); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"\n", 1); + raptor_free_xml_element(rdf_Description_element); + } + + if(base_uri) + raptor_free_uri(base_uri); + + if(free_predicate_ns) + raptor_free_namespace(predicate_ns); + + if(uri_string) + RAPTOR_FREE(char*, uri_string); + + return rc; +} + + +/* end a serialize */ +static int +raptor_rdfxml_serialize_end(raptor_serializer* serializer) +{ + raptor_rdfxml_serializer_context* context = (raptor_rdfxml_serializer_context*)serializer->context; + raptor_xml_writer* xml_writer = context->xml_writer; + + if(xml_writer) { + /* Make sure an empty RDF/XML document is written when 0 triples + * were seen + */ + + /* ignore ret value */ + raptor_rdfxml_ensure_writen_header(serializer, context); + + if(context->rdf_RDF_element) { + raptor_xml_writer_end_element(xml_writer, context->rdf_RDF_element); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + } + + raptor_xml_writer_flush(xml_writer); + } + + if(context->rdf_RDF_element) { + raptor_free_xml_element(context->rdf_RDF_element); + context->rdf_RDF_element = NULL; + } + + return 0; +} + + +/* finish the serializer factory */ +static void +raptor_rdfxml_serialize_finish_factory(raptor_serializer_factory* factory) +{ + +} + +static const char* const rdfxml_names[2] = { "rdfxml", NULL}; + +static const char* const rdfxml_uri_strings[3] = { + "http://www.w3.org/ns/formats/RDF_XML", + "http://www.w3.org/TR/rdf-syntax-grammar", + NULL +}; + +#define RDFXML_TYPES_COUNT 2 +static const raptor_type_q rdfxml_types[RDFXML_TYPES_COUNT + 1] = { + { "application/rdf+xml", 19, 10}, + { "text/rdf", 8, 6}, + { NULL, 0, 0} +}; + +static int +raptor_rdfxml_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = rdfxml_names; + factory->desc.mime_types = rdfxml_types; + + factory->desc.label = "RDF/XML"; + factory->desc.uri_strings = rdfxml_uri_strings, + + factory->context_length = sizeof(raptor_rdfxml_serializer_context); + + factory->init = raptor_rdfxml_serialize_init; + factory->terminate = raptor_rdfxml_serialize_terminate; + factory->declare_namespace = raptor_rdfxml_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_rdfxml_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_rdfxml_serialize_start; + factory->serialize_statement = raptor_rdfxml_serialize_statement; + factory->serialize_end = raptor_rdfxml_serialize_end; + factory->finish_factory = raptor_rdfxml_serialize_finish_factory; + + return 0; +} + + + +int +raptor_init_serializer_rdfxml(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_rdfxml_serializer_register_factory); +} + + diff --git a/src/raptor_serialize_rdfxmla.c b/src/raptor_serialize_rdfxmla.c new file mode 100644 index 0000000..2341947 --- /dev/null +++ b/src/raptor_serialize_rdfxmla.c @@ -0,0 +1,1553 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_rdfxmla.c - RDF/XML with abbreviations serializer + * + * Copyright (C) 2004-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * Copyright (C) 2005, Steve Shepard steveshep@gmail.com + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* + * Raptor rdfxml-abbrev serializer object + */ +typedef struct { + raptor_namespace_stack *nstack; /* Namespace stack */ + raptor_namespace *xml_nspace; /* the xml: namespace */ + raptor_namespace *rdf_nspace; /* the rdf: namespace */ + raptor_xml_element* rdf_RDF_element; /* the rdf:RDF element */ + raptor_xml_writer *xml_writer; /* where the xml is being written */ + raptor_sequence *namespaces; /* User declared namespaces */ + raptor_avltree *subjects; /* subject items */ + raptor_avltree *blanks; /* blank subject items */ + raptor_avltree *nodes; /* nodes */ + raptor_abbrev_node *rdf_type; /* rdf:type uri */ + + /* non-zero if is Adobe XMP abbreviated form */ + int is_xmp; + + /* non zero if rdf:RDF has been written (and thus no new namespaces + * can be declared). + */ + int written_header; + + /* for labeling namespaces */ + int namespace_count; + + /* xml_writer was passed in and not owned by us */ + int external_xml_writer; + + /* true if should write rdf:RDF */ + int write_rdf_RDF; + + /* starting namespace stack depth */ + int starting_depth; + + /* namespaces stack was passed in andn not owned by us */ + int external_nstack; + + /* If not NULL, the URI of the single node to serialize - starting + * from property elements */ + raptor_uri* single_node; + + /* If non-0, emit typed nodes */ + int write_typed_nodes; +} raptor_rdfxmla_context; + + +/* prototypes for functions */ + +static int raptor_rdfxmla_emit_resource(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_abbrev_node *node, + int depth); + +static int raptor_rdfxmla_emit_literal(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_abbrev_node *node, + int depth); +static int raptor_rdfxmla_emit_blank(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_abbrev_node* node, + int depth); +static int raptor_rdfxmla_emit_subject_list_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_rdfxmla_emit_subject_properties(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_rdfxmla_emit_subject(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_rdfxmla_emit(raptor_serializer *serializer); + +static int raptor_rdfxmla_serialize_init(raptor_serializer* serializer, + const char *name); +static void raptor_rdfxmla_serialize_terminate(raptor_serializer* serializer); +static int raptor_rdfxmla_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix); +static int raptor_rdfxmla_serialize_start(raptor_serializer* serializer); +static int raptor_rdfxmla_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement); + +static int raptor_rdfxmla_serialize_end(raptor_serializer* serializer); +static void raptor_rdfxmla_serialize_finish_factory(raptor_serializer_factory* factory); + + +/* helper functions */ + + +/* + * raptor_rdfxmla_emit_resource_uri: + * @serializer: #raptor_serializer object + * @element: XML Element + * @uri: URI object + * @depth: depth into tree + * + * Emit a description of a resource using an XML Element + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_resource_uri(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_uri* uri, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_xml_writer *xml_writer = context->xml_writer; + raptor_qname **attrs; + unsigned char *attr_name; + unsigned char *attr_value; + + RAPTOR_DEBUG2("Emitting resource predicate URI %s\n", + raptor_uri_as_string(uri)); + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + if(!attrs) + return 1; + + attr_name = (unsigned char *)"resource"; + + if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) + /* newly allocated string */ + attr_value = raptor_uri_to_relative_uri_string(serializer->base_uri, uri); + else + attr_value = raptor_uri_as_string(uri); + + attrs[0] = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + attr_name, + attr_value); + + if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_RELATIVE_URIS)) + RAPTOR_FREE(char*, attr_value); + + if(!attrs[0]) { + RAPTOR_FREE(qnamearray, attrs); + return 1; + } + + raptor_xml_element_set_attributes(element, attrs, 1); + + raptor_xml_writer_start_element(xml_writer, element); + raptor_xml_writer_end_element(context->xml_writer, element); + + RAPTOR_DEBUG2("Emitted resource predicate URI %s\n", + raptor_uri_as_string(uri)); + + return 0; +} + + +/* + * raptor_rdfxmla_emit_resource: + * @serializer: #raptor_serializer object + * @element: XML Element + * @node: resource node + * @depth: depth into tree + * + * Emit a description of a resource using an XML Element + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_resource(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_abbrev_node* node, + int depth) +{ + int rc; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting resource node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_URI) + return 1; + + rc = raptor_rdfxmla_emit_resource_uri(serializer, element, + node->term->value.uri, depth); + + RAPTOR_DEBUG_ABBREV_NODE("Emitted resource node", node); + + return rc; +} + + +/* + * raptor_rdfxmla_emit_literal: + * @serializer: #raptor_serializer object + * @element: XML Element + * @node: literal node + * @depth: depth into tree + * + * Emit a description of a literal using an XML Element + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_literal(raptor_serializer *serializer, + raptor_xml_element *element, + raptor_abbrev_node* node, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_xml_writer *xml_writer = context->xml_writer; + raptor_qname **attrs; + int attrs_count; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting literal node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_LITERAL) + return 1; + + if(node->term->value.literal.language || node->term->value.literal.datatype) { + + attrs_count = 0; + attrs = RAPTOR_CALLOC(raptor_qname**, 2, sizeof(raptor_qname*)); + if(!attrs) + return 1; + + if(node->term->value.literal.language) { + attrs[attrs_count] = raptor_new_qname(context->nstack, + (unsigned char*)"xml:lang", + node->term->value.literal.language); + if(!attrs[attrs_count]) + goto attrs_oom; + attrs_count++; + } + + if(node->term->value.literal.datatype) { + unsigned char *datatype_value; + datatype_value = raptor_uri_as_string(node->term->value.literal.datatype); + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + (const unsigned char*)"datatype", + datatype_value); + if(!attrs[attrs_count]) + goto attrs_oom; + attrs_count++; + + /* SJS Note: raptor_default_uri_as_string simply returns a + * pointer to the string. Hope this is also true of alternate + * uri implementations. */ + /* RAPTOR_FREE(char*, datatype_value); */ + + } + + raptor_xml_element_set_attributes(element, attrs, attrs_count); + + } + + raptor_xml_writer_start_element(xml_writer, element); + raptor_xml_writer_cdata(xml_writer, node->term->value.literal.string); + raptor_xml_writer_end_element(xml_writer, element); + + RAPTOR_DEBUG_ABBREV_NODE("Emitted literal node", node); + + return 0; + + attrs_oom: + + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_FATAL, NULL, + "Out of memory"); + + /* attrs_count has not been incremented yet + * and it points to the qname the allocation of which failed */ + attrs_count--; + while(attrs_count>=0) + raptor_free_qname(attrs[attrs_count--]); + + RAPTOR_FREE(qnamearray, attrs); + + return 1; +} + + +/* + * raptor_rdfxmla_emit_blank: + * @serializer: #raptor_serializer object + * @element: XML Element + * @node: blank node + * @depth: depth into tree + * + * Emit a description of a blank node using an XML Element + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_blank(raptor_serializer *serializer, + raptor_xml_element *element, raptor_abbrev_node* node, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting blank node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_BLANK) + return 1; + + if((node->count_as_subject == 1 && node->count_as_object == 1)) { + /* If this is only used as a 1 subject and object or never + * used as a subject or never used as an object, it never need + * be referenced with an explicit name */ + raptor_abbrev_subject* blank; + + raptor_xml_writer_start_element(context->xml_writer, element); + + blank = raptor_abbrev_subject_find(context->blanks, node->term); + + if(blank) { + raptor_rdfxmla_emit_subject(serializer, blank, depth + 1); + raptor_abbrev_subject_invalidate(blank); + } + + } else { + unsigned char *attr_name = (unsigned char*)"nodeID"; + unsigned char *attr_value = node->term->value.blank.string; + raptor_qname **attrs; + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + if(!attrs) + return 1; + + attrs[0] = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + attr_name, + attr_value); + + raptor_xml_element_set_attributes(element, attrs, 1); + raptor_xml_writer_start_element(context->xml_writer, element); + + } + + raptor_xml_writer_end_element(context->xml_writer, element); + + RAPTOR_DEBUG_ABBREV_NODE("Emitted blank node", node); + + return 0; +} + + +/* + * raptor_rdfxmla_emit_subject_list_items: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit an rdf list of items (rdf:li) about a subject node. + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_subject_list_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + int rv = 0; + int i = 0; + raptor_uri* base_uri = NULL; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject list items", subject->node); + + while(!rv && i < raptor_sequence_size(subject->list_items)) { + + raptor_abbrev_node* object; + raptor_qname *qname; + raptor_xml_element *element; + + object = (raptor_abbrev_node*)raptor_sequence_get_at(subject->list_items, i++); + if(!object) + continue; + + qname = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + (unsigned char *)"li", + NULL); + + if(serializer->base_uri) + base_uri = raptor_uri_copy(serializer->base_uri); + element = raptor_new_xml_element(qname, NULL, base_uri); + if(!element) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_FATAL, NULL, + "Out of memory"); + raptor_free_qname(qname); + rv = 1; /* error */ + break; + } + + switch (object->term->type) { + + case RAPTOR_TERM_TYPE_URI: + rv = raptor_rdfxmla_emit_resource(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rv = raptor_rdfxmla_emit_literal(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_rdfxmla_emit_blank(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + + } + + raptor_free_xml_element(element); + + } + + return rv; +} + + +/* + * raptor_rdfxmla_emit_subject_properties: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit the properties about a subject node. + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_subject_properties(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + int rv = 0; + raptor_avltree_iterator* iter = NULL; + raptor_term* subject_term = subject->node->term; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject properties", subject->node); + + /* Emit any rdf:_n properties collected */ + if(raptor_sequence_size(subject->list_items) > 0) { + rv = raptor_rdfxmla_emit_subject_list_items(serializer, subject, depth + 1); + if(rv) + return rv; + } + + + if(subject->node_type && !context->write_typed_nodes) { + raptor_uri *base_uri = NULL; + raptor_qname *qname = NULL; + raptor_xml_element *element = NULL; + + /* if rdf:type was associated with this subject and do not want + * a typed node, emit it as a property element + */ + qname = raptor_new_qname_from_resource(context->namespaces, + context->nstack, + &context->namespace_count, + context->rdf_type); + if(!qname) + goto oom; + + if(serializer->base_uri) + base_uri = raptor_uri_copy(serializer->base_uri); + + element = raptor_new_xml_element(qname, NULL, base_uri); + if(!element) { + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_qname(qname); + goto oom; + } + + rv = raptor_rdfxmla_emit_resource_uri(serializer, element, + subject_term->value.uri, + depth + 1); + raptor_free_xml_element(element); + } + + + for(iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); + iter && !rv; + (rv = raptor_avltree_iterator_next(iter))) { + raptor_uri *base_uri = NULL; + raptor_qname *qname; + raptor_xml_element *element; + raptor_abbrev_node** nodes; + raptor_abbrev_node* predicate; + raptor_abbrev_node* object; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + predicate= nodes[0]; + object= nodes[1]; + + qname = raptor_new_qname_from_resource(context->namespaces, + context->nstack, + &context->namespace_count, + predicate); + if(!qname) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot split URI '%s' into an XML qname", + raptor_uri_as_string(predicate->term->value.uri)); + continue; + } + + if(serializer->base_uri) + base_uri = raptor_uri_copy(serializer->base_uri); + element = raptor_new_xml_element(qname, NULL, base_uri); + if(!element) { + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_qname(qname); + goto oom; + } + + switch (object->term->type) { + + case RAPTOR_TERM_TYPE_URI: + rv = raptor_rdfxmla_emit_resource(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rv = raptor_rdfxmla_emit_literal(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_rdfxmla_emit_blank(serializer, element, object, + depth + 1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + } + + /* Return error if emitting something failed above */ + if(rv) + break; + + raptor_free_xml_element(element); + + } + if(iter) + raptor_free_avltree_iterator(iter); + + return rv; + + oom: + if(iter) + raptor_free_avltree_iterator(iter); + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_FATAL, NULL, + "Out of memory"); + return 1; +} + + +/* + * raptor_rdfxmla_emit_subject: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit a subject node + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit_subject(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_qname *qname = NULL; + raptor_xml_element *element = NULL; + raptor_qname **attrs; + unsigned char *attr_name; + unsigned char *attr_value; + raptor_uri *base_uri = NULL; + int subject_is_single_node; + raptor_term *subject_term = subject->node->term; + + if(!raptor_abbrev_subject_valid(subject)) + return 0; + + subject_is_single_node = (context->single_node && + subject_term->type == RAPTOR_TERM_TYPE_URI && + raptor_uri_equals(subject_term->value.uri, + context->single_node)); + + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject node", subject->node); + + if(!depth && + subject_term->type == RAPTOR_TERM_TYPE_BLANK && + subject->node->count_as_subject == 1 && + subject->node->count_as_object == 1) { + RAPTOR_DEBUG_ABBREV_NODE("Skipping subject node", subject->node); + return 0; + } + + + if(subject->node_type && context->write_typed_nodes) { + /* if rdf:type was associated with this subject */ + qname = raptor_new_qname_from_resource(context->namespaces, + context->nstack, + &context->namespace_count, + subject->node_type); + + if(!qname) { + raptor_log_error_formatted(serializer->world, + RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot split URI '%s' into an XML qname", + raptor_uri_as_string(subject->node_type->term->value.uri)); + return 1; + } + + } else { + qname = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + (unsigned const char*)"Description", + NULL); + if(!qname) + goto oom; + } + + if(serializer->base_uri) + base_uri = raptor_uri_copy(serializer->base_uri); + element = raptor_new_xml_element(qname, NULL, base_uri); + if(!element) { + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_qname(qname); + goto oom; + } + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + if(!attrs) + goto oom; + + attr_name = NULL; + attr_value = NULL; + + /* emit the subject node */ + if(subject_term->type == RAPTOR_TERM_TYPE_URI) { + attr_name = (unsigned char*)"about"; + if(context->is_xmp) { + /* XML rdf:about value is always "" */ + attr_value = RAPTOR_CALLOC(unsigned char*, 1, sizeof(unsigned char)); + } else if(RAPTOR_OPTIONS_GET_NUMERIC(serializer, + RAPTOR_OPTION_RELATIVE_URIS)) + attr_value = raptor_uri_to_relative_uri_string(serializer->base_uri, + subject_term->value.uri); + else + attr_value = raptor_uri_to_string(subject_term->value.uri); + + } else if(subject_term->type == RAPTOR_TERM_TYPE_BLANK) { + if(subject->node->count_as_subject && + subject->node->count_as_object && + !(subject->node->count_as_subject == 1 && + subject->node->count_as_object == 1)) { + /* No need for nodeID if this node is never used as a subject + * or object OR if it is used exactly once as subject and object. + */ + attr_name = (unsigned char*)"nodeID"; + attr_value = subject_term->value.blank.string; + } + } + + if(attr_name) { + attrs[0] = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + attr_name, + attr_value); + + if(subject_term->type != RAPTOR_TERM_TYPE_BLANK) + RAPTOR_FREE(char*, attr_value); + + if(!attrs[0]) { + RAPTOR_FREE(qnamearray, attrs); + goto oom; + } + + /* Note: if we were willing to track the in-scope rdf:lang, we + * could do the "2.5 Property Attributes" abbreviation here */ + raptor_xml_element_set_attributes(element, attrs, 1); + } else { + RAPTOR_FREE(qnamearray, attrs); + } + + if(!subject_is_single_node) { + raptor_xml_writer_start_element(context->xml_writer, element); + raptor_rdfxmla_emit_subject_properties(serializer, subject, depth + 1); + raptor_xml_writer_end_element(context->xml_writer, element); + } else + raptor_rdfxmla_emit_subject_properties(serializer, subject, depth); + + raptor_free_xml_element(element); + + return 0; + + oom: + if(element) + raptor_free_xml_element(element); + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Out of memory"); + return 1; +} + + +/* + * raptor_rdfxmla_emit - + * @serializer: #raptor_serializer object + * + * Emit RDF/XML for all stored triples. + * + * Return value: non-0 on failure + **/ +static int +raptor_rdfxmla_emit(raptor_serializer *serializer) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_abbrev_subject* subject; + raptor_abbrev_subject* blank; + raptor_avltree_iterator* iter = NULL; + + iter = raptor_new_avltree_iterator(context->subjects, NULL, NULL, 1); + while(iter) { + subject = (raptor_abbrev_subject*)raptor_avltree_iterator_get(iter); + if(subject) { + raptor_rdfxmla_emit_subject(serializer, subject, context->starting_depth); + } + if(raptor_avltree_iterator_next(iter)) + break; + } + if(iter) + raptor_free_avltree_iterator(iter); + + if(!context->single_node) { + /* Emit any remaining blank nodes */ + iter = raptor_new_avltree_iterator(context->blanks, NULL, NULL, 1); + while(iter) { + blank = (raptor_abbrev_subject*)raptor_avltree_iterator_get(iter); + if(blank) { + raptor_rdfxmla_emit_subject(serializer, blank, context->starting_depth); + } + if(raptor_avltree_iterator_next(iter)) + break; + } + if(iter) + raptor_free_avltree_iterator(iter); + } + + return 0; +} + + +/* + * raptor serializer rdfxml-abbrev implementation + */ + + +static void +raptor_rdfxmla_serialize_init_nstack(raptor_serializer* serializer, + raptor_namespace_stack *nstack) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + + context->xml_nspace = raptor_new_namespace(context->nstack, + (const unsigned char*)"xml", + raptor_xml_namespace_uri, + context->starting_depth); + + context->rdf_nspace = raptor_new_namespace(context->nstack, + (const unsigned char*)"rdf", + raptor_rdf_namespace_uri, + context->starting_depth); +} + + + +/* create a new serializer */ +static int +raptor_rdfxmla_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_rdfxmla_context* context; + raptor_term* type_term; + + context = (raptor_rdfxmla_context*)serializer->context; + + context->nstack = raptor_new_namespaces(serializer->world, 1); + if(!context->nstack) + return 1; + + raptor_rdfxmla_serialize_init_nstack(serializer, context->nstack); + + context->namespaces = raptor_new_sequence(NULL, NULL); + + context->subjects = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_subject_compare, + (raptor_data_free_handler)raptor_free_abbrev_subject, 0); + + context->blanks = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_subject_compare, + (raptor_data_free_handler)raptor_free_abbrev_subject, 0); + + context->nodes = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_node_compare, + (raptor_data_free_handler)raptor_free_abbrev_node, 0); + + type_term = RAPTOR_RDF_type_term(serializer->world); + context->rdf_type = raptor_new_abbrev_node(serializer->world, type_term); + + if(!context->xml_nspace || !context->rdf_nspace || !context->namespaces || + !context->subjects || !context->blanks || !context->nodes || + !context->rdf_type) { + raptor_rdfxmla_serialize_terminate(serializer); + return 1; + } + + context->is_xmp=!strncmp(name, "rdfxml-xmp", 10); + if(context->is_xmp) + RAPTOR_OPTIONS_SET_NUMERIC(serializer, + RAPTOR_OPTION_WRITER_XML_DECLARATION, 0); + + /* Note: item 0 in the list is rdf:RDF's namespace */ + if(raptor_sequence_push(context->namespaces, context->rdf_nspace)) { + raptor_rdfxmla_serialize_terminate(serializer); + return 1; + } + + context->write_rdf_RDF = 1; + context->starting_depth = 0; + context->single_node = NULL; + context->write_typed_nodes = 1; + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_rdfxmla_serialize_terminate(raptor_serializer* serializer) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + + if(context->xml_writer) { + if(!context->external_xml_writer) + raptor_free_xml_writer(context->xml_writer); + context->xml_writer = NULL; + context->external_xml_writer = 0; + } + + if(context->rdf_RDF_element) { + raptor_free_xml_element(context->rdf_RDF_element); + context->rdf_RDF_element = NULL; + } + + if(context->rdf_nspace) { + raptor_free_namespace(context->rdf_nspace); + context->rdf_nspace = NULL; + } + + if(context->xml_nspace) { + raptor_free_namespace(context->xml_nspace); + context->xml_nspace = NULL; + } + + if(context->namespaces) { + int i; + + /* Note: item 0 in the list is rdf:RDF's namespace and freed above */ + for(i = 1; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + if(ns) + raptor_free_namespace(ns); + } + raptor_free_sequence(context->namespaces); + context->namespaces = NULL; + } + + if(context->subjects) { + raptor_free_avltree(context->subjects); + context->subjects = NULL; + } + + if(context->blanks) { + raptor_free_avltree(context->blanks); + context->blanks = NULL; + } + + if(context->nodes) { + raptor_free_avltree(context->nodes); + context->nodes = NULL; + } + + /* always free raptor_namespace* before stack */ + if(context->nstack) { + if(!context->external_nstack) + raptor_free_namespaces(context->nstack); + context->nstack = NULL; + } + + if(context->rdf_type) { + raptor_free_abbrev_node(context->rdf_type); + context->rdf_type = NULL; + } +} + + +#define RDFXMLA_NAMESPACE_DEPTH 0 + +/* add a namespace */ +static int +raptor_rdfxmla_serialize_declare_namespace_from_namespace(raptor_serializer* serializer, + raptor_namespace *nspace) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + int i; + + if(context->written_header) + return 1; + + for(i = 0; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + + /* If prefix is already declared, ignore it */ + if(!ns->prefix && !nspace->prefix) + return 1; + + if(ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + return 1; + + if(ns->uri && nspace->uri && + raptor_uri_equals(ns->uri, nspace->uri)) + return 1; + } + + nspace = raptor_new_namespace_from_uri(context->nstack, + nspace->prefix, nspace->uri, + context->starting_depth + + RDFXMLA_NAMESPACE_DEPTH); + if(!nspace) + return 1; + + raptor_sequence_push(context->namespaces, nspace); + return 0; +} + + +/* add a namespace */ +static int +raptor_rdfxmla_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_namespace *ns; + int rc; + + ns = raptor_new_namespace_from_uri(context->nstack, prefix, uri, + context->starting_depth + + RDFXMLA_NAMESPACE_DEPTH); + + rc = raptor_rdfxmla_serialize_declare_namespace_from_namespace(serializer, + ns); + raptor_free_namespace(ns); + + return rc; +} + + +/* + * raptor_rdfxmla_serialize_set_write_rdf_RDF: + * @serializer: serializer object + * @value: value + * + * INTERNAL - Set flag to write rdf:RDF root element + * + * Return value: non-0 on failure + */ +int +raptor_rdfxmla_serialize_set_write_rdf_RDF(raptor_serializer* serializer, + int value) +{ + raptor_rdfxmla_context* context; + + if(strcmp(serializer->factory->desc.names[0], "rdfxml-abbrev")) + return 1; + + context = (raptor_rdfxmla_context*)serializer->context; + + context->write_rdf_RDF = value; + + return 0; +} + + +/* + * raptor_rdfxmla_serialize_set_xml_writer: + * @serializer: serializer object + * @xml_writer: XML writer + * @nstack: namespace stack + * + * INTERNAL - Set an existing created XML writer to write the serializing to + * + * Return value: non-0 on failure + */ +int +raptor_rdfxmla_serialize_set_xml_writer(raptor_serializer* serializer, + raptor_xml_writer* xml_writer, + raptor_namespace_stack *nstack) +{ + raptor_rdfxmla_context* context; + + if(strcmp(serializer->factory->desc.names[0], "rdfxml-abbrev")) + return 1; + + context = (raptor_rdfxmla_context*)serializer->context; + + context->xml_writer = xml_writer; + context->starting_depth = xml_writer ? (raptor_xml_writer_get_depth(xml_writer) + 1) : -1; + context->external_xml_writer = (xml_writer != NULL); + + if(context->xml_nspace) + raptor_free_namespace(context->xml_nspace); + if(context->rdf_nspace) + raptor_free_namespace(context->rdf_nspace); + /* always free raptor_namespace* before stack */ + if(context->nstack) + raptor_free_namespaces(context->nstack); + + context->nstack = nstack; + context->external_nstack = 1; + raptor_rdfxmla_serialize_init_nstack(serializer, context->nstack); + + return 0; +} + + +/* + * raptor_rdfxmla_serialize_set_single_node: + * @serializer: + * @uri: + * + * INTERNAL - Set a single node to serialize the contents + * + * The outer node element with this URI is not serialized, the inner + * property elements are written. @uri is copied + * + * Return value: non-0 on failure + */ +int +raptor_rdfxmla_serialize_set_single_node(raptor_serializer* serializer, + raptor_uri* uri) +{ + raptor_rdfxmla_context* context; + + if(strcmp(serializer->factory->desc.names[0], "rdfxml-abbrev")) + return 1; + + context = (raptor_rdfxmla_context*)serializer->context; + + if(context->single_node) + raptor_free_uri(context->single_node); + + context->single_node = raptor_uri_copy(uri); + + return 0; +} + + +/* + * raptor_rdfxmla_serialize_set_write_typed_nodes: + * @serializer: + * @value: + * + * INTERNAL - Set flag to write typed node elements + * + * Return value: non-0 on failure + */ +int +raptor_rdfxmla_serialize_set_write_typed_nodes(raptor_serializer* serializer, + int value) +{ + raptor_rdfxmla_context* context; + + if(strcmp(serializer->factory->desc.names[0], "rdfxml-abbrev")) + return 1; + + context = (raptor_rdfxmla_context*)serializer->context; + + context->write_typed_nodes = value; + + return 0; +} + + +/* start a serialize */ +static int +raptor_rdfxmla_serialize_start(raptor_serializer* serializer) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + + if(!context->external_xml_writer) { + raptor_xml_writer* xml_writer; + raptor_option option; + + if(context->xml_writer) + raptor_free_xml_writer(context->xml_writer); + + xml_writer = raptor_new_xml_writer(serializer->world, context->nstack, + serializer->iostream); + if(!xml_writer) + return 1; + + raptor_xml_writer_set_option(xml_writer, + RAPTOR_OPTION_WRITER_AUTO_INDENT, NULL,1); + raptor_xml_writer_set_option(xml_writer, + RAPTOR_OPTION_WRITER_AUTO_EMPTY, NULL, 1); + raptor_xml_writer_set_option(xml_writer, + RAPTOR_OPTION_WRITER_INDENT_WIDTH, NULL, 2); + option = RAPTOR_OPTION_WRITER_XML_VERSION; + raptor_xml_writer_set_option(xml_writer, option, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(serializer, option)); + option = RAPTOR_OPTION_WRITER_XML_DECLARATION; + raptor_xml_writer_set_option(xml_writer, option, NULL, + RAPTOR_OPTIONS_GET_NUMERIC(serializer, option)); + + context->xml_writer = xml_writer; + } + + return 0; +} + + +static int +raptor_rdfxmla_ensure_writen_header(raptor_serializer* serializer, + raptor_rdfxmla_context* context) +{ + raptor_xml_writer* xml_writer; + raptor_qname *qname; + raptor_uri *base_uri; + int i; + raptor_qname **attrs = NULL; + int attrs_count = 0; + + if(context->written_header) + return 0; /* already succeeded */ + + if(!context->write_rdf_RDF) { + context->written_header = 1; + return 0; + } + + xml_writer = context->xml_writer; + if(context->is_xmp) + raptor_xml_writer_raw(xml_writer, + (const unsigned char*)"<?xpacket begin='' id='W5M0MpCehiHzreSzNTczkc9d'?>\n<x:xmpmeta xmlns:x='adobe:ns:meta/'>\n"); + + qname = raptor_new_qname_from_namespace_local_name(serializer->world, + context->rdf_nspace, + (const unsigned char*)"RDF", + NULL); + if(!qname) + goto oom; + base_uri = serializer->base_uri; + if(base_uri) + base_uri = raptor_uri_copy(base_uri); + context->rdf_RDF_element = raptor_new_xml_element(qname, NULL, base_uri); + if(!context->rdf_RDF_element) { + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_qname(qname); + goto oom; + } + + /* NOTE: Starts at item 1 as item 0 is the element's namespace (rdf) + * and does not need to be declared + */ + for(i = 1; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + raptor_xml_element_declare_namespace(context->rdf_RDF_element, ns); + } + + if(base_uri && + RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_WRITE_BASE_URI)) { + const unsigned char* base_uri_string; + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + if(!attrs) + goto oom; + + base_uri_string = raptor_uri_as_string(base_uri); + attrs[attrs_count] = raptor_new_qname_from_namespace_local_name(serializer->world, + context->xml_nspace, + (const unsigned char*)"base", + base_uri_string); + if(!attrs[attrs_count]) { + RAPTOR_FREE(qnamearray, attrs); + goto oom; + } + attrs_count++; + } + + if(attrs_count) + raptor_xml_element_set_attributes(context->rdf_RDF_element, attrs, + attrs_count); + else + raptor_xml_element_set_attributes(context->rdf_RDF_element, NULL, 0); + + + + raptor_xml_writer_start_element(xml_writer, context->rdf_RDF_element); + + context->written_header = 1; + + return 0; + + oom: + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Out of memory"); + return 1; +} + + +/* serialize a statement */ +static int +raptor_rdfxmla_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_abbrev_subject* subject = NULL; + raptor_abbrev_node* predicate = NULL; + raptor_abbrev_node* object = NULL; + int rv = 0; + raptor_term_type object_type; + + if(!(statement->subject->type == RAPTOR_TERM_TYPE_URI || + statement->subject->type == RAPTOR_TERM_TYPE_BLANK)) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with subject node type %u", + statement->subject->type); + return 1; + } + + subject = raptor_abbrev_subject_lookup(context->nodes, context->subjects, + context->blanks, + statement->subject); + if(!subject) + return 1; + + object_type = statement->object->type; + + if(!(object_type == RAPTOR_TERM_TYPE_URI || + object_type == RAPTOR_TERM_TYPE_BLANK || + object_type == RAPTOR_TERM_TYPE_LITERAL)) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with object node type %u", + object_type); + return 1; + } + + object = raptor_abbrev_node_lookup(context->nodes, + statement->object); + if(!object) + return 1; + + + if(statement->predicate->type == RAPTOR_TERM_TYPE_URI) { + predicate = raptor_abbrev_node_lookup(context->nodes, statement->predicate); + if(!predicate) + return 1; + + if(!subject->node_type && + raptor_abbrev_node_equals(predicate, context->rdf_type) && + statement->object->type == RAPTOR_TERM_TYPE_URI) { + + /* Store the first one as the type for abbreviation 2.14 + * purposes. Note that it is perfectly legal to have + * multiple type definitions. All definitions after the + * first go in the property list */ + subject->node_type = raptor_abbrev_node_lookup(context->nodes, + statement->object); + if(!subject->node_type) + return 1; + subject->node_type->ref_count++; + return 0; + + } else { + int add_property = 1; + + if(context->is_xmp && predicate->ref_count > 1) { + raptor_avltree_iterator* iter = NULL; + for((iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1)); + iter && !rv; + (rv = raptor_avltree_iterator_next(iter))) { + raptor_abbrev_node** nodes; + raptor_abbrev_node* node; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + node= nodes[0]; + + if(node == predicate) { + add_property = 0; + if(object->term->type == RAPTOR_TERM_TYPE_BLANK) { + /* look for any generated blank node associated with this + * statement and free it + */ + raptor_abbrev_subject *blank = + raptor_abbrev_subject_find(context->blanks, + statement->object); + if(subject) raptor_avltree_delete(context->blanks, blank); + } + break; + } + } + if(iter) + raptor_free_avltree_iterator(iter); + } + + if(add_property) { + rv = raptor_abbrev_subject_add_property(subject, predicate, object); + if(rv < 0) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Unable to add properties to subject %p", + RAPTOR_VOIDP(subject)); + return rv; + } + } + } + + } else { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with predicate node type %u", + statement->predicate->type); + return 1; + } + + if(object_type == RAPTOR_TERM_TYPE_URI || + object_type == RAPTOR_TERM_TYPE_BLANK) + object->count_as_object++; + + + return 0; + +} + + +/* end a serialize */ +static int +raptor_rdfxmla_serialize_end(raptor_serializer* serializer) +{ + + raptor_rdfxmla_context* context = (raptor_rdfxmla_context*)serializer->context; + raptor_xml_writer* xml_writer = context->xml_writer; + + if(xml_writer) { + if(!raptor_rdfxmla_ensure_writen_header(serializer, context)) { + + raptor_rdfxmla_emit(serializer); + + if(context->write_rdf_RDF) { + /* ensure_writen_header() returned success, can assume context->rdf_RDF_element is non-NULL */ + raptor_xml_writer_end_element(xml_writer, context->rdf_RDF_element); + + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + } + } + } + + if(context->rdf_RDF_element) { + raptor_free_xml_element(context->rdf_RDF_element); + context->rdf_RDF_element = NULL; + } + + if(context->is_xmp && xml_writer) + raptor_xml_writer_raw(xml_writer, + (const unsigned char*)"</x:xmpmeta>\n<?xpacket end='r'?>\n"); + + if(xml_writer) + raptor_xml_writer_flush(xml_writer); + + if(context->single_node) + raptor_free_uri(context->single_node); + + context->written_header = 0; + + return 0; +} + + +/* finish the serializer factory */ +static void +raptor_rdfxmla_serialize_finish_factory(raptor_serializer_factory* factory) +{ + /* NOP */ +} + + +static const char* const rdfxml_xmp_names[2] = { "rdfxml-xmp", NULL}; + +static const char* const rdfxml_xmp_uri_strings[2] = { + "http://www.w3.org/TR/rdf-syntax-grammar", + NULL +}; + +#define RDFXML_XMP_TYPES_COUNT 1 +static const raptor_type_q rdfxml_xmp_types[RDFXML_XMP_TYPES_COUNT + 1] = { + { "application/rdf+xml", 19, 0}, + { NULL, 0, 0} +}; + +static int +raptor_rdfxml_xmp_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = rdfxml_xmp_names; + factory->desc.mime_types = rdfxml_xmp_types; + + factory->desc.label = "RDF/XML (XMP Profile)"; + factory->desc.uri_strings = rdfxml_xmp_uri_strings; + + factory->context_length = sizeof(raptor_rdfxmla_context); + + factory->init = raptor_rdfxmla_serialize_init; + factory->terminate = raptor_rdfxmla_serialize_terminate; + factory->declare_namespace = raptor_rdfxmla_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_rdfxmla_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_rdfxmla_serialize_start; + factory->serialize_statement = raptor_rdfxmla_serialize_statement; + factory->serialize_end = raptor_rdfxmla_serialize_end; + factory->finish_factory = raptor_rdfxmla_serialize_finish_factory; + + return 0; +} + + +static const char* const rdfxmla_names[2] = { "rdfxml-abbrev", NULL}; + +static const char* const rdfxml_uri_strings[3] = { + "http://www.w3.org/ns/formats/RDF_XML", + "http://www.w3.org/TR/rdf-syntax-grammar", + NULL +}; + +#define RDFXMLA_TYPES_COUNT 1 +static const raptor_type_q rdfxmla_types[RDFXMLA_TYPES_COUNT + 1] = { + { "application/rdf+xml", 19, 0}, + { NULL, 0, 0} +}; + +static int +raptor_rdfxmla_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = rdfxmla_names; + factory->desc.mime_types = rdfxmla_types; + + factory->desc.label = "RDF/XML (Abbreviated)"; + factory->desc.uri_strings = rdfxml_uri_strings; + + factory->context_length = sizeof(raptor_rdfxmla_context); + + factory->init = raptor_rdfxmla_serialize_init; + factory->terminate = raptor_rdfxmla_serialize_terminate; + factory->declare_namespace = raptor_rdfxmla_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_rdfxmla_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_rdfxmla_serialize_start; + factory->serialize_statement = raptor_rdfxmla_serialize_statement; + factory->serialize_end = raptor_rdfxmla_serialize_end; + factory->finish_factory = raptor_rdfxmla_serialize_finish_factory; + + return 0; +} + + +int +raptor_init_serializer_rdfxmla(raptor_world* world) +{ + int rc; + + rc = !raptor_serializer_register_factory(world, + &raptor_rdfxml_xmp_serializer_register_factory); + if(rc) + return rc; + + rc = !raptor_serializer_register_factory(world, + &raptor_rdfxmla_serializer_register_factory); + + return rc; +} + diff --git a/src/raptor_serialize_rss.c b/src/raptor_serialize_rss.c new file mode 100644 index 0000000..90f1f23 --- /dev/null +++ b/src/raptor_serialize_rss.c @@ -0,0 +1,2425 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_rss.c - Raptor RSS 1.0 and Atom 1.0 serializers + * + * Copyright (C) 2003-2015, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" +#include "raptor_rss.h" + + +typedef struct { + raptor_world* world; + raptor_term* term; + /* shared pointer */ + raptor_rss_item* item; +} raptor_rss_group_map; + + + +/* + * Raptor 'RSS 1.0' serializer object + */ +typedef struct { + raptor_world* world; + + /* static rss model */ + raptor_rss_model model; + + /* Triples with no assigned type node */ + raptor_sequence *triples; + + /* Sequence of raptor_rss_item* : rdf:Seq items rdf:_ < n> at offset n */ + raptor_sequence *items; + + /* Sequence of raptor_rss_item* (?x rdf:type rss:Enclosure) */ + raptor_sequence *enclosures; + + /* Term of rdf:Seq node */ + raptor_term *seq_term; + + /* Namespace stack for serializing */ + raptor_namespace_stack *nstack; + + /* the default namespace (rdf: or atom:) - + * this is destroyed when nstack above is deleted + */ + raptor_namespace* default_nspace; + + /* rdf namespace in @nspaces array */ + raptor_namespace* rdf_nspace; + + /* the xml: namespace */ + raptor_namespace *xml_nspace; + + /* the root element (rdf:RDF or atom:feed) */ + raptor_xml_element* root_element; + + /* where the xml is being written */ + raptor_xml_writer *xml_writer; + + /* non-0 if this is an atom 1.0 serializer */ + int is_atom; + + /* 0 = none + * 1 = existing rss:item item containg rdf/xml encoding of any extra + * triples about URI (rss-1.0 serializer only) + * 2 = at:md element containing rdf/xml property elements encoding + * of any extra triples about URI (atom serializer only) + */ + int rss_triples_mode; + + /* namespaces declared here */ + raptor_namespace* nspaces[RAPTOR_RSS_NAMESPACES_SIZE]; + + /* Map of group URI (key, owned) : rss item object (value, shared) */ + raptor_avltree *group_map; + + /* User declared namespaces */ + raptor_sequence *user_namespaces; + + /* URI of XML Literal datatype */ + raptor_uri* xml_literal_dt; + + int free_default_nspace; +} raptor_rss10_serializer_context; + + +static void +raptor_free_group_map(raptor_rss_group_map* gm) +{ + if(gm->term) + raptor_free_term(gm->term); + + RAPTOR_FREE(raptor_rss_group_map, gm); +} + + +static int +raptor_rss_group_map_compare(raptor_rss_group_map* gm1, + raptor_rss_group_map* gm2) +{ + return raptor_term_compare(gm1->term, gm2->term); +} + + +static raptor_rss_item* +raptor_rss10_get_group_item(raptor_rss10_serializer_context *rss_serializer, + raptor_term* term) +{ + raptor_rss_group_map search_gm; + raptor_rss_group_map* gm; + + search_gm.world = rss_serializer->world; + search_gm.term = term; + gm = (raptor_rss_group_map*)raptor_avltree_search(rss_serializer->group_map, + (void*)&search_gm); + + return gm ? gm->item : NULL; +} + + +static int +raptor_rss10_set_item_group(raptor_rss10_serializer_context *rss_serializer, + raptor_term* term, raptor_rss_item *item) +{ + raptor_rss_group_map* gm; + + if(raptor_rss10_get_group_item(rss_serializer, term)) + return 0; + + gm = RAPTOR_CALLOC(raptor_rss_group_map*, 1, sizeof(*gm)); + gm->world = rss_serializer->world; + gm->term = raptor_term_copy(term); + gm->item = item; + + raptor_avltree_add(rss_serializer->group_map, gm); + return 0; +} + + +/** + * raptor_rss10_serialize_init: + * @serializer: serializer object + * @name: serializer name + * + * INTERNAL (raptor_serializer_factory API) - create a new serializer + * + * Return value: non-0 on failure + */ +static int +raptor_rss10_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_rss10_serializer_context *rss_serializer; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + rss_serializer->world = serializer->world; + + raptor_rss_common_init(serializer->world); + raptor_rss_model_init(serializer->world, &rss_serializer->model); + + rss_serializer->triples = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, (raptor_data_print_handler)raptor_statement_print); + + rss_serializer->items = raptor_new_sequence((raptor_data_free_handler)raptor_free_rss_item, (raptor_data_print_handler)NULL); + + rss_serializer->enclosures = raptor_new_sequence((raptor_data_free_handler)raptor_free_rss_item, (raptor_data_print_handler)NULL); + + rss_serializer->group_map = raptor_new_avltree((raptor_data_compare_handler)raptor_rss_group_map_compare, + (raptor_data_free_handler)raptor_free_group_map, 0); + + rss_serializer->user_namespaces = raptor_new_sequence((raptor_data_free_handler)raptor_free_namespace, NULL); + + rss_serializer->is_atom = !(strcmp(name,"atom")); + + rss_serializer->nstack = raptor_new_namespaces(serializer->world, 1); + + rss_serializer->xml_literal_dt = raptor_new_uri(serializer->world, + raptor_xml_literal_datatype_uri_string); + + return 0; +} + + +/** + * raptor_rss10_serialize_terminate: + * @serializer: serializer object + * + * INTERNAL (raptor_serializer_factory API) - destroy a serializer + */ +static void +raptor_rss10_serialize_terminate(raptor_serializer* serializer) +{ + raptor_world* world = serializer->world; + raptor_rss10_serializer_context *rss_serializer; + int i; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + raptor_rss_model_clear(&rss_serializer->model); + raptor_rss_common_terminate(world); + + if(rss_serializer->triples) + raptor_free_sequence(rss_serializer->triples); + + if(rss_serializer->items) + raptor_free_sequence(rss_serializer->items); + + if(rss_serializer->enclosures) + raptor_free_sequence(rss_serializer->enclosures); + + if(rss_serializer->seq_term) + raptor_free_term(rss_serializer->seq_term); + + if(rss_serializer->xml_writer) + raptor_free_xml_writer(rss_serializer->xml_writer); + + for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE; i++) { + if(rss_serializer->nspaces[i]) + raptor_free_namespace(rss_serializer->nspaces[i]); + } + + if(rss_serializer->free_default_nspace && rss_serializer->default_nspace) + raptor_free_namespace(rss_serializer->default_nspace); + + if(rss_serializer->xml_nspace) + raptor_free_namespace(rss_serializer->xml_nspace); + + if(rss_serializer->user_namespaces) + raptor_free_sequence(rss_serializer->user_namespaces); + + /* all raptor_namespace* objects must be freed BEFORE the stack + * they are attached to here: */ + if(rss_serializer->nstack) + raptor_free_namespaces(rss_serializer->nstack); + + if(rss_serializer->group_map) + raptor_free_avltree(rss_serializer->group_map); + + if(world->rss_fields_info_qnames) { + for(i = 0; i < RAPTOR_RSS_FIELDS_SIZE; i++) { + if(world->rss_fields_info_qnames[i]) + raptor_free_qname(world->rss_fields_info_qnames[i]); + } + RAPTOR_FREE(raptor_qname* array, world->rss_fields_info_qnames); + world->rss_fields_info_qnames = NULL; + } + + if(world->rss_types_info_qnames) { + for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) { + if(world->rss_types_info_qnames[i]) + raptor_free_qname(world->rss_types_info_qnames[i]); + } + RAPTOR_FREE(raptor_wname* array, world->rss_types_info_qnames); + world->rss_types_info_qnames = NULL; + } + + if(rss_serializer->xml_literal_dt) + raptor_free_uri(rss_serializer->xml_literal_dt); +} + + +/** + * raptor_rss10_move_statements: + * @rss_serializer: serializer object + * @type: item type + * @item: item object + * + * INTERNAL - Move statements from the stored triples into item @item + * that match @item's URI as subject. + * + * Return value: count of number of triples moved + */ +static int +raptor_rss10_move_statements(raptor_rss10_serializer_context *rss_serializer, + raptor_rss_type type, + raptor_rss_item *item) +{ + int t; + int count = 0; + int is_atom = rss_serializer->is_atom; + int size = raptor_sequence_size(rss_serializer->triples); + + for(t = 0; t < size; t++) { + raptor_statement* s; + int f; + + s = (raptor_statement*)raptor_sequence_get_at(rss_serializer->triples, t); + if(!s) + continue; + + if(s->subject->type != RAPTOR_TERM_TYPE_URI || + !raptor_uri_equals(s->subject->value.uri, item->uri)) + continue; + + /* now we know this triple is associated with the item URI + * and can count the relevant triples */ + count++; + + /* add triples with anonymous object to the general triples sequence + * for this item, and to the group map (blank node closure) + */ + if(s->object->type == RAPTOR_TERM_TYPE_BLANK) { + raptor_rss10_set_item_group(rss_serializer, s->object, item); + + RAPTOR_DEBUG4("Moved anonymous value property URI <%s> for typed node %u - %s\n", + raptor_uri_as_string(s->predicate->value.uri), + type, raptor_rss_items_info[type].name); + s = (raptor_statement*)raptor_sequence_delete_at(rss_serializer->triples, + t); + raptor_sequence_push(item->triples, s); + continue; + } + + + /* otherwise process object value types resource or literal */ + for(f = 0; f < RAPTOR_RSS_FIELDS_SIZE; f++) { + if(!rss_serializer->world->rss_fields_info_uris[f]) + continue; + + if(s->predicate->type == RAPTOR_TERM_TYPE_URI && + s->object->type != RAPTOR_TERM_TYPE_BLANK && + raptor_uri_equals(s->predicate->value.uri, + rss_serializer->world->rss_fields_info_uris[f])) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + + /* found field this triple to go in 'item' so move the + * object value over + */ + if(s->object->type == RAPTOR_TERM_TYPE_URI) { + field->uri = s->object->value.uri; + s->object->value.uri = NULL; + } else { + field->value = s->object->value.literal.string; + if(s->object->value.literal.datatype && + raptor_uri_equals(s->object->value.literal.datatype, + rss_serializer->xml_literal_dt)) + field->is_xml = 1; + + if(f == RAPTOR_RSS_FIELD_CONTENT_ENCODED) + field->is_xml = 1; + + if(f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<') + field->is_xml = 1; + + s->object->value.literal.string = NULL; + } + + if(is_atom) { + int i; + + /* Rewrite item fields rss->atom */ + for(i = 0; + raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; + i++) { + int from_f = raptor_atom_to_rss[i].to; + int to_f = raptor_atom_to_rss[i].from; + + /* Do not rewrite to atom0.3 terms */ + if(raptor_rss_fields_info[to_f].nspace == ATOM0_3_NS) + continue; + + if(f == from_f && + !(item->fields[to_f] && item->fields[to_f]->value)) { + f = to_f; + if(to_f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<') + field->is_xml = 1; + field->is_mapped = 1; + RAPTOR_DEBUG5("Moved field %d - %s to field %d - %s\n", + from_f, raptor_rss_fields_info[from_f].name, + to_f, raptor_rss_fields_info[to_f].name); + break; + } + } + } /* end is atom field to map */ + + RAPTOR_DEBUG1("Adding field\n"); + raptor_rss_item_add_field(item, f, field); + raptor_sequence_set_at(rss_serializer->triples, t, NULL); + break; + } + } /* end for field loop */ + + /* loop ended early so triple was assocated with a field - continue */ + if(f < RAPTOR_RSS_FIELDS_SIZE) + continue; + + + /* otherwise triple was not found as a field so store in triples + * sequence + */ + RAPTOR_DEBUG4("UNKNOWN property URI <%s> for typed node %u - %s\n", + raptor_uri_as_string(s->predicate->value.uri), + type, raptor_rss_items_info[type].name); + s = (raptor_statement*)raptor_sequence_delete_at(rss_serializer->triples, + t); + raptor_sequence_push(item->triples, s); + + } /* end for all triples */ + +#ifdef RAPTOR_DEBUG + if(count > 0) + RAPTOR_DEBUG5("Moved %d triples to typed node %u - %s with uri <%s>\n", + count, type, raptor_rss_items_info[type].name, + raptor_uri_as_string(item->uri)); +#endif + + return count; +} + + +/** + * raptor_rss10_move_anonymous_statements: + * @rss_serializer: serializer object + * + * INTERNAL - Move statements with a blank node subject to the appropriate item + * + */ +static int +raptor_rss10_move_anonymous_statements(raptor_rss10_serializer_context *rss_serializer) +{ + int t; + int handled = 1; + int round = 0; +#ifdef RAPTOR_DEBUG + int moved_count = 0; +#endif + + for(round = 0; handled; round++) { + int size = raptor_sequence_size(rss_serializer->triples); + + handled = 0; + for(t = 0; t < size; t++) { + raptor_statement* s; + raptor_rss_item* item; + + s = (raptor_statement*)raptor_sequence_get_at(rss_serializer->triples, t); + if(!s) + continue; + + if(s->subject->type != RAPTOR_TERM_TYPE_BLANK) + continue; + + item = raptor_rss10_get_group_item(rss_serializer, s->subject); + + if(item) { + /* triple matched an existing item */ + s = (raptor_statement*)raptor_sequence_delete_at(rss_serializer->triples, + t); + raptor_sequence_push(item->triples, s); +#ifdef RAPTOR_DEBUG + moved_count++; +#endif + + if(s->object->type == RAPTOR_TERM_TYPE_BLANK) + raptor_rss10_set_item_group(rss_serializer, s->object, item); + + + handled = 1; + } + } /* end for all triples */ + +#ifdef RAPTOR_DEBUG + if(moved_count > 0) + RAPTOR_DEBUG3("Round %d: Moved %d triples\n", round, moved_count); +#endif + } + + return 0; +} + + +/** + * raptor_rss10_move_leftover_statements: + * @rss_serializer: serializer object + * + * INTERNAL - Move any statements in the serializer pool to items or channel + * + */ +static int +raptor_rss10_move_leftover_statements(raptor_rss10_serializer_context *rss_serializer) +{ + raptor_rss_model* rss_model; + int i; + int type; + raptor_rss_item* item; + int size; + + rss_model = &rss_serializer->model; + + type = RAPTOR_RSS_ITEM; + size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + raptor_rss10_move_statements(rss_serializer, (raptor_rss_type)type, item); + } + + type = RAPTOR_RSS_CHANNEL; + if(rss_model->common[type]) { + item = rss_model->common[type]; + raptor_rss10_move_statements(rss_serializer, (raptor_rss_type)type, item); + } + + return 0; +} + + +/** + * raptor_rss10_remove_mapped_item_fields: + * @rss_serializer: serializer object + * @item: rss item + * @type: item type + * + * INTERNAL - Remove mapped fields for an item + * + */ +static int +raptor_rss10_remove_mapped_item_fields(raptor_rss10_serializer_context *rss_serializer, + raptor_rss_item* item, int type) +{ + int f; + + if(!item->fields_count) + return 0; + + for(f = 0; f < RAPTOR_RSS_FIELDS_SIZE; f++) { + raptor_rss_field* field; + int saw_mapped = 0; + int saw_non_mapped = 0; + + for(field = item->fields[f]; field; field = field->next) { + if(field->is_mapped) + saw_mapped++; + else + saw_non_mapped++; + } + + if(saw_mapped && saw_non_mapped) { + raptor_rss_field* last_field = NULL; + RAPTOR_DEBUG6("Item %p Field %d - %s: %d mapped %d non-mapped\n", + RAPTOR_VOIDP(item), + f, raptor_rss_fields_info[f].name, + saw_mapped, saw_non_mapped); + + field = item->fields[f]; + while(field) { + raptor_rss_field* next = field->next; + field->next = NULL; + if(field->is_mapped) + raptor_rss_field_free(field); + else { + if(!last_field) + item->fields[f] = field; + else + last_field->next = field; + last_field = field; + } + field = next; + } + } + + } + + return 0; +} + + +/** + * raptor_rss10_remove_mapped_fields: + * @rss_serializer: serializer object + * + * INTERNAL - Move statements with a blank node subject to the appropriate item + * + */ +static int +raptor_rss10_remove_mapped_fields(raptor_rss10_serializer_context *rss_serializer) +{ + raptor_rss_model* rss_model; + int is_atom; + int i; + + rss_model = &rss_serializer->model; + is_atom = rss_serializer->is_atom; + + if(!is_atom) + return 0; + + if(rss_model->items_count) { + int size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + raptor_rss_item* item; + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + raptor_rss10_remove_mapped_item_fields(rss_serializer, item, + RAPTOR_RSS_ITEM); + } + } + + for(i = RAPTOR_RSS_CHANNEL; i < RAPTOR_RSS_COMMON_SIZE; i++) { + raptor_rss_item* item; + for(item = rss_model->common[i]; item; item = item->next) { + raptor_rss10_remove_mapped_item_fields(rss_serializer, item, i); + } + } + + return 0; +} + + +/** + * raptor_rss10_store_statement: + * @rss_serializer: serializer object + * @s: statement (shared - do not become owner of this) + * + * INTERNAL - decide where to store a statement in an item or keep pending + * + * Return value: non-0 if handled (stored) + */ +static int +raptor_rss10_store_statement(raptor_rss10_serializer_context *rss_serializer, + raptor_statement *s) +{ + raptor_rss_item *item = NULL; + int handled = 0; + int is_atom = rss_serializer->is_atom; + + item = raptor_rss10_get_group_item(rss_serializer, s->subject); + + if(item && + s->predicate->type == RAPTOR_TERM_TYPE_URI && + (s->object->type == RAPTOR_TERM_TYPE_URI || + s->object->type == RAPTOR_TERM_TYPE_LITERAL)) { + int f; + raptor_uri* predicate_uri = s->predicate->value.uri; + + /* scan triples (? <predicate-uri> <uri or literal>) */ + + for(f = 0; f < RAPTOR_RSS_FIELDS_SIZE; f++) { + raptor_rss_field* field; + if(!rss_serializer->world->rss_fields_info_uris[f]) + continue; + + if(raptor_uri_equals(predicate_uri, + rss_serializer->world->rss_fields_info_uris[f])) { + /* found field this triple to go in 'item' so move the + * object value over + */ + field = raptor_rss_new_field(rss_serializer->world); + + if(s->object->type == RAPTOR_TERM_TYPE_URI) { + field->uri = s->object->value.uri; + s->object->value.uri = NULL; + } else { + /* must be literal - checked above */ + field->value = s->object->value.literal.string; + + if(s->object->value.literal.datatype && + raptor_uri_equals(s->object->value.literal.datatype, + rss_serializer->xml_literal_dt)) + field->is_xml = 1; + + if(f == RAPTOR_RSS_FIELD_CONTENT_ENCODED) + field->is_xml = 1; + + if(f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<') + field->is_xml = 1; + s->object->value.literal.string = NULL; + } + + if(is_atom) { + int i; + + /* Rewrite item fields rss->atom */ + for(i = 0; + raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) { + int from_f = raptor_atom_to_rss[i].to; + int to_f = raptor_atom_to_rss[i].from; + + /* Do not rewrite to atom0.3 terms */ + if(raptor_rss_fields_info[to_f].nspace == ATOM0_3_NS) + continue; + + if(f == from_f && + !(item->fields[to_f] && item->fields[to_f]->value)) { + f = to_f; + + if(to_f == RAPTOR_RSS_FIELD_ATOM_SUMMARY && *field->value == '<') + field->is_xml = 1; + + field->is_mapped = 1; + RAPTOR_DEBUG5("Moved field %d - %s to field %d - %s\n", + from_f, raptor_rss_fields_info[from_f].name, + to_f, raptor_rss_fields_info[to_f].name); + break; + } + } + } + + RAPTOR_DEBUG1("Adding field\n"); + raptor_rss_item_add_field(item, f, field); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Stored statement under typed node %p\n", + RAPTOR_VOIDP(item)); +#endif + + handled = 1; + break; + } + } + } + + if(!handled) { + raptor_statement *t; + + /* Need to handle this later so copy it */ + t = raptor_statement_copy(s); + if(t) { + raptor_sequence_push(rss_serializer->triples, t); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr,"Stored statement: "); + raptor_statement_print_as_ntriples(s, stderr); + fprintf(stderr,"\n"); +#endif + handled = 1; + } + } + + return handled; +} + + +static int +raptor_rss10_serialize_start(raptor_serializer* serializer) +{ + raptor_rss10_serializer_context *rss_serializer; + const char* rss_triples; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + rss_triples = (const char*)RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_RSS_TRIPLES); + if(rss_triples) { + if(!strcmp(rss_triples, "none")) + rss_serializer->rss_triples_mode = 0; + else if(!strcmp(rss_triples, "rdf-xml")) + rss_serializer->rss_triples_mode = 1; + else if(!strcmp(rss_triples, "atom-triples")) + rss_serializer->rss_triples_mode = 2; + else + rss_serializer->rss_triples_mode = 0; + } + + return 0; +} + + +/** + * raptor_rss10_serialize_statement: + * @serializer: serializer object + * @statement: statement (shared - am not owner of this) + * + * INTERNAL (raptor_serializer_factory API) - Serialize a statement + * + * Return value: non-0 on failure + */ +static int +raptor_rss10_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_rss_model *rss_model; + int handled = 0; + int i; + raptor_rss_type type; + raptor_rss_item *item = NULL; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + rss_model = &rss_serializer->model; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(1) { + RAPTOR_DEBUG1("Processing statement\n "); + raptor_statement_print_as_ntriples(statement, stderr); + fputc('\n', stderr); + } +#endif + + if(raptor_uri_equals(statement->predicate->value.uri, + RAPTOR_RSS_RSS_items_URI(rss_model))) { + /* ignore any triple (? rss:items ?) - is infered */ + return 0; + } + + if(!raptor_uri_equals(statement->predicate->value.uri, + RAPTOR_RDF_type_URI(serializer->world))) + goto savetriple; + + + type = RAPTOR_RSS_NONE; + + if(statement->object->type == RAPTOR_TERM_TYPE_URI) { + raptor_uri* object_uri = statement->object->value.uri; + + /* look for triple: (? rdf:type ?class-uri) to find containers and blocks */ + + /* Look for triple (? rdf:type rdf:Seq) */ + if(raptor_uri_equals(object_uri, RAPTOR_RDF_Seq_URI(serializer->world))) { + + rss_serializer->seq_term = raptor_term_copy(statement->subject); + + handled = 1; + goto savetriple; + } + + /* look for triple: (? rdf:type ?class-uri) to find containers and blocks */ + for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) { + raptor_uri *item_uri = serializer->world->rss_types_info_uris[i]; + + if(item_uri && raptor_uri_equals(object_uri, item_uri)) { + type = (raptor_rss_type)i; + +#ifdef RAPTOR_DEBUG + if(1) { + unsigned char* ts; + ts = raptor_term_to_string(statement->subject); + RAPTOR_DEBUG4("Found typed node %u - %s with term %s\n", type, + raptor_rss_items_info[type].name, ts); + RAPTOR_FREE(char*, ts); + } +#endif + break; + } + } + } + + if(type == RAPTOR_RSS_NONE) { +#ifdef RAPTOR_DEBUG + if(1) { + unsigned char* ts; + ts = raptor_term_to_string(statement->object); + RAPTOR_DEBUG2("UNKNOWN typed node with type term %s\n", ts); + RAPTOR_FREE(char*, ts); + } +#endif + goto savetriple; + } + + + if(type == RAPTOR_RSS_ITEM) { + int size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + + if(raptor_rss_item_equals_statement_subject(item, statement)) + break; + + } + if(i < size) { + RAPTOR_DEBUG2("Found RSS item at entry %d in sequence of items\n", i); + } else { +#ifdef RAPTOR_DEBUG + if(1) { + unsigned char* ts; + ts = raptor_term_to_string(statement->subject); + + RAPTOR_DEBUG2("RSS item term %s is not in sequence of items\n", ts); + RAPTOR_FREE(char*, ts); + } +#endif + item = NULL; + } + } else if(type == RAPTOR_RSS_ENCLOSURE) { + int size = raptor_sequence_size(rss_serializer->enclosures); + for(i = 0; i < size; i++) { + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i); + + if(raptor_rss_item_equals_statement_subject(item, statement)) + break; + } + if(i < size) { + RAPTOR_DEBUG2("Found enclosure at entry %d in sequence of enclosures\n", i); + } else { +#ifdef RAPTOR_DEBUG + if(1) { + unsigned char* ts; + ts = raptor_term_to_string(statement->subject); + RAPTOR_DEBUG2("Add new enclosure to sequence with term %s\n", ts); + RAPTOR_FREE(char*, ts); + } +#endif + + item = raptor_new_rss_item(rss_serializer->world); + raptor_sequence_push(rss_serializer->enclosures, item); + } + } else { + item = raptor_rss_model_add_common(rss_model, type); + } + + + if(item && statement->subject->type == RAPTOR_TERM_TYPE_URI) { + raptor_rss_item_set_uri(item, statement->subject->value.uri); + + /* Move any existing statements to the newly discovered item */ + raptor_rss10_move_statements(rss_serializer, type, item); + + raptor_rss10_set_item_group(rss_serializer, item->term, item); + + handled = 1; + } + + + savetriple: + if(!handled) { + handled = raptor_rss10_store_statement(rss_serializer, statement); + + /* failed to store */ + if(!handled) + return 1; + } + + return 0; +} + + +static void +raptor_rss10_build_items(raptor_rss10_serializer_context *rss_serializer) +{ + raptor_rss_model* rss_model = &rss_serializer->model; + int i; + int size; + + if(!rss_serializer->seq_term) + return; + + size = raptor_sequence_size(rss_serializer->triples); + for(i = 0; i < size; i++) { + int ordinal = -1; + raptor_statement* s; + + s = (raptor_statement*)raptor_sequence_get_at(rss_serializer->triples, i); + if(!s) + continue; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG1("Processing statement\n "); + raptor_statement_print_as_ntriples(s, stderr); + fputc('\n', stderr); +#endif + + /* skip triples that are not ? ? <uri> */ + if(s->object->type != RAPTOR_TERM_TYPE_URI) { + RAPTOR_DEBUG1("Not ? ? <uri> - continuing\n"); + continue; + } + + + if(raptor_term_equals(s->subject, rss_serializer->seq_term)) { + const unsigned char* uri_str; + /* found <seq URI> <some predicate> <some URI> triple */ + + /* predicate is a resource */ + uri_str = raptor_uri_as_string(s->predicate->value.uri); + + if(!strncmp((const char*)uri_str, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) + ordinal= raptor_check_ordinal(uri_str + 44); + +#ifdef RAPTOR_DEBUG + if(1) { + unsigned char* ts; + ts = raptor_term_to_string(s->object); + RAPTOR_DEBUG3("Found RSS 1.0 item %d with term %s\n", ordinal, ts); + RAPTOR_FREE(char*, ts); + } +#endif + + if(ordinal >= 0) { + raptor_rss_item *item; + + item = raptor_new_rss_item(rss_serializer->world); + + raptor_rss_item_set_uri(item, s->object->value.uri); + + raptor_sequence_set_at(rss_serializer->items, ordinal - 1, item); + + raptor_sequence_set_at(rss_serializer->triples, i, NULL); + + /* Move any existing statements to the newly discovered item */ + raptor_rss10_move_statements(rss_serializer, RAPTOR_RSS_ITEM, item); + + raptor_rss10_set_item_group(rss_serializer, item->term, item); + } + } + } + + rss_model->items_count = raptor_sequence_size(rss_serializer->items); +} + + +static void +raptor_rss10_build_xml_names(raptor_serializer *serializer, int is_entry) +{ + raptor_world* world = serializer->world; + raptor_rss10_serializer_context *rss_serializer; + raptor_rss_model* rss_model; + raptor_uri *base_uri = serializer->base_uri; + raptor_xml_element *element; + raptor_qname *qname; + int root_ns_id; + const unsigned char *root_local_name; + int i; + int is_atom; + const raptor_rss_item_info *item_node_type; + int item_node_typei; + int default_ns_id; + int size; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + rss_model = &rss_serializer->model; + is_atom = rss_serializer->is_atom; + + if(is_atom) { + default_ns_id = ATOM1_0_NS; + root_ns_id = ATOM1_0_NS; + root_local_name = (is_entry ? (const unsigned char*)"entry" : + (const unsigned char*)"feed"); + item_node_typei = RAPTOR_ATOM_ENTRY; + } else { + default_ns_id = RSS1_0_NS; + root_ns_id = RDF_NS; + root_local_name = (const unsigned char*)"RDF"; + item_node_typei = RAPTOR_RSS_ITEM; + } + item_node_type = &raptor_rss_items_info[item_node_typei]; + + rss_serializer->xml_nspace = raptor_new_namespace(rss_serializer->nstack, + (const unsigned char*)"xml", + (const unsigned char*)raptor_xml_namespace_uri, + 0); + + + /* Now we have a namespace stack, declare the namespaces */ + for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE; i++) { + raptor_uri* uri = serializer->world->rss_namespaces_info_uris[i]; + const unsigned char *prefix; + + prefix = (const unsigned char*)raptor_rss_namespaces_info[i].prefix; + if(!prefix) + continue; + + if(i == default_ns_id) { + if(!RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_PREFIX_ELEMENTS)) + prefix = NULL; + } + + if(uri) { + raptor_namespace* nspace; + nspace = raptor_new_namespace_from_uri(rss_serializer->nstack, prefix, + uri, 0); + rss_serializer->nspaces[i] = nspace; + + if(i == default_ns_id) { + rss_serializer->default_nspace = nspace; + rss_serializer->free_default_nspace = 0; + } + } + + if(i == RDF_NS) + rss_serializer->rdf_nspace = rss_serializer->nspaces[i]; + } + + + qname = raptor_new_qname_from_namespace_local_name(serializer->world, + rss_serializer->nspaces[root_ns_id], + root_local_name, + NULL); + if(base_uri) + base_uri = raptor_uri_copy(base_uri); + + element = raptor_new_xml_element(qname, NULL, base_uri); + rss_serializer->root_element = element; + + + /* Declare the namespaces on the root element */ + raptor_xml_element_declare_namespace(element, rss_serializer->default_nspace); + + for(i = 0; i < RAPTOR_RSS_NAMESPACES_SIZE; i++) { + const unsigned char *prefix; + + prefix = (const unsigned char*)raptor_rss_namespaces_info[i].prefix; + if(!prefix && i != default_ns_id) + continue; + + if(rss_serializer->nspaces[i]) + raptor_xml_element_declare_namespace(element, rss_serializer->nspaces[i]); + } + + size = raptor_sequence_size(rss_serializer->user_namespaces); + for(i = 0; i < size; i++) { + raptor_namespace* nspace; + nspace = (raptor_namespace*)raptor_sequence_get_at(rss_serializer->user_namespaces, i); + + /* Ignore user setting default namespace prefix */ + if(!nspace->prefix) + continue; + + raptor_xml_element_declare_namespace(element, nspace); + } + + + world->rss_fields_info_qnames = RAPTOR_CALLOC(raptor_qname**, + RAPTOR_RSS_FIELDS_SIZE, + sizeof(raptor_qname*)); + if(!world->rss_fields_info_qnames) + return; + + for(i = 0; i < RAPTOR_RSS_FIELDS_SIZE; i++) { + int n = raptor_rss_fields_info[i].nspace; + raptor_namespace* nspace = rss_serializer->nspaces[n]; + const unsigned char* lname; + lname = (const unsigned char*)raptor_rss_fields_info[i].name; + + world->rss_fields_info_qnames[i] = + raptor_new_qname_from_namespace_local_name(serializer->world, + nspace, lname, NULL); + if(!world->rss_fields_info_qnames[i]) + return; + } + + world->rss_types_info_qnames = RAPTOR_CALLOC(raptor_qname**, + RAPTOR_RSS_COMMON_SIZE, + sizeof(raptor_qname*)); + if(!world->rss_types_info_qnames) + return; + for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) { + int n = raptor_rss_items_info[i].nspace; + raptor_namespace* nspace = rss_serializer->nspaces[n]; + + if(nspace) { + const unsigned char* lname = + (const unsigned char*)raptor_rss_items_info[i].name; + + world->rss_types_info_qnames[i] = + raptor_new_qname_from_namespace_local_name(serializer->world, + nspace, lname, NULL); + if(!world->rss_types_info_qnames[i]) + return; + } + } + + for(i = 0; i < RAPTOR_RSS_COMMON_SIZE; i++) { + raptor_rss_item* item; + for(item = rss_model->common[i]; item; item = item->next) { + int typei = i; + + if(!item->fields_count) + continue; + + if(is_atom) { + if(typei == RAPTOR_RSS_CHANNEL) + typei = RAPTOR_ATOM_FEED; + else if(typei == RAPTOR_RSS_ITEM) + typei = RAPTOR_ATOM_ENTRY; + } + item->node_type = &raptor_rss_items_info[typei]; + item->node_typei = typei; + } + } + + size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + raptor_rss_item* item; + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + item->node_type = item_node_type; + item->node_typei = item_node_typei; + } + + size = raptor_sequence_size(rss_serializer->enclosures); + for(i = 0; i < size; i++) { + raptor_rss_item* item; + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i); + item->node_type = &raptor_rss_items_info[RAPTOR_RSS_ENCLOSURE]; + item->node_typei = RAPTOR_RSS_ENCLOSURE; + } + +} + + +static void +raptor_rss10_emit_atom_triples_map(raptor_serializer *serializer, int is_feed, + const unsigned char* map_element_name) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_xml_writer* xml_writer; + raptor_uri *base_uri = serializer->base_uri; + raptor_uri* base_uri_copy = NULL; + raptor_namespace* at_nspace; + raptor_xml_element* at_map_root_element; + raptor_qname *at_map_root_qname; + int i; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + xml_writer = rss_serializer->xml_writer; + at_nspace = rss_serializer->nspaces[ATOMTRIPLES_NS]; + + at_map_root_qname = raptor_new_qname_from_namespace_local_name(serializer->world, at_nspace, + (const unsigned char*)map_element_name, NULL); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + at_map_root_element = raptor_new_xml_element(at_map_root_qname, NULL, + base_uri_copy); + + raptor_xml_writer_start_element(xml_writer, at_map_root_element); + + /* Walk list of fields mapped atom to rss */ + for(i = 0; raptor_atom_to_rss[i].from != RAPTOR_RSS_FIELD_UNKNOWN; i++) { + int from_f = raptor_atom_to_rss[i].from; + int to_f = raptor_atom_to_rss[i].to; + const raptor_rss_field_info* from_field_info = &raptor_rss_fields_info[from_f]; + const raptor_rss_field_info* to_field_info = &raptor_rss_fields_info[to_f]; + raptor_xml_element* at_map_element; + raptor_qname *at_map_qname; + raptor_qname** at_map_attrs; + const char* predicate_prefix; + unsigned char* ruri_string; + + /* Do not rewrite to atom0.3 terms */ + if(to_field_info->nspace == ATOM0_3_NS) + continue; + + /* atom:feed only contains some fields that are mapped */ + if(is_feed && !(from_f == RAPTOR_RSS_FIELD_ATOM_ID || + from_f == RAPTOR_RSS_FIELD_ATOM_UPDATED || + from_f == RAPTOR_RSS_FIELD_ATOM_RIGHTS || + from_f == RAPTOR_RSS_FIELD_ATOM_TITLE)) + continue; + + predicate_prefix = raptor_rss_namespaces_info[from_field_info->nspace].prefix; + if(!predicate_prefix) + continue; + + /* <at:map property="{property URI}">{atom element}</at:map> */ + at_map_qname = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + at_nspace, + map_element_name, + NULL); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + at_map_element = raptor_new_xml_element(at_map_qname, NULL, base_uri_copy); + + + at_map_attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, + serializer->world->rss_fields_info_uris[to_f]); + at_map_attrs[0] = raptor_new_qname(rss_serializer->nstack, + (const unsigned char*)"property", + ruri_string); + raptor_free_memory(ruri_string); + raptor_xml_element_set_attributes(at_map_element, at_map_attrs, 1); + + raptor_xml_writer_start_element(xml_writer, at_map_element); + raptor_xml_writer_cdata(xml_writer, (const unsigned char*)predicate_prefix); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)":", 1); + raptor_xml_writer_cdata(xml_writer, + (const unsigned char*)from_field_info->name); + raptor_xml_writer_end_element(xml_writer, at_map_element); + + raptor_free_xml_element(at_map_element); + } + + raptor_xml_writer_end_element(xml_writer, at_map_root_element); + + raptor_free_xml_element(at_map_root_element); +} + + + +/* atom-specific feed XML elements */ +static void +raptor_rss10_emit_atom_feed(raptor_serializer *serializer, + raptor_rss_item *item) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_xml_writer* xml_writer; + raptor_uri *base_uri = serializer->base_uri; + raptor_uri* base_uri_copy = NULL; + raptor_xml_element* atom_link_element; + raptor_qname *atom_link_qname; + raptor_qname** atom_link_attrs; + raptor_namespace* atom_nspace; + unsigned char* ruri_string; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + atom_nspace = rss_serializer->nspaces[ATOM1_0_NS]; + xml_writer = rss_serializer->xml_writer; + + atom_link_qname = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + atom_nspace, + (const unsigned char*)"link", + NULL); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + atom_link_element = raptor_new_xml_element(atom_link_qname, NULL, + base_uri_copy); + + atom_link_attrs = RAPTOR_CALLOC(raptor_qname**, 2, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, item->uri); + + atom_link_attrs[0] = raptor_new_qname(rss_serializer->nstack, + (const unsigned char*)"href", + ruri_string); + raptor_free_memory(ruri_string); + atom_link_attrs[1] = raptor_new_qname(rss_serializer->nstack, + (const unsigned char*)"rel", + (const unsigned char*)"self"); + raptor_xml_element_set_attributes(atom_link_element, atom_link_attrs, 2); + + raptor_xml_writer_empty_element(xml_writer, atom_link_element); + + raptor_free_xml_element(atom_link_element); + + if(rss_serializer->rss_triples_mode == 2) { + raptor_rss10_emit_atom_triples_map(serializer, 1, + (const unsigned char*)"feedmap"); + raptor_rss10_emit_atom_triples_map(serializer, 0, + (const unsigned char*)"entrymap"); + } +} + + +/* emit the RSS 1.0-specific rdf:Seq and rss:item XML elements */ +static void +raptor_rss10_emit_rss_items(raptor_serializer *serializer) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_xml_writer* xml_writer; + raptor_uri *base_uri = serializer->base_uri; + raptor_uri* base_uri_copy = NULL; + raptor_xml_element* rss_items_predicate; + int i; + raptor_qname *rdf_Seq_qname; + raptor_xml_element *rdf_Seq_element; + int size; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + if(!raptor_sequence_size(rss_serializer->items)) + return; + + xml_writer = rss_serializer->xml_writer; + + rdf_Seq_qname = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"Seq", + NULL); + + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + rdf_Seq_element = raptor_new_xml_element(rdf_Seq_qname, NULL, base_uri_copy); + + /* make the <rss:items><rdf:Seq><rdf:li /> .... </rdf:Seq></rss:items> */ + + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + rss_items_predicate = raptor_new_xml_element(raptor_qname_copy(serializer->world->rss_fields_info_qnames[RAPTOR_RSS_FIELD_ITEMS]), NULL, base_uri_copy); + + raptor_xml_writer_start_element(xml_writer, rss_items_predicate); + + raptor_xml_writer_start_element(xml_writer, rdf_Seq_element); + + size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + raptor_rss_item* item_item; + raptor_qname *rdf_li_qname; + raptor_xml_element *rdf_li_element; + raptor_qname **attrs; + unsigned char* ruri_string; + + item_item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + rdf_li_qname = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"li", + NULL); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + rdf_li_element = raptor_new_xml_element(rdf_li_qname, NULL, base_uri_copy); + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, item_item->uri); + attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"resource", + ruri_string); + raptor_free_memory(ruri_string); + raptor_xml_element_set_attributes(rdf_li_element, attrs, 1); + + raptor_xml_writer_empty_element(xml_writer, rdf_li_element); + + raptor_xml_writer_newline(xml_writer); + + raptor_free_xml_element(rdf_li_element); + } + + raptor_xml_writer_end_element(xml_writer, rdf_Seq_element); + + raptor_free_xml_element(rdf_Seq_element); + + raptor_xml_writer_end_element(xml_writer, rss_items_predicate); + + raptor_free_xml_element(rss_items_predicate); +} + + +/* emit a block of RDF/XML depending on the rssTriples option mode */ +static void +raptor_rss10_emit_rdfxml_item_triples(raptor_serializer *serializer, + raptor_rss_item *item) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_xml_writer* xml_writer; + raptor_xml_element* root_element = NULL; + raptor_serializer* ser = NULL; + raptor_uri* base_uri = NULL; + int t_max_count = raptor_sequence_size(item->triples); + int t_count; + int t; + int is_atom; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + if(rss_serializer->rss_triples_mode == 0 || !item->triples) + return; + + xml_writer = rss_serializer->xml_writer; + is_atom = rss_serializer->is_atom; + + /* can only use atom-triples with atom serializer */ + if(rss_serializer->rss_triples_mode == 2 && !is_atom) + return; + + /* can only use rdf-xml with rss-1.0 serializer */ + if(rss_serializer->rss_triples_mode == 1 && is_atom) + return; + + t_count = 0; + for(t = 0; t < t_max_count; t++) { + if(raptor_sequence_get_at(item->triples, t)) + t_count++; + } + if(!t_count) + return; + + RAPTOR_DEBUG2("Serializing %d triples\n", t_count); + + if(is_atom) { + raptor_namespace* at_nspace = rss_serializer->nspaces[ATOMTRIPLES_NS]; + raptor_qname* root_qname; + + /* atom:md with no attribute */ + root_qname = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + at_nspace, + (const unsigned char*)"md", + NULL); + if(!root_qname) + goto oom; + + base_uri = serializer->base_uri; + if(base_uri) + base_uri = raptor_uri_copy(base_uri); + + /* after this root_element owns root_qname and (this copy of) base_uri */ + root_element = raptor_new_xml_element(root_qname, NULL, base_uri); + if(!root_element) { + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_qname(root_qname); + } else + raptor_xml_writer_start_element(xml_writer, root_element); + } + + ser = raptor_new_serializer(rss_serializer->world, "rdfxml-abbrev"); + if(!ser) + goto oom; + + raptor_rdfxmla_serialize_set_xml_writer(ser, xml_writer, + rss_serializer->nstack); + raptor_rdfxmla_serialize_set_write_rdf_RDF(ser, 0); + raptor_rdfxmla_serialize_set_single_node(ser, item->uri); + if(rss_serializer->rss_triples_mode == 2) { + /* raptor_rdfxmla_serialize_set_write_typed_nodes(ser, 0); */ + } + + /* after this call, ser does + * NOT own serializer->iostream and will not destroy it + * when raptor_free_serializer(ser) is called. + */ + raptor_serializer_start_to_iostream(ser, base_uri, serializer->iostream); + + for(t = 0; t < t_max_count; t++) { + raptor_statement* s; + s = (raptor_statement*)raptor_sequence_get_at(item->triples, t); + if(s) + raptor_serializer_serialize_statement(ser, s); + } + + raptor_serializer_serialize_end(ser); + + if(is_atom) + raptor_xml_writer_end_element(xml_writer, root_element); + + oom: + if(ser) + raptor_free_serializer(ser); + + if(root_element) + raptor_free_xml_element(root_element); +} + + +/** + * raptor_rss10_ensure_atom_field_zero_one: + * @item: RSS item object + * @f: ATOM field type + * + * INTERNAL - Check that the given item @field appears 0 or 1 times + */ +static void +raptor_rss10_ensure_atom_field_zero_one(raptor_rss_item* item, + raptor_rss_fields_type f) +{ + raptor_rss_field* field = item->fields[f]; + if(!field) + return; + + if(field->next) { + /* more than 1 value so delete rest of values */ + raptor_rss_field* next = field->next; + field->next = NULL; + + do { + field = next; + + next = field->next; + field->next = NULL; + raptor_rss_field_free(field); + } while(next); + } + +} + + +/** + * raptor_rss10_ensure_atom_feed_valid: + * @rss_serializer: serializer object + * + * INTERNAL - Ensure the atom items have all the fields they need: + * <id> & <title> & <updated> + * plus: + * <link rel='alternate' ...> OR <content>.. + * + */ +static int +raptor_rss10_ensure_atom_feed_valid(raptor_rss10_serializer_context *rss_serializer) +{ + int is_atom; + int i; + raptor_rss_item* item; + raptor_rss_model* rss_model; + time_t now = 0; + int size; + +#ifdef HAVE_GETTIMEOFDAY + struct timeval tv; + if(!gettimeofday(&tv, NULL)) + now = tv.tv_sec; +#else + now = time(NULL); +#endif + + is_atom = rss_serializer->is_atom; + rss_model = &rss_serializer->model; + + if(!is_atom) + return 0; + + item = rss_model->common[RAPTOR_RSS_CHANNEL]; + if(item) { + int f; + + /* atom:id is required */ + f = RAPTOR_RSS_FIELD_ATOM_ID; + if(!item->fields[f]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + field->uri = raptor_uri_copy(item->uri); + raptor_rss_item_add_field(item, f, field); + } + + /* atom:updated is required */ + f = RAPTOR_RSS_FIELD_ATOM_UPDATED; + if(!item->fields[f]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + raptor_rss_set_date_field(field, now); + raptor_rss_item_add_field(item, f, field); + } + + /* atom:content is forbidden in feed */ + f = RAPTOR_RSS_FIELD_ATOM_CONTENT; + if(item->fields[f]) { + raptor_rss_field_free(item->fields[f]); + item->fields[f] = NULL; + } + + /* atom:summary is forbidden in feed */ + f = RAPTOR_RSS_FIELD_ATOM_SUMMARY; + if(item->fields[f]) { + raptor_rss_field_free(item->fields[f]); + item->fields[f] = NULL; + } + + /* These fields can appear 0 or 1 times on a feed */ + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_ICON); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_LOGO); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_RIGHTS); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_SUBTITLE); + } + + + size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + + /* atom:id - defaults to item URI */ + if(!item->fields[RAPTOR_RSS_FIELD_ATOM_ID]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + field->uri = raptor_uri_copy(item->uri); + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_ID, field); + } + + /* atom:title - defaults to "untitled" */ + if(!item->fields[RAPTOR_RSS_FIELD_ATOM_TITLE]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + field->value = RAPTOR_MALLOC(unsigned char*, 9); + memcpy(field->value, "untitled", 9); + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_TITLE, field); + } + + /* atom:updated - defaults to now time */ + if(!item->fields[RAPTOR_RSS_FIELD_ATOM_UPDATED]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + raptor_rss_set_date_field(field, now); + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_UPDATED, field); + } + + /* enforce there is either an atom:content OR atom:link (rel = alternate) + * by adding a link to {item URI} if missing + */ + if(!item->fields[RAPTOR_RSS_FIELD_ATOM_CONTENT] && + !item->fields[RAPTOR_RSS_FIELD_ATOM_LINK]) { + raptor_rss_field* field = raptor_rss_new_field(rss_serializer->world); + field->uri = raptor_uri_copy(item->uri); + raptor_rss_item_add_field(item, RAPTOR_RSS_FIELD_ATOM_LINK, field); + } + + /* These fields can appear 0 or 1 times on an entry */ + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_PUBLISHED); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_RIGHTS); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_SOURCE); + raptor_rss10_ensure_atom_field_zero_one(item, + RAPTOR_RSS_FIELD_ATOM_SUMMARY); + } + + return 0; +} + + +static void +raptor_rss10_emit_item(raptor_serializer* serializer, + raptor_rss_item *item, int item_type, + int emit_container) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_xml_writer* xml_writer; + raptor_rss_model* rss_model; + raptor_uri *base_uri = serializer->base_uri; + raptor_xml_element *element = NULL; + raptor_qname **attrs = NULL; + raptor_uri* base_uri_copy = NULL; + int fi; + int is_atom; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + +#ifdef RAPTOR_DEBUG + if(!item) { + RAPTOR_FATAL3("Tried to emit NULL item of type %d - %s\n", item_type, + raptor_rss_items_info[item_type].name); + } +#endif + + xml_writer = rss_serializer->xml_writer; + is_atom = rss_serializer->is_atom; + rss_model = &rss_serializer->model; + + if(!item->fields_count) { + int i; + int size = raptor_sequence_size(rss_serializer->enclosures); + + for(i = 0; i < size; i++) { + raptor_rss_item *enclosure_item; + enclosure_item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i); + /* If the item and enclosure item have the same URI, move the + * enclosure fields to the item. Assumed that they got conflated + * previously such as when the enclosure url = the guid + */ + if(enclosure_item->uri && + raptor_uri_equals(item->uri, enclosure_item->uri)) { + int j; + for(j = 0; j < RAPTOR_RSS_FIELDS_SIZE; j++) { + if(j != RAPTOR_RSS_RDF_ENCLOSURE_TYPE && + j != RAPTOR_RSS_RDF_ENCLOSURE_LENGTH && + j != RAPTOR_RSS_RDF_ENCLOSURE_URL) { + item->fields[j] = enclosure_item->fields[j]; + enclosure_item->fields[j] = NULL; + item->fields_count++; + enclosure_item->fields_count--; + } + } + break; + } + } + } + + if(!item->fields_count) + return; + + if(emit_container) { + raptor_qname* qname_copy; + + qname_copy = raptor_qname_copy(serializer->world->rss_types_info_qnames[item->node_typei]); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + element = raptor_new_xml_element(qname_copy, NULL, base_uri_copy); + + if(!is_atom && item->uri) { + unsigned char* ruri_string; + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, item->uri); + attrs[0] = raptor_new_qname_from_namespace_local_name(serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"about", + ruri_string); + raptor_free_memory(ruri_string); + raptor_xml_element_set_attributes(element, attrs, 1); + } + + raptor_xml_writer_start_element(xml_writer, element); + } + + + for(fi = 0; fi < RAPTOR_RSS_FIELDS_SIZE; fi++) { + raptor_rss_fields_type f = (raptor_rss_fields_type)fi; + raptor_rss_field* field; + + if(f == RAPTOR_RSS_FIELD_ITEMS) + /* emitting the RSS items rdf:Seq block is done after this loop */ + continue; + + if(!serializer->world->rss_fields_info_uris[f]) + continue; + + if(f == RAPTOR_RSS_FIELD_ATOM_AUTHOR) { + int typei; + + if(!is_atom) + continue; + + if(item_type != RAPTOR_RSS_CHANNEL) + continue; + + typei = RAPTOR_ATOM_AUTHOR; + if(!rss_model->common[typei]) { + raptor_rss_item* author_item; + + /* No atom author was present so make a new atom:author item + * then either promote the string to an atom:name field OR + * use "unknown" + */ + author_item = raptor_rss_model_add_common(rss_model, + (raptor_rss_type)typei); + + author_item->node_type = &raptor_rss_items_info[typei]; + author_item->node_typei = typei; + + /* FIXME - uses _:author as bnode name - should make a new + * genid for each author node. This is OK because there + * is a check above that there is only 1 author per FEED. + */ + author_item->term = raptor_new_term_from_blank(serializer->world, + (unsigned char*)"author"); + + + /* Move atom:name author field, or create a dummy one */ + f = RAPTOR_RSS_FIELD_ATOM_NAME; + if(item->fields[f]) { + field = item->fields[f]; + item->fields[f] = NULL; + } else { + field = raptor_rss_new_field(serializer->world); + field->value = RAPTOR_MALLOC(unsigned char*, 8); + memcpy(field->value, "unknown", 8); + } + raptor_rss_item_add_field(author_item, RAPTOR_RSS_FIELD_ATOM_NAME, + field); + + /* Move atom author fields if found: atom:uri and atom:email + * are only used inside Person constructs + */ + f = RAPTOR_RSS_FIELD_ATOM_URI; + if(item->fields[f]) { + field = item->fields[f]; + raptor_rss_item_add_field(author_item, f, field); + item->fields[f] = NULL; + } + f = RAPTOR_RSS_FIELD_ATOM_EMAIL; + if(item->fields[f]) { + field = item->fields[f]; + raptor_rss_item_add_field(author_item, f, field); + item->fields[f] = NULL; + } + } + + RAPTOR_DEBUG3("Emitting type %i - %s\n", typei, + raptor_rss_items_info[typei].name); + raptor_rss10_emit_item(serializer, rss_model->common[typei], typei, 1); + continue; + } + + + for(field = item->fields[f]; field; field = field->next) { + raptor_xml_element* predicate; + + /* Use atom:summary in preference */ + if(is_atom && f == RAPTOR_RSS_FIELD_DESCRIPTION) + continue; + + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + predicate = raptor_new_xml_element(raptor_qname_copy(serializer->world->rss_fields_info_qnames[f]), NULL, base_uri_copy); + + if(is_atom && field->uri) { + unsigned char* ruri_string; + size_t len; + raptor_uri* my_base_uri = base_uri; + + if(f == RAPTOR_RSS_FIELD_ATOM_ID) + my_base_uri = NULL; + + ruri_string = raptor_uri_to_relative_counted_uri_string(my_base_uri, + field->uri, + &len); + + if(f == RAPTOR_RSS_FIELD_ATOM_LINK && + !item->fields[RAPTOR_RSS_FIELD_ATOM_CONTENT]) { + /* atom:link to URI and there is no atom:content */ + raptor_qname **predicate_attrs = NULL; + predicate_attrs = RAPTOR_CALLOC(raptor_qname**, 2, + sizeof(raptor_qname*)); + predicate_attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + NULL, + (const unsigned char*)"href", + ruri_string); + predicate_attrs[1] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + NULL, + (const unsigned char*)"rel", + (const unsigned char*)"alternate"); + field->value = NULL; + raptor_xml_element_set_attributes(predicate, predicate_attrs, 2); + raptor_xml_writer_empty_element(xml_writer, predicate); + } else if(f == RAPTOR_RSS_FIELD_ATOM_CONTENT) { + /* <atom:content src="{uri value}" type="{type}" /> */ + raptor_qname **predicate_attrs = NULL; + const unsigned char* content_type; + raptor_rss_field* content_type_field; + + /* get the type */ + content_type_field = item->fields[RAPTOR_RSS_FIELD_AT_CONTENT_TYPE]; + if(content_type_field && content_type_field->value) + content_type = content_type_field->value; + else + content_type = (const unsigned char*)"text/html"; + + predicate_attrs = RAPTOR_CALLOC(raptor_qname**, 2, + sizeof(raptor_qname*)); + predicate_attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + NULL, + (const unsigned char*)"src", + ruri_string); + predicate_attrs[1] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + NULL, + (const unsigned char*)"type", + (const unsigned char*)content_type); + /* free at:contentType field - no need to emit it */ + if(content_type_field) { + raptor_rss_field_free(content_type_field); + item->fields[RAPTOR_RSS_FIELD_AT_CONTENT_TYPE] = NULL; + } + + field->value = NULL; + raptor_xml_element_set_attributes(predicate, predicate_attrs, 2); + raptor_xml_writer_empty_element(xml_writer, predicate); + } else { + raptor_xml_writer_start_element(xml_writer, predicate); + raptor_xml_writer_cdata_counted(xml_writer, ruri_string, + (unsigned int)len); + raptor_xml_writer_end_element(xml_writer, predicate); + } + raptor_free_memory(ruri_string); + + } else if(field->uri) { + raptor_uri* enclosure_uri = field->uri; + raptor_rss_item *enclosure_item = NULL; + int i; + + if(f == RAPTOR_RSS_FIELD_ENCLOSURE && item_type == RAPTOR_RSS_ITEM) { + int size = raptor_sequence_size(rss_serializer->enclosures); + for(i = 0; i < size; i++) { + enclosure_item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->enclosures, i); + if(enclosure_item->uri && raptor_uri_equals(enclosure_uri, + enclosure_item->uri)) + break; + } + if(enclosure_item) { + int attr_count = 0; + unsigned char* ruri_string; + + attrs = RAPTOR_CALLOC(raptor_qname**, 3, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, field->uri); + attrs[attr_count] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"resource", + ruri_string); + raptor_free_memory(ruri_string); + attr_count++; + + if(enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE] && enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE]->value) { + attrs[attr_count] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->nspaces[RSS2_0_ENC_NS], + (const unsigned char*)raptor_rss_fields_info[RAPTOR_RSS_RDF_ENCLOSURE_TYPE].name, + (const unsigned char*)enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_TYPE]->value); + attr_count++; + } + + if(enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH] && enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH]->value) { + attrs[attr_count] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->nspaces[RSS2_0_ENC_NS], + (const unsigned char*)raptor_rss_fields_info[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH].name, + (const unsigned char*)enclosure_item->fields[RAPTOR_RSS_RDF_ENCLOSURE_LENGTH]->value); + attr_count++; + } + raptor_xml_element_set_attributes(predicate, attrs, attr_count); + } else { + RAPTOR_DEBUG2("Enclosure item with URI %s could not be found in list of enclosures\n", raptor_uri_as_string(enclosure_uri)); + } + } else { + unsigned char* ruri_string; + + /* not an rss:item with an rss:enclosure field */ + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + ruri_string = raptor_uri_to_relative_uri_string(base_uri, field->uri); + attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"resource", + ruri_string); + raptor_free_memory(ruri_string); + raptor_xml_element_set_attributes(predicate, attrs, 1); + } + raptor_xml_writer_empty_element(xml_writer, predicate); + } else if(field->value) { + /* not a URI, must be a literal */ + int is_xhtml_content = field->is_xml; + int prefer_cdata = (!is_atom && f == RAPTOR_RSS_FIELD_CONTENT_ENCODED); + + if(is_xhtml_content && !prefer_cdata) { + raptor_qname **predicate_attrs = NULL; + predicate_attrs = RAPTOR_CALLOC(raptor_qname**, 1, + sizeof(raptor_qname*)); + if(is_atom) + predicate_attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + NULL, + (const unsigned char*)"type", + (const unsigned char*)"xhtml"); + else + predicate_attrs[0] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->rdf_nspace, + (const unsigned char*)"parseType", + (const unsigned char*)"Literal"); + raptor_xml_element_set_attributes(predicate, predicate_attrs, 1); + } + + raptor_xml_writer_start_element(xml_writer, predicate); + + if(is_xhtml_content) { + if(prefer_cdata) + raptor_xml_writer_raw_counted(xml_writer, + (const unsigned char*)"<![CDATA[", 9); + raptor_xml_writer_raw(xml_writer, (const unsigned char*)field->value); + if(prefer_cdata) + raptor_xml_writer_raw_counted(xml_writer, + (const unsigned char*)"]]>", 3); + } else + raptor_xml_writer_cdata(xml_writer, (const unsigned char*)field->value); + raptor_xml_writer_end_element(xml_writer, predicate); + } else { + RAPTOR_DEBUG3("Field %u - %s had no URI or literal value\n", + f, raptor_rss_fields_info[f].name); + } + raptor_free_xml_element(predicate); + } + } + + + if(item_type == RAPTOR_RSS_CHANNEL) { + if(is_atom) + raptor_rss10_emit_atom_feed(serializer, item); + + if(!is_atom) + raptor_rss10_emit_rss_items(serializer); + } + + /* Add an RDF/XML block with remaining triples if Atom */ + if(item->triples && raptor_sequence_size(item->triples)) + raptor_rss10_emit_rdfxml_item_triples(serializer, item); + + if(emit_container) { + raptor_xml_writer_end_element(xml_writer, element); + raptor_free_xml_element(element); + } + +} + + +/** + * raptor_rss10_serialize_end: + * @serializer: serializer object + * + * INTERNAL (raptor_serializer_factory API) - End a serializing + * + * Return value: non-0 on failure + */ +static int +raptor_rss10_serialize_end(raptor_serializer* serializer) +{ + raptor_rss10_serializer_context *rss_serializer; + raptor_rss_model* rss_model; + int i; + raptor_xml_writer* xml_writer; +#ifdef RAPTOR_DEBUG + int triple_count = 0; +#endif + int is_atom; + raptor_qname **attrs = NULL; + int attrs_count = 0; + raptor_uri* entry_uri = NULL; + raptor_rss_item* entry_item = NULL; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + rss_model = &rss_serializer->model; + is_atom = rss_serializer->is_atom; + + raptor_rss10_build_items(rss_serializer); + + raptor_rss10_move_leftover_statements(rss_serializer); + + raptor_rss10_move_anonymous_statements(rss_serializer); + + if(is_atom) { + char* entry_uri_string; + + raptor_rss10_ensure_atom_feed_valid(rss_serializer); + + raptor_rss10_remove_mapped_fields(rss_serializer); + + entry_uri_string = RAPTOR_OPTIONS_GET_STRING(serializer, + RAPTOR_OPTION_ATOM_ENTRY_URI); + if(entry_uri_string) { + int size = raptor_sequence_size(rss_serializer->items); + entry_uri = raptor_new_uri(rss_serializer->world, + (const unsigned char*)entry_uri_string); + for(i = 0; i < size; i++) { + raptor_rss_item* item; + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + if(raptor_uri_equals(item->uri, entry_uri)) { + entry_item = item; + break; + } + } + if(!entry_item) { + RAPTOR_DEBUG2("Entry URI %s was not found in list of items\n", + raptor_uri_as_string(entry_uri)); + raptor_free_uri(entry_uri); + entry_uri = NULL; + } + } + + } + +#ifdef RAPTOR_DEBUG + if(1) { + int size = raptor_sequence_size(rss_serializer->triples); + for(i = 0; i < size; i++) { + raptor_statement* t; + t = (raptor_statement*)raptor_sequence_get_at(rss_serializer->triples, i); + if(t) { + fprintf(stderr, " %d: ", i); + raptor_statement_print(t, stderr); + fputc('\n', stderr); + triple_count++; + } + } + RAPTOR_DEBUG2("Starting with %d stored triples\n", triple_count); + } +#endif + + if(!rss_model->common[RAPTOR_RSS_CHANNEL]) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "No RSS channel found"); + if(entry_uri) + raptor_free_uri(entry_uri); + return 1; + } + + + if(rss_serializer->xml_writer) + raptor_free_xml_writer(rss_serializer->xml_writer); + + xml_writer = raptor_new_xml_writer(rss_serializer->world, + rss_serializer->nstack, + serializer->iostream); + rss_serializer->xml_writer = xml_writer; + raptor_xml_writer_set_option(xml_writer, + RAPTOR_OPTION_WRITER_AUTO_INDENT, NULL, 1); + raptor_xml_writer_set_option(xml_writer, + RAPTOR_OPTION_WRITER_AUTO_EMPTY, NULL, 1); + + raptor_rss10_build_xml_names(serializer, (is_atom && entry_uri)); + + if(serializer->base_uri && + RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_WRITE_BASE_URI)) { + const unsigned char* base_uri_string; + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + + base_uri_string = raptor_uri_as_string(serializer->base_uri); + attrs[attrs_count++] = raptor_new_qname_from_namespace_local_name(rss_serializer->world, + rss_serializer->xml_nspace, + (const unsigned char*)"base", + base_uri_string); + } + + if(attrs_count) + raptor_xml_element_set_attributes(rss_serializer->root_element, attrs, + attrs_count); + else + raptor_xml_element_set_attributes(rss_serializer->root_element, NULL, 0); + + raptor_xml_writer_start_element(xml_writer, rss_serializer->root_element); + + + if(entry_item) { + RAPTOR_DEBUG1("Emitting entry\n"); + raptor_rss10_emit_item(serializer, entry_item, RAPTOR_RSS_ITEM, 0); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + } else { + i = RAPTOR_RSS_CHANNEL; + RAPTOR_DEBUG3("Emitting type %i - %s\n", i, raptor_rss_items_info[i].name); + raptor_rss10_emit_item(serializer, rss_model->common[i], i, !is_atom); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + + if(rss_model->items_count) { + int size = raptor_sequence_size(rss_serializer->items); + for(i = 0; i < size; i++) { + raptor_rss_item* item; + item = (raptor_rss_item*)raptor_sequence_get_at(rss_serializer->items, i); + raptor_rss10_emit_item(serializer, item, RAPTOR_RSS_ITEM, 1); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"\n", 1); + } + + } + + for(i = RAPTOR_RSS_CHANNEL + 1; i < RAPTOR_RSS_COMMON_SIZE; i++) { + raptor_rss_item* item; + + if(is_atom) { + /* atom 1.0 only serializes rss:item (channel is done above) */ + if(i != RAPTOR_RSS_ITEM) + continue; + } else { + /* rss 1.0 ignores atom:author for now - FIXME */ + if(i == RAPTOR_ATOM_AUTHOR) + continue; + } + + for(item = rss_model->common[i]; item; item = item->next) { + RAPTOR_DEBUG3("Emitting type %i - %s\n", i, + raptor_rss_items_info[i].name); + raptor_rss10_emit_item(serializer, item, i, 1); + } + } + } + + + if(entry_uri) + raptor_free_uri(entry_uri); + + raptor_xml_writer_end_element(xml_writer, rss_serializer->root_element); + + raptor_free_xml_element(rss_serializer->root_element); + + raptor_xml_writer_newline(xml_writer); + + raptor_xml_writer_flush(xml_writer); + + return 0; +} + + +/* add a namespace */ +static int +raptor_rss10_serialize_declare_namespace_from_namespace(raptor_serializer* serializer, + raptor_namespace *nspace) +{ + raptor_rss10_serializer_context* rss_serializer; + int i; + int size; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + size = raptor_sequence_size(rss_serializer->user_namespaces); + for(i = 0; i < size; i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(rss_serializer->user_namespaces, i); + + /* If prefix is already declared, ignore it */ + if(!ns->prefix && !nspace->prefix) + return 1; + + if(ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + return 1; + + if(ns->uri && nspace->uri && + raptor_uri_equals(ns->uri, nspace->uri)) + return 1; + } + + nspace = raptor_new_namespace_from_uri(rss_serializer->nstack, + nspace->prefix, nspace->uri, + 0); + if(!nspace) + return 1; + + raptor_sequence_push(rss_serializer->user_namespaces, nspace); + return 0; +} + + +/* add a namespace */ +static int +raptor_rss10_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix) +{ + raptor_rss10_serializer_context* rss_serializer; + raptor_namespace *ns; + int rc; + + rss_serializer = (raptor_rss10_serializer_context*)serializer->context; + + ns = raptor_new_namespace_from_uri(rss_serializer->nstack, prefix, uri, 0); + rc = raptor_rss10_serialize_declare_namespace_from_namespace(serializer, ns); + raptor_free_namespace(ns); + + return rc; +} + + + +/** + * raptor_rss10_serialize_finish_factory: + * @factory: serializer factory + * + * INTERNAL (raptor_serializer_factory API) - finish the serializer factory + */ +static void +raptor_rss10_serialize_finish_factory(raptor_serializer_factory* factory) +{ + +} + + +static const char* rss10_names[3] = { "rss-1.0", NULL}; + +static const char* const rss10_uri_strings[2] = { +"http://purl.org/rss/1.0/spec", + NULL +}; + +#define RSS10_TYPES_COUNT 5 +static const raptor_type_q rss10_types[RSS10_TYPES_COUNT + 1] = { + { "application/rss+xml", 19, 10}, + { "application/rss", 15, 3}, + { "text/rss", 8, 3}, + { "application/xml", 15, 3}, + { "text/xml", 8, 3}, + { NULL, 0, 0} +}; + +static int +raptor_rss10_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = rss10_names; + factory->desc.mime_types = rss10_types; + + factory->desc.label = "RSS 1.0"; + factory->desc.uri_strings = rss10_uri_strings; + + factory->context_length = sizeof(raptor_rss10_serializer_context); + + factory->init = raptor_rss10_serialize_init; + factory->terminate = raptor_rss10_serialize_terminate; + factory->declare_namespace = raptor_rss10_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_rss10_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_rss10_serialize_start; + factory->serialize_statement = raptor_rss10_serialize_statement; + factory->serialize_end = raptor_rss10_serialize_end; + factory->finish_factory = raptor_rss10_serialize_finish_factory; + + return 0; +} + + + +static const char* atom_names[3] = { "atom", NULL}; + +static const char* const atom_uri_strings[2] = { + "http://www.ietf.org/rfc/rfc4287.txt", + NULL +}; + +#define ATOM_TYPES_COUNT 1 +static const raptor_type_q atom_types[ATOM_TYPES_COUNT + 1] = { + { "application/atom+xml", 20, 10}, + { NULL, 0, 0} +}; + +static int +raptor_atom_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = atom_names; + factory->desc.mime_types = atom_types; + + factory->desc.label = "Atom 1.0"; + factory->desc.uri_strings = atom_uri_strings; + + factory->context_length = sizeof(raptor_rss10_serializer_context); + + factory->init = raptor_rss10_serialize_init; + factory->terminate = raptor_rss10_serialize_terminate; + factory->declare_namespace = raptor_rss10_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_rss10_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_rss10_serialize_start; + factory->serialize_statement = raptor_rss10_serialize_statement; + factory->serialize_end = raptor_rss10_serialize_end; + factory->finish_factory = raptor_rss10_serialize_finish_factory; + + return 0; +} + + + +int +raptor_init_serializer_rss10(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_rss10_serializer_register_factory); +} + +int +raptor_init_serializer_atom(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_atom_serializer_register_factory); +} + diff --git a/src/raptor_serialize_turtle.c b/src/raptor_serialize_turtle.c new file mode 100644 index 0000000..79fdbbd --- /dev/null +++ b/src/raptor_serialize_turtle.c @@ -0,0 +1,1819 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_serialize_turtle.c - Turtle serializer + * + * Copyright (C) 2006,2008 Dave Robillard + * Copyright (C) 2004-2013 David Beckett http://www.dajobe.org/ + * Copyright (C) 2004-2005 University of Bristol, UK http://www.bristol.ac.uk/ + * Copyright (C) 2005 Steve Shepard steveshep@gmail.com + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#define MAX_ASCII_INT_SIZE 13 + + +/* + * Raptor turtle serializer object + */ +typedef struct { + raptor_namespace_stack *nstack; /* Namespace stack */ + raptor_namespace *rdf_nspace; /* the rdf: namespace */ + raptor_turtle_writer *turtle_writer; /* where the xml is being written */ + raptor_sequence *namespaces; /* User declared namespaces */ + raptor_avltree *subjects; /* subject items */ + raptor_avltree *blanks; /* blank subject items */ + raptor_avltree *nodes; /* nodes */ + raptor_abbrev_node *rdf_type; /* rdf:type uri */ + + /* URI of rdf:XMLLiteral */ + raptor_uri* rdf_xml_literal_uri; + + /* URI of rdf:first */ + raptor_uri* rdf_first_uri; + + /* URI of rdf:rest */ + raptor_uri* rdf_rest_uri; + + /* URI of rdf:nil */ + raptor_uri* rdf_nil_uri; + + /* URI of rs:ResultSet */ + raptor_uri* rs_ResultSet_uri; + + /* URI of rs:resultVariable */ + raptor_uri* rs_resultVariable_uri; + + /* Non 0 for rs:ResultSet */ + int resultset; + + /* Non 0 for mKR serializer */ + int emit_mkr; + /* Flags for turtle writer */ + int turtle_writer_flags; + + /* Non 0 if "begin relation result ;" has been written */ + int written_begin; + + /* non zero if header is finished being written + * (and thus no new namespaces can be declared). + */ + int written_header; + + /* for labeling namespaces */ + int namespace_count; + + /* state for raptor_mkr_emit_subject_resultset() */ + int mkr_rs_size; + int mkr_rs_arity; + int mkr_rs_ntuple; + int mkr_rs_nvalue; + int mkr_rs_processing_value; +} raptor_turtle_context; + + +/* prototypes for functions */ + +static int raptor_turtle_emit_resource(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth); + +static int raptor_turtle_emit_literal(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth); +static int raptor_turtle_emit_blank(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth); +static int raptor_turtle_emit_subject_list_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_turtle_emit_subject_collection_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_turtle_emit_subject_properties(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_mkr_emit_subject_resultset(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_turtle_emit_subject(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth); +static int raptor_turtle_emit(raptor_serializer *serializer); + +static int raptor_turtle_serialize_init(raptor_serializer* serializer, + const char *name); +static void raptor_turtle_serialize_terminate(raptor_serializer* serializer); +static int raptor_turtle_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix); +static int raptor_turtle_serialize_start(raptor_serializer* serializer); +static int raptor_turtle_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement); + +static int raptor_turtle_serialize_end(raptor_serializer* serializer); +static int raptor_turtle_serialize_flush(raptor_serializer* serializer); +static void raptor_turtle_serialize_finish_factory(raptor_serializer_factory* factory); + + +int +raptor_turtle_is_legal_turtle_qname(raptor_qname* qname) +{ + const char* prefix_name; + const char* local_name; + + if(!qname) + return 0; + + prefix_name = qname->nspace ? (const char*)qname->nspace->prefix : NULL; + if(prefix_name) { + /* prefixName: must have leading [A-Z][a-z][0-9] (nameStartChar - '_') */ + /* prefixName: no . anywhere */ + if(!(isalpha((int)*prefix_name) || isdigit((int)*prefix_name)) || + strchr(prefix_name, '.')) + return 0; + } + + local_name = (const char*)qname->local_name; + if(local_name) { + /* nameStartChar: must have leading [A-Z][a-z][0-9]_ */ + /* nameChar: no . anywhere */ + if(!(isalpha((int)*local_name) || isdigit((int)*local_name) || *local_name == '_') || + strchr(local_name, '.')) + return 0; + } + + return 1; +} + +/* + * raptor_turtle_emit_resource: + * @serializer: #raptor_serializer object + * @node: resource node + * @depth: depth into tree + * + * Emit a description of a resource using an XML Element + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_resource(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + int emit_mkr = context->emit_mkr; + raptor_turtle_writer *turtle_writer = context->turtle_writer; + + raptor_qname* qname = NULL; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting resource node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_URI) + return 1; + + if(raptor_uri_equals(node->term->value.uri, context->rdf_nil_uri)) { + if(emit_mkr) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ", 1); + else + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"( )", 3); + return 0; + } + + qname = raptor_new_qname_from_namespace_uri(context->nstack, + node->term->value.uri, 10); + + /* XML Names allow leading '_' and '.' anywhere but Turtle does not */ + if(qname && !raptor_turtle_is_legal_turtle_qname(qname)) { + raptor_free_qname(qname); + qname = NULL; + } + + if(raptor_uri_equals(node->term->value.uri, context->rdf_nil_uri)) { + if(emit_mkr) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ", 1); + else + raptor_turtle_writer_raw_counted(turtle_writer ,(const unsigned char*)"( )", 3); + return 0; + } + + if(qname) { + raptor_turtle_writer_qname(turtle_writer, qname); + raptor_free_qname(qname); + } else { + raptor_turtle_writer_reference(turtle_writer, node->term->value.uri); + } + + RAPTOR_DEBUG_ABBREV_NODE("Emitted", node); + + return 0; +} + + +/* + * raptor_turtle_emit_literal: + * @serializer: #raptor_serializer object + * @node: literal node + * @depth: depth into tree + * + * Emit a description of a literal (object). + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_literal(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer *turtle_writer = context->turtle_writer; + int rc = 0; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting literal node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_LITERAL) + return 1; + + rc = raptor_turtle_writer_literal(turtle_writer, context->nstack, + node->term->value.literal.string, + node->term->value.literal.language, + node->term->value.literal.datatype); + + RAPTOR_DEBUG_ABBREV_NODE("Emitted literal node", node); + + return rc; +} + + +/* + * raptor_turtle_emit_blank: + * @serializer: #raptor_serializer object + * @node: blank node + * @depth: depth into tree + * + * Emit a description of a blank node + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_blank(raptor_serializer *serializer, + raptor_abbrev_node* node, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer* turtle_writer = context->turtle_writer; + int emit_mkr = context->emit_mkr; + int rc = 0; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting blank node", node); + + if(node->term->type != RAPTOR_TERM_TYPE_BLANK) + return 1; + + if((node->count_as_subject == 1 && node->count_as_object == 1)) { + /* If this is only used as a 1 subject and object or never + * used as a subject or never used as an object, it never need + * be referenced with an explicit name */ + raptor_abbrev_subject* blank; + + blank = raptor_abbrev_subject_find(context->blanks, node->term); + if(blank) { + rc = raptor_turtle_emit_subject(serializer, blank, depth+1); + raptor_abbrev_subject_invalidate(blank); + } + + } else { + /* Blank node that needs an explicit name */ + raptor_turtle_writer_bnodeid(context->turtle_writer, + node->term->value.blank.string, + node->term->value.blank.string_len); + if(emit_mkr && !context->resultset) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" has", 4); + } + + RAPTOR_DEBUG_ABBREV_NODE("Emitted blank node", node); + + return rc; +} + + +/* + * raptor_turtle_emit_subject_list_items: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit an rdf list of items (rdf:li) about a subject node. + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_subject_list_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + int rv = 0; + int i = 0; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject list items", subject->node); + + while(!rv && i < raptor_sequence_size(subject->list_items)) { + raptor_abbrev_node* object; + + object = (raptor_abbrev_node*)raptor_sequence_get_at(subject->list_items, + i++); + if(!object) + continue; + + switch(object->term->type) { + case RAPTOR_TERM_TYPE_URI: + rv = raptor_turtle_emit_resource(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rv = raptor_turtle_emit_literal(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_turtle_emit_blank(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + + } + + } + + return rv; +} + + +/* + * raptor_turtle_emit_subject_collection_items: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit an abbreviated rdf collection of items (rdf:first, rdf:rest) about a subject node. + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_subject_collection_items(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + int emit_mkr = context->emit_mkr; + int rv = 0; + raptor_avltree_iterator* iter = NULL; + int i; + int is_new_subject = 0; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject collection items", subject->node); + + /* if just saw a new subject (is_new_subject is true) then there is no need + * to advance the iterator - it was just reset + */ + for(i = 0, (iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1)); + iter && !rv; + i++, (rv = is_new_subject ? 0 : raptor_avltree_iterator_next(iter))) { + raptor_abbrev_node** nodes; + raptor_abbrev_node* predicate; + raptor_abbrev_node* object; + + is_new_subject = 0; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + predicate = nodes[0]; + object = nodes[1]; + + if(!raptor_uri_equals(predicate->term->value.uri, + context->rdf_first_uri)) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Malformed collection - first predicate is not rdf:first"); + raptor_free_avltree_iterator(iter); + return 1; + } + + if(!object) + continue; + + if(i > 0) { + if(emit_mkr) + raptor_turtle_writer_raw_counted(context->turtle_writer, + (const unsigned char*)", ", 1); + else + raptor_turtle_writer_newline(context->turtle_writer); + } + + switch(object->term->type) { + case RAPTOR_TERM_TYPE_URI: + rv = raptor_turtle_emit_resource(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rv = raptor_turtle_emit_literal(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_turtle_emit_blank(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + } + + /* Return error if emitting something failed above */ + if(rv) { + raptor_free_avltree_iterator(iter); + return rv; + } + + /* last item */ + rv = raptor_avltree_iterator_next(iter); + if(rv) + break; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + predicate = nodes[0]; + object = nodes[1]; + + if(!raptor_uri_equals(predicate->term->value.uri, context->rdf_rest_uri)) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Malformed collection - second predicate is not rdf:rest"); + raptor_free_avltree_iterator(iter); + return 1; + } + + if(object->term->type == RAPTOR_TERM_TYPE_BLANK) { + subject = raptor_abbrev_subject_find(context->blanks, object->term); + + if(!subject) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Malformed collection - could not find subject for rdf:rest"); + raptor_free_avltree_iterator(iter); + return 1; + } + + /* got a <(old)subject> rdf:rest <(new)subject> triple so know + * subject has changed and should reset the properties iterator + */ + if(iter) + raptor_free_avltree_iterator(iter); + iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); + is_new_subject = 1; + + } else { + if(object->term->type != RAPTOR_TERM_TYPE_URI || + !raptor_uri_equals(object->term->value.uri, context->rdf_nil_uri)) { + raptor_log_error(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Malformed collection - last rdf:rest resource is not rdf:nil"); + raptor_free_avltree_iterator(iter); + return 1; + } + break; + } + } + if(iter) + raptor_free_avltree_iterator(iter); + + return rv; +} + + + +/* + * raptor_turtle_emit_subject_properties: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit the properties about a subject node. + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_subject_properties(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer *turtle_writer = context->turtle_writer; + int emit_mkr = context->emit_mkr; + int numobj = 2; /* "[" "]" around all object lists (any size) */ + raptor_abbrev_node* last_predicate = NULL; + int rv = 0; + raptor_avltree_iterator* iter = NULL; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject properties", subject->node); + + /* Emit any rdf:_n properties collected */ + if(raptor_sequence_size(subject->list_items) > 0) + rv = raptor_turtle_emit_subject_list_items(serializer, subject, depth+1); + + for((iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1)); + iter && !rv; + (rv = raptor_avltree_iterator_next(iter))) { + raptor_abbrev_node** nodes; + raptor_abbrev_node* predicate; + raptor_abbrev_node* object; + raptor_qname *qname; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + predicate = nodes[0]; + object = nodes[1]; + numobj = 2; /* = raptor_sequence_size(xxx) if available */ + + if(!last_predicate || + !raptor_abbrev_node_equals(predicate, last_predicate)) { + /* no object list abbreviation possible, terminate last object */ + if(last_predicate) { + if(emit_mkr && !context->resultset) { + if(numobj > 1) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"]", 1); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)", ", 2); + } else { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ;", 2); + } + raptor_turtle_writer_newline(turtle_writer); + } + + qname = raptor_new_qname_from_namespace_uri(context->nstack, + predicate->term->value.uri, + 10); + + if(raptor_abbrev_node_equals(predicate, context->rdf_type)) { + if(emit_mkr) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"rdf:type", 8); + else + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"a", 1); + } else if(qname) { + raptor_turtle_writer_qname(turtle_writer, qname); + } else { + raptor_turtle_writer_reference(turtle_writer, predicate->term->value.uri); + } + if(emit_mkr) { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" = ", 3); + if(numobj > 1) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"[", 1); + } else { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ", 1); + } + + + if(qname) + raptor_free_qname(qname); + } else { /* not last object for this predicate */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)", ", 2); + } + + + switch(object->term->type) { + case RAPTOR_TERM_TYPE_URI: + rv = raptor_turtle_emit_resource(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + rv = raptor_turtle_emit_literal(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_turtle_emit_blank(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + } + + /* Return error if emitting something failed above */ + if(rv) + return rv; + + last_predicate = predicate; + } + + if(iter) + raptor_free_avltree_iterator(iter); + + return rv; +} + + + +/* + * raptor_mkr_emit_subject_resultset: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit rs:ResultSet as CSV relation. + * + * Return value: non-0 on failure + **/ +static int +raptor_mkr_emit_subject_resultset(raptor_serializer* serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer *turtle_writer = context->turtle_writer; + raptor_abbrev_node* last_predicate = NULL; + int rv = 0; + raptor_avltree_iterator* iter = NULL; + int skip_object; + + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject resultset", subject->node); + + /* Emit any rdf:_n properties collected */ + if(raptor_sequence_size(subject->list_items) > 0) + rv = raptor_turtle_emit_subject_list_items(serializer, subject, depth+1); + + + for((iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1)); + iter && !rv; + (rv = raptor_avltree_iterator_next(iter))) { + raptor_abbrev_node** nodes; + raptor_abbrev_node* predicate; + raptor_abbrev_node* object; + raptor_qname *qname; + + nodes = (raptor_abbrev_node**)raptor_avltree_iterator_get(iter); + if(!nodes) + break; + + predicate = nodes[0]; + object = nodes[1]; + + if(!last_predicate || + !raptor_abbrev_node_equals(predicate, last_predicate)) { + /* first predicate or same predicate as last time */ + + /* no object list abbreviation possible, terminate last object */ + if(last_predicate) { + if(!context->mkr_rs_arity) { + /* last variable in first row */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ;", 2); + raptor_turtle_writer_newline(turtle_writer); + context->mkr_rs_ntuple++; /* start count after variables */ + } else if(!context->mkr_rs_nvalue) { + /* size not emitted */ + } else if(context->mkr_rs_processing_value && + (context->mkr_rs_nvalue == context->mkr_rs_arity)) { + /* previous value was last value of row */ + context->mkr_rs_processing_value = 0; + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ;", 2); + raptor_turtle_writer_newline(turtle_writer); + context->mkr_rs_nvalue = 0; + context->mkr_rs_ntuple++; + if(context->mkr_rs_ntuple > context->mkr_rs_size) { + /* previous row was last row of table */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"end relation result ;", 21); + raptor_turtle_writer_newline(turtle_writer); + break; + } + } + } + + qname = raptor_new_qname_from_namespace_uri(context->nstack, + predicate->term->value.uri, + 10); + if(raptor_abbrev_node_equals(predicate, context->rdf_type)) { + skip_object = 1; /* all values have been written */ + } else if(qname) { + /* check predicate name */ + if(!strcmp((const char*)qname->local_name, (const char*)"resultVariable")) { + /* emit mKR relation header */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"result is relation with format = csv ;", 38); + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"begin relation result ;", 23); + raptor_turtle_writer_decrease_indent(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + skip_object = 0; + + } else if(!strcmp((const char*)qname->local_name, (const char*)"size")) { + context->mkr_rs_arity = context->mkr_rs_nvalue; + context->mkr_rs_nvalue = 0; + skip_object = 0; + } else if(!strcmp((const char*)qname->local_name, (const char*)"solution")) { + skip_object = 0; /* get values */ + } else if(!strcmp((const char*)qname->local_name, (const char*)"binding")) { + skip_object = 0; /* get values */ + } else if(!strcmp((const char*)qname->local_name, (const char*)"variable")) { + skip_object = 1; + } else if(!strcmp((const char*)qname->local_name, (const char*)"value")) { + context->mkr_rs_processing_value = 1; + context->mkr_rs_nvalue++; + skip_object = 0; + } else { + skip_object = 1; + } + + } else { + /* not qname */ + raptor_turtle_writer_reference(turtle_writer, predicate->term->value.uri); + skip_object = 0; + } /* end predicate */ + + if(qname) + raptor_free_qname(qname); + } else { /* predicate was skipped */ + if(!context->mkr_rs_arity) + /* not last variable */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)", ", 2); + } + + if(!skip_object) { + /* do not skip object */ + switch(object->term->type) { + case RAPTOR_TERM_TYPE_URI: + rv = raptor_turtle_emit_resource(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + if(!context->mkr_rs_arity) { + /* variables */ + context->mkr_rs_nvalue++; + raptor_turtle_writer_csv_string(turtle_writer, object->term->value.literal.string); + } else if(!context->mkr_rs_nvalue) { + /* size */ + context->mkr_rs_size = atoi((const char*)object->term->value.literal.string); + } else { + /* values */ + raptor_turtle_writer_csv_string(turtle_writer, object->term->value.literal.string); + if(context->mkr_rs_nvalue < context->mkr_rs_arity) + /* not last value */ + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)", ", 2); + } + break; + + case RAPTOR_TERM_TYPE_BLANK: + rv = raptor_turtle_emit_blank(serializer, object, depth+1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "Triple has unsupported term type %u", + object->term->type); + break; + } + } /* end object */ + + /* Return error if emitting something failed above */ + if(rv) + return rv; + + last_predicate = predicate; + } /* end iteration i */ + + if(iter) + raptor_free_avltree_iterator(iter); + + return rv; +} + +/* + * raptor_turtle_emit_subject: + * @serializer: #raptor_serializer object + * @subject: subject node + * @depth: depth into tree + * + * Emit a subject node + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit_subject(raptor_serializer *serializer, + raptor_abbrev_subject* subject, + int depth) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer* turtle_writer = context->turtle_writer; + int emit_mkr = context->emit_mkr; + int numobj = 2; + int blank = 1; + int collection = 0; + int rc = 0; + + if(!raptor_abbrev_subject_valid(subject)) return 0; + + RAPTOR_DEBUG_ABBREV_NODE("Emitting subject node", subject->node); + + if(!depth && + subject->node->term->type == RAPTOR_TERM_TYPE_BLANK && + subject->node->count_as_subject == 1 && + subject->node->count_as_object == 1) { + RAPTOR_DEBUG_ABBREV_NODE("Skipping subject node - subj & obj count 1", subject->node); + return 0; + } + + if(raptor_avltree_size(subject->properties) == 0) { + RAPTOR_DEBUG_ABBREV_NODE("Skipping subject node - no props", subject->node); + return 0; + } + + /* check if we can do collection abbreviation */ + if(raptor_avltree_size(subject->properties) >= 2) { + raptor_avltree_iterator* iter = NULL; + raptor_abbrev_node* pred1; + raptor_abbrev_node* pred2; + + iter = raptor_new_avltree_iterator(subject->properties, NULL, NULL, 1); + if(!iter) + return 1; + pred1 = ((raptor_abbrev_node**)raptor_avltree_iterator_get(iter))[0]; + if(raptor_avltree_iterator_next(iter)) { + raptor_free_avltree_iterator(iter); + return 1; + } + pred2 = ((raptor_abbrev_node**)raptor_avltree_iterator_get(iter))[0]; + raptor_free_avltree_iterator(iter); + + if(pred1->term->type == RAPTOR_TERM_TYPE_URI && + pred2->term->type == RAPTOR_TERM_TYPE_URI && + ( + (raptor_uri_equals(pred1->term->value.uri, context->rdf_first_uri) && + raptor_uri_equals(pred2->term->value.uri, context->rdf_rest_uri)) + || + (raptor_uri_equals(pred2->term->value.uri, context->rdf_first_uri) && + raptor_uri_equals(pred1->term->value.uri, context->rdf_rest_uri)) + ) + ) { + collection = 1; + /* check for rs:ResultSet */ + } else if(pred1->term->type == RAPTOR_TERM_TYPE_URI && + raptor_uri_equals(pred1->term->value.uri, context->rs_resultVariable_uri)) { + context->resultset = 1; + } + } + + /* emit the subject node */ + if(subject->node->term->type == RAPTOR_TERM_TYPE_URI) { + if(emit_mkr) { + if(context->resultset && !context->written_begin) { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"result is ", 10); + rc = raptor_turtle_emit_resource(serializer, subject->node, depth+1); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ;", 2); + raptor_turtle_writer_decrease_indent(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + } else { + rc = raptor_turtle_emit_resource(serializer, subject->node, depth+1); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" has", 4); + } + } else { + rc = raptor_turtle_emit_resource(serializer, subject->node, depth+1); + } + if(rc) + return rc; + blank = 0; + collection = 0; + + } else if(subject->node->term->type == RAPTOR_TERM_TYPE_BLANK) { + if((subject->node->count_as_subject == 1 && + subject->node->count_as_object == 0) && depth > 1) { + blank = 1; + } else if(subject->node->count_as_object == 0) { + if(emit_mkr) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"{}", 2); + else + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"[]", 2); + blank = 0; + } else if(!collection && subject->node->count_as_object > 1) { + /* Referred to (used as an object), so needs a nodeID */ + if(emit_mkr) { + if(!context->resultset) { + raptor_turtle_writer_bnodeid(turtle_writer, + subject->node->term->value.blank.string, + subject->node->term->value.blank.string_len); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" has", 4); + } + } else { + raptor_turtle_writer_bnodeid(turtle_writer, + subject->node->term->value.blank.string, + subject->node->term->value.blank.string_len); + } + } + } + + if(collection) { + if(!emit_mkr) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"(", 1); + raptor_turtle_writer_increase_indent(turtle_writer); + + rc = raptor_turtle_emit_subject_collection_items(serializer, subject, depth+1); + + raptor_turtle_writer_decrease_indent(turtle_writer); + + if(!emit_mkr) { + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)")", 1); + } + + } else { + if(emit_mkr) { + if(context->resultset) { + /* mKR relation with format = csv */ + if(blank && depth > 1) { + /* skip */ + } + + raptor_mkr_emit_subject_resultset(serializer, subject, depth+1); + + raptor_turtle_writer_decrease_indent(turtle_writer); + + if(blank && depth > 1) { + /* skip */ + } + } else { + /* mKR not relation */ + if(blank && depth > 1) { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"{ ", 2); + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_bnodeid(turtle_writer, + subject->node->term->value.blank.string, + subject->node->term->value.blank.string_len); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" has", 4); + } + raptor_turtle_writer_increase_indent(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + + raptor_turtle_emit_subject_properties(serializer, subject, depth+1); + + raptor_turtle_writer_decrease_indent(turtle_writer); + + if(blank && depth > 1) { + if(numobj > 1) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"]", 1); + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)";}", 2); + } + } + } else { + /* Turtle */ + if(blank && depth > 1) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"[", 1); + + raptor_turtle_writer_increase_indent(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + + raptor_turtle_emit_subject_properties(serializer, subject, depth+1); + + raptor_turtle_writer_decrease_indent(turtle_writer); + + if(blank && depth > 1) { + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"]", 1); + } + } + } + + if(depth == 0) { + /* NOTE: the space before the . here MUST be there or statements + * that end in a numeric literal will be interpreted incorrectly + * (the "." will be parsed as part of the literal and statement + * left unterminated) + */ + if(emit_mkr) { + if(!context->resultset) { + if(numobj > 1) + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)"]", 1); + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ;", 2); + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + } + context->resultset = 0; + context->written_begin = 0; + } else { + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" .", 2); + raptor_turtle_writer_newline(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + } + } + + return rc; +} + + +/* + * raptor_turtle_emit: + * @serializer: #raptor_serializer object + * + * Emit Turtle for all stored triples. + * + * Return value: non-0 on failure + **/ +static int +raptor_turtle_emit(raptor_serializer *serializer) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_abbrev_subject* subject; + raptor_abbrev_subject* blank; + int rc; + raptor_avltree_iterator* iter = NULL; + + iter = raptor_new_avltree_iterator(context->subjects, NULL, NULL, 1); + while(iter) { + subject = (raptor_abbrev_subject *)raptor_avltree_iterator_get(iter); + if(subject) { + rc = raptor_turtle_emit_subject(serializer, subject, 0); + if(rc) { + raptor_free_avltree_iterator(iter); + return rc; + } + } + if(raptor_avltree_iterator_next(iter)) break; + } + if(iter) raptor_free_avltree_iterator(iter); + + /* Emit any remaining blank nodes. */ + iter = raptor_new_avltree_iterator(context->blanks, NULL, NULL, 1); + while(iter) { + blank = (raptor_abbrev_subject *)raptor_avltree_iterator_get(iter); + if(blank) { + rc = raptor_turtle_emit_subject(serializer, blank, 0); + if(rc) { + raptor_free_avltree_iterator(iter); + return rc; + } + } + if(raptor_avltree_iterator_next(iter)) break; + } + if(iter) raptor_free_avltree_iterator(iter); + + return 0; +} + + +/* + * raptor serializer Turtle implementation + */ + + +/* create a new serializer */ +static int +raptor_turtle_serialize_init(raptor_serializer* serializer, const char *name) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_uri *rdf_type_uri; + + context->turtle_writer_flags = 0; + if(!strcmp(name,(const char*)"mkr")) { + context->emit_mkr = 1; + context->turtle_writer_flags |= TURTLE_WRITER_FLAG_MKR; + } else + context->emit_mkr = 0; + context->resultset = 0; + context->written_begin = 0; + + context->nstack = raptor_new_namespaces(serializer->world, 1); + if(!context->nstack) + return 1; + context->rdf_nspace = raptor_new_namespace(context->nstack, + (const unsigned char*)"rdf", + (const unsigned char*)raptor_rdf_namespace_uri, + 0); + + context->namespaces = raptor_new_sequence(NULL, NULL); + + context->subjects = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_subject_compare, + (raptor_data_free_handler)raptor_free_abbrev_subject, 0); + + context->blanks = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_subject_compare, + (raptor_data_free_handler)raptor_free_abbrev_subject, 0); + + context->nodes = + raptor_new_avltree((raptor_data_compare_handler)raptor_abbrev_node_compare, + (raptor_data_free_handler)raptor_free_abbrev_node, 0); + + rdf_type_uri = raptor_new_uri_for_rdf_concept(serializer->world, + (const unsigned char*)"type"); + if(rdf_type_uri) { + raptor_term* uri_term; + uri_term = raptor_new_term_from_uri(serializer->world, + rdf_type_uri); + raptor_free_uri(rdf_type_uri); + context->rdf_type = raptor_new_abbrev_node(serializer->world, uri_term); + raptor_free_term(uri_term); + } else + context->rdf_type = NULL; + + context->rdf_xml_literal_uri = raptor_new_uri(serializer->world, raptor_xml_literal_datatype_uri_string); + context->rdf_first_uri = raptor_new_uri(serializer->world, (const unsigned char*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + context->rdf_rest_uri = raptor_new_uri(serializer->world, (const unsigned char*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#rest"); + context->rdf_nil_uri = raptor_new_uri(serializer->world, (const unsigned char*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + + context->rs_ResultSet_uri = raptor_new_uri(serializer->world, (const unsigned char*)"http://jena.hpl.hp.com/2003/03/result-set#ResultSet"); + context->rs_resultVariable_uri = raptor_new_uri(serializer->world, (const unsigned char*)"http://jena.hpl.hp.com/2003/03/result-set#resultVariable"); + + if(!context->rdf_nspace || !context->namespaces || + !context->subjects || !context->blanks || !context->nodes || + !context->rdf_xml_literal_uri || !context->rdf_first_uri || + !context->rdf_rest_uri || !context->rdf_nil_uri || !context->rdf_type || + !context->rs_ResultSet_uri || !context->rs_resultVariable_uri) + { + raptor_turtle_serialize_terminate(serializer); + return 1; + } + + /* Note: item 0 in the list is rdf:RDF's namespace */ + if(raptor_sequence_push(context->namespaces, context->rdf_nspace)) { + raptor_turtle_serialize_terminate(serializer); + return 1; + } + + return 0; +} + + +/* destroy a serializer */ +static void +raptor_turtle_serialize_terminate(raptor_serializer* serializer) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + + if(context->turtle_writer) { + raptor_free_turtle_writer(context->turtle_writer); + context->turtle_writer = NULL; + } + + if(context->rdf_nspace) { + raptor_free_namespace(context->rdf_nspace); + context->rdf_nspace = NULL; + } + + if(context->namespaces) { + int i; + + /* Note: item 0 in the list is rdf:RDF's namespace and freed above */ + for(i = 1; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns =(raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + if(ns) + raptor_free_namespace(ns); + } + raptor_free_sequence(context->namespaces); + context->namespaces = NULL; + } + + if(context->subjects) { + raptor_free_avltree(context->subjects); + context->subjects = NULL; + } + + if(context->blanks) { + raptor_free_avltree(context->blanks); + context->blanks = NULL; + } + + if(context->nodes) { + raptor_free_avltree(context->nodes); + context->nodes = NULL; + } + + if(context->nstack) { + raptor_free_namespaces(context->nstack); + context->nstack = NULL; + } + + if(context->rdf_type) { + raptor_free_abbrev_node(context->rdf_type); + context->rdf_type = NULL; + } + + if(context->rdf_xml_literal_uri) { + raptor_free_uri(context->rdf_xml_literal_uri); + context->rdf_xml_literal_uri = NULL; + } + + if(context->rdf_first_uri) { + raptor_free_uri(context->rdf_first_uri); + context->rdf_first_uri = NULL; + } + + if(context->rdf_rest_uri) { + raptor_free_uri(context->rdf_rest_uri); + context->rdf_rest_uri = NULL; + } + + if(context->rdf_nil_uri) { + raptor_free_uri(context->rdf_nil_uri); + context->rdf_nil_uri = NULL; + } + + if(context->rs_ResultSet_uri) { + raptor_free_uri(context->rs_ResultSet_uri); + context->rs_ResultSet_uri = NULL; + } + + if(context->rs_resultVariable_uri) { + raptor_free_uri(context->rs_resultVariable_uri); + context->rs_resultVariable_uri = NULL; + } +} + + +#define TURTLE_NAMESPACE_DEPTH 0 + +/* add a namespace */ +static int +raptor_turtle_serialize_declare_namespace_from_namespace(raptor_serializer* serializer, + raptor_namespace *nspace) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + int i; + + if(context->written_header) + return 1; + + for(i = 0; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + + /* If prefix is already declared, ignore it */ + if(!ns->prefix && !nspace->prefix) + return 1; + + if(ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + return 1; + + if(ns->uri && nspace->uri && + raptor_uri_equals(ns->uri, nspace->uri)) + return 1; + } + + nspace = raptor_new_namespace_from_uri(context->nstack, + nspace->prefix, nspace->uri, + TURTLE_NAMESPACE_DEPTH); + if(!nspace) + return 1; + + raptor_sequence_push(context->namespaces, nspace); + return 0; +} + + +/* add a namespace */ +static int +raptor_turtle_serialize_declare_namespace(raptor_serializer* serializer, + raptor_uri *uri, + const unsigned char *prefix) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_namespace *ns; + int rc; + + ns = raptor_new_namespace_from_uri(context->nstack, prefix, uri, + TURTLE_NAMESPACE_DEPTH); + + rc = raptor_turtle_serialize_declare_namespace_from_namespace(serializer, ns); + raptor_free_namespace(ns); + + return rc; +} + + +/* start a serialize */ +static int +raptor_turtle_serialize_start(raptor_serializer* serializer) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_turtle_writer* turtle_writer; + int flag; + + if(context->turtle_writer) + raptor_free_turtle_writer(context->turtle_writer); + + flag = RAPTOR_OPTIONS_GET_NUMERIC(serializer, RAPTOR_OPTION_WRITE_BASE_URI); + turtle_writer = raptor_new_turtle_writer(serializer->world, + serializer->base_uri, + flag, + context->nstack, + serializer->iostream, + context->turtle_writer_flags); + if(!turtle_writer) + return 1; + + raptor_turtle_writer_set_option(turtle_writer, + RAPTOR_OPTION_WRITER_AUTO_INDENT, 1); + raptor_turtle_writer_set_option(turtle_writer, + RAPTOR_OPTION_WRITER_INDENT_WIDTH, 2); + + context->turtle_writer = turtle_writer; + + return 0; +} + +static void +raptor_turtle_ensure_writen_header(raptor_serializer* serializer, + raptor_turtle_context* context) +{ + int i; + raptor_turtle_writer* turtle_writer = context->turtle_writer; + + if(context->written_header) + return; + + if(!context->turtle_writer) + return; + + for(i = 0; i< raptor_sequence_size(context->namespaces); i++) { + raptor_namespace* ns; + ns = (raptor_namespace*)raptor_sequence_get_at(context->namespaces, i); + raptor_turtle_writer_namespace_prefix(turtle_writer, ns); + raptor_namespace_stack_start_namespace(context->nstack, ns, 0); + } + + raptor_turtle_writer_newline(context->turtle_writer); + + context->written_header = 1; +} + +/* serialize a statement */ +static int +raptor_turtle_serialize_statement(raptor_serializer* serializer, + raptor_statement *statement) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + raptor_abbrev_subject* subject = NULL; + raptor_abbrev_node* predicate = NULL; + raptor_abbrev_node* object = NULL; + int rv; + raptor_term_type object_type; + + if(!(statement->subject->type == RAPTOR_TERM_TYPE_URI || + statement->subject->type == RAPTOR_TERM_TYPE_BLANK)) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Do not know how to serialize node type %u", + statement->subject->type); + return 1; + } + + subject = raptor_abbrev_subject_lookup(context->nodes, context->subjects, + context->blanks, + statement->subject); + if(!subject) { + return 1; + } + + object_type = statement->object->type; + + if(!(object_type == RAPTOR_TERM_TYPE_URI || + object_type == RAPTOR_TERM_TYPE_BLANK || + object_type == RAPTOR_TERM_TYPE_LITERAL)) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot serialize a triple with object node type %u", + object_type); + return 1; + } + + object = raptor_abbrev_node_lookup(context->nodes, statement->object); + if(!object) + return 1; + + + if(statement->predicate->type == RAPTOR_TERM_TYPE_URI) { + predicate = raptor_abbrev_node_lookup(context->nodes, statement->predicate); + if(!predicate) + return 1; + + rv = raptor_abbrev_subject_add_property(subject, predicate, object); + if(rv < 0) { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Unable to add properties to subject %p", + RAPTOR_VOIDP(subject)); + return rv; + } + + } else { + raptor_log_error_formatted(serializer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Do not know how to serialize node type %u", + statement->predicate->type); + return 1; + } + + if(object_type == RAPTOR_TERM_TYPE_URI || + object_type == RAPTOR_TERM_TYPE_BLANK) + object->count_as_object++; + + return 0; +} + + +/* end a serialize */ +static int +raptor_turtle_serialize_end(raptor_serializer* serializer) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + + raptor_turtle_ensure_writen_header(serializer, context); + + raptor_turtle_emit(serializer); + + /* reset serializer for reuse */ + context->written_header = 0; + + return 0; +} + +/* flush turtle */ +static int +raptor_turtle_serialize_flush(raptor_serializer* serializer) +{ + raptor_turtle_context* context = (raptor_turtle_context*)serializer->context; + + raptor_turtle_ensure_writen_header(serializer, context); + + raptor_turtle_emit(serializer); + + if(context->subjects) { + raptor_avltree_trim(context->subjects); + } + + if(context->blanks) { + raptor_avltree_trim(context->blanks); + } + + if(context->nodes) { + raptor_avltree_trim(context->nodes); + } + + return 0; +} + + +/* finish the serializer factory */ +static void +raptor_turtle_serialize_finish_factory(raptor_serializer_factory* factory) +{ + /* NOP */ +} + + +static const char* const turtle_names[2] = { "turtle", NULL}; +static const char* const mkr_names[2] = { "mkr", NULL}; + +static const char* const turtle_uri_strings[3] = { + "http://www.w3.org/ns/formats/Turtle", + "http://www.dajobe.org/2004/01/turtle/", + NULL +}; + +#define TURTLE_TYPES_COUNT 6 +static const raptor_type_q turtle_types[TURTLE_TYPES_COUNT + 1] = { + { "text/turtle", 11, 10}, + { "application/turtle", 18, 10}, + { "application/x-turtle", 20, 8}, + { "text/n3", 7, 3}, + { "text/rdf+n3", 11, 3}, + { "application/rdf+n3", 18, 3}, + { NULL, 0, 0} +}; +#define MKR_TYPES_COUNT 6 +static const raptor_type_q mkr_types[TURTLE_TYPES_COUNT + 1] = { + { "text/mkr", 8, 10}, + { "application/mkr", 15, 10}, + { "application/x-mkr", 17, 8}, + { "text/n3", 7, 3}, + { "text/rdf+n3", 11, 3}, + { "application/rdf+n3", 18, 3}, + { NULL, 0, 0} +}; + +static int +raptor_turtle_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = turtle_names; + factory->desc.mime_types = turtle_types; + + factory->desc.label = "Turtle Terse RDF Triple Language"; + factory->desc.uri_strings = turtle_uri_strings; + + factory->context_length = sizeof(raptor_turtle_context); + + factory->init = raptor_turtle_serialize_init; + factory->terminate = raptor_turtle_serialize_terminate; + factory->declare_namespace = raptor_turtle_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_turtle_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_turtle_serialize_start; + factory->serialize_statement = raptor_turtle_serialize_statement; + factory->serialize_end = raptor_turtle_serialize_end; + factory->serialize_flush = raptor_turtle_serialize_flush; + factory->finish_factory = raptor_turtle_serialize_finish_factory; + + return 0; +} + +static int +raptor_mkr_serializer_register_factory(raptor_serializer_factory *factory) +{ + factory->desc.names = mkr_names; + factory->desc.mime_types = mkr_types; + + factory->desc.label = "mKR my Knowledge Representation Language"; + factory->desc.uri_strings = turtle_uri_strings; + + factory->context_length = sizeof(raptor_turtle_context); + + factory->init = raptor_turtle_serialize_init; + factory->terminate = raptor_turtle_serialize_terminate; + factory->declare_namespace = raptor_turtle_serialize_declare_namespace; + factory->declare_namespace_from_namespace = raptor_turtle_serialize_declare_namespace_from_namespace; + factory->serialize_start = raptor_turtle_serialize_start; + factory->serialize_statement = raptor_turtle_serialize_statement; + factory->serialize_end = raptor_turtle_serialize_end; + factory->finish_factory = raptor_turtle_serialize_finish_factory; + + return 0; +} + +int +raptor_init_serializer_turtle(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_turtle_serializer_register_factory); +} + +int +raptor_init_serializer_mkr(raptor_world* world) +{ + return !raptor_serializer_register_factory(world, + &raptor_mkr_serializer_register_factory); +} + + + +/** + * raptor_uri_turtle_write: + * @world: world + * @iostr: iostream for writing + * @uri: uri + * @nstack: namespace stack + * @base_uri: base URI + * + * Write #raptor_uri to a stream in turtle syntax (using QNames). + * + * Note: This creates and destroys several internal objects for each + * call so for more efficient writing, create a turtle serializer. + * + * Return value: non-0 on failure + */ +int +raptor_uri_turtle_write(raptor_world *world, + raptor_iostream* iostr, + raptor_uri* uri, + raptor_namespace_stack *nstack, + raptor_uri *base_uri) +{ + int rc; + raptor_turtle_writer* turtle_writer; + + turtle_writer = raptor_new_turtle_writer(world, base_uri, 0, nstack, iostr, 0); + if(!turtle_writer) + return 1; + + rc = raptor_turtle_writer_uri(turtle_writer, uri); + + raptor_free_turtle_writer(turtle_writer); + + return rc; +} + + + +/** + * raptor_term_turtle_write: + * @iostr: iostream for writing + * @term: term + * @nstack: namespace stack + * @base_uri: base URI + * + * Write #raptor_term to a stream in turtle syntax (using QNames). + * + * Note: This creates and destroys several internal objects for each + * call so for more efficient writing, create a turtle serializer. + * + * Return value: non-0 on failure + */ +int +raptor_term_turtle_write(raptor_iostream* iostr, + raptor_term* term, + raptor_namespace_stack *nstack, + raptor_uri *base_uri) +{ + int rc; + raptor_turtle_writer* turtle_writer; + + turtle_writer = raptor_new_turtle_writer(term->world, base_uri, 0, nstack, + iostr, 0); + if(!turtle_writer) + return 1; + + rc = raptor_turtle_writer_term(turtle_writer, term); + + raptor_free_turtle_writer(turtle_writer); + + return rc; +} + + + +/** + * raptor_uri_to_turtle_counted_string: + * @world: world + * @uri: uri + * @nstack: namespace stack + * @base_uri: base URI + * @len_p: Pointer to location to store length of new string (if not NULL) + * + * Convert #raptor_uri to a string. + * Caller has responsibility to free the string. + * + * Note: This creates and destroys several internal objects for each + * call so for more efficient writing, create a turtle serializer. + * + * Return value: the new string or NULL on failure. The length of + * the new string is returned in *@len_p if len_p is not NULL. + */ +unsigned char* +raptor_uri_to_turtle_counted_string(raptor_world *world, + raptor_uri* uri, + raptor_namespace_stack *nstack, + raptor_uri *base_uri, + size_t *len_p) +{ + int rc = 1; + raptor_iostream* iostr; + unsigned char *s = NULL; + raptor_turtle_writer* turtle_writer; + + iostr = raptor_new_iostream_to_string(world, + (void**)&s, len_p, malloc); + if(!iostr) + return NULL; + + turtle_writer = raptor_new_turtle_writer(world, base_uri, 0, nstack, iostr, 0); + if(!turtle_writer) + goto tidy; + + rc = raptor_turtle_writer_uri(turtle_writer, uri); + + raptor_free_turtle_writer(turtle_writer); + + tidy: + raptor_free_iostream(iostr); + + if(rc) { + free(s); + s = NULL; + } + + return s; +} + +/** + * raptor_uri_to_turtle_string: + * @world: world + * @uri: uri + * @nstack: namespace stack + * @base_uri: base URI + * + * Convert #raptor_uri to a string. + * Caller has responsibility to free the string. + * + * Note: This creates and destroys several internal objects for each + * call so for more efficient writing, create a turtle serializer. + * + * Return value: the new string or NULL on failure. + */ +unsigned char* +raptor_uri_to_turtle_string(raptor_world *world, + raptor_uri* uri, + raptor_namespace_stack *nstack, + raptor_uri *base_uri) +{ + return raptor_uri_to_turtle_counted_string(world, uri, nstack, base_uri, NULL); +} + + + +/** + * raptor_term_to_turtle_counted_string: + * @term: term + * @nstack: namespace stack + * @base_uri: base URI + * @len_p: Pointer to location to store length of new string (if not NULL) + * + * Convert #raptor_term to a string. + * Caller has responsibility to free the string. + * + * Note: This creates and destroys several internal objects for each + * call so for more efficient writing, create a turtle serializer. + * + * See also raptor_term_to_counted_string() which writes in simpler + * N-Triples with no Turtle abbreviated forms, and is quicker. + * + * Return value: the new string or NULL on failure. The length of + * the new string is returned in *@len_p if len_p is not NULL. + */ +unsigned char* +raptor_term_to_turtle_counted_string(raptor_term* term, + raptor_namespace_stack *nstack, + raptor_uri *base_uri, + size_t *len_p) +{ + int rc; + raptor_iostream* iostr; + unsigned char *s; + iostr = raptor_new_iostream_to_string(term->world, + (void**)&s, len_p, malloc); + if(!iostr) + return NULL; + + rc = raptor_term_turtle_write(iostr, term, nstack, base_uri); + + raptor_free_iostream(iostr); + if(rc) { + free(s); + s = NULL; + } + + return s; +} + +/** + * raptor_term_to_turtle_string: + * @term: term + * @nstack: namespace stack + * @base_uri: base URI + * + * Convert #raptor_term to a string. + * Caller has responsibility to free the string. + * + * See also raptor_term_to_counted_string() which writes in simpler + * N-Triples with no Turtle abbreviated forms, and is quicker. + * + * Return value: the new string or NULL on failure. + */ +unsigned char* +raptor_term_to_turtle_string(raptor_term* term, + raptor_namespace_stack *nstack, + raptor_uri *base_uri) +{ + return raptor_term_to_turtle_counted_string(term, nstack, base_uri, NULL); +} + diff --git a/src/raptor_set.c b/src/raptor_set.c new file mode 100644 index 0000000..334416c --- /dev/null +++ b/src/raptor_set.c @@ -0,0 +1,329 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_set.c - Sets for checking IDs + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <sys/types.h> + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> /* for abort() as used in errors */ +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +/* + * The only methods needed here are: + * Create Set + * Destroy Set + * Check a (base, ID) pair present add it if not, return if added/not + * + */ + +struct raptor_base_id_set_s +{ + raptor_world* world; + + /* The base URI of this set of IDs */ + raptor_uri *uri; + + /* neighbour ID sets */ + struct raptor_base_id_set_s* prev; + struct raptor_base_id_set_s* next; + + /* binary tree */ + raptor_avltree* tree; +}; +typedef struct raptor_base_id_set_s raptor_base_id_set; + + +struct raptor_id_set_s +{ + raptor_world* world; + + /* start of trees, 1 per base URI */ + struct raptor_base_id_set_s* first; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + int hits; + int misses; +#endif +}; + + +/* functions implementing the ID set api */ + +/** + * raptor_new_id_set: + * @world: raptor_world object + * + * INTERNAL - Constructor - create a new ID set. + * + * Return value: non 0 on failure + **/ +raptor_id_set* +raptor_new_id_set(raptor_world* world) +{ + raptor_id_set* set = RAPTOR_CALLOC(raptor_id_set*, 1, sizeof(*set)); + if(!set) + return NULL; + + set->world = world; + + return set; +} + + +/** + * raptor_free_base_id_set: + * @set: #raptor_base_id_set + * + * INTERNAL - Destructor - Free a Base ID Set. + * + **/ +static void +raptor_free_base_id_set(raptor_base_id_set *base) +{ + if(base->tree) + raptor_free_avltree(base->tree); + if(base->uri) + raptor_free_uri(base->uri); + RAPTOR_FREE(raptor_base_id_set, base); +} + + +/** + * raptor_free_id_set: + * @set: #raptor_id_set + * + * INTERNAL - Destructor - Free ID Set. + * + **/ +void +raptor_free_id_set(raptor_id_set *set) +{ + raptor_base_id_set *base; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(set, raptor_id_set); + + base = set->first; + while(base) { + raptor_base_id_set *next = base->next; + raptor_free_base_id_set(base); + base = next; + } + RAPTOR_FREE(raptor_id_set, set); +} + + + +/** + * raptor_id_set_add: + * @set: #raptor_id_set + * @base_uri: base #raptor_uri of identifier + * @id: identifier name + * @id_len: length of identifier + * + * INTERNAL - Add an item to the set. + * + * Return value: <0 on failure, 0 on success, 1 if already present + **/ +int +raptor_id_set_add(raptor_id_set* set, raptor_uri *base_uri, + const unsigned char *id, size_t id_len) +{ + raptor_base_id_set *base; + char* item; + + if(!base_uri || !id || !id_len) + return -1; + + base = set->first; + while(base) { + if(raptor_uri_equals(base->uri, base_uri)) + break; + base = base->next; + } + + if(!base) { + /* a set for this base_uri not found */ + base = RAPTOR_CALLOC(raptor_base_id_set*, 1, sizeof(*base)); + if(!base) + return -1; + + base->world = set->world; + + base->uri = raptor_uri_copy(base_uri); + + base->tree = raptor_new_avltree((raptor_data_compare_handler)strcmp, + free, 0); + + /* Add to the start of the list */ + if(set->first) + set->first->prev = base; + /* base->prev = NULL; */ + base->next = set->first; + + set->first = base; + } else { + /* If not at the start of the list, move there */ + if(base != set->first) { + /* remove from the list */ + base->prev->next = base->next; + if(base->next) + base->next->prev = base->prev; + /* add at the start of the list */ + set->first->prev = base; + base->prev = NULL; + base->next = set->first; + } + } + + item = (char*)raptor_avltree_search(base->tree, id); + + /* if already there, error */ + if(item) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + set->misses++; +#endif + return 1; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + set->hits++; +#endif + + item = RAPTOR_MALLOC(char*, id_len + 1); + if(!item) + return 1; + + memcpy(item, id, id_len + 1); + + return raptor_avltree_add(base->tree, item); +} + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +void +raptor_id_set_stats_print(raptor_id_set* set, FILE *stream) { + fprintf(stream, "set hits: %d misses: %d\n", set->hits, set->misses); +} +#endif + +#endif + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + const char *items[8] = { "ron", "amy", "jen", "bij", "jib", "daj", "jim", NULL }; + raptor_id_set *set; + raptor_uri *base_uri; + int i = 0; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + base_uri = raptor_new_uri(world, (const unsigned char*)"http://example.org/base#"); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Creating set\n", program); +#endif + + set = raptor_new_id_set(world); + if(!set) { + fprintf(stderr, "%s: Failed to create set\n", program); + exit(1); + } + + for(i = 0; items[i]; i++) { + size_t len = strlen(items[i]); + int rc; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Adding set item '%s'\n", program, items[i]); +#endif + + rc = raptor_id_set_add(set, base_uri, (const unsigned char*)items[i], len); +if(rc) { + fprintf(stderr, "%s: Adding set item %d '%s' failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + } + + for(i = 0; items[i]; i++) { + size_t len = strlen(items[i]); + int rc; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Adding duplicate set item '%s'\n", program, items[i]); +#endif + + rc = raptor_id_set_add(set, base_uri, (const unsigned char*)items[i], len); + if(rc <= 0) { + fprintf(stderr, "%s: Adding duplicate set item %d '%s' succeeded, should have failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_id_set_stats_print(set, stderr); +#endif + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing set\n", program); +#endif + raptor_free_id_set(set); + + raptor_free_uri(base_uri); + + raptor_free_world(world); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif diff --git a/src/raptor_statement.c b/src/raptor_statement.c new file mode 100644 index 0000000..52317bc --- /dev/null +++ b/src/raptor_statement.c @@ -0,0 +1,424 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_statement.c - Raptor statements + * + * Copyright (C) 2008-2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +/* for ptrdiff_t */ +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* prototypes for helper functions */ + + +/** + * raptor_new_statement: + * @world: raptor world + * + * Constructor - create a new #raptor_statement. + * + * Return value: new raptor statement or NULL on failure + */ +raptor_statement* +raptor_new_statement(raptor_world *world) +{ + raptor_statement* statement; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + statement = RAPTOR_CALLOC(raptor_statement*, 1, sizeof(*statement)); + if(!statement) + return NULL; + + statement->world = world; + /* dynamic - usage counted */ + statement->usage = 1; + + return statement; +} + + +/** + * raptor_new_statement_from_nodes: + * @world: raptor world + * @subject: subject term (or NULL) + * @predicate: predicate term (or NULL) + * @object: object term (or NULL) + * @graph: graph name term (or NULL) + * + * Constructor - create a new #raptor_statement from a set of terms + * + * The @subject, @predicate, @object and @graph become owned by the statement. + * + * Return value: new raptor statement or NULL on failure + */ +raptor_statement* +raptor_new_statement_from_nodes(raptor_world* world, raptor_term *subject, + raptor_term *predicate, raptor_term *object, + raptor_term *graph) +{ + raptor_statement* t; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + t = raptor_new_statement(world); + if(!t) { + if(subject) + raptor_free_term(subject); + if(predicate) + raptor_free_term(predicate); + if(object) + raptor_free_term(object); + if(graph) + raptor_free_term(graph); + return NULL; + } + + t->subject = subject; + t->predicate = predicate; + t->object = object; + t->graph = graph; + + return t; +} + + +/** + * raptor_statement_init: + * @statement: statement to initialize + * @world: raptor world + * + * Initialize a static #raptor_statement. + * + */ +void +raptor_statement_init(raptor_statement *statement, raptor_world *world) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(world, raptor_world); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN(statement, raptor_statement); + + /* ensure all fields are set to NULL to start with */ + memset(statement, 0, sizeof(*statement)); + + statement->world = world; + + /* static - not usage counted */ + statement->usage = -1; +} + + +/** + * raptor_statement_copy: + * @statement: statement to copy + * + * Copy a #raptor_statement. + * + * Return value: a new #raptor_statement or NULL on error + */ +raptor_statement* +raptor_statement_copy(raptor_statement *statement) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(statement, raptor_statement, NULL); + + /* static - not usage counted */ + if(statement->usage < 0) { + raptor_statement* s2; + /* s2 will be a dynamic, usage->counted statement */ + s2 = raptor_new_statement(statement->world); + if(!s2) + return NULL; + + s2->world = statement->world; + if(statement->subject) + s2->subject = raptor_term_copy(statement->subject); + if(statement->predicate) + s2->predicate = raptor_term_copy(statement->predicate); + if(statement->object) + s2->object = raptor_term_copy(statement->object); + if(statement->graph) + s2->graph = raptor_term_copy(statement->graph); + + return s2; + } + + statement->usage++; + + return statement; +} + + +/** + * raptor_statement_clear: + * @statement: #raptor_statement object + * + * Empty a raptor_statement of terms. + * + **/ +void +raptor_statement_clear(raptor_statement *statement) +{ + if(!statement) + return; + + /* raptor_free_term() does a NULL check */ + + raptor_free_term(statement->subject); + statement->subject = NULL; + + raptor_free_term(statement->predicate); + statement->predicate = NULL; + + raptor_free_term(statement->object); + statement->object = NULL; + + raptor_free_term(statement->graph); + statement->graph = NULL; +} + + +/** + * raptor_free_statement: + * @statement: statement + * + * Destructor + * + */ +void +raptor_free_statement(raptor_statement *statement) +{ + /* dynamically or statically allocated? */ + int is_dynamic; + + if(!statement) + return; + + is_dynamic = (statement->usage >= 0); + + /* dynamically allocated and still in use? */ + if(is_dynamic && --statement->usage) + return; + + raptor_statement_clear(statement); + + if(is_dynamic) + RAPTOR_FREE(raptor_statement, statement); +} + + +/** + * raptor_statement_print: + * @statement: #raptor_statement object to print + * @stream: FILE* stream + * + * Print a raptor_statement to a stream. + * + * Return value: non-0 on failure + **/ +int +raptor_statement_print(const raptor_statement * statement, FILE *stream) +{ + int rc = 0; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(statement, raptor_statement, 1); + + fputc('[', stream); + + if(!statement->subject) { + fputs("NULL", stream); + } else { + if(statement->subject->type == RAPTOR_TERM_TYPE_BLANK) + fputs((const char*)statement->subject->value.blank.string, stream); + else + raptor_uri_print(statement->subject->value.uri, stream); + } + + fputs(", ", stream); + + if(statement->predicate) + raptor_uri_print(statement->predicate->value.uri, stream); + else + fputs("NULL", stream); + + fputs(", ", stream); + + if(!statement->object) { + fputs("NULL", stream); + } else { + if(statement->object->type == RAPTOR_TERM_TYPE_LITERAL) { + if(statement->object->value.literal.datatype) { + raptor_uri* dt_uri = statement->object->value.literal.datatype; + fputc('<', stream); + fputs((const char*)raptor_uri_as_string(dt_uri), stream); + fputc('>', stream); + } + fputc('"', stream); + fputs((const char*)statement->object->value.literal.string, stream); + fputc('"', stream); + } else if(statement->object->type == RAPTOR_TERM_TYPE_BLANK) + fputs((const char*)statement->object->value.blank.string, stream); + else { + raptor_uri_print(statement->object->value.uri, stream); + } + } + + if(statement->graph) { + if(statement->graph->type == RAPTOR_TERM_TYPE_BLANK && + statement->graph->value.blank.string) { + fputs(", ", stream); + + fputs((const char*)statement->graph->value.blank.string, stream); + } else if(statement->graph->type == RAPTOR_TERM_TYPE_URI && + statement->graph->value.uri) { + fputs(", ", stream); + raptor_uri_print(statement->graph->value.uri, stream); + } + } + + fputc(']', stream); + + return rc; +} + + +/** + * raptor_statement_print_as_ntriples: + * @statement: #raptor_statement to print + * @stream: FILE* stream + * + * Print a raptor_statement in N-Triples form. + * + * Return value: non-0 on failure + **/ +int +raptor_statement_print_as_ntriples(const raptor_statement * statement, + FILE *stream) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(statement, raptor_statement, 1); + + if(raptor_term_print_as_ntriples(statement->subject, stream)) + return 1; + + fputc(' ', stream); + if(raptor_term_print_as_ntriples(statement->predicate, stream)) + return 1; + + fputc(' ', stream); + if(raptor_term_print_as_ntriples(statement->object, stream)) + return 1; + + fputs(" .", stream); + + return 0; +} + + +/** + * raptor_statement_compare: + * @s1: first statement + * @s2: second statement + * + * Compare a pair of #raptor_statement + * + * Uses raptor_term_compare() to check ordering between subjects, + * predicates and objects of statements. + * + * Return value: <0 if s1 is before s2, 0 if equal, >0 if s1 is after s2 + */ +int +raptor_statement_compare(const raptor_statement *s1, + const raptor_statement *s2) +{ + int d = 0; + + if(!s1 || !s2) { + /* If one or both are NULL, return a stable comparison order */ + ptrdiff_t pd = (s2 - s1); + + /* copy the sign of the (unknown size) signed integer 'd' into an + * int result + */ + return (pd > 0) - (pd < 0); + } + + d = raptor_term_compare(s1->subject, s2->subject); + if(d) + return d; + + /* predicates are URIs */ + d = raptor_term_compare(s1->predicate, s2->predicate); + if(d) + return d; + + /* objects are URIs or blank nodes or literals */ + d = raptor_term_compare(s1->object, s2->object); + if(d) + return d; + + /* graphs are URIs or blank nodes */ + d = raptor_term_compare(s1->graph, s2->graph); + + return d; +} + + +/** + * raptor_statement_equals: + * @s1: first statement + * @s2: second statement + * + * Compare a pair of #raptor_statement for equality + * + * Return value: non-0 if statements are equal + */ +int +raptor_statement_equals(const raptor_statement* s1, const raptor_statement* s2) +{ + if(!s1 || !s2) + return 0; + + if(!raptor_term_equals(s1->subject, s2->subject)) + return 0; + + if(!raptor_term_equals(s1->predicate, s2->predicate)) + return 0; + + if(!raptor_term_equals(s1->object, s2->object)) + return 0; + + return 1; +} diff --git a/src/raptor_stringbuffer.c b/src/raptor_stringbuffer.c new file mode 100644 index 0000000..03223c3 --- /dev/null +++ b/src/raptor_stringbuffer.c @@ -0,0 +1,902 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_stringbuffer.c - Stringbuffer class for growing strings + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <sys/types.h> + +#ifdef HAVE_STDLIB_H +#include <stdlib.h> /* for abort() as used in errors */ +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +struct raptor_stringbuffer_node_s +{ + struct raptor_stringbuffer_node_s* next; + unsigned char *string; + size_t length; +}; +typedef struct raptor_stringbuffer_node_s raptor_stringbuffer_node; + + +struct raptor_stringbuffer_s +{ + /* Pointing to the first item in the list of nodes */ + raptor_stringbuffer_node* head; + /* and the last */ + raptor_stringbuffer_node* tail; + + /* total length of the string */ + size_t length; + + /* frozen string if already calculated, or NULL if not present */ + unsigned char *string; +}; + + +/* prototypes for local functions */ +static int raptor_stringbuffer_append_string_common(raptor_stringbuffer* stringbuffer, const unsigned char *string, size_t length, int do_copy); + + +/* functions implementing the stringbuffer api */ + +/** + * raptor_new_stringbuffer: + * + * Create a new stringbuffer. + * + * Return value: pointer to a raptor_stringbuffer object or NULL on failure + **/ +raptor_stringbuffer* +raptor_new_stringbuffer(void) +{ + raptor_stringbuffer* sb; + + sb = RAPTOR_CALLOC(raptor_stringbuffer*, 1, sizeof(*sb)); + return sb; +} + + +/** + * raptor_free_stringbuffer: + * @stringbuffer: stringbuffer object to destroy. + * + * Destroy a stringbuffer. + * + **/ +void +raptor_free_stringbuffer(raptor_stringbuffer *stringbuffer) +{ + if(!stringbuffer) + return; + + if(stringbuffer->head) { + raptor_stringbuffer_node *node = stringbuffer->head; + + while(node) { + raptor_stringbuffer_node *next = node->next; + + if(node->string) + RAPTOR_FREE(char*, node->string); + RAPTOR_FREE(raptor_stringbuffer_node, node); + node = next; + } + } + + if(stringbuffer->string) + RAPTOR_FREE(char*, stringbuffer->string); + + RAPTOR_FREE(raptor_stringbuffer, stringbuffer); +} + + + +/** + * raptor_stringbuffer_append_string_common: + * @stringbuffer: raptor stringbuffer + * @string: string + * @length: length of string + * @do_copy: non-0 to copy the string + * + * Add a string to the stringbuffer. + * + * INTERNAL + * + * If @string is NULL or @length is 0, no work is performed. + * + * If @do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Return value: non-0 on failure + **/ +static int +raptor_stringbuffer_append_string_common(raptor_stringbuffer* stringbuffer, + const unsigned char *string, + size_t length, + int do_copy) +{ + raptor_stringbuffer_node *node; + + if(!string || !length) + return 0; + + node = RAPTOR_MALLOC(raptor_stringbuffer_node*, sizeof(*node)); + if(!node) { + if(!do_copy) + RAPTOR_FREE(char*, string); + return 1; + } + + if(do_copy) { + /* Note this copy does not include the \0 character - not needed */ + node->string = RAPTOR_MALLOC(unsigned char*, length); + if(!node->string) { + RAPTOR_FREE(raptor_stringbuffer_node, node); + return 1; + } + memcpy(node->string, string, length); + } else + node->string = (unsigned char*)string; + node->length = length; + + + if(stringbuffer->tail) { + stringbuffer->tail->next = node; + stringbuffer->tail = node; + } else + stringbuffer->head = stringbuffer->tail = node; + node->next = NULL; + + if(stringbuffer->string) { + RAPTOR_FREE(char*, stringbuffer->string); + stringbuffer->string = NULL; + } + stringbuffer->length += length; + + return 0; +} + + + + +/** + * raptor_stringbuffer_append_counted_string: + * @stringbuffer: raptor stringbuffer + * @string: string + * @length: length of string + * @do_copy: non-0 to copy the string + * + * Add a counted string to the stringbuffer. + * + * If @string is NULL or @length is 0, no work is performed. + * + * If @do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_counted_string(raptor_stringbuffer* stringbuffer, + const unsigned char *string, size_t length, + int do_copy) +{ + if(!string || !length) + return 0; + + return raptor_stringbuffer_append_string_common(stringbuffer, string, length, do_copy); +} + + +/** + * raptor_stringbuffer_append_string: + * @stringbuffer: raptor stringbuffer + * @string: string + * @do_copy: non-0 to copy the string + * + * Add a string to the stringbuffer. + * + * If @string is NULL, no work is performed. + * + * If @do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_string(raptor_stringbuffer* stringbuffer, + const unsigned char *string, int do_copy) +{ + if(!string) + return 0; + + return raptor_stringbuffer_append_string_common(stringbuffer, string, strlen((const char*)string), do_copy); +} + + +/** + * raptor_stringbuffer_append_decimal: + * @stringbuffer: raptor stringbuffer + * @integer: integer to format as decimal and add + * + * Add an integer in decimal to the stringbuffer. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_decimal(raptor_stringbuffer* stringbuffer, + int integer) +{ + /* enough for 64 bit signed integer + * INT64_MAX is 9223372036854775807 (19 digits) + 1 for sign + */ + unsigned char buf[20]; + unsigned char *p; + int i = integer; + size_t length = 1; + if(integer < 0) { + length++; + i= -integer; + } + while(i /= 10) + length++; + + p = buf+length-1; + i = integer; + if(i < 0) + i= -i; + do { + *p-- = RAPTOR_GOOD_CAST(unsigned char, '0'+(i %10)); + i /= 10; + } while(i); + if(integer < 0) + *p= '-'; + + return raptor_stringbuffer_append_counted_string(stringbuffer, buf, length, 1); +} + + +/** + * raptor_stringbuffer_append_stringbuffer: + * @stringbuffer: #raptor_stringbuffer + * @append: #raptor_stringbuffer to append + * + * Add a stringbuffer to the stringbuffer. + * + * This function removes the content from the appending stringbuffer, + * making it empty and appends it to the supplied stringbuffer. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_stringbuffer(raptor_stringbuffer* stringbuffer, + raptor_stringbuffer* append) +{ + raptor_stringbuffer_node *node = append->head; + + if(!node) + return 0; + + /* move all append nodes to stringbuffer */ + if(stringbuffer->tail) { + stringbuffer->tail->next = node; + } else + stringbuffer->head = node; + + stringbuffer->tail = append->tail; + + /* adjust our length */ + stringbuffer->length += append->length; + if(stringbuffer->string) { + RAPTOR_FREE(char*, stringbuffer->string); + stringbuffer->string = NULL; + } + + /* zap append content */ + append->head = append->tail = NULL; + append->length = 0; + if(append->string) { + RAPTOR_FREE(char*, append->string); + append->string = NULL; + } + + return 0; +} + + + + +/** + * raptor_stringbuffer_prepend_string_common: + * @stringbuffer: raptor stringbuffer + * @string: string + * @length: length of string + * @do_copy: non-0 to copy the string + * + * Add a string to the start of a stringbuffer. + * + * INTERNAL + * + * If do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Return value: non-0 on failure + **/ +static int +raptor_stringbuffer_prepend_string_common(raptor_stringbuffer* stringbuffer, + const unsigned char *string, size_t length, + int do_copy) +{ + raptor_stringbuffer_node *node; + + node = RAPTOR_MALLOC(raptor_stringbuffer_node*, sizeof(*node)); + if(!node) + return 1; + + if(do_copy) { + /* Note this copy does not include the \0 character - not needed */ + node->string = RAPTOR_MALLOC(unsigned char*, length); + if(!node->string) { + RAPTOR_FREE(raptor_stringbuffer_node, node); + return 1; + } + memcpy(node->string, string, length); + } else + node->string = (unsigned char*)string; + node->length = length; + + + node->next = stringbuffer->head; + if(stringbuffer->head) + stringbuffer->head = node; + else + stringbuffer->head = stringbuffer->tail = node; + + if(stringbuffer->string) { + RAPTOR_FREE(char*, stringbuffer->string); + stringbuffer->string = NULL; + } + stringbuffer->length += length; + + return 0; +} + + + + +/** + * raptor_stringbuffer_prepend_counted_string: + * @stringbuffer: raptor stringbuffer + * @string: string + * @length: length of string + * @do_copy: non-0 to copy the string + + * If do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Add a string to the start of the stringbuffer. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_prepend_counted_string(raptor_stringbuffer* stringbuffer, + const unsigned char *string, size_t length, + int do_copy) +{ + return raptor_stringbuffer_prepend_string_common(stringbuffer, string, length, do_copy); +} + + +/** + * raptor_stringbuffer_prepend_string: + * @stringbuffer: raptor stringbuffer + * @string: string + * @do_copy: non-0 to copy the string + * + * Add a string to the start of the stringbuffer. + * + * If do_copy is non-0, the passed-in string is copied into new memory + * otherwise the stringbuffer becomes the owner of the string pointer + * and will free it when the stringbuffer is destroyed. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_prepend_string(raptor_stringbuffer* stringbuffer, + const unsigned char *string, int do_copy) +{ + return raptor_stringbuffer_prepend_string_common(stringbuffer, string, strlen((const char*)string), do_copy); +} + + +/** + * raptor_stringbuffer_length: + * @stringbuffer: raptor stringbuffer + * + * Return the stringbuffer length. + * + * Return value: size of stringbuffer + **/ +size_t +raptor_stringbuffer_length(raptor_stringbuffer* stringbuffer) +{ + return stringbuffer->length; +} + + + +/** + * raptor_stringbuffer_as_string: + * @stringbuffer: raptor stringbuffer + * + * Return the stringbuffer as a C string. + * + * Note: the return value is a to a shared string that the stringbuffer + * allocates and manages. + * + * Return value: NULL on failure or stringbuffer is empty, otherwise + * a pointer to a shared copy of the string. + **/ +unsigned char * +raptor_stringbuffer_as_string(raptor_stringbuffer* stringbuffer) +{ + raptor_stringbuffer_node *node; + unsigned char *p; + + if(!stringbuffer->length) + return NULL; + if(stringbuffer->string) + return stringbuffer->string; + + stringbuffer->string = RAPTOR_MALLOC(unsigned char*, stringbuffer->length + 1); + if(!stringbuffer->string) + return NULL; + + node = stringbuffer->head; + p = stringbuffer->string; + while(node) { + memcpy(p, node->string, node->length); + p+= node->length; + node = node->next; + } + *p='\0'; + return stringbuffer->string; +} + + +/** + * raptor_stringbuffer_copy_to_string: + * @stringbuffer: raptor stringbuffer + * @string: output string + * @length: size of output string + * + * Copy the stringbuffer into a string. + * + * Copies the underlying string to a pre-allocated buffer. The + * output string is always '\0' terminated. + * + * Return value: non-0 on failure such as stringbuffer is empty, buffer is too small + **/ +int +raptor_stringbuffer_copy_to_string(raptor_stringbuffer* stringbuffer, + unsigned char *string, size_t length) +{ + raptor_stringbuffer_node *node; + unsigned char *p; + + if(!string || length < 1) + return 1; + + if(!stringbuffer->length) + return 0; + + p = string; + for(node = stringbuffer->head; node; node = node->next) { + if(node->length > length) { + p[-1]='\0'; + return 1; + } + memcpy(p, node->string, node->length); + p+= node->length; + length-= node->length; + } + *p='\0'; + return 0; +} + + + +/** + * raptor_stringbuffer_append_hexadecimal: + * @stringbuffer: raptor stringbuffer + * @hex: integer to format + * + * Add an integer formatted in hexdecimal (base 16) to the stringbuffer. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_hexadecimal(raptor_stringbuffer* stringbuffer, + int hex) +{ + unsigned char buf[2]; + + if(hex < 0 || hex > 0xF) + return 1; + + *buf = RAPTOR_GOOD_CAST(unsigned char, (hex < 10) ? ('0' + hex) : ('A' + hex - 10)); + buf[1] = '\0'; + + return raptor_stringbuffer_append_counted_string(stringbuffer, buf, 1, 1); +} + + +/* RFC3986 Unreserved */ +#define IS_URI_UNRESERVED(c) ( (c >= 'A' && c <= 'F') || \ + (c >= 'a' && c <= 'f') || \ + (c >= '0' && c <= '9') || \ + (c == '-' || c == '.' || c == '_' || c == '~') ) +#define IS_URI_SAFE(c) (IS_URI_UNRESERVED(c)) + + +/** + * raptor_stringbuffer_append_uri_escaped_counted_string: + * @sb: raptor stringbuffer + * @string: string + * @length: length of string + * @space_is_plus: if non-0, escape spaces as '+' otherwise percent-encode them + * + * Add a URI-escaped version of @string to the stringbuffer. + * + * If @string is NULL or @length is 0, no work is performed. + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_uri_escaped_counted_string(raptor_stringbuffer* sb, + const char* string, + size_t length, + int space_is_plus) +{ + unsigned int i; + unsigned char buf[2]; + buf[1] = '\0'; + + if(!string || !length) + return 0; + + for(i = 0; i < length; i++) { + char c = string[i]; + if(!c) + break; + + if(IS_URI_SAFE(c)) { + *buf = RAPTOR_GOOD_CAST(unsigned char, c); + + if(raptor_stringbuffer_append_counted_string(sb, buf, 1, 1)) + return 1; + } else if (c == ' ' && space_is_plus) { + *buf = '+'; + + if(raptor_stringbuffer_append_counted_string(sb, buf, 1, 1)) + return 1; + } else { + *buf = '%'; + if(raptor_stringbuffer_append_counted_string(sb, buf, 1, 1)) + return 1; + + if(raptor_stringbuffer_append_hexadecimal(sb, (c & 0xf0) >> 4)) + return 1; + + if(raptor_stringbuffer_append_hexadecimal(sb, (c & 0x0f))) + return 1; + } + } + + return 0; +} + + +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +int +main(int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); +#define TEST_ITEMS_COUNT 9 + const char *items[TEST_ITEMS_COUNT] = { "the", "quick" ,"brown", "fox", "jumps", "over", "the", "lazy", "dog" }; + const char *items_string = "thequickbrownfoxjumpsoverthelazydog"; + const size_t items_len = 35; + const char *test_integer_string = "abcd"; +#define TEST_INTEGERS_COUNT 7 + const int test_integers[TEST_INTEGERS_COUNT]={ 0, 1, -1, 11, 1234, 12345, -12345 }; + const char *test_integer_results[TEST_INTEGERS_COUNT]={ "abcd0", "abcd1", "abcd-1", "abcd11", "abcd1234", "abcd12345", "abcd-12345" }; + raptor_stringbuffer *sb; + unsigned char *str; + size_t len; + int i = 0; + raptor_stringbuffer *sb1, *sb2; +#define TEST_APPEND_COUNT 2 + const char *test_append_results[TEST_APPEND_COUNT]={ "thebrownjumpsthedog", "quickfoxoverlazy" }; + const char *test_append_results_total="thebrownjumpsthedogquickfoxoverlazy"; +#define COPY_STRING_BUFFER_SIZE 100 + unsigned char *copy_string; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Creating string buffer\n", program); +#endif + + /* test appending */ + + sb = raptor_new_stringbuffer(); + if(!sb) { + fprintf(stderr, "%s: Failed to create string buffer\n", program); + exit(1); + } + + for(i = 0; i < TEST_ITEMS_COUNT; i++) { + int rc; + len = strlen(items[i]); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Adding string buffer item '%s'\n", program, items[i]); +#endif + + rc = raptor_stringbuffer_append_counted_string(sb, (unsigned char*)items[i], len, 1); + if(rc) { + fprintf(stderr, "%s: Adding string buffer item %d '%s' failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + } + + len = raptor_stringbuffer_length(sb); + if(len != items_len) { + fprintf(stderr, "%s: string buffer len is %d, expected %d\n", program, + (int)len, (int)items_len); + exit(1); + } + + str = raptor_stringbuffer_as_string(sb); + if(strcmp((const char*)str, items_string)) { + fprintf(stderr, "%s: string buffer contains '%s', expected '%s'\n", + program, str, items_string); + exit(1); + } + + raptor_free_stringbuffer(sb); + + + /* test prepending */ + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Creating string buffer\n", program); +#endif + + sb = raptor_new_stringbuffer(); + if(!sb) { + fprintf(stderr, "%s: Failed to create string buffer\n", program); + exit(1); + } + + for(i = TEST_ITEMS_COUNT-1; i>=0 ; i--) { + int rc; + len = strlen(items[i]); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Prepending string buffer item '%s'\n", program, items[i]); +#endif + + rc = raptor_stringbuffer_prepend_counted_string(sb, (unsigned char*)items[i], len, 1); + if(rc) { + fprintf(stderr, "%s: Prepending string buffer item %d '%s' failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + } + + len = raptor_stringbuffer_length(sb); + if(len != items_len) { + fprintf(stderr, "%s: string buffer len is %d, expected %d\n", program, + (int)len, (int)items_len); + exit(1); + } + + str = raptor_stringbuffer_as_string(sb); + if(strcmp((const char*)str, items_string)) { + fprintf(stderr, "%s: string buffer contains '%s', expected '%s'\n", + program, str, items_string); + exit(1); + } + + + /* test adding integers */ + + for(i = 0; i < TEST_INTEGERS_COUNT; i++) { + raptor_stringbuffer *isb = raptor_new_stringbuffer(); + if(!isb) { + fprintf(stderr, "%s: Failed to create string buffer\n", program); + exit(1); + } + + raptor_stringbuffer_append_string(isb, + (const unsigned char*)test_integer_string, 1); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Adding decimal integer %d to buffer\n", program, test_integers[i]); +#endif + + raptor_stringbuffer_append_decimal(isb, test_integers[i]); + + str = raptor_stringbuffer_as_string(isb); + if(strcmp((const char*)str, test_integer_results[i])) { + fprintf(stderr, "%s: string buffer contains '%s', expected '%s'\n", + program, str, test_integer_results[i]); + exit(1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing string buffer\n", program); +#endif + raptor_free_stringbuffer(isb); + } + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Creating two stringbuffers to join\n", program); +#endif + + sb1 = raptor_new_stringbuffer(); + if(!sb1) { + fprintf(stderr, "%s: Failed to create string buffer\n", program); + exit(1); + } + sb2 = raptor_new_stringbuffer(); + if(!sb2) { + fprintf(stderr, "%s: Failed to create string buffer\n", program); + exit(1); + } + + for(i = 0; i < TEST_ITEMS_COUNT; i++) { + raptor_stringbuffer *sbx; + int rc; + len = strlen(items[i]); + + sbx = (i % 2) ? sb2 : sb1; + rc = raptor_stringbuffer_append_counted_string(sbx, (unsigned char*)items[i], len, 1); + if(rc) { + fprintf(stderr, "%s: Adding string buffer item %d '%s' failed, returning error %d\n", + program, i, items[i], rc); + exit(1); + } + } + + if(1) { + int rc; + + rc = raptor_stringbuffer_append_counted_string(sb1, (unsigned char*)"X", 0, 1); + if(rc) { + fprintf(stderr, "%s: Adding 0-length counted string failed, returning error %d\n", + program, rc); + exit(1); + } + rc = raptor_stringbuffer_append_string(sb1, NULL, 1); + if(rc) { + fprintf(stderr, "%s: Adding NULL string failed, returning error %d\n", + program, rc); + exit(1); + } + } + + str = raptor_stringbuffer_as_string(sb1); + if(strcmp((const char*)str, test_append_results[0])) { + fprintf(stderr, "%s: string buffer sb1 contains '%s', expected '%s'\n", + program, str, test_append_results[0]); + exit(1); + } + str = raptor_stringbuffer_as_string(sb2); + if(strcmp((const char*)str, test_append_results[1])) { + fprintf(stderr, "%s: string buffer sb2 contains '%s', expected '%s'\n", + program, str, test_append_results[1]); + exit(1); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Appended two stringbuffers\n", program); +#endif + + if(raptor_stringbuffer_append_stringbuffer(sb1, sb2)) { + fprintf(stderr, "%s: Failed to append string buffer\n", program); + exit(1); + } + + str = raptor_stringbuffer_as_string(sb1); + if(strcmp((const char*)str, test_append_results_total)) { + fprintf(stderr, "%s: appended string buffer contains '%s', expected '%s'\n", + program, str, test_append_results_total); + exit(1); + } + + len = raptor_stringbuffer_length(sb2); + if(len) { + fprintf(stderr, "%s: appended string buffer is length %d, not empty'\n", + program, (int)len); + exit(1); + } + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Copying string buffer to string\n", program); +#endif + + copy_string = (unsigned char*)malloc(COPY_STRING_BUFFER_SIZE); + if(raptor_stringbuffer_copy_to_string(sb1, copy_string, COPY_STRING_BUFFER_SIZE)) { + fprintf(stderr, "%s: copying string buffer to string failed\n", + program); + exit(1); + } + if(strcmp((const char*)copy_string, test_append_results_total)) { + fprintf(stderr, "%s: copied string buffer contains '%s', expected '%s'\n", + program, copy_string, test_append_results_total); + exit(1); + } + free(copy_string); + + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing string buffers\n", program); +#endif + raptor_free_stringbuffer(sb1); + raptor_free_stringbuffer(sb2); + raptor_free_stringbuffer(sb); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif diff --git a/src/raptor_syntax_description.c b/src/raptor_syntax_description.c new file mode 100644 index 0000000..eb549b2 --- /dev/null +++ b/src/raptor_syntax_description.c @@ -0,0 +1,109 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_syntax_description.c - Raptor syntax description API + * + * Copyright (C) 2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static unsigned int +count_strings_array(const char* const * array) +{ + unsigned int i; + + if(!array) + return 0; + + for(i = 0; (array[i]); i++) + ; + + return i; +} + + +static unsigned int +count_mime_types_array(const raptor_type_q* array) +{ + unsigned int i; + + if(!array) + return 0; + + for(i = 0; (array[i].mime_type); i++) + ; + + return i; +} + + +/** + * raptor_syntax_description_validate: + * @desc: description + * + * Validate a syntax description has the required fields (name, labels) and update counts + * + * Returns: non-0 on failure + **/ +int +raptor_syntax_description_validate(raptor_syntax_description* desc) +{ + if(!desc || !desc->names || !desc->names[0] || !desc->label) + return 1; + +#ifdef RAPTOR_DEBUG + /* Maintainer only check of static data */ + if(desc->mime_types) { + unsigned int i; + const raptor_type_q* type_q = NULL; + + for(i = 0; + (type_q = &desc->mime_types[i]) && type_q->mime_type; + i++) { + size_t len = strlen(type_q->mime_type); + if(len != type_q->mime_type_len) { + fprintf(stderr, + "Format %s mime type %s actual len %d static len %d\n", + desc->names[0], type_q->mime_type, + (int)len, (int)type_q->mime_type_len); + } + } + } +#endif + + desc->names_count = count_strings_array(desc->names); + if(!desc->names_count) + return 1; + + desc->mime_types_count = count_mime_types_array(desc->mime_types); + desc->uri_strings_count = count_strings_array(desc->uri_strings); + + return 0; +} diff --git a/src/raptor_term.c b/src/raptor_term.c new file mode 100644 index 0000000..d6fa52b --- /dev/null +++ b/src/raptor_term.c @@ -0,0 +1,1039 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_term.c - Raptor terms + * + * Copyright (C) 2010, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +/** + * raptor_new_term_from_uri: + * @world: raptor world + * @uri: uri + * + * Constructor - create a new URI statement term + * + * Takes a copy (reference) of the passed in @uri + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_uri(raptor_world* world, raptor_uri* uri) +{ + raptor_term *t; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!uri) + return NULL; + + raptor_world_open(world); + + t = RAPTOR_CALLOC(raptor_term*, 1, sizeof(*t)); + if(!t) + return NULL; + + t->usage = 1; + t->world = world; + t->type = RAPTOR_TERM_TYPE_URI; + t->value.uri = raptor_uri_copy(uri); + + return t; +} + + +/** + * raptor_new_term_from_counted_uri_string: + * @world: raptor world + * @uri_string: UTF-8 encoded URI string. + * @length: length of URI string + * + * Constructor - create a new URI statement term from a UTF-8 encoded Unicode string + * + * Note: The @uri_string need not be NULL terminated - a NULL will be + * added to the copied string used. + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_counted_uri_string(raptor_world* world, + const unsigned char *uri_string, + size_t length) +{ + raptor_term *t; + raptor_uri* uri; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + uri = raptor_new_uri_from_counted_string(world, uri_string, length); + if(!uri) + return NULL; + + t = raptor_new_term_from_uri(world, uri); + + raptor_free_uri(uri); + + return t; +} + + +/** + * raptor_new_term_from_uri_string: + * @world: raptor world + * @uri_string: UTF-8 encoded URI string. + * + * Constructor - create a new URI statement term from a UTF-8 encoded Unicode string + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_uri_string(raptor_world* world, + const unsigned char *uri_string) +{ + raptor_term *t; + raptor_uri* uri; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + uri = raptor_new_uri(world, uri_string); + if(!uri) + return NULL; + + t = raptor_new_term_from_uri(world, uri); + + raptor_free_uri(uri); + + return t; +} + + +/** + * raptor_new_term_from_counted_literal: + * @world: raptor world + * @literal: UTF-8 encoded literal string (or NULL for empty literal) + * @literal_len: length of literal + * @datatype: literal datatype URI (or NULL) + * @language: literal language (or NULL for no language) + * @language_len: literal language length + * + * Constructor - create a new literal statement term from a counted UTF-8 encoded literal string + * + * Takes copies of the passed in @literal, @datatype, @language + * + * Only one of @language or @datatype may be given. If both are + * given, NULL is returned. If @language is the empty string, it is + * the equivalent to NULL. + * + * Note: The @literal need not be NULL terminated - a NULL will be + * added to the copied string used. + * + * Return value: new term or NULL on failure + */ +raptor_term* +raptor_new_term_from_counted_literal(raptor_world* world, + const unsigned char* literal, + size_t literal_len, + raptor_uri* datatype, + const unsigned char* language, + unsigned char language_len) +{ + raptor_term *t; + unsigned char* new_literal = NULL; + unsigned char* new_language = NULL; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if(language && !*language) + language = NULL; + + if(language && datatype) + return NULL; + + + new_literal = RAPTOR_MALLOC(unsigned char*, literal_len + 1); + if(!new_literal) + return NULL; + + if(!literal || !*literal) + literal_len = 0; + + if(literal_len) { + memcpy(new_literal, literal, literal_len); + new_literal[literal_len] = '\0'; + } else + *new_literal = '\0'; + + if(language) { + unsigned char c; + unsigned char* l; + + new_language = RAPTOR_MALLOC(unsigned char*, language_len + 1); + if(!new_language) { + RAPTOR_FREE(char*, new_literal); + return NULL; + } + + l = new_language; + while((c = *language++)) { + if(c == '_') + c = '-'; + *l++ = c; + } + *l = '\0'; + } else + language_len = 0; + + if(datatype) + datatype = raptor_uri_copy(datatype); + + + t = RAPTOR_CALLOC(raptor_term*, 1, sizeof(*t)); + if(!t) { + if(new_literal) + RAPTOR_FREE(char*, new_literal); + if(new_language) + RAPTOR_FREE(char*, new_language); + if(datatype) + raptor_free_uri(datatype); + return NULL; + } + t->usage = 1; + t->world = world; + t->type = RAPTOR_TERM_TYPE_LITERAL; + t->value.literal.string = new_literal; + t->value.literal.string_len = RAPTOR_LANG_LEN_FROM_INT(literal_len); + t->value.literal.language = new_language; + t->value.literal.language_len = language_len; + t->value.literal.datatype = datatype; + + return t; +} + + +/** + * raptor_new_term_from_literal: + * @world: raptor world + * @literal: UTF-8 encoded literal string (or NULL for empty literal) + * @datatype: literal datatype URI (or NULL) + * @language: literal language (or NULL) + * + * Constructor - create a new literal statement term + * + * Takes copies of the passed in @literal, @datatype, @language + * + * Only one of @language or @datatype may be given. If both are + * given, NULL is returned. If @language is the empty string, it is + * the equivalent to NULL. + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_literal(raptor_world* world, + const unsigned char* literal, + raptor_uri* datatype, + const unsigned char* language) +{ + size_t literal_len = 0; + size_t language_len = 0; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if(literal) + literal_len = strlen(RAPTOR_GOOD_CAST(const char*, literal)); + + if(language) + language_len = strlen(RAPTOR_GOOD_CAST(const char*, language)); + + return raptor_new_term_from_counted_literal(world, literal, literal_len, + datatype, language, + RAPTOR_BAD_CAST(unsigned char, language_len)); +} + + +/** + * raptor_new_term_from_counted_blank: + * @world: raptor world + * @blank: UTF-8 encoded blank node identifier (or NULL) + * @length: length of identifier (or 0) + * + * Constructor - create a new blank node statement term from a counted UTF-8 encoded blank node ID + * + * Takes a copy of the passed in @blank + * + * If @blank is NULL, creates a new internal identifier and uses it. + * This will use the handler set with + * raptor_world_set_generate_bnodeid_parameters() + * + * Note: The @blank need not be NULL terminated - a NULL will be + * added to the copied string used. + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_counted_blank(raptor_world* world, + const unsigned char* blank, size_t length) +{ + raptor_term *t; + unsigned char* new_id; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if (blank) { + new_id = RAPTOR_MALLOC(unsigned char*, length + 1); + if(!new_id) + return NULL; + memcpy(new_id, blank, length); + new_id[length] = '\0'; + } else { + new_id = raptor_world_generate_bnodeid(world); + length = strlen((const char*)new_id); + } + + t = RAPTOR_CALLOC(raptor_term*, 1, sizeof(*t)); + if(!t) { + RAPTOR_FREE(char*, new_id); + return NULL; + } + + t->usage = 1; + t->world = world; + t->type = RAPTOR_TERM_TYPE_BLANK; + t->value.blank.string = new_id; + t->value.blank.string_len = RAPTOR_BAD_CAST(int, length); + + return t; +} + + +/** + * raptor_new_term_from_blank: + * @world: raptor world + * @blank: UTF-8 encoded blank node identifier (or NULL) + * + * Constructor - create a new blank node statement term from a UTF-8 encoded blank node ID + * + * Takes a copy of the passed in @blank + * + * If @blank is NULL or an empty string, creates a new internal + * identifier and uses it. This will use the handler set with + * raptor_world_set_generate_bnodeid_parameters() + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_blank(raptor_world* world, const unsigned char* blank) +{ + size_t length = 0; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + if(blank) { + if(*blank) + length = strlen((const char*)blank); + else + blank = NULL; + } + + return raptor_new_term_from_counted_blank(world, blank, length); +} + + +/** + * raptor_new_term_from_counted_string: + * @world: raptor world + * @string: N-Triples format string (UTF-8) + * @length: length of @string (or 0) + * + * Constructor - create a new term from a Turtle / N-Triples format string in UTF-8 + * + * See also raptor_term_to_counted_string() and raptor_term_to_string() + * + * Return value: new term or NULL on failure +*/ +raptor_term* +raptor_new_term_from_counted_string(raptor_world* world, + unsigned char* string, size_t length) +{ + raptor_term* term = NULL; + size_t bytes_read; + raptor_locator locator; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!string) + return NULL; + + if(!length) + length = strlen(RAPTOR_GOOD_CAST(const char*, string)); + + raptor_world_open(world); + + memset(&locator, '\0', sizeof(locator)); + locator.line = -1; + + bytes_read = raptor_ntriples_parse_term(world, &locator, + string, &length, &term, 1); + + if(!bytes_read || length != 0) { + if(term) + raptor_free_term(term); + term = NULL; + } + + return term; +} + + +/** + * raptor_term_copy: + * @term: raptor term + * + * Copy constructor - get a copy of a statement term + * + * Return value: new term object or NULL on failure + */ +raptor_term* +raptor_term_copy(raptor_term* term) +{ + if(!term) + return NULL; + + term->usage++; + return term; +} + + +/** + * raptor_free_term: + * @term: #raptor_term object + * + * Destructor - destroy a raptor_term object. + * + **/ +void +raptor_free_term(raptor_term *term) +{ + if(!term) + return; + + if(--term->usage) + return; + + switch(term->type) { + case RAPTOR_TERM_TYPE_URI: + if(term->value.uri) { + raptor_free_uri(term->value.uri); + term->value.uri = NULL; + } + break; + + case RAPTOR_TERM_TYPE_BLANK: + if(term->value.blank.string) { + RAPTOR_FREE(char*, term->value.blank.string); + term->value.blank.string = NULL; + } + break; + + case RAPTOR_TERM_TYPE_LITERAL: + if(term->value.literal.string) { + RAPTOR_FREE(char*, term->value.literal.string); + term->value.literal.string = NULL; + } + + if(term->value.literal.datatype) { + raptor_free_uri(term->value.literal.datatype); + term->value.literal.datatype = NULL; + } + + if(term->value.literal.language) { + RAPTOR_FREE(char*, term->value.literal.language); + term->value.literal.language = NULL; + } + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + break; + } + + RAPTOR_FREE(term, term); +} + + +/** + * raptor_term_to_counted_string: + * @term: #raptor_term + * @len_p: Pointer to location to store length of new string (if not NULL) + * + * Turns a raptor term into a N-Triples format counted string. + * + * Turns the given @term into an N-Triples escaped string using all the + * escapes as defined in http://www.w3.org/TR/rdf-testcases/#ntriples + * + * This function uses raptor_term_ntriples_write() to write to an + * #raptor_iostream which is the prefered way to write formatted + * output. + * + * See also raptor_new_term_from_counted_string() to reverse this. + * + * See also raptor_term_to_turtle_string() to write as Turtle which + * will include Turtle syntax such as 'true' for booleans and """quoting""" + * + * Return value: the new string or NULL on failure. The length of + * the new string is returned in *@len_p if len_p is not NULL. + **/ +unsigned char* +raptor_term_to_counted_string(raptor_term *term, size_t* len_p) +{ + raptor_iostream *iostr; + void *string = NULL; + int rc; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(term, raptor_term, NULL); + + iostr = raptor_new_iostream_to_string(term->world, + &string, len_p, NULL); + if(!iostr) + return NULL; + + rc = raptor_term_escaped_write(term, 0, iostr); + raptor_free_iostream(iostr); + + if(rc) { + if(string) { + RAPTOR_FREE(char*, string); + string = NULL; + } + } + + return (unsigned char *)string; +} + + +/** + * raptor_term_to_string: + * @term: #raptor_term + * + * Turns a raptor term into a N-Triples format string. + * + * Turns the given @term into an N-Triples escaped string using all the + * escapes as defined in http://www.w3.org/TR/rdf-testcases/#ntriples + * + * See also raptor_new_term_from_counted_string() to reverse this. + * + * See also raptor_term_to_turtle_string() to write as Turtle which + * will include Turtle syntax such as 'true' for booleans and """quoting""" + * + * Return value: the new string or NULL on failure. + **/ +unsigned char* +raptor_term_to_string(raptor_term *term) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(term, raptor_term, NULL); + + return raptor_term_to_counted_string(term, NULL); +} + + +/* + * raptor_term_print_as_ntriples: + * @term: #raptor_term + * @stream: FILE stream + * + * INTERNAL - Print a term as N-Triples + */ +int +raptor_term_print_as_ntriples(const raptor_term *term, FILE* stream) +{ + int rc = 0; + raptor_iostream* iostr; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(term, raptor_term, 1); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(stream, FILE*, 1); + + iostr = raptor_new_iostream_to_file_handle(term->world, stream); + if(!iostr) + return 1; + + rc = raptor_term_escaped_write(term, 0, iostr); + + raptor_free_iostream(iostr); + + return rc; +} + + +/** + * raptor_term_equals: + * @t1: first term + * @t2: second term + * + * Compare a pair of #raptor_term for equality + * + * Return value: non-0 if the terms are equal + */ +int +raptor_term_equals(raptor_term* t1, raptor_term* t2) +{ + int d = 0; + + if(!t1 || !t2) + return 0; + + if(t1->type != t2->type) + return 0; + + if(t1 == t2) + return 1; + + switch(t1->type) { + case RAPTOR_TERM_TYPE_URI: + d = raptor_uri_equals(t1->value.uri, t2->value.uri); + break; + + case RAPTOR_TERM_TYPE_BLANK: + if(t1->value.blank.string_len != t2->value.blank.string_len) + /* different lengths */ + break; + + d = !strcmp((const char*)t1->value.blank.string, + (const char*)t2->value.blank.string); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + if(t1->value.literal.string_len != t2->value.literal.string_len) + /* different lengths */ + break; + + d = !strcmp((const char*)t1->value.literal.string, + (const char*)t2->value.literal.string); + if(!d) + break; + + if(t1->value.literal.language && t2->value.literal.language) { + /* both have a language */ + d = !strcmp((const char*)t1->value.literal.language, + (const char*)t2->value.literal.language); + if(!d) + break; + } else if(t1->value.literal.language || t2->value.literal.language) { + /* only one has a language - different */ + d = 0; + break; + } + + if(t1->value.literal.datatype && t2->value.literal.datatype) { + /* both have a datatype */ + d = raptor_uri_equals(t1->value.literal.datatype, + t2->value.literal.datatype); + } else if(t1->value.literal.datatype || t2->value.literal.datatype) { + /* only one has a datatype - different */ + d = 0; + } + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + break; + } + + return d; +} + + +/** + * raptor_term_compare: + * @t1: first term + * @t2: second term + * + * Compare a pair of #raptor_term + * + * If types are different, the #raptor_term_type order is used. + * + * Resource and datatype URIs are compared with raptor_uri_compare(), + * blank nodes and literals with strcmp(). If one literal has no + * language, it is earlier than one with a language. If one literal + * has no datatype, it is earlier than one with a datatype. + * + * Return value: <0 if t1 is before t2, 0 if equal, >0 if t1 is after t2 + */ +int +raptor_term_compare(const raptor_term *t1, const raptor_term *t2) +{ + int d = 0; + + /* check for NULL terms */ + if(!t1 || !t2) { + if(!t1 && !t2) + return 0; /* both NULL */ + + /* place NULLs before any other term */ + return t1 ? 1 : -1; + } + + if(t1->type != t2->type) + return (t1->type - t2->type); + + switch(t1->type) { + case RAPTOR_TERM_TYPE_URI: + d = raptor_uri_compare(t1->value.uri, t2->value.uri); + break; + + case RAPTOR_TERM_TYPE_BLANK: + d = strcmp((const char*)t1->value.blank.string, + (const char*)t2->value.blank.string); + break; + + case RAPTOR_TERM_TYPE_LITERAL: + d = strcmp((const char*)t1->value.literal.string, + (const char*)t2->value.literal.string); + if(d) + break; + + if(t1->value.literal.language && t2->value.literal.language) { + /* both have a language */ + d = strcmp((const char*)t1->value.literal.language, + (const char*)t2->value.literal.language); + } else if(t1->value.literal.language || t2->value.literal.language) + /* only one has a language; the language-less one is earlier */ + d = (!t1->value.literal.language ? -1 : 1); + if(d) + break; + + if(t1->value.literal.datatype && t2->value.literal.datatype) { + /* both have a datatype */ + d = raptor_uri_compare(t1->value.literal.datatype, + t2->value.literal.datatype); + } else if(t1->value.literal.datatype || t2->value.literal.datatype) + /* only one has a datatype; the datatype-less one is earlier */ + d = (!t1->value.literal.datatype ? -1 : 1); + break; + + case RAPTOR_TERM_TYPE_UNKNOWN: + default: + break; + } + + return d; +} +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + +static const unsigned char *uri_string1 = (const unsigned char *)"http://http://www.dajobe.org/"; +static unsigned int uri_string1_len = 29; /* strlen(uri_string1) */ +static raptor_term_type uri_string1_type = RAPTOR_TERM_TYPE_URI; +static const unsigned char *uri_string2 = (const unsigned char *)"http://www.example.org/"; +static unsigned int uri_string2_len = 23; /* strlen(uri_string2) */ +static raptor_term_type uri_string2_type = RAPTOR_TERM_TYPE_URI; +static const unsigned char *literal_string1 = (const unsigned char *)"Dave Beckett"; +static unsigned int literal_string1_len = 12; /* strlen(literal_string1) */ +static raptor_term_type literal_string1_type = RAPTOR_TERM_TYPE_LITERAL; +static const unsigned char *bnodeid1 = (const unsigned char *)"abc123"; +static unsigned int bnodeid1_len = 6; /* strlen(bnode_id1) */ +static raptor_term_type bnodeid1_type = RAPTOR_TERM_TYPE_BLANK; +static const unsigned char* language1 = (const unsigned char*)"en"; + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + int rc = 0; + raptor_term* term1 = NULL; /* URI string 1 */ + raptor_term* term2 = NULL; /* literal string1 */ + raptor_term* term3 = NULL; /* blank node 1 */ + raptor_term* term4 = NULL; /* URI string 2 */ + raptor_term* term5 = NULL; /* URI string 1 again */ + raptor_uri* uri1; + unsigned char* uri_str; + size_t uri_len; + + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + + /* check a term for NULL URI fails */ + term1 = raptor_new_term_from_uri(world, NULL); + if(term1) { + fprintf(stderr, "%s: raptor_new_uri(NULL) returned object rather than failing\n", program); + rc = 1; + goto tidy; + } + + /* check a term for non-NULL URI succeeds */ + uri1 = raptor_new_uri(world, uri_string1); + if(!uri1) { + fprintf(stderr, "%s: raptor_new_uri(%s) failed\n", program, uri_string1); + rc = 1; + goto tidy; + } + term1 = raptor_new_term_from_uri(world, uri1); + if(!term1) { + fprintf(stderr, "%s: raptor_new_term_from_uri_string(URI %s) failed\n", + program, uri_string1); + rc = 1; + goto tidy; + } + raptor_free_uri(uri1); uri1 = NULL; + if(term1->type != uri_string1_type) { + fprintf(stderr, "%s: raptor term 1 is of type %d expected %d\n", + program, term1->type, uri_string1_type); + rc = 1; + goto tidy; + } + + + /* returns a pointer to shared string */ + uri_str = raptor_uri_as_counted_string(term1->value.uri, &uri_len); + if(!uri_str) { + fprintf(stderr, "%s: raptor_uri_as_counted_string term 1 failed\n", + program); + rc = 1; + goto tidy; + } + + if(uri_len != uri_string1_len) { + fprintf(stderr, "%s: raptor term 1 URI is of length %d expected %d\n", + program, (int)uri_len, (int)uri_string1_len); + rc = 1; + goto tidy; + } + + + /* check an empty literal is created from a NULL literal pointer succeeds */ + term2 = raptor_new_term_from_counted_literal(world, NULL, 0, NULL, NULL, 0); + if(!term2) { + fprintf(stderr, "%s: raptor_new_term_from_counted_literal() with all NULLs failed\n", program); + rc = 1; + goto tidy; + } + raptor_free_term(term2); + + + /* check an empty literal from an empty language literal pointer succeeds */ + term2 = raptor_new_term_from_counted_literal(world, NULL, 0, NULL, + (const unsigned char*)"", 0); + if(!term2) { + fprintf(stderr, "%s: raptor_new_term_from_counted_literal() with empty language failed\n", program); + rc = 1; + goto tidy; + } + raptor_free_term(term2); + + /* check a literal with language and datatype fails */ + uri1 = raptor_new_uri(world, uri_string1); + if(!uri1) { + fprintf(stderr, "%s: raptor_new_uri(%s) failed\n", program, uri_string1); + rc = 1; + goto tidy; + } + term2 = raptor_new_term_from_counted_literal(world, literal_string1, + literal_string1_len, + uri1, language1, 0); + raptor_free_uri(uri1); uri1 = NULL; + if(term2) { + fprintf(stderr, "%s: raptor_new_term_from_counted_literal() with language and datatype returned object rather than failing\n", program); + rc = 1; + goto tidy; + } + + /* check a literal with no language and no datatype succeeds */ + term2 = raptor_new_term_from_counted_literal(world, literal_string1, + literal_string1_len, NULL, NULL, 0); + if(!term2) { + fprintf(stderr, "%s: raptor_new_term_from_counted_literal(%s) failed\n", + program, literal_string1); + rc = 1; + goto tidy; + } + if(term2->type != literal_string1_type) { + fprintf(stderr, "%s: raptor term 2 is of type %d expected %d\n", + program, term2->type, literal_string1_type); + rc = 1; + goto tidy; + } + + + /* check a blank node term with NULL id generates a new identifier */ + term3 = raptor_new_term_from_counted_blank(world, NULL, 0); + if(!term3) { + fprintf(stderr, "%s: raptor_new_term_from_counted_blank(NULL) failed\n", + program); + rc = 1; + goto tidy; + } + if(term3->type != bnodeid1_type) { + fprintf(stderr, "%s: raptor term 3 is of type %d expected %d\n", + program, term3->type, bnodeid1_type); + rc = 1; + goto tidy; + } + raptor_free_term(term3); + + /* check a blank node term with an identifier succeeds */ + term3 = raptor_new_term_from_counted_blank(world, bnodeid1, bnodeid1_len); + if(!term3) { + fprintf(stderr, "%s: raptor_new_term_from_counted_blank(%s) failed\n", + program, bnodeid1); + rc = 1; + goto tidy; + } + if(term3->type != bnodeid1_type) { + fprintf(stderr, "%s: raptor term 3 is of type %d expected %d\n", + program, term3->type, bnodeid1_type); + rc = 1; + goto tidy; + } + + + /* check a different URI term succeeds */ + term4 = raptor_new_term_from_counted_uri_string(world, uri_string2, + uri_string2_len); + if(!term4) { + fprintf(stderr, + "%s: raptor_new_term_from_counted_uri_string(URI %s) failed\n", + program, uri_string2); + rc = 1; + goto tidy; + } + if(term4->type != uri_string2_type) { + fprintf(stderr, "%s: raptor term 4 is of type %d expected %d\n", + program, term4->type, uri_string2_type); + rc = 1; + goto tidy; + } + /* returns a pointer to shared string */ + uri_str = raptor_uri_as_counted_string(term4->value.uri, &uri_len); + if(!uri_str) { + fprintf(stderr, "%s: raptor_uri_as_counted_string term 4 failed\n", + program); + rc = 1; + goto tidy; + } + + if(uri_len != uri_string2_len) { + fprintf(stderr, "%s: raptor term 4 URI is of length %d expected %d\n", + program, (int)uri_len, (int)uri_string2_len); + rc = 1; + goto tidy; + } + + + /* check the same URI term as term1 succeeds */ + term5 = raptor_new_term_from_uri_string(world, uri_string1); + if(!term5) { + fprintf(stderr, "%s: raptor_new_term_from_uri_string(URI %s) failed\n", + program, uri_string1); + rc = 1; + goto tidy; + } + + + if(raptor_term_equals(term1, term2)) { + fprintf(stderr, "%s: raptor_term_equals (URI %s, literal %s) returned equal, expected not-equal\n", + program, uri_string1, literal_string1); + rc = 1; + goto tidy; + } + + if(raptor_term_equals(term1, term3)) { + fprintf(stderr, "%s: raptor_term_equals (URI %s, bnode %s) returned equal, expected not-equal\n", + program, uri_string1, bnodeid1); + rc = 1; + goto tidy; + } + + if(raptor_term_equals(term1, term4)) { + fprintf(stderr, "%s: raptor_term_equals (URI %s, URI %s) returned equal, expected not-equal\n", + program, uri_string1, uri_string2); + rc = 1; + goto tidy; + } + + if(!raptor_term_equals(term1, term5)) { + fprintf(stderr, "%s: raptor_term_equals (URI %s, URI %s) returned not-equal, expected equal\n", + program, uri_string1, uri_string1); + rc = 1; + goto tidy; + } + + if(term1->value.uri != term5->value.uri) { + fprintf(stderr, "%s: term1 and term5 URI objects returned not-equal pointers, expected equal\n", + program); + /* This is not necessarily a failure if the raptor_uri module has had + * the URI interning disabled with + * raptor_world_set_flag(world, RAPTOR_WORLD_FLAG_URI_INTERNING, 0) + * however this test suite does not do that, so it is a failure here. + */ + rc = 1; + goto tidy; + } + + + tidy: + if(term1) + raptor_free_term(term1); + if(term2) + raptor_free_term(term2); + if(term3) + raptor_free_term(term3); + if(term4) + raptor_free_term(term4); + if(term5) + raptor_free_term(term5); + + raptor_free_world(world); + + return rc; +} + +#endif /* STANDALONE */ diff --git a/src/raptor_turtle_writer.c b/src/raptor_turtle_writer.c new file mode 100644 index 0000000..eb62a6e --- /dev/null +++ b/src/raptor_turtle_writer.c @@ -0,0 +1,1043 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_turtle_writer.c - Raptor Turtle Writer + * + * Copyright (C) 2006, Dave Robillard + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif +#include <math.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#ifndef STANDALONE + + +#define TURTLE_WRITER_AUTO_INDENT(turtle_writer) ((turtle_writer->flags & TURTLE_WRITER_FLAG_AUTO_INDENT) != 0) + +struct raptor_turtle_writer_s { + raptor_world* world; + + int depth; + + raptor_uri* base_uri; + + int my_nstack; + raptor_namespace_stack *nstack; + int nstack_depth; + + /* outputting to this iostream */ + raptor_iostream *iostr; + + /* Turtle Writer flags - bits defined in enum raptor_turtle_writer_flags */ + int flags; + + /* indentation per level if formatting */ + int indent; +}; + + +/* 16 spaces */ +#define SPACES_BUFFER_SIZE sizeof(spaces_buffer) +static const unsigned char spaces_buffer[] = { + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ' +}; + + +void +raptor_turtle_writer_increase_indent(raptor_turtle_writer *turtle_writer) +{ + turtle_writer->depth += turtle_writer->indent; +} + + +void +raptor_turtle_writer_decrease_indent(raptor_turtle_writer *turtle_writer) +{ + turtle_writer->depth -= turtle_writer->indent; +} + + +void +raptor_turtle_writer_newline(raptor_turtle_writer *turtle_writer) +{ + int num_spaces; + + raptor_iostream_write_byte('\n', turtle_writer->iostr); + + if(!TURTLE_WRITER_AUTO_INDENT(turtle_writer)) + return; + + num_spaces = turtle_writer->depth * turtle_writer->indent; + + while(num_spaces > 0) { + int count; + count = (num_spaces > RAPTOR_GOOD_CAST(int, SPACES_BUFFER_SIZE)) ? + RAPTOR_GOOD_CAST(int, SPACES_BUFFER_SIZE) : num_spaces; + + raptor_iostream_counted_string_write(spaces_buffer, count, turtle_writer->iostr); + + num_spaces -= count; + } + + return; +} + + +void +raptor_turtle_writer_csv_string(raptor_turtle_writer *turtle_writer, + const unsigned char *string) +{ + raptor_iostream *iostr = turtle_writer->iostr; + size_t len = strlen((const char*)string); + const char delim = '\x22'; + int quoting_needed = 0; + size_t i; + + for(i = 0; i < len; i++) { + char c = string[i]; + /* Quoting needed for delim (double quote), comma, linefeed or return */ + if(c == delim || c == ',' || c == '\r' || c == '\n') { + quoting_needed++; + break; + } + } + if(!quoting_needed) { + raptor_iostream_counted_string_write(string, len, iostr); + return; + } + + raptor_iostream_write_byte(delim, iostr); + for(i = 0; i < len; i++) { + char c = string[i]; + if(c == delim) + raptor_iostream_write_byte(delim, iostr); + raptor_iostream_write_byte(c, iostr); + } + raptor_iostream_write_byte(delim, iostr); + + return; +} + +/** + * raptor_new_turtle_writer: + * @world: raptor_world object + * @base_uri: Base URI for the writer (or NULL) + * @write_base_uri: non-0 to write '@base' directive to output + * @nstack: Namespace stack for the writer to start with (or NULL) + * @iostr: I/O stream to write to + * @flags: bitflags from #raptor_turtle_writer_flags + * + * Constructor - Create a new Turtle Writer writing Turtle to a raptor_iostream + * + * Return value: a new #raptor_turtle_writer object or NULL on failure + **/ +raptor_turtle_writer* +raptor_new_turtle_writer(raptor_world* world, + raptor_uri* base_uri, int write_base_uri, + raptor_namespace_stack *nstack, + raptor_iostream* iostr, + int flags) +{ + raptor_turtle_writer* turtle_writer; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!nstack || !iostr) + return NULL; + + raptor_world_open(world); + + turtle_writer = RAPTOR_CALLOC(raptor_turtle_writer*, 1, + sizeof(*turtle_writer)); + + if(!turtle_writer) + return NULL; + + turtle_writer->world = world; + + turtle_writer->nstack_depth = 0; + + turtle_writer->nstack = nstack; + if(!turtle_writer->nstack) { + turtle_writer->nstack = raptor_new_namespaces(world, 1); + turtle_writer->my_nstack = 1; + } + + turtle_writer->iostr = iostr; + + turtle_writer->flags = flags; + turtle_writer->indent = 2; + + turtle_writer->base_uri = NULL; + /* Ensure any initial base URI is not written relative */ + if(base_uri && write_base_uri) + raptor_turtle_writer_base(turtle_writer, base_uri); + turtle_writer->base_uri = base_uri; + + return turtle_writer; +} + + +/** + * raptor_free_turtle_writer: + * @turtle_writer: Turtle writer object + * + * Destructor - Free Turtle Writer + * + **/ +void +raptor_free_turtle_writer(raptor_turtle_writer* turtle_writer) +{ + if(!turtle_writer) + return; + + if(turtle_writer->nstack && turtle_writer->my_nstack) + raptor_free_namespaces(turtle_writer->nstack); + + RAPTOR_FREE(raptor_turtle_writer, turtle_writer); +} + + +static int +raptor_turtle_writer_contains_newline(const unsigned char *s, size_t len) +{ + size_t i = 0; + + for( ; i < len; i++) + if(s[i] == '\n') + return 1; + + return 0; +} + + +/** + * raptor_turtle_writer_raw: + * @turtle_writer: Turtle writer object + * @s: raw string to write + * + * Write a raw string to the Turtle writer verbatim. + * + **/ +void +raptor_turtle_writer_raw(raptor_turtle_writer* turtle_writer, + const unsigned char *s) +{ + raptor_iostream_string_write(s, turtle_writer->iostr); +} + + +/** + * raptor_turtle_writer_raw_counted: + * @turtle_writer: Turtle writer object + * @s: raw string to write + * @len: length of string + * + * Write a counted string to the Turtle writer verbatim. + * + **/ +void +raptor_turtle_writer_raw_counted(raptor_turtle_writer* turtle_writer, + const unsigned char *s, unsigned int len) +{ + raptor_iostream_counted_string_write(s, len, turtle_writer->iostr); +} + + +/** + * raptor_turtle_writer_namespace_prefix: + * @turtle_writer: Turtle writer object + * @ns: Namespace to write prefix declaration for + * + * Write a namespace prefix declaration (@prefix) + * + * Must only be used at the beginning of a document. + */ +void +raptor_turtle_writer_namespace_prefix(raptor_turtle_writer* turtle_writer, + raptor_namespace* ns) +{ + int emit_mkr = (turtle_writer->flags & TURTLE_WRITER_FLAG_MKR); + + raptor_iostream_string_write("@prefix ", turtle_writer->iostr); + if(ns->prefix) + raptor_iostream_string_write(raptor_namespace_get_prefix(ns), + turtle_writer->iostr); + raptor_iostream_counted_string_write(": ", 2, turtle_writer->iostr); + raptor_turtle_writer_reference(turtle_writer, raptor_namespace_get_uri(ns)); + if(emit_mkr) + raptor_iostream_counted_string_write(" ;\n", 3, turtle_writer->iostr); + else + raptor_iostream_counted_string_write(" .\n", 3, turtle_writer->iostr); +} + + +/** + * raptor_turtle_writer_base: + * @turtle_writer: Turtle writer object + * @base_uri: New base URI or NULL + * + * Write a base URI directive (@base) to set the in-scope base URI + */ +void +raptor_turtle_writer_base(raptor_turtle_writer* turtle_writer, + raptor_uri* base_uri) +{ + int emit_mkr = (turtle_writer->flags & TURTLE_WRITER_FLAG_MKR); + + if(base_uri) { + raptor_iostream_counted_string_write("@base ", 6, turtle_writer->iostr); + raptor_turtle_writer_reference(turtle_writer, base_uri); + if(emit_mkr) + raptor_iostream_counted_string_write(" ;\n", 3, turtle_writer->iostr); + else + raptor_iostream_counted_string_write(" .\n", 3, turtle_writer->iostr); + } +} + + +/** + * raptor_turtle_writer_reference: + * @turtle_writer: Turtle writer object + * @uri: URI to write + * + * Write a Turtle-encoded URI to the Turtle writer. + * + * Return value: non-0 on failure + **/ +int +raptor_turtle_writer_reference(raptor_turtle_writer* turtle_writer, + raptor_uri* uri) +{ + return raptor_uri_escaped_write(uri, turtle_writer->base_uri, + RAPTOR_ESCAPED_WRITE_TURTLE_URI, + turtle_writer->iostr); +} + + +/** + * raptor_turtle_writer_qname: + * @turtle_writer: Turtle writer object + * @qname: qname to write + * + * Write a QName to the Turtle writer. + * + **/ +void +raptor_turtle_writer_qname(raptor_turtle_writer* turtle_writer, + raptor_qname* qname) +{ + raptor_iostream* iostr = turtle_writer->iostr; + + if(qname->nspace && qname->nspace->prefix_length > 0) + raptor_iostream_counted_string_write(qname->nspace->prefix, + qname->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + + raptor_iostream_counted_string_write(qname->local_name, + qname->local_name_length, + iostr); + return; +} + + +/** + * raptor_turtle_writer_quoted_counted_string: + * @turtle_writer: Turtle writer object + * @s: string to write + * @len: string length + * + * Write a Turtle escaped-string inside double quotes to the writer. + * + * Return value: non-0 on failure + **/ +int +raptor_turtle_writer_quoted_counted_string(raptor_turtle_writer* turtle_writer, + const unsigned char *s, size_t len) +{ + const unsigned char *quotes = (const unsigned char *)"\"\"\"\""; + const unsigned char *q = quotes + 2; + size_t q_len = 1; + int flags = RAPTOR_ESCAPED_WRITE_TURTLE_LITERAL; + int rc = 0; + + if(!s) + return 1; + + /* Turtle """longstring""" (2) or "string" (1) */ + if(raptor_turtle_writer_contains_newline(s, len)) { + /* long string */ + flags = RAPTOR_ESCAPED_WRITE_TURTLE_LONG_LITERAL; + q = quotes; + q_len = 3; + } + + raptor_iostream_counted_string_write(q, q_len, turtle_writer->iostr); + rc = raptor_string_escaped_write(s, len, '"', + flags, turtle_writer->iostr); + raptor_iostream_counted_string_write(q, q_len, turtle_writer->iostr); + + return rc; +} + + +/* + * raptor_turtle_writer_literal: + * @turtle_writer: Turtle writer object + * @nstack: Namespace stack for making a QName for datatype URI + * @s: literal string to write (SHARED) + * @lang: language tag (may be NULL) + * @datatype: datatype URI (may be NULL) + * + * INTERNAL - Write a literal (possibly with lang and datatype) to the Turtle writer. + * + * Return value: non-0 on failure + **/ +int +raptor_turtle_writer_literal(raptor_turtle_writer* turtle_writer, + raptor_namespace_stack *nstack, + const unsigned char* s, const unsigned char* lang, + raptor_uri* datatype) +{ + /* DBL_MAX = 309 decimal digits */ + #define INT_MAX_LEN 309 + + /* DBL_EPSILON = 52 digits */ + #define FRAC_MAX_LEN 52 + + char* endptr = (char *)s; + int written = 0; + + /* typed literal special cases */ + if(datatype) { + /* integer */ + if(raptor_uri_equals(datatype, turtle_writer->world->xsd_integer_uri)) { + /* FIXME. Work around that gcc < 4.5 cannot disable warn_unused_result */ + long gcc_is_stupid = strtol((const char*)s, &endptr, 10); + if(endptr != (char*)s && !*endptr) { + raptor_iostream_string_write(s, turtle_writer->iostr); + /* More gcc madness to 'use' the variable I didn't want */ + written = 1 + 0 * (int)gcc_is_stupid; + } else { + raptor_log_error(turtle_writer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Illegal value for xsd:integer literal."); + } + + /* double, decimal */ + } else if(raptor_uri_equals(datatype, turtle_writer->world->xsd_double_uri) || + raptor_uri_equals(datatype, turtle_writer->world->xsd_decimal_uri)) { + /* FIXME. Work around that gcc < 4.5 cannot disable warn_unused_result */ + double gcc_is_doubly_stupid = strtod((const char*)s, &endptr); + if(endptr != (char*)s && !*endptr) { + raptor_iostream_string_write(s, turtle_writer->iostr); + /* More gcc madness to 'use' the variable I didn't want */ + written = 1 + 0 * (int)gcc_is_doubly_stupid; + } else { + raptor_log_error(turtle_writer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Illegal value for xsd:double or xsd:decimal literal."); + } + + /* boolean */ + } else if(raptor_uri_equals(datatype, turtle_writer->world->xsd_boolean_uri)) { + if(!strcmp((const char*)s, "0") || !strcmp((const char*)s, "false")) { + raptor_iostream_string_write("false", turtle_writer->iostr); + written = 1; + } else if(!strcmp((const char*)s, "1") || !strcmp((const char*)s, "true")) { + raptor_iostream_string_write("true", turtle_writer->iostr); + written = 1; + } else { + raptor_log_error(turtle_writer->world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Illegal value for xsd:boolean literal."); + } + } + } + + if(written) + return 0; + + if(raptor_turtle_writer_quoted_counted_string(turtle_writer, s, + strlen((const char*)s))) + return 1; + + /* typed literal, not a special case */ + if(datatype) { + raptor_qname* qname; + + raptor_iostream_string_write("^^", turtle_writer->iostr); + qname = raptor_new_qname_from_namespace_uri(nstack, datatype, 10); + if(qname) { + raptor_turtle_writer_qname(turtle_writer, qname); + raptor_free_qname(qname); + } else + raptor_turtle_writer_reference(turtle_writer, datatype); + } else if(lang) { + /* literal with language tag */ + raptor_iostream_write_byte('@', turtle_writer->iostr); + raptor_iostream_string_write(lang, turtle_writer->iostr); + } + + return 0; +} + + +/** + * raptor_turtle_writer_comment: + * @turtle_writer: Turtle writer object + * @s: comment string to write + * + * Write a Turtle comment to the Turtle writer. + * + **/ +void +raptor_turtle_writer_comment(raptor_turtle_writer* turtle_writer, + const unsigned char *string) +{ + unsigned char c; + size_t len = strlen((const char*)string); + + raptor_iostream_counted_string_write((const unsigned char*)"# ", 2, + turtle_writer->iostr); + + for(; (c=*string); string++, len--) { + if(c == '\n') { + raptor_turtle_writer_newline(turtle_writer); + raptor_iostream_counted_string_write((const unsigned char*)"# ", 2, + turtle_writer->iostr); + } else if(c != '\r') { + /* skip carriage returns (windows... *sigh*) */ + raptor_iostream_write_byte(c, turtle_writer->iostr); + } + } + + raptor_turtle_writer_newline(turtle_writer); +} + + +/** + * raptor_turtle_writer_set_option: + * @turtle_writer: #raptor_turtle_writer turtle_writer object + * @option: option to set from enumerated #raptor_option values + * @value: integer option value (0 or larger) + * + * Set turtle_writer options with integer values. + * + * The allowed options are available via + * raptor_world_get_option_description() + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_turtle_writer_set_option(raptor_turtle_writer *turtle_writer, + raptor_option option, int value) +{ + if(value < 0 || + !raptor_option_is_valid_for_area(option, RAPTOR_OPTION_AREA_TURTLE_WRITER)) + return 1; + + switch(option) { + case RAPTOR_OPTION_WRITER_AUTO_INDENT: + if(value) + turtle_writer->flags |= TURTLE_WRITER_FLAG_AUTO_INDENT; + else + turtle_writer->flags &= ~TURTLE_WRITER_FLAG_AUTO_INDENT; + break; + + case RAPTOR_OPTION_WRITER_INDENT_WIDTH: + turtle_writer->indent = value; + break; + + case RAPTOR_OPTION_WRITER_AUTO_EMPTY: + case RAPTOR_OPTION_WRITER_XML_VERSION: + case RAPTOR_OPTION_WRITER_XML_DECLARATION: + break; + + /* parser options */ + case RAPTOR_OPTION_SCANNING: + case RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES: + case RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES: + case RAPTOR_OPTION_ALLOW_BAGID: + case RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST: + case RAPTOR_OPTION_NORMALIZE_LANGUAGE: + case RAPTOR_OPTION_NON_NFC_FATAL: + case RAPTOR_OPTION_WARN_OTHER_PARSETYPES: + case RAPTOR_OPTION_CHECK_RDF_ID: + case RAPTOR_OPTION_HTML_TAG_SOUP: + case RAPTOR_OPTION_MICROFORMATS: + case RAPTOR_OPTION_HTML_LINK: + case RAPTOR_OPTION_WWW_TIMEOUT: + case RAPTOR_OPTION_STRICT: + + /* Shared */ + case RAPTOR_OPTION_NO_NET: + case RAPTOR_OPTION_NO_FILE: + case RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES: + + /* XML writer options */ + case RAPTOR_OPTION_RELATIVE_URIS: + + /* DOT serializer options */ + case RAPTOR_OPTION_RESOURCE_BORDER: + case RAPTOR_OPTION_LITERAL_BORDER: + case RAPTOR_OPTION_BNODE_BORDER: + case RAPTOR_OPTION_RESOURCE_FILL: + case RAPTOR_OPTION_LITERAL_FILL: + case RAPTOR_OPTION_BNODE_FILL: + + /* JSON serializer options */ + case RAPTOR_OPTION_JSON_CALLBACK: + case RAPTOR_OPTION_JSON_EXTRA_DATA: + case RAPTOR_OPTION_RSS_TRIPLES: + case RAPTOR_OPTION_ATOM_ENTRY_URI: + case RAPTOR_OPTION_PREFIX_ELEMENTS: + + /* Turtle serializer option */ + case RAPTOR_OPTION_WRITE_BASE_URI: + + /* WWW option */ + case RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL: + case RAPTOR_OPTION_WWW_HTTP_USER_AGENT: + case RAPTOR_OPTION_WWW_CERT_FILENAME: + case RAPTOR_OPTION_WWW_CERT_TYPE: + case RAPTOR_OPTION_WWW_CERT_PASSPHRASE: + case RAPTOR_OPTION_WWW_SSL_VERIFY_PEER: + case RAPTOR_OPTION_WWW_SSL_VERIFY_HOST: + + default: + return -1; + } + + return 0; +} + + +/** + * raptor_turtle_writer_set_option_string: + * @turtle_writer: #raptor_turtle_writer turtle_writer object + * @option: option to set from enumerated #raptor_option values + * @value: option value + * + * Set turtle_writer options with string values. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * If the option type is integer, the value is interpreted as an + * integer. + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_turtle_writer_set_option_string(raptor_turtle_writer *turtle_writer, + raptor_option option, + const unsigned char *value) +{ + if(!value || + !raptor_option_is_valid_for_area(option, RAPTOR_OPTION_AREA_TURTLE_WRITER)) + return 1; + + if(raptor_option_value_is_numeric(option)) + return raptor_turtle_writer_set_option(turtle_writer, option, + atoi((const char*)value)); + + return 1; +} + + +/** + * raptor_turtle_writer_get_option: + * @turtle_writer: #raptor_turtle_writer serializer object + * @option: option to get value + * + * Get various turtle_writer options. + * + * The allowed options are available via raptor_options_enumerate(). + * + * Note: no option value is negative + * + * Return value: option value or < 0 for an illegal option + **/ +int +raptor_turtle_writer_get_option(raptor_turtle_writer *turtle_writer, + raptor_option option) +{ + int result = -1; + + switch(option) { + case RAPTOR_OPTION_WRITER_AUTO_INDENT: + result = TURTLE_WRITER_AUTO_INDENT(turtle_writer); + break; + + case RAPTOR_OPTION_WRITER_INDENT_WIDTH: + result = turtle_writer->indent; + break; + + /* writer options */ + case RAPTOR_OPTION_WRITER_AUTO_EMPTY: + case RAPTOR_OPTION_WRITER_XML_VERSION: + case RAPTOR_OPTION_WRITER_XML_DECLARATION: + + /* parser options */ + case RAPTOR_OPTION_SCANNING: + case RAPTOR_OPTION_ALLOW_NON_NS_ATTRIBUTES: + case RAPTOR_OPTION_ALLOW_OTHER_PARSETYPES: + case RAPTOR_OPTION_ALLOW_BAGID: + case RAPTOR_OPTION_ALLOW_RDF_TYPE_RDF_LIST: + case RAPTOR_OPTION_NORMALIZE_LANGUAGE: + case RAPTOR_OPTION_NON_NFC_FATAL: + case RAPTOR_OPTION_WARN_OTHER_PARSETYPES: + case RAPTOR_OPTION_CHECK_RDF_ID: + case RAPTOR_OPTION_HTML_TAG_SOUP: + case RAPTOR_OPTION_MICROFORMATS: + case RAPTOR_OPTION_HTML_LINK: + case RAPTOR_OPTION_WWW_TIMEOUT: + case RAPTOR_OPTION_STRICT: + + /* Shared */ + case RAPTOR_OPTION_NO_NET: + case RAPTOR_OPTION_NO_FILE: + case RAPTOR_OPTION_LOAD_EXTERNAL_ENTITIES: + + /* XML writer options */ + case RAPTOR_OPTION_RELATIVE_URIS: + + /* DOT serializer options */ + case RAPTOR_OPTION_RESOURCE_BORDER: + case RAPTOR_OPTION_LITERAL_BORDER: + case RAPTOR_OPTION_BNODE_BORDER: + case RAPTOR_OPTION_RESOURCE_FILL: + case RAPTOR_OPTION_LITERAL_FILL: + case RAPTOR_OPTION_BNODE_FILL: + + /* JSON serializer options */ + case RAPTOR_OPTION_JSON_CALLBACK: + case RAPTOR_OPTION_JSON_EXTRA_DATA: + case RAPTOR_OPTION_RSS_TRIPLES: + case RAPTOR_OPTION_ATOM_ENTRY_URI: + case RAPTOR_OPTION_PREFIX_ELEMENTS: + + /* Turtle serializer option */ + case RAPTOR_OPTION_WRITE_BASE_URI: + + /* WWW option */ + case RAPTOR_OPTION_WWW_HTTP_CACHE_CONTROL: + case RAPTOR_OPTION_WWW_HTTP_USER_AGENT: + case RAPTOR_OPTION_WWW_CERT_FILENAME: + case RAPTOR_OPTION_WWW_CERT_TYPE: + case RAPTOR_OPTION_WWW_CERT_PASSPHRASE: + case RAPTOR_OPTION_WWW_SSL_VERIFY_PEER: + case RAPTOR_OPTION_WWW_SSL_VERIFY_HOST: + + default: + break; + } + + return result; +} + + +/** + * raptor_turtle_writer_get_option_string: + * @turtle_writer: #raptor_turtle_writer serializer object + * @option: option to get value + * + * Get turtle_writer options with string values. + * + * The allowed options are available via raptor_options_enumerate(). + * + * Return value: option value or NULL for an illegal option or no value + **/ +const unsigned char * +raptor_turtle_writer_get_option_string(raptor_turtle_writer *turtle_writer, + raptor_option option) +{ + return NULL; +} + + +/** + * raptor_turtle_writer_bnodeid: + * @turtle_writer: Turtle writer object + * @bnodeid: blank node ID to write + * @len: length of @bnodeid + * + * Write a blank node ID with leading _: to the Turtle writer. + * + **/ +void +raptor_turtle_writer_bnodeid(raptor_turtle_writer* turtle_writer, + const unsigned char *bnodeid, size_t len) +{ + raptor_bnodeid_ntriples_write(bnodeid, len, + turtle_writer->iostr); +} + + +/** + * raptor_turtle_writer_uri: + * @turtle_writer: Turtle writer object + * @uri: uri + * + * Write a #raptor_uri to a turtle writer in qname or URI form + * + * Return value: non-0 on failure + */ +int +raptor_turtle_writer_uri(raptor_turtle_writer* turtle_writer, + raptor_uri* uri) +{ + raptor_qname* qname; + int rc = 0; + + if(!uri) + return 1; + + qname = raptor_new_qname_from_namespace_uri(turtle_writer->nstack, uri, 10); + + /* XML Names allow leading '_' and '.' anywhere but Turtle does not */ + if(qname && !raptor_turtle_is_legal_turtle_qname(qname)) { + raptor_free_qname(qname); + qname = NULL; + } + + if(qname) { + raptor_turtle_writer_qname(turtle_writer, qname); + raptor_free_qname(qname); + } else { + rc = raptor_turtle_writer_reference(turtle_writer, uri); + } + + return rc; +} + + +/** + * raptor_turtle_writer_term: + * @turtle_writer: Turtle writer object + * @term: term + * + * Write a #raptor_term to a turtle write + * + * Return value: non-0 on failure + */ +int +raptor_turtle_writer_term(raptor_turtle_writer* turtle_writer, + raptor_term* term) +{ + int rc = 0; + + if(!term) + return 1; + + if(term->type == RAPTOR_TERM_TYPE_URI) { + rc = raptor_turtle_writer_uri(turtle_writer, term->value.uri); + } else if(term->type == RAPTOR_TERM_TYPE_LITERAL) { + rc = raptor_turtle_writer_literal(turtle_writer, + turtle_writer->nstack, + term->value.literal.string, + term->value.literal.language, + term->value.literal.datatype); + } else if(term->type == RAPTOR_TERM_TYPE_BLANK) { + rc = raptor_bnodeid_ntriples_write(term->value.blank.string, + term->value.blank.string_len, + turtle_writer->iostr); + } else { + rc = 2; + } + + return rc; +} + + + + +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +const unsigned char *base_uri_string = (const unsigned char*)"http://example.org/base#"; + +const unsigned char* longstr = (const unsigned char*)"it's quoted\nand has newlines, \"s <> and\n\ttabbing"; + +#define OUT_BYTES_COUNT 149 + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + raptor_iostream *iostr; + raptor_namespace_stack *nstack; + raptor_namespace* ex_ns; + raptor_turtle_writer* turtle_writer; + raptor_uri* base_uri; + raptor_qname* el_name; + unsigned long count; + + /* for raptor_new_iostream_to_string */ + void *string = NULL; + size_t string_len = 0; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + iostr = raptor_new_iostream_to_string(world, &string, &string_len, NULL); + if(!iostr) { + fprintf(stderr, "%s: Failed to create iostream to string\n", program); + exit(1); + } + + nstack = raptor_new_namespaces(world, 1); + + base_uri = raptor_new_uri(world, base_uri_string); + + turtle_writer = raptor_new_turtle_writer(world, base_uri, 1, nstack, iostr, 0); + if(!turtle_writer) { + fprintf(stderr, "%s: Failed to create turtle_writer to iostream\n", program); + exit(1); + } + + raptor_turtle_writer_set_option(turtle_writer, + RAPTOR_OPTION_WRITER_AUTO_INDENT, 1); + + ex_ns = raptor_new_namespace(nstack, + (const unsigned char*)"ex", + (const unsigned char*)"http://example.org/ns#", + 0); + + + raptor_turtle_writer_namespace_prefix(turtle_writer, ex_ns); + + raptor_turtle_writer_reference(turtle_writer, base_uri); + + raptor_turtle_writer_increase_indent(turtle_writer); + raptor_turtle_writer_newline(turtle_writer); + + raptor_turtle_writer_raw(turtle_writer, (const unsigned char*)"ex:foo "); + + raptor_turtle_writer_quoted_counted_string(turtle_writer, longstr, + strlen((const char*)longstr)); + raptor_turtle_writer_raw_counted(turtle_writer, + (const unsigned char*)" ;", 2); + raptor_turtle_writer_newline(turtle_writer); + + el_name = raptor_new_qname_from_namespace_local_name(world, + ex_ns, + (const unsigned char*)"bar", + NULL); + + raptor_turtle_writer_qname(turtle_writer, el_name); + raptor_free_qname(el_name); + + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)" ", 1); + + raptor_turtle_writer_literal(turtle_writer, nstack, + (const unsigned char*)"10.0", NULL, + world->xsd_decimal_uri); + + raptor_turtle_writer_newline(turtle_writer); + + raptor_turtle_writer_decrease_indent(turtle_writer); + + raptor_turtle_writer_raw_counted(turtle_writer, (const unsigned char*)".", 1); + raptor_turtle_writer_newline(turtle_writer); + + + raptor_free_turtle_writer(turtle_writer); + + raptor_free_namespace(ex_ns); + + raptor_free_namespaces(nstack); + + raptor_free_uri(base_uri); + + + count = raptor_iostream_tell(iostr); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing iostream\n", program); +#endif + raptor_free_iostream(iostr); + + if(count != OUT_BYTES_COUNT) { + fprintf(stderr, "%s: I/O stream wrote %d bytes, expected %d\n", program, + (int)count, (int)OUT_BYTES_COUNT); + fputs("[[", stderr); + (void)fwrite(string, 1, string_len, stderr); + fputs("]]\n", stderr); + return 1; + } + + if(!string) { + fprintf(stderr, "%s: I/O stream failed to create a string\n", program); + return 1; + } + string_len = strlen((const char*)string); + if(string_len != count) { + fprintf(stderr, "%s: I/O stream created a string length %d, expected %d\n", program, (int)string_len, (int)count); + return 1; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Made Turtle string of %d bytes\n", program, (int)string_len); + fputs("[[", stderr); + (void)fwrite(string, 1, string_len, stderr); + fputs("]]\n", stderr); +#endif + + raptor_free_memory(string); + + raptor_free_world(world); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif diff --git a/src/raptor_unicode.c b/src/raptor_unicode.c new file mode 100644 index 0000000..c50d253 --- /dev/null +++ b/src/raptor_unicode.c @@ -0,0 +1,940 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_unicode.c - Raptor Unicode and UTF-8 support + * + * Copyright (C) 2002-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* Unicode defines only the range U+0000 to U+10FFFF */ +const raptor_unichar raptor_unicode_max_codepoint = 0x10FFFF; + + +/** + * raptor_unicode_utf8_string_put_char: + * @c: Unicode character + * @output: UTF-8 string buffer or NULL + * @length: length of output buffer + * + * Encode a Unicode character to a UTF-8 string + * + * If @output is NULL, then will calculate the length rather than + * perform the encoding. This can be used by the called to allocate + * space and then re-call this function with the new buffer. + * + * Return value: number of bytes encoded to output buffer or <0 on failure + **/ +int +raptor_unicode_utf8_string_put_char(raptor_unichar c, + unsigned char *output, size_t length) +{ + size_t size = 0; + + /* check for illegal code positions: + * [ U+D800 to U+DFFF (UTF-16 surrogates) - now allowed ] + * U+FFFE and U+FFFF + */ + if(c == 0xFFFE || c == 0xFFFF) + return -1; + + if (c < 0x00000080) + size = 1; + else if(c < 0x00000800) + size = 2; + else if(c < 0x00010000) + size = 3; + else if(c < 0x00200000) + size = 4; + else if(c < 0x04000000) + size = 5; + else if(c < 0x80000000) + size = 6; + else + return -1; + + /* when no buffer given, return size */ + if(!output) + return RAPTOR_GOOD_CAST(int, size); /* ok since size is in range 1..6 */ + + if(size > length) + return -1; + + switch(size) { + case 6: + output[5] = RAPTOR_GOOD_CAST(unsigned char, 0x80 | (unsigned char)(c & 0x3F)); + c= c >> 6; + /* set bit 2 (bits 7,6,5,4,3,2 less 7,6,5,4,3 set below) on last byte */ + c |= 0x4000000; /* 0x10000 = 0x04 << 24 */ + /* FALLTHROUGH */ + case 5: + output[4] = RAPTOR_GOOD_CAST(unsigned char, 0x80 | (unsigned char)(c & 0x3F)); + c= c >> 6; + /* set bit 3 (bits 7,6,5,4,3 less 7,6,5,4 set below) on last byte */ + c |= 0x200000; /* 0x10000 = 0x08 << 18 */ + /* FALLTHROUGH */ + case 4: + output[3] = RAPTOR_GOOD_CAST(unsigned char, 0x80 | (unsigned char)(c & 0x3F)); + c= c >> 6; + /* set bit 4 (bits 7,6,5,4 less 7,6,5 set below) on last byte */ + c |= 0x10000; /* 0x10000 = 0x10 << 12 */ + /* FALLTHROUGH */ + case 3: + output[2] = RAPTOR_GOOD_CAST(unsigned char, 0x80 | (unsigned char)(c & 0x3F)); + c= c >> 6; + /* set bit 5 (bits 7,6,5 less 7,6 set below) on last byte */ + c |= 0x800; /* 0x800 = 0x20 << 6 */ + /* FALLTHROUGH */ + case 2: + output[1] = RAPTOR_GOOD_CAST(unsigned char, 0x80 | (unsigned char)(c & 0x3F)); + c= c >> 6; + /* set bits 7,6 on last byte */ + c |= 0xc0; + /* FALLTHROUGH */ + case 1: + output[0] = (unsigned char)c; + } + + return RAPTOR_GOOD_CAST(int, size); /* ok since size is in range 1..6 */ +} + + +/** + * raptor_unicode_utf8_string_get_char: + * @input: UTF-8 string buffer + * @length: buffer size + * @output: Pointer to the Unicode character or NULL + * + * Decode a UTF-8 encoded string to get a Unicode character. + * + * If output is NULL, then will calculate the number of bytes that + * will be used from the input buffer and not perform the conversion. + * + * Return value: bytes used from input buffer or <0 on failure: -1 input buffer too short or length error, -2 overlong UTF-8 sequence, -3 illegal code positions, -4 code out of range U+0000 to U+10FFFF. In cases -2, -3 and -4 the coded character is stored in the output. + */ +int +raptor_unicode_utf8_string_get_char(const unsigned char *input, size_t length, + raptor_unichar *output) +{ + unsigned char in; + size_t size; + raptor_unichar c = 0; + + if(length < 1) + return -1; + + in=*input++; + if((in & 0x80) == 0) { + size = 1; + c= in & 0x7f; + } else if((in & 0xe0) == 0xc0) { + size = 2; + c= in & 0x1f; + } else if((in & 0xf0) == 0xe0) { + size = 3; + c= in & 0x0f; + } else if((in & 0xf8) == 0xf0) { + size = 4; + c = in & 0x07; + } else if((in & 0xfc) == 0xf8) { + size = 5; + c = in & 0x03; + } else if((in & 0xfe) == 0xfc) { + size = 6; + c = in & 0x01; + } else + return -1; + + + if(!output) + return RAPTOR_GOOD_CAST(int, size); /* ok since size is in range 1..6 */ + + if(length < size) + return -1; + + switch(size) { + case 6: + in=*input++ & 0x3f; + c= c << 6; + c |= in; + /* FALLTHROUGH */ + case 5: + in=*input++ & 0x3f; + c= c << 6; + c |= in; + /* FALLTHROUGH */ + case 4: + in=*input++ & 0x3f; + c= c << 6; + c |= in; + /* FALLTHROUGH */ + case 3: + in=*input++ & 0x3f; + c= c << 6; + c |= in; + /* FALLTHROUGH */ + case 2: + /* '*input' used here since we never need to use new value of input [CLANG] */ + in = *input & 0x3f; + c= c << 6; + c |= in; + /* FALLTHROUGH */ + default: + break; + } + + *output=c; + + /* check for overlong UTF-8 sequences */ + switch(size) { + case 2: + if(c < 0x00000080) + return -2; + break; + case 3: + if(c < 0x00000800) + return -2; + break; + case 4: + if(c < 0x00010000) + return -2; + break; + + default: /* 1 */ + break; + } + + + /* check for illegal code positions: + * [ U+D800 to U+DFFF (UTF-16 surrogates) - now allowed ] + * U+FFFE and U+FFFF + */ + if(c == 0xFFFE || c == 0xFFFF) + return -3; + + if(c > raptor_unicode_max_codepoint) + return -4; + + return RAPTOR_GOOD_CAST(int, size); /* ok since size is in range 1..6 */ +} + + +static int raptor_unicode_is_letter(long c); +static int raptor_unicode_is_basechar(long c); +static int raptor_unicode_is_ideographic(long c); +static int raptor_unicode_is_combiningchar(long c); +static int raptor_unicode_is_digit(long c); +static int raptor_unicode_is_extender(long c); + + +/** + * raptor_unicode_is_xml11_namestartchar: + * @c: Unicode character to check + * + * Check if Unicode character is legal to start an XML 1.1 Name + * + * See <ulink url="http://www.w3.org/TR/2004/REC-xml11-20040204/#NT-NameStartChar">Namespaces in XML 1.1 REC 2004-02-04 NameStartChar</ulink> + * updating + * <ulink url="http://www.w3.org/TR/2004/REC-xml11-20040204/">Extensible Markup Language (XML) 1.1 REC 2004-02-04</ulink> sec 2.3, [4a] + * excluding the ':' + * + * Return value: non-0 if legal + **/ +int +raptor_unicode_is_xml11_namestartchar(raptor_unichar c) +{ + return (((c >= 0x0041) && (c <= 0x005A)) || /* [A-Z] */ + (c == 0x005F) || /* '_' */ + ((c >= 0x0061) && (c <= 0x007A)) || /* [a-z] */ + ((c >= 0x00C0) && (c <= 0x00D6)) || + ((c >= 0x00D8) && (c <= 0x00F6)) || + ((c >= 0x00F8) && (c <= 0x02FF)) || + ((c >= 0x0370) && (c <= 0x037D)) || + ((c >= 0x037F) && (c <= 0x1FFF)) || + ((c >= 0x200C) && (c <= 0x200D)) || + ((c >= 0x2070) && (c <= 0x218F)) || + ((c >= 0x2C00) && (c <= 0x2FEF)) || + ((c >= 0x3001) && (c <= 0xD7FF)) || + ((c >= 0xF900) && (c <= 0xFDCF)) || + ((c >= 0xFDF0) && (c <= 0xFFFD)) || + ((c >= 0x10000) && (c <= 0xEFFFF))); +} + + +/** + * raptor_unicode_is_xml10_namestartchar: + * @c: Unicode character to check + * + * Check if Unicode character is legal to start an XML 1.0 Name + * + * See <ulink url="http://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCName">Namespaces in XML REC 1999-01-14</ulink> + * updating + * <ulink url="http://www.w3.org/TR/2004/REC-xml-20040204/">Extensible Markup Language (XML) 1.0 (Third Edition) REC 2004-02-04</ulink> + * excluding the ':' + * + * Return value: non-0 if legal + **/ +int +raptor_unicode_is_xml10_namestartchar(raptor_unichar c) +{ + return (raptor_unicode_is_letter(c) || + (c == '_')); +} + + +/** + * raptor_unicode_is_namestartchar: + * @c: Unicode character to check + * + * Check if Unicode character is legal to start an XML Name + * + * Return value: non-0 if the character is legal + **/ +int +raptor_unicode_is_namestartchar(raptor_unichar c) { +#ifdef RAPTOR_XML_1_1 + return raptor_unicode_is_xml11_namestartchar(c); +#else + return raptor_unicode_is_xml10_namestartchar(c); +#endif +} + + +/** + * raptor_unicode_is_xml11_namechar: + * @c: Unicode character + * + * Check if a Unicode codepoint is a legal to continue an XML 1.1 Name + * + * See <ulink url="http://www.w3.org/TR/2004/REC-xml11-20040204/">Namespaces in XML 1.1 REC 2004-02-04</ulink> + * updating + * <ulink url="http://www.w3.org/TR/2004/REC-xml-20040204/">Extensible Markup Language (XML) 1.0 (Third Edition) REC 2004-02-04</ulink> sec 2.3, [4a] + * excluding the ':' + * + * Return value: non-0 if legal + **/ +int +raptor_unicode_is_xml11_namechar(raptor_unichar c) +{ + return (raptor_unicode_is_xml11_namestartchar(c) || + (c == 0x002D) || /* '-' */ + (c == 0x002E) || /* '.' */ + (c >= 0x0030 && c <= 0x0039) || /* 0-9 */ + (c == 0x00B7) || + (c >= 0x0300 && c <=0x036F) || + (c >= 0x203F && c <=0x2040)); +} + + +/** + * raptor_unicode_is_xml10_namechar: + * @c: Unicode character + * + * Check if a Unicode codepoint is a legal to continue an XML 1.0 Name + * + * See <ulink url="http://www.w3.org/TR/1999/REC-xml-names-19990114/#NT-NCNameChar">Namespaces in XML REC 1999-01-14 NCNameChar</ulink> + * updating + * <ulink url="http://www.w3.org/TR/2004/REC-xml-20040204/">Extensible Markup Language (XML) 1.0 (Third Edition) REC 2004-02-04</ulink> + * excluding the ':' + * + * Return value: non-0 if legal + **/ +int +raptor_unicode_is_xml10_namechar(raptor_unichar c) +{ + return (raptor_unicode_is_letter(c) || + raptor_unicode_is_digit(c) || + (c == 0x002E) || /* '.' */ + (c == 0x002D) || /* '-' */ + (c == 0x005F) || /* '_' */ + raptor_unicode_is_combiningchar(c) || + raptor_unicode_is_extender(c)); +} + + +/** + * raptor_unicode_is_namechar: + * @c: Unicode character to check + * + * Check if Unicode character is legal to continue an XML Name . + * + * Return value: non-0 if the character is legal + **/ +int +raptor_unicode_is_namechar(raptor_unichar c) +{ +#ifdef RAPTOR_XML_1_1 + return raptor_unicode_is_xml11_namechar(c); +#else + return raptor_unicode_is_xml10_namechar(c); +#endif +} + + +/* + * All this below was derived by machine-transforming the classes in Appendix B + * of http://www.w3.org/TR/2000/REC-xml-20001006 + */ + +static int +raptor_unicode_is_letter(long c) +{ + return(raptor_unicode_is_basechar(c) || + raptor_unicode_is_ideographic(c)); +} + + +static int +raptor_unicode_is_basechar(long c) +{ + /* http://www.w3.org/TR/2000/REC-xml-20001006#NT-BaseChar */ + return((c >= 0x0041 && c <= 0x005A ) || + (c >= 0x0061 && c <= 0x007A ) || + (c >= 0x00C0 && c <= 0x00D6 ) || + (c >= 0x00D8 && c <= 0x00F6 ) || + (c >= 0x00F8 && c <= 0x00FF ) || + (c >= 0x0100 && c <= 0x0131 ) || + (c >= 0x0134 && c <= 0x013E ) || + (c >= 0x0141 && c <= 0x0148 ) || + (c >= 0x014A && c <= 0x017E ) || + (c >= 0x0180 && c <= 0x01C3 ) || + (c >= 0x01CD && c <= 0x01F0 ) || + (c >= 0x01F4 && c <= 0x01F5 ) || + (c >= 0x01FA && c <= 0x0217 ) || + (c >= 0x0250 && c <= 0x02A8 ) || + (c >= 0x02BB && c <= 0x02C1 ) || + (c == 0x0386) || + (c >= 0x0388 && c <= 0x038A ) || + (c == 0x038C) || + (c >= 0x038E && c <= 0x03A1 ) || + (c >= 0x03A3 && c <= 0x03CE ) || + (c >= 0x03D0 && c <= 0x03D6 ) || + (c == 0x03DA) || + (c == 0x03DC) || + (c == 0x03DE) || + (c == 0x03E0) || + (c >= 0x03E2 && c <= 0x03F3 ) || + (c >= 0x0401 && c <= 0x040C ) || + (c >= 0x040E && c <= 0x044F ) || + (c >= 0x0451 && c <= 0x045C ) || + (c >= 0x045E && c <= 0x0481 ) || + (c >= 0x0490 && c <= 0x04C4 ) || + (c >= 0x04C7 && c <= 0x04C8 ) || + (c >= 0x04CB && c <= 0x04CC ) || + (c >= 0x04D0 && c <= 0x04EB ) || + (c >= 0x04EE && c <= 0x04F5 ) || + (c >= 0x04F8 && c <= 0x04F9 ) || + (c >= 0x0531 && c <= 0x0556 ) || + (c == 0x0559) || + (c >= 0x0561 && c <= 0x0586 ) || + (c >= 0x05D0 && c <= 0x05EA ) || + (c >= 0x05F0 && c <= 0x05F2 ) || + (c >= 0x0621 && c <= 0x063A ) || + (c >= 0x0641 && c <= 0x064A ) || + (c >= 0x0671 && c <= 0x06B7 ) || + (c >= 0x06BA && c <= 0x06BE ) || + (c >= 0x06C0 && c <= 0x06CE ) || + (c >= 0x06D0 && c <= 0x06D3 ) || + (c == 0x06D5) || + (c >= 0x06E5 && c <= 0x06E6 ) || + (c >= 0x0905 && c <= 0x0939 ) || + (c == 0x093D) || + (c >= 0x0958 && c <= 0x0961 ) || + (c >= 0x0985 && c <= 0x098C ) || + (c >= 0x098F && c <= 0x0990 ) || + (c >= 0x0993 && c <= 0x09A8 ) || + (c >= 0x09AA && c <= 0x09B0 ) || + (c == 0x09B2) || + (c >= 0x09B6 && c <= 0x09B9 ) || + (c >= 0x09DC && c <= 0x09DD ) || + (c >= 0x09DF && c <= 0x09E1 ) || + (c >= 0x09F0 && c <= 0x09F1 ) || + (c >= 0x0A05 && c <= 0x0A0A ) || + (c >= 0x0A0F && c <= 0x0A10 ) || + (c >= 0x0A13 && c <= 0x0A28 ) || + (c >= 0x0A2A && c <= 0x0A30 ) || + (c >= 0x0A32 && c <= 0x0A33 ) || + (c >= 0x0A35 && c <= 0x0A36 ) || + (c >= 0x0A38 && c <= 0x0A39 ) || + (c >= 0x0A59 && c <= 0x0A5C ) || + (c == 0x0A5E) || + (c >= 0x0A72 && c <= 0x0A74 ) || + (c >= 0x0A85 && c <= 0x0A8B ) || + (c == 0x0A8D) || + (c >= 0x0A8F && c <= 0x0A91 ) || + (c >= 0x0A93 && c <= 0x0AA8 ) || + (c >= 0x0AAA && c <= 0x0AB0 ) || + (c >= 0x0AB2 && c <= 0x0AB3 ) || + (c >= 0x0AB5 && c <= 0x0AB9 ) || + (c == 0x0ABD) || + (c == 0x0AE0) || + (c >= 0x0B05 && c <= 0x0B0C ) || + (c >= 0x0B0F && c <= 0x0B10 ) || + (c >= 0x0B13 && c <= 0x0B28 ) || + (c >= 0x0B2A && c <= 0x0B30 ) || + (c >= 0x0B32 && c <= 0x0B33 ) || + (c >= 0x0B36 && c <= 0x0B39 ) || + (c == 0x0B3D) || + (c >= 0x0B5C && c <= 0x0B5D ) || + (c >= 0x0B5F && c <= 0x0B61 ) || + (c >= 0x0B85 && c <= 0x0B8A ) || + (c >= 0x0B8E && c <= 0x0B90 ) || + (c >= 0x0B92 && c <= 0x0B95 ) || + (c >= 0x0B99 && c <= 0x0B9A ) || + (c == 0x0B9C) || + (c >= 0x0B9E && c <= 0x0B9F ) || + (c >= 0x0BA3 && c <= 0x0BA4 ) || + (c >= 0x0BA8 && c <= 0x0BAA ) || + (c >= 0x0BAE && c <= 0x0BB5 ) || + (c >= 0x0BB7 && c <= 0x0BB9 ) || + (c >= 0x0C05 && c <= 0x0C0C ) || + (c >= 0x0C0E && c <= 0x0C10 ) || + (c >= 0x0C12 && c <= 0x0C28 ) || + (c >= 0x0C2A && c <= 0x0C33 ) || + (c >= 0x0C35 && c <= 0x0C39 ) || + (c >= 0x0C60 && c <= 0x0C61 ) || + (c >= 0x0C85 && c <= 0x0C8C ) || + (c >= 0x0C8E && c <= 0x0C90 ) || + (c >= 0x0C92 && c <= 0x0CA8 ) || + (c >= 0x0CAA && c <= 0x0CB3 ) || + (c >= 0x0CB5 && c <= 0x0CB9 ) || + (c == 0x0CDE) || + (c >= 0x0CE0 && c <= 0x0CE1 ) || + (c >= 0x0D05 && c <= 0x0D0C ) || + (c >= 0x0D0E && c <= 0x0D10 ) || + (c >= 0x0D12 && c <= 0x0D28 ) || + (c >= 0x0D2A && c <= 0x0D39 ) || + (c >= 0x0D60 && c <= 0x0D61 ) || + (c >= 0x0E01 && c <= 0x0E2E ) || + (c == 0x0E30) || + (c >= 0x0E32 && c <= 0x0E33 ) || + (c >= 0x0E40 && c <= 0x0E45 ) || + (c >= 0x0E81 && c <= 0x0E82 ) || + (c == 0x0E84) || + (c >= 0x0E87 && c <= 0x0E88 ) || + (c == 0x0E8A) || + (c == 0x0E8D) || + (c >= 0x0E94 && c <= 0x0E97 ) || + (c >= 0x0E99 && c <= 0x0E9F ) || + (c >= 0x0EA1 && c <= 0x0EA3 ) || + (c == 0x0EA5) || + (c == 0x0EA7) || + (c >= 0x0EAA && c <= 0x0EAB ) || + (c >= 0x0EAD && c <= 0x0EAE ) || + (c == 0x0EB0) || + (c >= 0x0EB2 && c <= 0x0EB3 ) || + (c == 0x0EBD) || + (c >= 0x0EC0 && c <= 0x0EC4 ) || + (c >= 0x0F40 && c <= 0x0F47 ) || + (c >= 0x0F49 && c <= 0x0F69 ) || + (c >= 0x10A0 && c <= 0x10C5 ) || + (c >= 0x10D0 && c <= 0x10F6 ) || + (c == 0x1100) || + (c >= 0x1102 && c <= 0x1103 ) || + (c >= 0x1105 && c <= 0x1107 ) || + (c == 0x1109) || + (c >= 0x110B && c <= 0x110C ) || + (c >= 0x110E && c <= 0x1112 ) || + (c == 0x113C) || + (c == 0x113E) || + (c == 0x1140) || + (c == 0x114C) || + (c == 0x114E) || + (c == 0x1150) || + (c >= 0x1154 && c <= 0x1155 ) || + (c == 0x1159) || + (c >= 0x115F && c <= 0x1161 ) || + (c == 0x1163) || + (c == 0x1165) || + (c == 0x1167) || + (c == 0x1169) || + (c >= 0x116D && c <= 0x116E ) || + (c >= 0x1172 && c <= 0x1173 ) || + (c == 0x1175) || + (c == 0x119E) || + (c == 0x11A8) || + (c == 0x11AB) || + (c >= 0x11AE && c <= 0x11AF ) || + (c >= 0x11B7 && c <= 0x11B8 ) || + (c == 0x11BA) || + (c >= 0x11BC && c <= 0x11C2 ) || + (c == 0x11EB) || + (c == 0x11F0) || + (c == 0x11F9) || + (c >= 0x1E00 && c <= 0x1E9B ) || + (c >= 0x1EA0 && c <= 0x1EF9 ) || + (c >= 0x1F00 && c <= 0x1F15 ) || + (c >= 0x1F18 && c <= 0x1F1D ) || + (c >= 0x1F20 && c <= 0x1F45 ) || + (c >= 0x1F48 && c <= 0x1F4D ) || + (c >= 0x1F50 && c <= 0x1F57 ) || + (c == 0x1F59) || + (c == 0x1F5B) || + (c == 0x1F5D) || + (c >= 0x1F5F && c <= 0x1F7D ) || + (c >= 0x1F80 && c <= 0x1FB4 ) || + (c >= 0x1FB6 && c <= 0x1FBC ) || + (c == 0x1FBE) || + (c >= 0x1FC2 && c <= 0x1FC4 ) || + (c >= 0x1FC6 && c <= 0x1FCC ) || + (c >= 0x1FD0 && c <= 0x1FD3 ) || + (c >= 0x1FD6 && c <= 0x1FDB ) || + (c >= 0x1FE0 && c <= 0x1FEC ) || + (c >= 0x1FF2 && c <= 0x1FF4 ) || + (c >= 0x1FF6 && c <= 0x1FFC ) || + (c == 0x2126) || + (c >= 0x212A && c <= 0x212B ) || + (c == 0x212E) || + (c >= 0x2180 && c <= 0x2182 ) || + (c >= 0x3041 && c <= 0x3094 ) || + (c >= 0x30A1 && c <= 0x30FA ) || + (c >= 0x3105 && c <= 0x312C ) || + (c >= 0xAC00 && c <= 0xD7A3 ) + ); +} + + +static int +raptor_unicode_is_ideographic(long c) +{ + /* http://www.w3.org/TR/2000/REC-xml-20001006#NT-Ideographic */ + return((c >= 0x4E00 && c <= 0x9FA5 ) || + (c == 0x3007) || + (c >= 0x3021 && c <= 0x3029 )); +} + + +static int +raptor_unicode_is_combiningchar(long c) +{ + /* http://www.w3.org/TR/2000/REC-xml-20001006#NT-CombiningChar */ + return((c >= 0x0300 && c <= 0x0345 ) || + (c >= 0x0360 && c <= 0x0361 ) || + (c >= 0x0483 && c <= 0x0486 ) || + (c >= 0x0591 && c <= 0x05A1 ) || + (c >= 0x05A3 && c <= 0x05B9 ) || + (c >= 0x05BB && c <= 0x05BD ) || + (c == 0x05BF) || + (c >= 0x05C1 && c <= 0x05C2 ) || + (c == 0x05C4) || + (c >= 0x064B && c <= 0x0652 ) || + (c == 0x0670) || + (c >= 0x06D6 && c <= 0x06DC ) || + (c >= 0x06DD && c <= 0x06DF ) || + (c >= 0x06E0 && c <= 0x06E4 ) || + (c >= 0x06E7 && c <= 0x06E8 ) || + (c >= 0x06EA && c <= 0x06ED ) || + (c >= 0x0901 && c <= 0x0903 ) || + (c == 0x093C) || + (c >= 0x093E && c <= 0x094C ) || + (c == 0x094D) || + (c >= 0x0951 && c <= 0x0954 ) || + (c >= 0x0962 && c <= 0x0963 ) || + (c >= 0x0981 && c <= 0x0983 ) || + (c == 0x09BC) || + (c == 0x09BE) || + (c == 0x09BF) || + (c >= 0x09C0 && c <= 0x09C4 ) || + (c >= 0x09C7 && c <= 0x09C8 ) || + (c >= 0x09CB && c <= 0x09CD ) || + (c == 0x09D7) || + (c >= 0x09E2 && c <= 0x09E3 ) || + (c == 0x0A02) || + (c == 0x0A3C) || + (c == 0x0A3E) || + (c == 0x0A3F) || + (c >= 0x0A40 && c <= 0x0A42 ) || + (c >= 0x0A47 && c <= 0x0A48 ) || + (c >= 0x0A4B && c <= 0x0A4D ) || + (c >= 0x0A70 && c <= 0x0A71 ) || + (c >= 0x0A81 && c <= 0x0A83 ) || + (c == 0x0ABC) || + (c >= 0x0ABE && c <= 0x0AC5 ) || + (c >= 0x0AC7 && c <= 0x0AC9 ) || + (c >= 0x0ACB && c <= 0x0ACD ) || + (c >= 0x0B01 && c <= 0x0B03 ) || + (c == 0x0B3C) || + (c >= 0x0B3E && c <= 0x0B43 ) || + (c >= 0x0B47 && c <= 0x0B48 ) || + (c >= 0x0B4B && c <= 0x0B4D ) || + (c >= 0x0B56 && c <= 0x0B57 ) || + (c >= 0x0B82 && c <= 0x0B83 ) || + (c >= 0x0BBE && c <= 0x0BC2 ) || + (c >= 0x0BC6 && c <= 0x0BC8 ) || + (c >= 0x0BCA && c <= 0x0BCD ) || + (c == 0x0BD7) || + (c >= 0x0C01 && c <= 0x0C03 ) || + (c >= 0x0C3E && c <= 0x0C44 ) || + (c >= 0x0C46 && c <= 0x0C48 ) || + (c >= 0x0C4A && c <= 0x0C4D ) || + (c >= 0x0C55 && c <= 0x0C56 ) || + (c >= 0x0C82 && c <= 0x0C83 ) || + (c >= 0x0CBE && c <= 0x0CC4 ) || + (c >= 0x0CC6 && c <= 0x0CC8 ) || + (c >= 0x0CCA && c <= 0x0CCD ) || + (c >= 0x0CD5 && c <= 0x0CD6 ) || + (c >= 0x0D02 && c <= 0x0D03 ) || + (c >= 0x0D3E && c <= 0x0D43 ) || + (c >= 0x0D46 && c <= 0x0D48 ) || + (c >= 0x0D4A && c <= 0x0D4D ) || + (c == 0x0D57) || + (c == 0x0E31) || + (c >= 0x0E34 && c <= 0x0E3A ) || + (c >= 0x0E47 && c <= 0x0E4E ) || + (c == 0x0EB1) || + (c >= 0x0EB4 && c <= 0x0EB9 ) || + (c >= 0x0EBB && c <= 0x0EBC ) || + (c >= 0x0EC8 && c <= 0x0ECD ) || + (c >= 0x0F18 && c <= 0x0F19 ) || + (c == 0x0F35) || + (c == 0x0F37) || + (c == 0x0F39) || + (c == 0x0F3E) || + (c == 0x0F3F) || + (c >= 0x0F71 && c <= 0x0F84 ) || + (c >= 0x0F86 && c <= 0x0F8B ) || + (c >= 0x0F90 && c <= 0x0F95 ) || + (c == 0x0F97) || + (c >= 0x0F99 && c <= 0x0FAD ) || + (c >= 0x0FB1 && c <= 0x0FB7 ) || + (c == 0x0FB9) || + (c >= 0x20D0 && c <= 0x20DC ) || + (c == 0x20E1) || + (c >= 0x302A && c <= 0x302F ) || + (c == 0x3099) || + (c == 0x309A)); +} + + +static int +raptor_unicode_is_digit(long c) +{ + /* http://www.w3.org/TR/2000/REC-xml-20001006#NT-Digit */ + return((c >= 0x0030 && c <= 0x0039 ) || + (c >= 0x0660 && c <= 0x0669 ) || + (c >= 0x06F0 && c <= 0x06F9 ) || + (c >= 0x0966 && c <= 0x096F ) || + (c >= 0x09E6 && c <= 0x09EF ) || + (c >= 0x0A66 && c <= 0x0A6F ) || + (c >= 0x0AE6 && c <= 0x0AEF ) || + (c >= 0x0B66 && c <= 0x0B6F ) || + (c >= 0x0BE7 && c <= 0x0BEF ) || + (c >= 0x0C66 && c <= 0x0C6F ) || + (c >= 0x0CE6 && c <= 0x0CEF ) || + (c >= 0x0D66 && c <= 0x0D6F ) || + (c >= 0x0E50 && c <= 0x0E59 ) || + (c >= 0x0ED0 && c <= 0x0ED9 ) || + (c >= 0x0F20 && c <= 0x0F29 )); +} + + +static int +raptor_unicode_is_extender(long c) +{ + /* http://www.w3.org/TR/2000/REC-xml-20001006#NT-Extender */ + return((c == 0x00B7) || + (c == 0x02D0) || + (c == 0x02D1) || + (c == 0x0387) || + (c == 0x0640) || + (c == 0x0E46) || + (c == 0x0EC6) || + (c == 0x3005) || + (c >= 0x3031 && c <= 0x3035 ) || + (c >= 0x309D && c <= 0x309E ) || + (c >= 0x30FC && c <= 0x30FE )); +} + + +/* + * raptor_unicode_check_utf8_nfc_string: + * @input: UTF-8 string + * @length: length of string + * @error: pointer to error flag (or NULL) + * + * INTERNAL - Check if a Unicode UTF-8 encoded string is in Unicode Normal Form C. + * + * Return value: <0 on error, 0 if not NFC, >0 if is NFC + **/ +int +raptor_unicode_check_utf8_nfc_string(const unsigned char *input, size_t length) +{ + unsigned int i; + int plain = 1; + int rc; + + for(i = 0; i < length; i++) + if(input[i] > 0x7f) { + plain = 0; + break; + } + + if(plain) + return 1; + +#ifdef RAPTOR_NFC_ICU + rc = raptor_nfc_icu_check(input, length); +#else + rc = 1; +#endif + return rc; +} + + +/** + * raptor_unicode_check_utf8_string: + * @string: UTF-8 string + * @length: length of string + * + * Check a string is valid Unicode UTF-8. + * + * Return value: Non 0 if the string is UTF-8 + **/ +int +raptor_unicode_check_utf8_string(const unsigned char *string, size_t length) +{ + while(length > 0) { + raptor_unichar unichar = 0; + + int unichar_len; + unichar_len = raptor_unicode_utf8_string_get_char(string, length, &unichar); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > length) + return 0; + + if(unichar > raptor_unicode_max_codepoint) + return 0; + + string += unichar_len; + length -= unichar_len; + } + return 1; +} + + +/** + * raptor_unicode_utf8_strlen: + * @string: buffer + * @length: buffer length + * + * Calculate the number of Unicode characters in the given UTF-8 encoded buffer + * + * Return value: number of characters or <0 if sequence is invalid + */ +int +raptor_unicode_utf8_strlen(const unsigned char *string, size_t length) +{ + int unicode_length = 0; + + while(length > 0) { + int unichar_len; + unichar_len = raptor_unicode_utf8_string_get_char(string, length, NULL); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > length) { + unicode_length = -1; + break; + } + + string += unichar_len; + length -= unichar_len; + + unicode_length++; + } + + return unicode_length; +} + + +/** + * raptor_unicode_utf8_substr: + * @dest: destination string buffer to write to (or NULL) + * @dest_length_p: location to store actual destination length (or NULL) + * @src: source string + * @src_length: source length in bytes + * @startingLoc: starting location offset 0 for first Unicode character + * @length: number of Unicode characters to copy at offset @startingLoc (or < 0) + * + * Get a unicode (UTF-8) substring of an existing UTF-8 string + * + * If @dest is NULL, returns the number of bytes needed to write and + * does no work. + * + * Return value: number of bytes used in destination string or 0 on failure + */ +size_t +raptor_unicode_utf8_substr(unsigned char* dest, size_t* dest_length_p, + const unsigned char* src, size_t src_length, + int startingLoc, int length) +{ + size_t dest_length = 0; /* destination unicode characters count */ + size_t dest_bytes = 0; /* destination UTF-8 bytes count */ + int dest_offset = 0; /* destination string unicode characters index */ + unsigned char* p = dest; + + if(!src) + return 0; + + while(src_length > 0) { + int unichar_len; + + unichar_len = raptor_unicode_utf8_string_get_char(src, src_length, NULL); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > src_length) + break; + + if(dest_offset >= startingLoc) { + if(p) { + /* copy 1 Unicode character to dest */ + memcpy(p, src, RAPTOR_GOOD_CAST(size_t, unichar_len)); + p += unichar_len; + } + dest_bytes += unichar_len; + + dest_length++; + if(length >= 0 && dest_length == RAPTOR_GOOD_CAST(size_t, length)) + break; + } + + src += unichar_len; + src_length -= unichar_len; + + dest_offset++; + } + + if(p) + *p = '\0'; + + if(dest_length_p) + *dest_length_p = dest_length; + + return dest_bytes; +} diff --git a/src/raptor_uri.c b/src/raptor_uri.c new file mode 100644 index 0000000..53768e7 --- /dev/null +++ b/src/raptor_uri.c @@ -0,0 +1,2099 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_uri.c - Raptor URI class + * + * Copyright (C) 2002-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#if defined(STANDALONE) && defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) +/* for lstat() used in main() test which is in POSIX */ +#ifndef _POSIX_C_SOURCE +#define _POSIX_C_SOURCE 200809L +#endif +#endif + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +/* for ptrdiff_t */ +#ifdef HAVE_STDDEF_H +#include <stddef.h> +#endif +#ifdef HAVE_UNISTD_H +#include <unistd.h> +#endif +#ifdef HAVE_LIMITS_H +#include <limits.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +/* Symbian OS uses similar path mappings as Windows but does not necessarily have the WIN32 flag defined */ +#if defined(__SYMBIAN32__) && !defined(WIN32) +#define WIN32 +#endif + + +/* raptor_uri structure */ +struct raptor_uri_s { + /* raptor_world object */ + raptor_world *world; + /* the URI string */ + unsigned char *string; + /* length of string */ + unsigned int length; + /* usage count */ + int usage; +}; + + +#ifndef STANDALONE + +/** + * raptor_new_uri_from_counted_string: + * @world: raptor_world object + * @uri_string: URI string. + * @length: length of URI string + * + * Constructor - create a raptor URI from a UTF-8 encoded Unicode string. + * + * Note: The @uri_string need not be NULL terminated - a NULL will be + * added to the copied string used. + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_from_counted_string(raptor_world* world, + const unsigned char *uri_string, + size_t length) +{ + raptor_uri* new_uri; + unsigned char *new_string; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!uri_string || !*uri_string) + return NULL; + + raptor_world_open(world); + + if(world->uris_tree) { + raptor_uri key; /* on stack - not allocated */ + + /* just to be safe */ + memset(&key, 0, sizeof(key)); + + key.string = (unsigned char*)uri_string; + key.length = (unsigned int)length; + + /* if existing URI found in tree, return it */ + new_uri = (raptor_uri*)raptor_avltree_search(world->uris_tree, &key); + if(new_uri) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("Found existing URI %s with current usage %d\n", + uri_string, new_uri->usage); +#endif + + new_uri->usage++; + + goto unlock; + } + } + + + /* otherwise create a new one */ + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG1("Creating new URI '"); + fwrite(uri_string, sizeof(char), length, RAPTOR_DEBUG_FH); + fputs("' in hash\n", RAPTOR_DEBUG_FH); +#endif + + new_uri = RAPTOR_CALLOC(raptor_uri*, 1, sizeof(*new_uri)); + if(!new_uri) + goto unlock; + + new_uri->world = world; + new_uri->length = (unsigned int)length; + + new_string = RAPTOR_MALLOC(unsigned char*, length + 1); + if(!new_string) { + RAPTOR_FREE(raptor_uri, new_uri); + new_uri=NULL; + goto unlock; + } + + memcpy((char*)new_string, (const char*)uri_string, length); + new_string[length] = '\0'; + new_uri->string = new_string; + + new_uri->usage = 1; /* for user */ + + /* store in tree */ + if(world->uris_tree) { + if(raptor_avltree_add(world->uris_tree, new_uri)) { + RAPTOR_FREE(char*, new_string); + RAPTOR_FREE(raptor_uri, new_uri); + new_uri = NULL; + } + } + + unlock: + + return new_uri; +} + + +/** + * raptor_new_uri: + * @world: raptor_world object + * @uri_string: URI string. + * + * Constructor - create a raptor URI from a UTF-8 encoded Unicode string. + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri(raptor_world* world, const unsigned char *uri_string) +{ + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!uri_string) + return NULL; + + raptor_world_open(world); + + return raptor_new_uri_from_counted_string(world, uri_string, + strlen((const char*)uri_string)); +} + + +/** + * raptor_new_uri_from_uri_local_name: + * @world: raptor_world object + * @uri: existing #raptor_uri + * @local_name: local name + * + * Constructor - create a raptor URI from an existing URI and a local name. + * + * Creates a new URI from the concatenation of the @local_name to the + * @uri. This is NOT relative URI resolution, which is done by the + * raptor_new_uri_relative_to_base() constructor. + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_from_uri_local_name(raptor_world* world, raptor_uri *uri, + const unsigned char *local_name) +{ + size_t len; + unsigned char *new_string; + raptor_uri* new_uri; + size_t local_name_length; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!uri) + return NULL; + + raptor_world_open(world); + + local_name_length = strlen((const char*)local_name); + + len = uri->length + local_name_length; + new_string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!new_string) + return NULL; + + memcpy((char*)new_string, (const char*)uri->string, uri->length); + memcpy((char*)(new_string + uri->length), (const char*)local_name, + local_name_length + 1); + + new_uri = raptor_new_uri_from_counted_string(world, new_string, len); + RAPTOR_FREE(char*, new_string); + + return new_uri; +} + + +/** + * raptor_new_uri_relative_to_base_counted: + * @world: raptor_world object + * @base_uri: existing base URI + * @uri_string: relative URI string + * @uri_len: length of URI string (or 0) + * + * Constructor - create a raptor URI from a base URI and a relative counted URI string. + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_relative_to_base_counted(raptor_world* world, + raptor_uri *base_uri, + const unsigned char *uri_string, + size_t uri_len) +{ + unsigned char *buffer; + size_t buffer_length; + raptor_uri* new_uri; + size_t actual_length; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!base_uri || !uri_string) + return NULL; + + if(!uri_len) + uri_len = strlen(RAPTOR_GOOD_CAST(const char*, uri_string)); + + raptor_world_open(world); + + /* If URI string is empty, just copy base URI */ + if(!*uri_string) + return raptor_uri_copy(base_uri); + + /* +1 for adding any missing URI path '/' */ + buffer_length = base_uri->length + uri_len + 1; + buffer = RAPTOR_MALLOC(unsigned char*, buffer_length + 1); + if(!buffer) + return NULL; + + actual_length = raptor_uri_resolve_uri_reference(base_uri->string, uri_string, + buffer, buffer_length); + + new_uri = raptor_new_uri_from_counted_string(world, buffer, actual_length); + RAPTOR_FREE(char*, buffer); + return new_uri; +} + + +/** + * raptor_new_uri_relative_to_base: + * @world: raptor_world object + * @base_uri: existing base URI + * @uri_string: relative URI string + * + * Constructor - create a raptor URI from a base URI and a relative URI string. + * + * Use raptor_new_uri_relative_to_base_counted() if the URI string length is known + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_relative_to_base(raptor_world* world, + raptor_uri *base_uri, + const unsigned char *uri_string) +{ + return raptor_new_uri_relative_to_base_counted(world, base_uri, + uri_string, 0); +} + + +/** + * raptor_new_uri_from_id: + * @world: raptor_world object + * @base_uri: existing base URI + * @id: RDF ID + * + * Constructor - create a new URI from a base URI and RDF ID. + * + * This creates a URI equivalent to concatenating @base_uri with + * ## and @id. + * + * Return value: a new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_from_id(raptor_world *world, raptor_uri *base_uri, + const unsigned char *id) +{ + raptor_uri *new_uri; + unsigned char *local_name; + size_t len; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!base_uri || !id) + return NULL; + + raptor_world_open(world); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Using ID %s\n", id); +#endif + + len = strlen((char*)id); + /* "#id\0" */ + local_name = RAPTOR_MALLOC(unsigned char*, len + 1 + 1); + if(!local_name) + return NULL; + + *local_name = '#'; + memcpy(local_name + 1, id, len + 1); /* len+1 to copy NUL */ + + new_uri = raptor_new_uri_relative_to_base(world, base_uri, local_name); + RAPTOR_FREE(char*, local_name); + return new_uri; +} + + +/** + * raptor_new_uri_for_rdf_concept: + * @world: raptor_world object + * @name: RDF namespace concept + * + * Constructor - create a raptor URI for the RDF namespace concept name. + * + * Example: u=raptor_new_uri_for_rdf_concept("value") creates a new + * URI for the rdf:value term. + * + * Return value: a new #raptor_uri object or NULL on failure + **/ +raptor_uri* +raptor_new_uri_for_rdf_concept(raptor_world* world, const unsigned char *name) +{ + raptor_uri *new_uri; + unsigned char *new_uri_string; + const unsigned char *base_uri_string = raptor_rdf_namespace_uri; + size_t base_uri_string_len = raptor_rdf_namespace_uri_len; + size_t new_uri_string_len; + size_t name_len; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!name) + return NULL; + + raptor_world_open(world); + + name_len = strlen((const char*)name); + new_uri_string_len = base_uri_string_len + name_len; + new_uri_string = RAPTOR_MALLOC(unsigned char*, new_uri_string_len + 1); + if(!new_uri_string) + return NULL; + + memcpy(new_uri_string, base_uri_string, base_uri_string_len); + memcpy(new_uri_string + base_uri_string_len, name, name_len + 1); /* copy NUL */ + + new_uri = raptor_new_uri_from_counted_string(world, new_uri_string, + new_uri_string_len); + RAPTOR_FREE(char*, new_uri_string); + + return new_uri; +} + + +/** + * raptor_new_uri_from_uri_or_file_string: + * @world: raptor_world object + * @base_uri: existing base URI + * @uri_or_file_string: URI string or filename + * + * Constructor - create a raptor URI from a string that is a relative or absolute URI or a filename + * + * If the @uri_or_file_string is a filename PATH that exists, the + * result will be a URI file://PATH + * + * Return value: a new #raptor_uri object or NULL on failure + **/ +raptor_uri* +raptor_new_uri_from_uri_or_file_string(raptor_world* world, + raptor_uri* base_uri, + const unsigned char* uri_or_file_string) +{ + raptor_uri* new_uri = NULL; + const unsigned char* new_uri_string; + const char* path; + + if(raptor_uri_filename_exists(uri_or_file_string) > 0) { + /* uri_or_file_string is a file name, not a file: URI */ + path = RAPTOR_GOOD_CAST(const char*, uri_or_file_string); + } else { + new_uri = raptor_new_uri_relative_to_base(world, base_uri, + uri_or_file_string); + new_uri_string = raptor_uri_as_string(new_uri); + path = raptor_uri_uri_string_to_counted_filename_fragment(new_uri_string, + NULL, NULL, NULL); + } + + if(path) { + if(new_uri) { + raptor_free_uri(new_uri); + new_uri = NULL; + } + + /* new_uri_string is a string like "file://" + path */ + new_uri_string = raptor_uri_filename_to_uri_string(path); + if(path != RAPTOR_GOOD_CAST(const char*, uri_or_file_string)) + RAPTOR_FREE(const char*, path); + + new_uri = raptor_new_uri(world, new_uri_string); + RAPTOR_FREE(char*, new_uri_string); + } + + return new_uri; +} + + +/** + * raptor_free_uri: + * @uri: URI to destroy + * + * Destructor - destroy a #raptor_uri object + **/ +void +raptor_free_uri(raptor_uri *uri) +{ + if(!uri) + return; + + uri->usage--; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("URI %s usage count now %d\n", uri->string, uri->usage); +#endif + + /* decrement usage, don't free if not 0 yet*/ + if(uri->usage > 0) { + return; + } + + /* this does not free the uri */ + if(uri->world->uris_tree) + raptor_avltree_delete(uri->world->uris_tree, uri); + + if(uri->string) + RAPTOR_FREE(char*, uri->string); + RAPTOR_FREE(raptor_uri, uri); +} + + +/** + * raptor_uri_equals: + * @uri1: URI 1 (may be NULL) + * @uri2: URI 2 (may be NULL) + * + * Check if two URIs are equal. + * + * A NULL URI is not equal to a non-NULL URI. + * + * Return value: non-0 if the URIs are equal + **/ +int +raptor_uri_equals(raptor_uri* uri1, raptor_uri* uri2) +{ + if(uri1 && uri2) { + /* Both not-NULL - compare for equality */ + if(uri1 == uri2) + return 1; + else if (uri1->length != uri2->length) + /* Different if lengths are different */ + return 0; + else + /* Same length compare: do not need strncmp() NUL checking */ + return memcmp((const char*)uri1->string, (const char*)uri2->string, + uri1->length) == 0; + } else if(uri1 || uri2) + /* Only one is NULL - not equal */ + return 0; + else + /* both NULL - equal */ + return 1; +} + + +/** + * raptor_uri_compare: + * @uri1: URI 1 (may be NULL) + * @uri2: URI 2 (may be NULL) + * + * Compare two URIs, ala strcmp. + * + * A NULL URI is always less than (never equal to) a non-NULL URI. + * + * Return value: -1 if uri1 < uri2, 0 if equal, 1 if uri1 > uri2 + **/ +int +raptor_uri_compare(raptor_uri* uri1, raptor_uri* uri2) +{ + if(uri1 == uri2) + return 0; + + if(uri1 && uri2) { + /* compare common (shortest) prefix */ + unsigned int len = (uri1->length > uri2->length) ? + uri2->length : uri1->length; + + /* Same length compare: Do not need the strncmp() NUL checking */ + int result = memcmp((const char*)uri1->string, (const char*)uri2->string, + len); + if(!result) + /* if prefix is the same, the shorter is earlier */ + result = uri1->length - uri2->length; + return result; + } + + /* One arg is NULL - sort that first */ + return (!uri1) ? -1 : 1; +} + + +/** + * raptor_uri_copy: + * @uri: URI object + * + * Constructor - get a copy of a URI. + * + * Return value: a new #raptor_uri object or NULL on failure + **/ +raptor_uri* +raptor_uri_copy(raptor_uri *uri) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, raptor_uri, NULL); + + uri->usage++; + return uri; +} + + +/** + * raptor_uri_as_string: + * @uri: #raptor_uri object + * + * Get a string representation of a URI. + * + * Returns a shared pointer to a string representation of @uri. This + * string is shared and must not be freed, otherwise see use the + * raptor_uri_to_string() or raptor_uri_to_counted_string() methods. + * + * Return value: shared string representation of URI + **/ +unsigned char* +raptor_uri_as_string(raptor_uri *uri) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, raptor_uri, NULL); + + return uri->string; +} + + +/** + * raptor_uri_as_counted_string: + * @uri: URI object + * @len_p: address of length variable or NULL + * + * Get a string representation of a URI with count. + * + * Returns a shared pointer to a string representation of @uri along + * with the length of the string in @len_p, if not NULL. This + * string is shared and must not be freed, otherwise see use the + * raptor_uri_to_string() or raptor_uri_to_counted_string() methods. + * + * Return value: shared string representation of URI + **/ +unsigned char* +raptor_uri_as_counted_string(raptor_uri *uri, size_t* len_p) +{ + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(uri, raptor_uri, NULL); + + if(len_p) + *len_p = uri->length; + return uri->string; +} + + +/** + * raptor_uri_counted_filename_to_uri_string: + * @filename: The filename to convert + * @filename_len: length of @filename or 0 to count it here + * + * Converts a counted filename to a file: URI. + * + * Handles the OS-specific escaping on turning filenames into URIs + * and returns a new buffer that the caller must free(). Turns a + * space in the filename into \%20 and '%' into \%25. + * + * Return value: A newly allocated string with the URI or NULL on failure + **/ +unsigned char* +raptor_uri_counted_filename_to_uri_string(const char *filename, + size_t filename_len) +{ + unsigned char *buffer = NULL; + const char *from; + char *to; +#ifndef WIN32 + char *path = NULL; +#endif + /* "file://" */ +#define RAPTOR_LEN_FILE_CSS 7 + size_t len = RAPTOR_LEN_FILE_CSS; + size_t fl; + + if(!filename) + return NULL; + + if(!filename_len) + filename_len = strlen(filename); + +#ifdef WIN32 +/* + * On WIN32, filenames turn into + * "file://" + translated filename + * where the translation is \\ turns into / and ' ' into %20, '%' into %25 + * and if the filename does not start with '\', it is relative + * in which case, a . is appended to the authority + * + * e.g + * FILENAME URI + * c:\windows\system file:///c:/windows/system + * \\server\dir\file.doc file://server/dir/file.doc + * a:foo file:///a:./foo + * C:\Documents and Settings\myapp\foo.bat + * file:///C:/Documents%20and%20Settings/myapp/foo.bat + * + * There are also UNC names \\server\share\blah + * that turn into file:///server/share/blah + * using the above algorithm. + */ + if(filename[1] == ':' && filename[2] != '\\') + len += 3; /* relative filename - add / and ./ */ + else if(*filename == '\\') + len -= 2; /* two // from not needed in filename */ + else + len++; /* / at start of path */ + +#else +/* others - unix: turn spaces into %20, '%' into %25 */ + + if(*filename != '/') { + size_t path_max; + size_t path_len; + size_t new_filename_len; + +#ifdef PATH_MAX + path_max = PATH_MAX; +#else + path_max = 1024; /* an initial guess at the length */ +#endif + path = (char*)malloc(path_max); + while(1) { + /* malloc() failed or getcwd() succeeded */ + errno = 0; + if(!path || getcwd(path, path_max)) + break; + + /* failed */ + if(errno != ERANGE) + break; + + /* try again with a bigger buffer */ + path_max *= 2; + path = (char*)realloc(path, path_max); + } + if(!path) + goto path_done; + path_len = strlen(path); + + /* path + '/' + filename */ + new_filename_len = path_len + 1 + filename_len; + if(path_max < new_filename_len + 1) { + path = (char*)realloc(path, new_filename_len + 1); + if(!path) + goto path_done; + } + + path[path_len] = '/'; + memcpy(path + path_len + 1, filename, filename_len); + path[new_filename_len] = '\0'; + filename_len = new_filename_len; + filename = (const char*)path; + } +#endif + + /* add URI-escaped filename length */ + for(from = filename, fl = filename_len; fl ; from++, fl--) { + len++; +#ifdef WIN32 + if(*from == ':') { + if(from[1] != '\\') + len += 2; + } +#endif + if(*from == ' ' || *from == '%') + len += 2; /* strlen(%xx)-1 */ + } + + buffer = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!buffer) + goto path_done; + + memcpy(buffer, "file://", RAPTOR_LEN_FILE_CSS + 1); /* copy NUL */ + from = filename; + to = (char*)(buffer + RAPTOR_LEN_FILE_CSS); + fl = filename_len; +#ifdef WIN32 + if(*from == '\\' && from[1] == '\\') { + from += 2; fl -= 2; + } else + *to++ ='/'; +#endif + for(; fl; fl--) { + char c = *from++; +#ifdef WIN32 + if(c == '\\') + *to++ ='/'; + else if(c == ':') { + *to++ = c; + if(*from != '\\') { + *to++ ='.'; + *to++ ='/'; + } + } else +#endif + if(c == ' ' || c == '%') { + *to++ = '%'; + *to++ = '2'; + *to++ = (c == ' ') ? '0' : '5'; + } else + *to++ = c; + } + *to = '\0'; + + path_done: +#ifndef WIN32 + /* Normalize the resulting URI path after the "file://" */ + if(buffer) + raptor_uri_normalize_path(buffer + RAPTOR_LEN_FILE_CSS, + len - RAPTOR_LEN_FILE_CSS); + + if(path) + free(path); +#endif + + return buffer; +} + + +/** + * raptor_uri_filename_to_uri_string: + * @filename: The filename to convert + * + * Converts a filename to a file: URI. + * + * Handles the OS-specific escaping on turning filenames into URIs + * and returns a new buffer that the caller must free(). Turns a + * space in the filename into \%20 and '%' into \%25. + * + * Return value: A newly allocated string with the URI or NULL on failure + **/ +unsigned char * +raptor_uri_filename_to_uri_string(const char *filename) + +{ + return raptor_uri_counted_filename_to_uri_string(filename, 0); +} + + +/** + * raptor_uri_uri_string_to_counted_filename_fragment: + * @uri_string: The file: URI to convert + * @len_p: address of filename length variable or NULL + * @fragment_p: Address of pointer to store any URI fragment or NULL + * @fragment_len_p: address of length variable or NULL + * + * Convert a file: URI to a counted filename and counted fragment. + * + * Handles the OS-specific file: URIs to filename mappings. Returns + * a new buffer containing the filename that the caller must free. + * + * If @len_p is present the length of the filename is returned + * + * If @fragment_p is given, a new string containing the URI fragment + * is returned, or NULL if none is present. If @fragment_len_p is present + * the length is returned in it. + * + * Return value: A newly allocated string with the filename or NULL on failure + **/ +char * +raptor_uri_uri_string_to_counted_filename_fragment(const unsigned char *uri_string, + size_t* len_p, + unsigned char **fragment_p, + size_t* fragment_len_p) +{ + char *filename; + size_t len = 0; + raptor_uri_detail *ud = NULL; + unsigned char *from; + char *to; +#ifdef WIN32 + unsigned char *p; +#endif + + if(!uri_string || !*uri_string) + return NULL; + + ud = raptor_new_uri_detail(uri_string); + if(!ud) + return NULL; + + + if(!ud->scheme || raptor_strcasecmp((const char*)ud->scheme, "file")) { + raptor_free_uri_detail(ud); + return NULL; + } + + if(ud->authority) { + if(!*ud->authority) + ud->authority = NULL; + else if(!raptor_strcasecmp((const char*)ud->authority, "localhost")) + ud->authority = NULL; + } + + /* Cannot do much if there is no path */ + if(!ud->path || (ud->path && !*ud->path)) { + raptor_free_uri_detail(ud); + return NULL; + } + + /* See raptor_uri_filename_to_uri_string for details of the mapping */ +#ifdef WIN32 + if(ud->authority) + len += ud->authority_len+3; + + p = ud->path; + /* remove leading slash from path if there is one */ + if(*p && p[0] == '/') { + p++; + len--; + } + /* handle case where path starts with drive letter */ + if(*p && (p[1] == '|' || p[1] == ':')) { + /* Either + * "a:" like in file://a|/... or file://a:/... + * or + * "a:." like in file://a:./foo + * giving device-relative path a:foo + */ + if(p[2] == '.') { + p[2] = *p; + p[3] = ':'; + p += 2; + len -= 2; /* remove 2 for ./ */ + } else + p[1] = ':'; + } +#endif + + + /* add URI-escaped filename length */ + for(from = ud->path; *from ; from++) { + len++; + if(*from == '%') + from += 2; + } + + + /* Something is wrong */ + if(!len) { + raptor_free_uri_detail(ud); + return NULL; + } + + filename = RAPTOR_MALLOC(char*, len + 1); + if(!filename) { + raptor_free_uri_detail(ud); + return NULL; + } + + to = filename; + +#ifdef WIN32 + if(ud->authority) { + *to++ = '\\'; + *to++ = '\\'; + from = ud->authority; + while( (*to++ = *from++) ) + ; + to--; + *to++ = '\\'; + } + + /* copy path after all /s */ + from = p; +#else + from = ud->path; +#endif + + while(*from) { + char c = *from++; +#ifdef WIN32 + if(c == '/') + *to++ = '\\'; + else +#endif + if(c == '%') { + if(*from && from[1]) { + char hexbuf[3]; + char *endptr = NULL; + hexbuf[0] = (char)*from; + hexbuf[1] = (char)from[1]; + hexbuf[2]='\0'; + c = (char)strtol((const char*)hexbuf, &endptr, 16); + if(endptr == &hexbuf[2]) + *to++ = c; + } + from += 2; + } else + *to++ = c; + } + *to = '\0'; + + if(len_p) + *len_p = len; + + if(fragment_p) { + size_t fragment_len = 0; + + if(ud->fragment) { + fragment_len = ud->fragment_len; + *fragment_p = RAPTOR_MALLOC(unsigned char*, fragment_len + 1); + if(*fragment_p) + memcpy(*fragment_p, ud->fragment, fragment_len + 1); + } else + *fragment_p = NULL; + if(fragment_len_p) + *fragment_len_p = fragment_len; + } + + raptor_free_uri_detail(ud); + + return filename; +} + + +/** + * raptor_uri_uri_string_to_filename_fragment: + * @uri_string: The file: URI to convert + * @fragment_p: Address of pointer to store any URI fragment or NULL + * + * Convert a file: URI to a filename and fragment. + * + * Handles the OS-specific file: URIs to filename mappings. Returns + * a new buffer containing the filename that the caller must free. + * + * If @fragment_p is given, a new string containing the URI fragment + * is returned, or NULL if none is present + * + * See also raptor_uri_uri_string_to_counted_filename_fragment() + * + * Return value: A newly allocated string with the filename or NULL on failure + **/ +char * +raptor_uri_uri_string_to_filename_fragment(const unsigned char *uri_string, + unsigned char **fragment_p) +{ + return raptor_uri_uri_string_to_counted_filename_fragment(uri_string, NULL, + fragment_p, NULL); +} + + +/** + * raptor_uri_uri_string_to_filename: + * @uri_string: The file: URI to convert + * + * Convert a file: URI to a filename. + * + * Handles the OS-specific file: URIs to filename mappings. Returns + * a new buffer containing the filename that the caller must free. + * + * See also raptor_uri_uri_string_to_counted_filename_fragment() + * + * Return value: A newly allocated string with the filename or NULL on failure + **/ +char * +raptor_uri_uri_string_to_filename(const unsigned char *uri_string) +{ + return raptor_uri_uri_string_to_counted_filename_fragment(uri_string, NULL, + NULL, NULL); +} + + + +/** + * raptor_uri_uri_string_is_file_uri: + * @uri_string: The URI string to check + * + * Check if a URI string is a file: URI. + * + * Return value: Non zero if URI string is a file: URI + **/ +int +raptor_uri_uri_string_is_file_uri(const unsigned char* uri_string) +{ + if(!uri_string || !*uri_string) + return 1; + + return raptor_strncasecmp((const char*)uri_string, "file:", 5) == 0; +} + + +/** + * raptor_new_uri_for_xmlbase: + * @old_uri: URI to transform + * + * Constructor - create a URI suitable for use as an XML Base. + * + * Takes an existing URI and ensures it has a path (default /) and has + * no fragment or query arguments - XML base does not use these. + * + * Return value: new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_for_xmlbase(raptor_uri* old_uri) +{ + unsigned char *uri_string; + unsigned char *new_uri_string; + raptor_uri* new_uri; + raptor_uri_detail *ud; + + if(!old_uri) + return NULL; + + uri_string = raptor_uri_as_string(old_uri); + + ud = raptor_new_uri_detail(uri_string); + if(!ud) + return NULL; + + if(!ud->path) { + ud->path = (unsigned char*)"/"; + ud->path_len = 1; + } + + ud->query = NULL; ud->query_len = 0; + ud->fragment = NULL; ud->fragment_len = 0; + new_uri_string = raptor_uri_detail_to_string(ud, NULL); + raptor_free_uri_detail(ud); + if(!new_uri_string) + return NULL; + + new_uri = raptor_new_uri(old_uri->world, new_uri_string); + RAPTOR_FREE(char*, new_uri_string); + + return new_uri; +} + + +/** + * raptor_new_uri_for_retrieval: + * @old_uri: URI to transform + * + * Constructor - create a URI suitable for retrieval. + * + * Takes an existing URI and ensures it has a path (default /) and has + * no fragment - URI retrieval does not use the fragment part. + * + * Return value: new #raptor_uri object or NULL on failure. + **/ +raptor_uri* +raptor_new_uri_for_retrieval(raptor_uri* old_uri) +{ + unsigned char *uri_string; + unsigned char *new_uri_string; + raptor_uri* new_uri; + raptor_uri_detail *ud; + + if(!old_uri) + return NULL; + + uri_string = raptor_uri_as_string(old_uri); + + ud = raptor_new_uri_detail(uri_string); + if(!ud) + return NULL; + + if(!ud->path) { + ud->path = (unsigned char*)"/"; + ud->path_len = 1; + } + + ud->fragment = NULL; ud->fragment_len = 0; + new_uri_string = raptor_uri_detail_to_string(ud, NULL); + raptor_free_uri_detail(ud); + if(!new_uri_string) + return NULL; + + new_uri = raptor_new_uri(old_uri->world, new_uri_string); + RAPTOR_FREE(char*, new_uri_string); + + return new_uri; +} + + +int +raptor_uri_init(raptor_world* world) +{ + if(world->uri_interning && !world->uris_tree) { + world->uris_tree = raptor_new_avltree((raptor_data_compare_handler)raptor_uri_compare, + /* free */ NULL, 0); + if(!world->uris_tree) { +#ifdef RAPTOR_DEBUG + RAPTOR_FATAL1("Failed to create raptor URI avltree"); +#else + raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Failed to create raptor URI avltree"); +#endif + } + + } + + return 0; +} + + +void +raptor_uri_finish(raptor_world* world) +{ + if(world->uris_tree) { + raptor_free_avltree(world->uris_tree); + world->uris_tree = NULL; + } +} + + +/* + * raptor_uri_path_common_base_length: + * @first_path: The first path (path only, not a full URI) + * @first_path_len: Length of first_path + * @second_path: The second path (path only, not a full URI) + * @second_path_len: Length of second_path + * + * Find the common base length of two URI path components. + * + * Return value: Length of the common base path + **/ + +static size_t +raptor_uri_path_common_base_length(const unsigned char *first_path, + size_t first_path_len, + const unsigned char *second_path, + size_t second_path_len) +{ + ptrdiff_t common_len = 0; + const unsigned char *cur_ptr = first_path; + const unsigned char *prev_ptr = first_path; + + /* Compare each path component of first_path and second_path until + * there is a mismatch. Then return the length from the start of + * the path to the last successful match. + */ + while((cur_ptr = (const unsigned char*)memchr(cur_ptr, '/', first_path_len))) { + cur_ptr++; + if(strncmp((const char*)first_path + common_len, + (const char*)second_path + common_len, cur_ptr - prev_ptr)) + break; + + first_path_len -= cur_ptr - prev_ptr; + prev_ptr = cur_ptr; + common_len = prev_ptr - first_path; + } + + return prev_ptr - first_path; +} + + +/* + * raptor_uri_path_make_relative_path: + * @from_path: The base path (path only, not a full URI) + * @from_path_len: Length of the base path + * @to_path: The reference path (path only, not a full URI) + * @to_path_len: Length of the reference path + * @suffix: String to be appended to the final relative path + * @suffix_len: Length of the suffix + * @result_length_p: Location to store the length of the string or NULL + * + * Make a relative URI path. + * + * Return value: A newly allocated relative path string or NULL on failure. + **/ + +static unsigned char * +raptor_uri_path_make_relative_path(const unsigned char *from_path, size_t from_path_len, + const unsigned char *to_path, size_t to_path_len, + const unsigned char *suffix, size_t suffix_len, + size_t *result_length_p) +{ + size_t common_len, cur_len, final_len, to_dir_len; + int up_dirs = 0; + const unsigned char *cur_ptr, *prev_ptr; + unsigned char *final_path, *final_path_cur; + + common_len = raptor_uri_path_common_base_length(from_path, from_path_len, + to_path, to_path_len); + + if(result_length_p) + *result_length_p=0; + + /* Count how many directories we have to go up */ + cur_ptr = from_path + common_len; + prev_ptr = cur_ptr; + cur_len = from_path_len - common_len; + while((cur_ptr = (const unsigned char*)memchr(cur_ptr, '/', cur_len))) { + cur_ptr++; + up_dirs++; + cur_len -= cur_ptr - prev_ptr; + prev_ptr = cur_ptr; + } + + /* Calculate how many characters of to_path subdirs (counted from the + common base) we have to add. */ + cur_ptr = to_path + common_len; + prev_ptr = cur_ptr; + cur_len = to_path_len - common_len; + while((cur_ptr = (const unsigned char*)memchr(cur_ptr, '/', cur_len))) { + cur_ptr++; + cur_len -= cur_ptr - prev_ptr; + prev_ptr = cur_ptr; + } + to_dir_len = prev_ptr - (to_path + common_len); + + /* Create the final relative path */ + final_len = up_dirs*3 + to_dir_len + suffix_len; /* 3 for each "../" */ + final_path = RAPTOR_MALLOC(unsigned char*, final_len + 1); + if(!final_path) + return NULL; + *final_path=0; + + /* First, add the necessary "../" parts */ + final_path_cur = final_path; + while(up_dirs--) { + *final_path_cur++='.'; + *final_path_cur++='.'; + *final_path_cur++='/'; + } + + /* Then, add the path from the common base to the to_path */ + memcpy(final_path_cur, to_path + common_len, to_dir_len); + final_path_cur += to_dir_len; + + /* Finally, add the suffix */ + if(suffix && suffix_len) { + /* As a special case, if the suffix begins with a dot (".") and the final + output string so far is non-empty, skip the dot. */ + if(*suffix == '.' && final_path_cur != final_path) { + /* Make sure that the dot really represents a directory and it's not + just part of a file name like ".foo". In other words, the dot must + either be the only character or the next character must be the + fragment or the query character. */ + if((suffix_len == 1) || + (suffix_len > 1 && (suffix[1] == '#' || suffix[1] == '?'))) { + suffix++; + suffix_len--; + final_len--; + } + } + if(suffix_len) + memcpy(final_path_cur, suffix, suffix_len); + } + + final_path[final_len] = 0; + + if(result_length_p) + *result_length_p=final_len; + + return final_path; +} + + +/** + * raptor_uri_to_relative_counted_uri_string: + * @base_uri: The base absolute URI to resolve against (or NULL) + * @reference_uri: The reference absolute URI to use + * @length_p: Location to store the length of the relative URI string or NULL + * + * Get the counted relative URI string of a URI against a base URI. + * + * Return value: A newly allocated relative URI string or NULL on failure + **/ + +unsigned char* +raptor_uri_to_relative_counted_uri_string(raptor_uri *base_uri, + raptor_uri *reference_uri, + size_t *length_p) { + raptor_uri_detail *base_detail = NULL, *reference_detail; + const unsigned char *base, *reference_str, *base_file, *reference_file; + unsigned char *suffix, *cur_ptr; + size_t base_len, reference_len, reference_file_len, suffix_len; + unsigned char *result = NULL; + int suffix_is_result = 0; + + if(!reference_uri) + return NULL; + + if(length_p) + *length_p=0; + + reference_str = raptor_uri_as_counted_string(reference_uri, &reference_len); + reference_detail = raptor_new_uri_detail(reference_str); + if(!reference_detail) + goto err; + + if(!base_uri) + goto buildresult; + + base = raptor_uri_as_counted_string(base_uri, &base_len); + base_detail = raptor_new_uri_detail(base); + if(!base_detail) + goto err; + + /* Check if the whole URIs are equal */ + if(raptor_uri_equals(base_uri, reference_uri)) { + reference_len = 0; + goto buildresult; + } + + /* Check if scheme and authority of the URIs are equal */ + if(base_detail->scheme_len == reference_detail->scheme_len && + base_detail->authority_len == reference_detail->authority_len && + !strncmp((const char*)base_detail->scheme, + (const char*)reference_detail->scheme, + base_detail->scheme_len) && + (base_detail->authority_len == 0 || + !strncmp((const char*)base_detail->authority, + (const char*)reference_detail->authority, + base_detail->authority_len))) { + + if(!base_detail->path) { + if(reference_detail->path) { + /* if base has no path then the relative URI is relative + * to scheme+authority so assemble that in the suffix + * buffer (adding any query part or fragment needed) + */ + reference_file = reference_detail->path; + reference_file_len = reference_detail->path_len; + suffix_is_result = 1; + goto addqueryfragment; + } + goto buildresult; + } + + /* Find the file name components */ + base_file = (const unsigned char*)strrchr((const char*)base_detail->path, '/'); + if(!base_file) + goto buildresult; + base_file++; + + if(!reference_detail->path) + goto buildresult; + reference_file = (const unsigned char*)strrchr((const char*)reference_detail->path, '/'); + if(!reference_file) + goto buildresult; + reference_file++; + + reference_file_len = reference_detail->path_len - + (reference_file - reference_detail->path); + + if(!strcmp((const char*)base_detail->path, (const char*)reference_detail->path)) { + /* If the file names are equal, don't put them in the relative URI */ + reference_file = NULL; + reference_file_len = 0; + } else if(*base_file && !*reference_file) { + /* If the base file is non-empty, but the reference file is + * empty, use "." as the file name. + */ + reference_file = (const unsigned char*)"."; + reference_file_len = 1; + } + + addqueryfragment: + /* Calculate the length of the suffix (file name + query + fragment) */ + suffix_len = reference_file_len + reference_detail->query_len + + reference_detail->fragment_len; + + if(reference_detail->query) + suffix_len++; /* add one char for the '?' */ + if(reference_detail->fragment) + suffix_len++; /* add one char for the '#' */ + + /* Assemble the suffix */ + suffix = RAPTOR_MALLOC(unsigned char*, suffix_len + 1); + if(!suffix) + goto err; + cur_ptr = suffix; + if(reference_file) { + memcpy(suffix, reference_file, reference_file_len); + cur_ptr+= reference_file_len; + } + + if(reference_detail->query) { + *cur_ptr++='?'; + memcpy(cur_ptr, reference_detail->query, reference_detail->query_len); + cur_ptr+= reference_detail->query_len; + } + + if(reference_detail->fragment) { + *cur_ptr++='#'; + memcpy(cur_ptr, reference_detail->fragment, reference_detail->fragment_len); + cur_ptr+= reference_detail->fragment_len; + } + *cur_ptr=0; + + if(suffix_is_result) { + /* If suffix is what we need, just use that as the result */ + result = suffix; + if(length_p) + *length_p=suffix_len; + } else { + /* Otherwise create the full relative path */ + result = raptor_uri_path_make_relative_path(base_detail->path, + base_detail->path_len, + reference_detail->path, + reference_detail->path_len, + suffix, + suffix_len, + length_p); + RAPTOR_FREE(char*, suffix); + } + } + + + buildresult: + /* If result is NULL at this point, it means that we were unable to find a + relative URI, so we'll return a full absolute URI instead. */ + if(!result) { + result = RAPTOR_MALLOC(unsigned char*, reference_len + 1); + if(result) { + if(reference_len) + memcpy(result, reference_str, reference_len); + result[reference_len] = 0; + if(length_p) + *length_p=reference_len; + } + } + + err: + if(base_detail) + raptor_free_uri_detail(base_detail); + raptor_free_uri_detail(reference_detail); + + return result; +} + + +/** + * raptor_uri_to_relative_uri_string: + * @base_uri: The base absolute URI to resolve against + * @reference_uri: The reference absolute URI to use + * + * Get the relative URI string of a URI against a base URI. + * + * Return value: A newly allocated relative URI string or NULL on failure + **/ +unsigned char* +raptor_uri_to_relative_uri_string(raptor_uri *base_uri, + raptor_uri *reference_uri) +{ + return raptor_uri_to_relative_counted_uri_string(base_uri, reference_uri, + NULL); +} + + +/** + * raptor_uri_print: + * @uri: URI to print + * @stream: The file handle to print to + * + * Print a URI to a file handle. + * + * Return value: non-0 on failure + **/ +int +raptor_uri_print(const raptor_uri* uri, FILE *stream) +{ + size_t nwritten = 0; + size_t len = 10; + unsigned char *string = (unsigned char*)"(NULL URI)"; + raptor_world* world = NULL; + + if(uri) { + world = uri->world; + string = raptor_uri_as_counted_string((raptor_uri*)uri, &len); + } + + nwritten = fwrite(string, 1, len, stream); + if(nwritten != len) + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, + NULL, "fwrite failed - %s", strerror(errno)); + + return (nwritten == len); +} + + +/** + * raptor_uri_to_counted_string: + * @uri: #raptor_uri object + * @len_p: Pointer to length (or NULL) + * + * Get a new counted string for a URI. + * + * If @len_p is not NULL, the length of the string is stored in it. + * + * The memory allocated must be freed by the caller and + * raptor_free_memory() should be used for best portability. + * + * Return value: new string or NULL on failure + **/ +unsigned char* +raptor_uri_to_counted_string(raptor_uri *uri, size_t *len_p) +{ + size_t len; + unsigned char *string; + unsigned char *new_string; + + if(!uri) + return NULL; + + string = raptor_uri_as_counted_string(uri, &len); + if(!string) + return NULL; + + new_string = RAPTOR_MALLOC(unsigned char*, len + 1); /* +1 for NULL termination */ + if(!new_string) + return NULL; + + memcpy(new_string, string, len+1); + + if(len_p) + *len_p=len; + return new_string; +} + + +/** + * raptor_uri_to_string: + * @uri: #raptor_uri object + * + * Get a new string for a URI. + * + * The memory allocated must be freed by the caller and + * raptor_free_memory() should be used for best portability. + * + * Return value: new string or NULL on failure + **/ +unsigned char* +raptor_uri_to_string(raptor_uri *uri) +{ + return raptor_uri_to_counted_string(uri, NULL); +} + + +/** + * raptor_new_uri_from_rdf_ordinal: + * @world: raptor_world object + * @ordinal: integer rdf:_n + * + * Internal - convert an integer rdf:_n ordinal to the resource URI + * + * Return value: new URI object or NULL on failure + **/ +raptor_uri* +raptor_new_uri_from_rdf_ordinal(raptor_world* world, int ordinal) +{ + /* strlen(rdf namespace URI) + _ + decimal int number + \0 */ + unsigned char uri_string[43 + 1 + MAX_ASCII_INT_SIZE + 1]; + unsigned char *p = uri_string; + + memcpy(p, raptor_rdf_namespace_uri, raptor_rdf_namespace_uri_len); + p += raptor_rdf_namespace_uri_len; + *p++ = '_'; + (void)raptor_format_integer(RAPTOR_GOOD_CAST(char*, p), + MAX_ASCII_INT_SIZE + 1, ordinal, /* base */ 10, + -1, '\0'); + + return raptor_new_uri(world, uri_string); +} + + +/** + * raptor_uri_get_world: + * @uri: #raptor_uri object + * + * Get the raptor_world object associated with a raptor_uri. + * + * Return value: raptor_world object + **/ +raptor_world* +raptor_uri_get_world(raptor_uri *uri) +{ + return uri->world; +} + + +/** + * raptor_uri_filename_exists: + * @path: file path + * + * Check if @path points to a file that exists + * + * Return value: > 0 if file exists, 0 if does not exist, < 0 on error + **/ +int +raptor_uri_filename_exists(const unsigned char* path) +{ + int exists = -1; +#ifdef HAVE_STAT + struct stat stat_buffer; +#endif + + if(!path) + return -1; + +#ifdef HAVE_STAT + if(!stat((const char*)path, &stat_buffer)) + exists = S_ISREG(stat_buffer.st_mode); +#else + exists = (access(path, R_OK) < 0) ? -1 : 1; +#endif + + return exists; +} + + +/** + * raptor_uri_file_exists: + * @uri: URI string + * + * Check if a file: URI is a file that exists + * + * Return value: > 0 if file exists, 0 if does not exist, < 0 if not a file URI or error + **/ +int +raptor_uri_file_exists(raptor_uri* uri) +{ + const unsigned char* uri_string; + + if(!uri) + return -1; + + uri_string = raptor_uri_as_string(uri); + if(!raptor_uri_uri_string_is_file_uri(uri_string)) + return -1; + + return raptor_uri_filename_exists(uri_string + 6); +} + + + +/** + * raptor_uri_escaped_write: + * @uri: uri to write + * @base_uri: base uri to write relative to (or NULL) + * @flags: bit flags - see #raptor_escaped_write_bitflags + * @iostr: raptor iostream + * + * Write a #raptor_uri formatted with escapes to a #raptor_iostream + * + * Return value: non-0 on failure + **/ +int +raptor_uri_escaped_write(raptor_uri* uri, + raptor_uri* base_uri, + unsigned int flags, + raptor_iostream *iostr) +{ + unsigned char *uri_str; + int uri_str_owned = 0; + size_t len; + + if(!uri) + return 1; + + raptor_iostream_write_byte('<', iostr); + if(base_uri) { + uri_str = raptor_uri_to_relative_counted_uri_string(base_uri, uri, &len); + if(!uri_str) + return 1; + + uri_str_owned = 1; + } else { + uri_str = raptor_uri_as_counted_string(uri, &len); + } + if(uri_str) + raptor_string_escaped_write(uri_str, len, '>', flags, iostr); + raptor_iostream_write_byte('>', iostr); + + if(uri_str_owned && uri_str) + RAPTOR_FREE(char*, uri_str); + + return 0; +} + + +/** + * raptor_uri_uri_string_is_absolute: + * @uri_string: uri to check write + * + * Check if a uri string is an absolute URI + * + * Return value: >0 if absolute, 0 if not, < 0 on failure + **/ +int +raptor_uri_uri_string_is_absolute(const unsigned char* uri_string) +{ + const unsigned char* s = uri_string; + + /* + * scheme = alpha *( alpha | digit | "+" | "-" | "." ) + * RFC 2396 section 3.1 Scheme Component + */ + if(*s && isalpha((int)*s)) { + s++; + + while(*s && (isalnum((int)*s) || + (*s == '+') || (*s == '-') || (*s == '.'))) + s++; + + if(*s == ':') + return 1; + } + + + return 0; +} + + +#endif /* !STANDALONE */ + + +#ifdef STANDALONE + +#include <stdio.h> +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +/* one more prototype */ +int main(int argc, char *argv[]); + +static const char *program; + + +static int +assert_uri_is_valid(raptor_uri* uri) +{ + if(strlen((const char*)uri->string) != uri->length) { + fprintf(stderr, + "%s: URI with string '%s' is invalid. length is %d, recorded in object as %d\n", + program, uri->string, + (int)strlen((const char*)uri->string), + (int)uri->length); + return 0; + } + + return 1; +} + + +static int +assert_filename_to_uri (const char *filename, const char *reference_uri) +{ + unsigned char *uri; + + uri = raptor_uri_filename_to_uri_string(filename); + + if(!uri || strcmp((const char*)uri, (const char*)reference_uri)) { + fprintf(stderr, + "%s: raptor_uri_filename_to_uri_string(%s) FAILED gaving URI %s != %s\n", + program, filename, uri, reference_uri); + if(uri) + RAPTOR_FREE(char*, uri); + return 1; + } + + RAPTOR_FREE(char*, uri); + return 0; +} + + +static int +assert_uri_to_filename (const char *uri, const char *reference_filename) +{ + char *filename; + + filename = raptor_uri_uri_string_to_filename((const unsigned char*)uri); + + if(filename && !reference_filename) { + fprintf(stderr, + "%s: raptor_uri_uri_string_to_filename(%s) FAILED giving filename %s != NULL\n", + program, uri, filename); + if(filename) + RAPTOR_FREE(char*, filename); + return 1; + } else if(filename && strcmp(filename, reference_filename)) { + fprintf(stderr, + "%s: raptor_uri_uri_string_to_filename(%s) FAILED gaving filename %s != %s\n", + program, uri, filename, reference_filename); + if(filename) + RAPTOR_FREE(char*, filename); + return 1; + } + + RAPTOR_FREE(char*, filename); + return 0; +} + + +static int +assert_uri_to_relative(raptor_world *world, const char *base, const char *uri, const char *relative) +{ + unsigned char *output; + int result; + raptor_uri* base_uri = NULL; + raptor_uri* reference_uri = raptor_new_uri(world, (const unsigned char*)uri); + size_t length = 0; + + if(!assert_uri_is_valid(reference_uri)) + return 1; + + if(base) { + base_uri = raptor_new_uri(world, (const unsigned char*)base); + if(base_uri && !assert_uri_is_valid(base_uri)) { + raptor_free_uri(reference_uri); + raptor_free_uri(base_uri); + return 1; + } + } + + output = raptor_uri_to_relative_counted_uri_string(base_uri, reference_uri, + &length); + result = strcmp(relative, (const char*)output); + if(result) { + fprintf(stderr, + "%s: raptor_uri_string_to_relative_uri_string FAILED: base='%s', uri='%s', expected='%s', got='%s'\n", + program, base, uri, relative, output); + RAPTOR_FREE(char*, output); + return 1; + } + RAPTOR_FREE(char*, output); + if(base_uri) + raptor_free_uri(base_uri); + raptor_free_uri(reference_uri); + return 0; +} + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *base_uri = "http://example.org/bpath/cpath/d;p?querystr#frag"; + const char *base_uri_xmlbase = "http://example.org/bpath/cpath/d;p"; + const char *base_uri_retrievable = "http://example.org/bpath/cpath/d;p?querystr"; +#ifndef WIN32 +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + const char* dirs[6] = { "/etc", "/bin", "/tmp", "/lib", "/var", NULL }; + #define URI_BUFFER_LEN 16 + unsigned char uri_buffer[URI_BUFFER_LEN]; /* strlen("file:///DIR/foo")+1 */ + int i; + const char *dir; +#endif +#endif + unsigned char *str; + raptor_uri *uri1, *uri2, *uri3; + + int failures = 0; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + if((program = strrchr(argv[0], '/'))) + program++; + else if((program = strrchr(argv[0], '\\'))) + program++; + else + program = argv[0]; + +#ifdef WIN32 + failures += assert_filename_to_uri ("c:\\windows\\system", "file:///c:/windows/system"); + failures += assert_filename_to_uri ("\\\\server\\share\\file.doc", "file://server/share/file.doc"); + failures += assert_filename_to_uri ("a:foo", "file:///a:./foo"); + + failures += assert_filename_to_uri ("C:\\Documents and Settings\\myapp\\foo.bat", "file:///C:/Documents%20and%20Settings/myapp/foo.bat"); + failures += assert_filename_to_uri ("C:\\My Documents\\%age.txt", "file:///C:/My%20Documents/%25age.txt"); + + failures += assert_uri_to_filename ("file:///c|/windows/system", "c:\\windows\\system"); + failures += assert_uri_to_filename ("file:///c:/windows/system", "c:\\windows\\system"); + failures += assert_uri_to_filename ("file://server/share/file.doc", "\\\\server\\share\\file.doc"); + failures += assert_uri_to_filename ("file:///a:./foo", "a:foo"); + failures += assert_uri_to_filename ("file:///C:/Documents%20and%20Settings/myapp/foo.bat", "C:\\Documents and Settings\\myapp\\foo.bat"); + failures += assert_uri_to_filename ("file:///C:/My%20Documents/%25age.txt", "C:\\My Documents\\%age.txt"); + + + failures += assert_uri_to_filename ("file:c:\\thing", "c:\\thing"); + failures += assert_uri_to_filename ("file:/c:\\thing", "c:\\thing"); + failures += assert_uri_to_filename ("file://c:\\thing", NULL); + failures += assert_uri_to_filename ("file:///c:\\thing", "c:\\thing"); + failures += assert_uri_to_filename ("file://localhost/", NULL); + failures += assert_uri_to_filename ("file://c:\\foo\\bar\\x.rdf", NULL); + +#else + + failures += assert_filename_to_uri ("/path/to/file", "file:///path/to/file"); + failures += assert_filename_to_uri ("/path/to/file with spaces", "file:///path/to/file%20with%20spaces"); + failures += assert_uri_to_filename ("file:///path/to/file", "/path/to/file"); + failures += assert_uri_to_filename ("file:///path/to/file%20with%20spaces", "/path/to/file with spaces"); + + /* Tests for Issue#0000268 http://bugs.librdf.org/mantis/view.php?id = 268 */ + failures += assert_uri_to_filename ("file:///path/to/http%253A%252F%252Fwww.example.org%252Fa%252Fb%252Fc", "/path/to/http%3A%2F%2Fwww.example.org%2Fa%2Fb%2Fc"); + failures += assert_filename_to_uri ("/path/to/http%3A%2F%2Fwww.example.org%2Fa%2Fb%2Fc", "file:///path/to/http%253A%252F%252Fwww.example.org%252Fa%252Fb%252Fc"); + +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + /* Need to test this with a real dir (preferably not /) + * This is just a test so pretty likely to work on all development systems + * that are not WIN32 + */ + + for(i = 0; (dir = dirs[i]); i++) { + struct stat buf; + if(!lstat(dir, &buf) && S_ISDIR(buf.st_mode) && !S_ISLNK(buf.st_mode)) { + if(!chdir(dir)) + break; + } + } + if(!dir) + fprintf(stderr, + "%s: WARNING: Found no convenient directory - not testing relative files\n", + program); + else { + snprintf((char*)uri_buffer, URI_BUFFER_LEN, "file://%s/foo", dir); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, + "%s: Checking relative file name 'foo' in dir %s expecting URI %s\n", + program, dir, uri_buffer); +#endif + failures += assert_filename_to_uri ("foo", (const char*)uri_buffer); + } +#endif + +#endif + + uri1 = raptor_new_uri(world, (const unsigned char*)base_uri); + + str = raptor_uri_as_string(uri1); + if(strcmp((const char*)str, base_uri)) { + fprintf(stderr, + "%s: raptor_uri_as_string(%s) FAILED gaving %s != %s\n", + program, base_uri, str, base_uri); + failures++; + } + + uri2 = raptor_new_uri_for_xmlbase(uri1); + str = raptor_uri_as_string(uri2); + if(strcmp((const char*)str, base_uri_xmlbase)) { + fprintf(stderr, + "%s: raptor_new_uri_for_xmlbase(URI %s) FAILED giving %s != %s\n", + program, base_uri, str, base_uri_xmlbase); + failures++; + } + + uri3 = raptor_new_uri_for_retrieval(uri1); + + str = raptor_uri_as_string(uri3); + if(strcmp((const char*)str, base_uri_retrievable)) { + fprintf(stderr, + "%s: raptor_new_uri_for_retrievable(%s) FAILED gaving %s != %s\n", + program, base_uri, str, base_uri_retrievable); + failures++; + } + + raptor_free_uri(uri3); + raptor_free_uri(uri2); + raptor_free_uri(uri1); + + failures += assert_uri_to_relative(world, NULL, "http://example.com/foo/bar", "http://example.com/foo/bar"); + failures += assert_uri_to_relative(world, "", "http://example.com/foo/bar", "http://example.com/foo/bar"); + failures += assert_uri_to_relative(world, "foo:", "http://example.com/foo/bar", "http://example.com/foo/bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo?foo#foo", "http://example.com/base/bar?bar#bar", "bar?bar#bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/foo/", "foo/"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/foo/.foo", "foo/.foo"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/foo/.foo#bar", "foo/.foo#bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/foo/bar", "foo/bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/foo#bar", "#bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/bar#foo", "bar#foo"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/otherbase/foo", "../otherbase/foo"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/otherbase/bar", "../otherbase/bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example.com/base/#foo", ".#foo"); + failures += assert_uri_to_relative(world, "http://example.com/base/foo", "http://example2.com/base/bar", "http://example2.com/base/bar"); + failures += assert_uri_to_relative(world, "http://example.com/base/one?path=/should/be/ignored", "http://example.com/base/two?path=/should/be/ignored", "two?path=/should/be/ignored"); + failures += assert_uri_to_relative(world, "http://example.org/base#", "http://www.foo.org", "http://www.foo.org"); + failures += assert_uri_to_relative(world, "http://example.org", "http://a.example.org/", "http://a.example.org/"); + failures += assert_uri_to_relative(world, "http://example.org", "http://a.example.org", "http://a.example.org"); + failures += assert_uri_to_relative(world, "http://abcdefgh.example.org/foo/bar/", "http://ijklmnop.example.org/", "http://ijklmnop.example.org/"); + failures += assert_uri_to_relative(world, "http://example.org", "http://example.org/a/b/c/d/efgh", "/a/b/c/d/efgh"); + + if(1) { + int ret; + raptor_uri* u1; + raptor_uri* u2; + + u1 = raptor_new_uri(world, (const unsigned char *)"http://example.org/abc"); + u2 = raptor_new_uri(world, (const unsigned char *)"http://example.org/def"); + + ret = raptor_uri_compare(u1, u2); + if(!(ret < 0)) { + fprintf(stderr, + "%s: raptor_uri_compare(%s, %s) FAILED gave %d expected <0\n", + program, raptor_uri_as_string(u1), raptor_uri_as_string(u2), + ret); + failures++; + } + + raptor_free_uri(u1); + raptor_free_uri(u2); + } + + raptor_free_world(world); + + return failures ; +} + +#endif /* STANDALONE */ diff --git a/src/raptor_win32.c b/src/raptor_win32.c new file mode 100644 index 0000000..d41eaf2 --- /dev/null +++ b/src/raptor_win32.c @@ -0,0 +1,44 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_win32.c - Raptor WIN32 support functions + * + * Copyright (C) 2002-2006, David Beckett http://www.dajobe.org/ + * Copyright (C) 2002-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef WIN32 + +/* Only on WIN32 systems */ + + +/* DLL entry point */ +BOOL APIENTRY +DllMain(HANDLE hModule, DWORD ul_reason_for_call, LPVOID lpReserved) +{ + return TRUE; +} + + +/* end if WIN32 */ +#endif diff --git a/src/raptor_www.c b/src/raptor_www.c new file mode 100644 index 0000000..26c2fa2 --- /dev/null +++ b/src/raptor_www.c @@ -0,0 +1,896 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www.c - Raptor WWW retrieval core + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_SYS_STAT_H +#include <sys/stat.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static int raptor_www_file_fetch(raptor_www* www); + + + +/* + * raptor_www_init: + * @world: raptor_world object + * + * INTERNAL - Initialise the WWW class. + * + * Must be called before creating any #raptor_www object. + * + * Return value: non-0 on failure + **/ +int +raptor_www_init(raptor_world* world) +{ + int rc = 0; + + if(world->www_initialized) + return 0; + + if(!world->www_skip_www_init_finish) { +#ifdef RAPTOR_WWW_LIBCURL + rc = curl_global_init(CURL_GLOBAL_ALL); +#endif + } + + world->www_initialized = 1; + return rc; +} + + +/* + * raptor_www_finish: + * @world: raptor_world object + * + * INTERNAL - Terminate the WWW class. + * + * Must be called to clean any resources used by the WWW implementation. + * + **/ +void +raptor_www_finish(raptor_world* world) +{ + if(!world->www_skip_www_init_finish) { +#ifdef RAPTOR_WWW_LIBCURL + curl_global_cleanup(); +#endif + } +} + + +/** + * raptor_new_www_with_connection: + * @world: raptor_world object + * @connection: external WWW connection object. + * + * Constructor - create a new #raptor_www object over an existing WWW connection. + * + * At present this only works with a libcurl CURL handle object + * when raptor is compiled with libcurl suppport. Otherwise the + * @connection is ignored. This allows such things as setting + * up special flags on the curl handle before passing into the constructor. + * + * Return value: a new #raptor_www object or NULL on failure. + **/ +raptor_www* +raptor_new_www_with_connection(raptor_world* world, void *connection) +{ + raptor_www* www; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + www = RAPTOR_CALLOC(raptor_www*, 1, sizeof(*www)); + if(!www) + return NULL; + + www->world = world; + www->type = NULL; + www->free_type = 1; /* default is to free content type */ + www->total_bytes = 0; + www->failed = 0; + www->status_code = 0; + www->write_bytes = NULL; + www->content_type = NULL; + www->uri_filter = NULL; + www->connection_timeout = 10; + www->cache_control = NULL; + +#ifdef RAPTOR_WWW_LIBCURL + www->curl_handle = (CURL*)connection; + if(raptor_www_curl_init(www)) { + raptor_free_www(www); + www = NULL; + } +#endif +#ifdef RAPTOR_WWW_LIBXML + raptor_www_libxml_init(www); +#endif +#ifdef RAPTOR_WWW_LIBFETCH + raptor_www_libfetch_init(www); +#endif + + return www; +} + + +/** + * raptor_new_www: + * @world: raptor_world object + * + * Constructor - create a new #raptor_www object. + * + * Return value: a new #raptor_www or NULL on failure. + **/ +raptor_www* +raptor_new_www(raptor_world* world) +{ + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + raptor_world_open(world); + + return raptor_new_www_with_connection(world, NULL); +} + + +/** + * raptor_free_www: + * @www: WWW object. + * + * Destructor - destroy a #raptor_www object. + **/ +void +raptor_free_www(raptor_www* www) +{ + /* free context */ + if(www->type) { + if(www->free_type) + RAPTOR_FREE(char*, www->type); + www->type = NULL; + } + + if(www->user_agent) { + RAPTOR_FREE(char*, www->user_agent); + www->user_agent = NULL; + } + + if(www->cache_control) { + RAPTOR_FREE(char*, www->cache_control); + www->cache_control = NULL; + } + + if(www->proxy) { + RAPTOR_FREE(char*, www->proxy); + www->proxy = NULL; + } + + if(www->http_accept) { + RAPTOR_FREE(char*, www->http_accept); + www->http_accept = NULL; + } + +#ifdef RAPTOR_WWW_LIBCURL + raptor_www_curl_free(www); +#endif +#ifdef RAPTOR_WWW_LIBXML + raptor_www_libxml_free(www); +#endif +#ifdef RAPTOR_WWW_LIBFETCH + raptor_www_libfetch_free(www); +#endif + + if(www->uri) + raptor_free_uri(www->uri); + + if(www->final_uri) + raptor_free_uri(www->final_uri); + + RAPTOR_FREE(www, www); +} + + + +/** + * raptor_www_set_write_bytes_handler: + * @www: WWW object + * @handler: bytes handler function + * @user_data: bytes handler data + * + * Set the handler to receive bytes written by the #raptor_www implementation. + * + **/ +void +raptor_www_set_write_bytes_handler(raptor_www* www, + raptor_www_write_bytes_handler handler, + void *user_data) +{ + www->write_bytes = handler; + www->write_bytes_userdata = user_data; +} + + +/** + * raptor_www_set_content_type_handler: + * @www: WWW object + * @handler: content type handler function + * @user_data: content type handler data + * + * Set the handler to receive the HTTP Content-Type header value. + * + * This is called if or when the value is discovered during retrieval + * by the raptor_www implementation. Not all implementations provide + * access to this. + **/ +void +raptor_www_set_content_type_handler(raptor_www* www, + raptor_www_content_type_handler handler, + void *user_data) +{ + www->content_type = handler; + www->content_type_userdata = user_data; +} + + +/** + * raptor_www_set_user_agent2: + * @www: WWW object + * @user_agent: User-Agent string + * @user_agent_len: Length of @user_agent string or 0 to count it here. + * + * Set the user agent value, for HTTP requests typically. + * + * Return value: non-0 on failure + **/ +int +raptor_www_set_user_agent2(raptor_www* www, const char *user_agent, + size_t user_agent_len) +{ + char *ua_copy = NULL; + + if(!user_agent || !*user_agent) { + www->user_agent = NULL; + return 0; + } + + if(user_agent_len == 0) + user_agent_len = strlen(user_agent); + + ua_copy = RAPTOR_MALLOC(char*, user_agent_len + 1); + if(!ua_copy) + return 1; + + memcpy(ua_copy, user_agent, user_agent_len + 1); /* copy NUL */ + + www->user_agent = ua_copy; + + return 0; +} + + +/** + * raptor_www_set_user_agent: + * @www: WWW object + * @user_agent: User-Agent string + * + * Set the user agent value, for HTTP requests typically. + * + * @Deprecated: use raptor_www_set_user_agent2() which takes a length + * parameter and returns a value to singify failure. + * + **/ +void +raptor_www_set_user_agent(raptor_www* www, const char *user_agent) +{ + (void)raptor_www_set_user_agent2(www, user_agent, 0); +} + + +/** + * raptor_www_set_proxy2: + * @www: WWW object + * @proxy: proxy string. + * @proxy_len: Length of @proxy string or 0 to count it here. + * + * Set the proxy for the WWW object. + * + * The @proxy usually a string of the form http://server.domain:port. + * + * Return value: non-0 on failure + **/ +int +raptor_www_set_proxy2(raptor_www* www, const char *proxy, + size_t proxy_len) +{ + char *proxy_copy; + + if(!proxy) + return 1; + + if(proxy_len == 0) + proxy_len = strlen(proxy); + + proxy_copy = RAPTOR_MALLOC(char*, proxy_len + 1); + if(!proxy_copy) + return 1; + + memcpy(proxy_copy, proxy, proxy_len + 1); /* copy NUL */ + + www->proxy = proxy_copy; + + return 0; +} + + +/** + * raptor_www_set_proxy: + * @www: WWW object + * @proxy: proxy string. + * + * Set the proxy for the WWW object. + * + * The @proxy usually a string of the form http://server.domain:port. + * + * @Deprecated: use raptor_www_set_proxy2() which takes an length + * parameter and returns a value to singify failure. + * + **/ +void +raptor_www_set_proxy(raptor_www* www, const char *proxy) +{ + (void)raptor_www_set_proxy2(www, proxy, 0); +} + + +/** + * raptor_www_set_http_accept2: + * @www: #raptor_www class + * @value: Accept: header value or NULL to have an empty one. + * @value_len: Length of @value string or 0 to count it here. + * + * Set HTTP Accept header. + * + * Return value: non-0 on failure + **/ +int +raptor_www_set_http_accept2(raptor_www* www, const char *value, + size_t value_len) +{ + char *value_copy; + size_t len = 8; /* strlen("Accept:")+1 */ + + if(value) { + if (value_len == 0) + value_len = strlen(value); + len += 1 + value_len; /* " "+value */ + } + + value_copy = RAPTOR_MALLOC(char*, len); + if(!value_copy) + return 1; + www->http_accept = value_copy; + + /* copy header name */ + memcpy(value_copy, "Accept:", 7); /* Do not copy NUL */ + value_copy += 7; + + /* copy header value */ + if(value) { + *value_copy++ = ' '; + memcpy(value_copy, value, value_len + 1); /* Copy NUL */ + } else { + /* Ensure value is NUL terminated */ + *value_copy = '\0'; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Using Accept header: '%s'\n", www->http_accept); +#endif + + return 0; +} + + +/** + * raptor_www_set_http_accept: + * @www: #raptor_www class + * @value: Accept: header value or NULL to have an empty one. + * + * Set HTTP Accept header. + * + * @Deprecated: use raptor_www_set_http_accept2() which takes an + * length parameter and returns a value to singify failure. + * + **/ +void +raptor_www_set_http_accept(raptor_www* www, const char *value) +{ + (void)raptor_www_set_http_accept2(www, value, 0); +} + + +/** + * raptor_www_set_connection_timeout: + * @www: WWW object + * @timeout: Timeout in seconds + * + * Set WWW connection timeout + **/ +void +raptor_www_set_connection_timeout(raptor_www* www, int timeout) +{ + www->connection_timeout = timeout; +} + + +/** + * raptor_www_set_http_cache_control: + * @www: WWW object + * @cache_control: Cache-Control header value (or NULL to disable) + * + * Set HTTP Cache-Control:header (default none) + * + * The @cache_control value can be a string to set it, "" to send + * a blank header or NULL to not set the header at all. + * + * Return value: non-0 on failure + **/ +int +raptor_www_set_http_cache_control(raptor_www* www, const char* cache_control) +{ + char *cache_control_copy; + const char* const header="Cache-Control:"; + const size_t header_len = 14; /* strlen("Cache-Control:") */ + size_t len; + size_t cc_len; + + RAPTOR_ASSERT_RETURN((strlen(header) != header_len), "Cache-Control header length is wrong", 1); + + if(www->cache_control) { + RAPTOR_FREE(char*, www->cache_control); + www->cache_control = NULL; + } + + if(!cache_control) { + www->cache_control = NULL; + return 0; + } + + cc_len = strlen(cache_control); + len = header_len + 1 + cc_len + 1; /* header+" "+cache_control+"\0" */ + + cache_control_copy = RAPTOR_MALLOC(char*, len); + if(!cache_control_copy) + return 1; + + www->cache_control = cache_control_copy; + + /* copy header name */ + memcpy(cache_control_copy, header, header_len); /* Do not copy NUL */ + cache_control_copy += header_len; + + /* copy header value */ + if(*cache_control) { + *cache_control_copy ++= ' '; + memcpy(cache_control_copy, cache_control, cc_len + 1); /* Copy NUL */ + } else { + /* Ensure value is NUL terminated */ + *cache_control_copy = '\0'; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("Using Cache-Control header: '%s'\n", www->cache_control); +#endif + + return 0; +} + + +/** + * raptor_www_set_uri_filter: + * @www: WWW object + * @filter: URI filter function + * @user_data: User data to pass to filter function + * + * Set URI filter function for WWW retrieval. + **/ +void +raptor_www_set_uri_filter(raptor_www* www, + raptor_uri_filter_func filter, + void *user_data) +{ + www->uri_filter = filter; + www->uri_filter_user_data = user_data; +} + + +/** + * raptor_www_set_ssl_cert_options: + * @www: WWW object + * @cert_filename: SSL client certificate file + * @cert_type: SSL client certificate type (default is "PEM") + * @cert_passphrase: SSL client certificate password + * + * Set SSL client certificate options (where supported) + * + * Return value: non-0 when setting options is not supported + **/ +int +raptor_www_set_ssl_cert_options(raptor_www* www, + const char* cert_filename, + const char* cert_type, + const char* cert_passphrase) +{ +#ifdef RAPTOR_WWW_LIBCURL + return raptor_www_curl_set_ssl_cert_options(www, cert_filename, cert_type, + cert_passphrase); +#else + return 1; +#endif +} + + +/** + * raptor_www_set_ssl_verify_options: + * @www: WWW object + * @verify_peer: SSL verify peer - non-0 to verify peer SSL certificate (default) + * @verify_host: SSL verify host - 0 none, non-0 to require a CN match (default). + * + * Set whether SSL verifies the authenticity of the peer's certificate + * + * These options correspond to setting the curl + * CURLOPT_SSL_VERIFYPEER and CURLOPT_SSL_VERIFYHOST options. + * + * Return value: non-0 on failure + **/ +int +raptor_www_set_ssl_verify_options(raptor_www* www, int verify_peer, + int verify_host) +{ +#ifdef RAPTOR_WWW_LIBCURL + return raptor_www_curl_set_ssl_verify_options(www, verify_peer, + verify_host); +#else + return 1; +#endif +} + + + +/** + * raptor_www_get_connection: + * @www: #raptor_www object + * + * Get WWW library connection object. + * + * Return the internal WWW connection handle. For libcurl, this + * returns the CURL handle and for libxml the context. Otherwise + * it returns NULL. + * + * Return value: connection pointer + **/ +void* +raptor_www_get_connection(raptor_www* www) +{ +#if defined(RAPTOR_WWW_LIBCURL) + return www->curl_handle; +#elif defined(RAPTOR_WWW_LIBXML) + return www->ctxt; +#else + return NULL; +#endif +} + + +/** + * raptor_www_abort: + * @www: WWW object + * @reason: abort reason message + * + * Abort an ongoing raptor WWW operation and pass back a reason. + * + * This is typically used within one of the raptor WWW handlers + * when retrieval need no longer continue due to another + * processing issue or error. + **/ +void +raptor_www_abort(raptor_www* www, const char *reason) +{ + www->failed = 1; +} + + +void +raptor_www_error(raptor_www* www, const char *message, ...) +{ + va_list arguments; + + va_start(arguments, message); + + raptor_log_error_varargs(www->world, + RAPTOR_LOG_LEVEL_ERROR, + &www->locator, + message, arguments); + + va_end(arguments); +} + + +static int +raptor_www_file_handle_fetch(raptor_www* www, FILE* fh) +{ + while(!feof(fh)) { + size_t len = fread(www->buffer, 1, RAPTOR_WWW_BUFFER_SIZE, fh); + if(len > 0) { + www->total_bytes += len; + www->buffer[len]='\0'; + + if(www->write_bytes) + www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1); + } + + if(feof(fh) || www->failed) + break; + } + + if(!www->failed) + www->status_code = 200; + + return www->failed; +} + + +static int +raptor_www_file_fetch(raptor_www* www) +{ + char *filename; + FILE *fh; + unsigned char *uri_string = raptor_uri_as_string(www->uri); +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + struct stat buf; +#endif + + www->status_code = 200; + + filename = raptor_uri_uri_string_to_filename(uri_string); + if(!filename) { + raptor_www_error(www, "Not a file: URI"); + return 1; + } + +#if defined(HAVE_UNISTD_H) && defined(HAVE_SYS_STAT_H) + if(!stat(filename, &buf) && S_ISDIR(buf.st_mode)) { + raptor_www_error(www, "Cannot read from a directory '%s'", filename); + RAPTOR_FREE(char*, filename); + www->status_code = 404; + return 1; + } +#endif + + fh = fopen(filename, "rb"); + if(!fh) { + raptor_www_error(www, "file '%s' open failed - %s", + filename, strerror(errno)); + RAPTOR_FREE(char*, filename); + www->status_code = (errno == EACCES) ? 403: 404; + www->failed = 1; + + return www->failed; + } + + raptor_www_file_handle_fetch(www, fh); + fclose(fh); + + RAPTOR_FREE(char*, filename); + + return www->failed; +} + + +/** +* raptor_www_fetch: +* @www: WWW object +* @uri: URI to read from +* +* Start a WWW content retrieval for the given URI, returning data via the write_bytes handler. +* +* Return value: non-0 on failure. +**/ +int +raptor_www_fetch(raptor_www *www, raptor_uri *uri) +{ + int status = 1; + + www->uri = raptor_new_uri_for_retrieval(uri); + + www->locator.uri = uri; + www->locator.line= -1; + www->locator.column= -1; + + if(www->uri_filter) { + int rc = www->uri_filter(www->uri_filter_user_data, uri); + if(rc) + return rc; + } + +#ifdef RAPTOR_WWW_NONE + status = raptor_www_file_fetch(www); +#else + + if(raptor_uri_uri_string_is_file_uri(raptor_uri_as_string(www->uri))) + status = raptor_www_file_fetch(www); + else { +#ifdef RAPTOR_WWW_LIBCURL + status = raptor_www_curl_fetch(www); +#endif + +#ifdef RAPTOR_WWW_LIBXML + status = raptor_www_libxml_fetch(www); +#endif + +#ifdef RAPTOR_WWW_LIBFETCH + status = raptor_www_libfetch_fetch(www); +#endif + } + +#endif + if(!status && www->status_code && www->status_code != 200){ + raptor_www_error(www, "Resolving URI failed with HTTP status %d", + www->status_code); + status = 1; + } + + www->failed = status; + + return www->failed; +} + + +static void +raptor_www_fetch_to_string_write_bytes(raptor_www* www, void *userdata, + const void *ptr, size_t size, + size_t nmemb) +{ + raptor_stringbuffer* sb = (raptor_stringbuffer*)userdata; + size_t len = size * nmemb; + + raptor_stringbuffer_append_counted_string(sb, (unsigned char*)ptr, len, 1); +} + + +/** + * raptor_www_fetch_to_string: + * @www: raptor_www object + * @uri: raptor_uri to retrieve + * @string_p: pointer to location to hold string + * @length_p: pointer to location to hold length of string (or NULL) + * @malloc_handler: pointer to malloc() to use to make string (or NULL) + * + * Start a WWW content retrieval for the given URI, returning the data in a new string. + * + * If @malloc_handler is null, raptor will allocate it using it's + * own memory allocator. *string_p is set to NULL on failure (and + * *length_p to 0 if length_p is not NULL). + * + * Return value: non-0 on failure + **/ +RAPTOR_EXTERN_C +int +raptor_www_fetch_to_string(raptor_www *www, raptor_uri *uri, + void **string_p, size_t *length_p, + raptor_data_malloc_handler const malloc_handler) +{ + raptor_stringbuffer *sb = NULL; + void *str = NULL; + raptor_www_write_bytes_handler saved_write_bytes; + void *saved_write_bytes_userdata; + + sb = raptor_new_stringbuffer(); + if(!sb) + return 1; + + if(length_p) + *length_p=0; + + saved_write_bytes = www->write_bytes; + saved_write_bytes_userdata = www->write_bytes_userdata; + raptor_www_set_write_bytes_handler(www, raptor_www_fetch_to_string_write_bytes, sb); + + if(raptor_www_fetch(www, uri)) + str = NULL; + else { + size_t len = raptor_stringbuffer_length(sb); + if(len) { + str = (void*)malloc_handler(len+1); + if(str) { + raptor_stringbuffer_copy_to_string(sb, (unsigned char*)str, len+1); + *string_p=str; + if(length_p) + *length_p=len; + } + } + } + + if(sb) + raptor_free_stringbuffer(sb); + + raptor_www_set_write_bytes_handler(www, saved_write_bytes, saved_write_bytes_userdata); + + return (str == NULL); +} + + +/** + * raptor_www_get_final_uri: + * @www: #raptor_www object + * + * Get the WWW final resolved URI. + * + * This returns the URI used after any protocol redirection. + * + * Return value: a new URI or NULL if not known. + **/ +raptor_uri* +raptor_www_get_final_uri(raptor_www* www) +{ + return www->final_uri ? raptor_uri_copy(www->final_uri) : NULL; +} + + +/** + * raptor_www_set_final_uri_handler: + * @www: WWW object + * @handler: content type handler function + * @user_data: content type handler data + * + * Set the handler to receive the HTTP Content-Type header value. + * + * This is called if or when the value is discovered during retrieval + * by the raptor_www implementation. Not all implementations provide + * access to this. + **/ +void +raptor_www_set_final_uri_handler(raptor_www* www, + raptor_www_final_uri_handler handler, + void *user_data) +{ + www->final_uri_handler = handler; + www->final_uri_userdata = user_data; +} diff --git a/src/raptor_www_curl.c b/src/raptor_www_curl.c new file mode 100644 index 0000000..d689f2b --- /dev/null +++ b/src/raptor_www_curl.c @@ -0,0 +1,388 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www_curl.c - Raptor WWW retrieval via libcurl + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef RAPTOR_WWW_LIBCURL + +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +#include <strings.h> +#endif +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static void +raptor_www_curl_update_status(raptor_www* www) +{ + char* final_uri; + + if(www->failed) + return; + + if(www->checked_status++) + return; + + if(!www->final_uri) { + /* If not already found in headers by + * raptor_www_curl_header_callback() which overrides what libcurl + * found in HTTP status line (3xx) + */ + + if(curl_easy_getinfo(www->curl_handle, CURLINFO_EFFECTIVE_URL, + &final_uri) == CURLE_OK) { + www->final_uri = raptor_new_uri(www->world, (const unsigned char*)final_uri); + if(www->final_uri_handler) + www->final_uri_handler(www, www->final_uri_userdata, www->final_uri); + } + } + +} + + +static size_t +raptor_www_curl_write_callback(void *ptr, size_t size, size_t nmemb, void *userdata) +{ + raptor_www* www = (raptor_www*)userdata; + size_t bytes = size * nmemb; + + /* If WWW has been aborted, return nothing so that + * libcurl will abort the transfer + */ + if(www->failed) + return 0; + + raptor_www_curl_update_status(www); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG2("Got %d bytes\n", bytes); +#endif + + if(www->write_bytes) + www->write_bytes(www, www->write_bytes_userdata, ptr, size, nmemb); + www->total_bytes += bytes; + return bytes; +} + + +static size_t +raptor_www_curl_header_callback(void* ptr, size_t size, size_t nmemb, + void *userdata) +{ + raptor_www* www = (raptor_www*)userdata; + size_t bytes = size * nmemb; + int c; + + /* If WWW has been aborted, return nothing so that + * libcurl will abort the transfer + */ + if(www->failed) + return 0; + +#define CONTENT_TYPE_LEN 14 + if(!raptor_strncasecmp((char*)ptr, "Content-Type: ", CONTENT_TYPE_LEN)) { + size_t len = bytes - CONTENT_TYPE_LEN - 2; /* for \r\n */ + char *type_buffer = RAPTOR_MALLOC(char*, len + 1); + memcpy(type_buffer, (char*)ptr + 14, len); + type_buffer[len]='\0'; + if(www->type) + RAPTOR_FREE(char*, www->type); + www->type = type_buffer; + www->free_type = 1; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + RAPTOR_DEBUG3("Got content type header '%s' (%d bytes)\n", type_buffer, len); +#endif + if(www->content_type) + www->content_type(www, www->content_type_userdata, www->type); + } + + +#define CONTENT_LOCATION_LEN 18 + if(!raptor_strncasecmp((char*)ptr, "Content-Location: ", + CONTENT_LOCATION_LEN)) { + size_t uri_len = bytes - CONTENT_LOCATION_LEN - 2; /* for \r\n */ + unsigned char* uri_str = (unsigned char*)ptr + CONTENT_LOCATION_LEN; + + if(www->final_uri) + raptor_free_uri(www->final_uri); + + /* Ensure it is NUL terminated */ + c = uri_str[uri_len]; + uri_str[uri_len] = '\0'; + www->final_uri = raptor_new_uri_relative_to_base_counted(www->world, + www->uri, + uri_str, uri_len); + uri_str[uri_len] = RAPTOR_GOOD_CAST(unsigned char, c); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + if(www->final_uri) + RAPTOR_DEBUG2("Got content location header '%s'\n", + raptor_uri_as_string(www->final_uri)); +#endif + if(www->final_uri_handler) + www->final_uri_handler(www, www->final_uri_userdata, www->final_uri); + } + + return bytes; +} + + +/* Return non-0 on failure */ +int +raptor_www_curl_init(raptor_www *www) +{ + CURLcode res; + +#define curl_init_setopt_or_fail(h, k, v) do { \ + res = curl_easy_setopt(h, k, v); \ + if(res != CURLE_OK) \ + return 1; \ + } while(0) + + if(!www->curl_handle) { + www->curl_handle = curl_easy_init(); + www->curl_init_here = 1; + } + + +#ifndef CURLOPT_WRITEDATA +#define CURLOPT_WRITEDATA CURLOPT_FILE +#endif + + /* send all data to this function */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEFUNCTION, + raptor_www_curl_write_callback); + /* ... using this data pointer */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEDATA, www); + + + /* send all headers to this function */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_HEADERFUNCTION, + raptor_www_curl_header_callback); + /* ... using this data pointer */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_WRITEHEADER, www); + + /* Make it follow Location: headers */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_FOLLOWLOCATION, 1); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_VERBOSE, (void*)1); +#endif + + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_ERRORBUFFER, + www->error_buffer); + + /* Connection timeout in seconds */ + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_CONNECTTIMEOUT, + www->connection_timeout); + curl_init_setopt_or_fail(www->curl_handle, CURLOPT_NOSIGNAL, 1); + + return 0; +} + + +void +raptor_www_curl_free(raptor_www *www) +{ + /* only tidy up if we did all the work */ + if(www->curl_init_here && www->curl_handle) { + curl_easy_cleanup(www->curl_handle); + www->curl_handle = NULL; + } +} + + +int +raptor_www_curl_fetch(raptor_www *www) +{ + CURLcode res = CURLE_OK; + struct curl_slist *slist = NULL; + + if(www->proxy) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_PROXY, www->proxy); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting proxy to %s failed", www->proxy); + return 1; + } + } + + if(www->user_agent) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_USERAGENT, www->user_agent); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting user agent to %s failed", www->user_agent); + return 1; + } + } + + if(www->http_accept) + slist = curl_slist_append(slist, (const char*)www->http_accept); + + /* ALWAYS disable curl default "Pragma: no-cache" */ + slist = curl_slist_append(slist, "Pragma:"); + if(www->cache_control) + slist = curl_slist_append(slist, (const char*)www->cache_control); + + if(slist) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_HTTPHEADER, slist); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request http headers failed"); + return 1; + } + } + + /* specify URL to get */ + res = curl_easy_setopt(www->curl_handle, CURLOPT_URL, + raptor_uri_as_string(www->uri)); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request URL failed"); + return 1; + } + + if(curl_easy_perform(www->curl_handle)) { + /* failed */ + www->failed = 1; + raptor_www_error(www, "Resolving URI failed: %s", www->error_buffer); + } else { + long lstatus; + +#ifndef CURLINFO_RESPONSE_CODE +#define CURLINFO_RESPONSE_CODE CURLINFO_HTTP_CODE +#endif + + /* Requires pointer to a long */ + if(curl_easy_getinfo(www->curl_handle, CURLINFO_RESPONSE_CODE, &lstatus) == CURLE_OK) + /* CURL status code will always fit in an int */ + www->status_code = RAPTOR_GOOD_CAST(int, lstatus); + + } + + if(slist) + curl_slist_free_all(slist); + + return www->failed; +} + + +int +raptor_www_curl_set_ssl_cert_options(raptor_www* www, + const char* cert_filename, + const char* cert_type, + const char* cert_passphrase) +{ + CURLcode res; + + /* client certificate file name */ + if(cert_filename) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERT, cert_filename); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert filename to %s failed", + cert_filename); + return 1; + } + } + + /* curl default is "PEM" */ + if(cert_type) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSLCERTTYPE, cert_type); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert type to %s failed", + cert_type); + return 1; + } + } + + /* passphrase */ + /* Removed in 7.16.4 */ +#if LIBCURL_VERSION_NUM < 0x071004 +#define CURLOPT_KEYPASSWD CURLOPT_SSLKEYPASSWD +#endif + if(cert_passphrase) { + res = curl_easy_setopt(www->curl_handle, CURLOPT_KEYPASSWD, cert_passphrase); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL cert pass phrase failed"); + return 1; + } + } + + return 0; +} + + +int +raptor_www_curl_set_ssl_verify_options(raptor_www* www, int verify_peer, + int verify_host) +{ + CURLcode res; + + if(verify_peer) + verify_peer = 1; + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYPEER, verify_peer); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL verify peer flag %d failed", + verify_peer); + return 1; + } + + /* curl 7.28.1 removed the value 1 from being legal: + * http://daniel.haxx.se/blog/2012/10/25/libcurl-claimed-to-be-dangerous/ + * + * CURL GIT commit da82f59b697310229ccdf66104d5d65a44dfab98 + * Sat Oct 27 12:31:39 2012 +0200 + * + * Legal values are: + * 0 to disable host verifying + * 2 (default) to enable host verifyinging + */ + if(verify_host) + verify_host = 2; + res = curl_easy_setopt(www->curl_handle, CURLOPT_SSL_VERIFYHOST, verify_host); + if(res != CURLE_OK) { + www->failed = 1; + raptor_www_error(www, "Setting request SSL verify host flag %d failed", + verify_host); + return 1; + } + + return 0; +} + + +#endif /* RAPTOR_WWW_LIBCURL */ diff --git a/src/raptor_www_libfetch.c b/src/raptor_www_libfetch.c new file mode 100644 index 0000000..55f85a5 --- /dev/null +++ b/src/raptor_www_libfetch.c @@ -0,0 +1,97 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www_libfetch.c - Raptor WWW retrieval via libfetch + * + * Copyright (C) 2003-2006, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef RAPTOR_WWW_LIBFETCH + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> +#include <errno.h> + +#ifdef HAVE_SYS_PARAM_H +#include <sys/param.h> +#endif +#include <fetch.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +void +raptor_www_libfetch_init(raptor_www *www) +{ +} + + +void +raptor_www_libfetch_free(raptor_www *www) +{ +} + + +int +raptor_www_libfetch_fetch(raptor_www *www) +{ + FILE *stream; + + if(www->proxy) { + setenv("HTTP_PROXY", www->proxy, 0); + setenv("FTP_PROXY", www->proxy, 0); + } + + if(www->user_agent) + setenv("HTTP_USER_AGENT", www->user_agent, 0); + + stream = fetchXGetURL((const char*)raptor_uri_as_string(www->uri), NULL, NULL); + if(!stream) { + www->failed = 1; + raptor_www_error(www, "%s", fetchLastErrString); + return 1; + } + + /* fetch does not give us access to this */ + www->status_code = 200; + + while(!feof(stream)) { + size_t len = fread(www->buffer, 1, RAPTOR_WWW_BUFFER_SIZE, stream); + + www->total_bytes += len; + + if(www->write_bytes) + www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1); + + if(len < RAPTOR_WWW_BUFFER_SIZE) + break; + } + fclose(stream); + + return www->failed; +} + +#endif /* RAPTOR_WWW_LIBFETCH */ diff --git a/src/raptor_www_libxml.c b/src/raptor_www_libxml.c new file mode 100644 index 0000000..e2b9f3e --- /dev/null +++ b/src/raptor_www_libxml.c @@ -0,0 +1,159 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www_libxml.c - Raptor WWW retrieval via libxml2 + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#ifdef RAPTOR_WWW_LIBXML + +void +raptor_www_libxml_init(raptor_www *www) +{ + xmlNanoHTTPInit(); + www->ctxt = NULL; +} + + +void +raptor_www_libxml_free(raptor_www *www) +{ + xmlNanoHTTPCleanup(); +} + + +int +raptor_www_libxml_fetch(raptor_www *www) +{ + char* headers = NULL; + + if(www->proxy) + xmlNanoHTTPScanProxy(www->proxy); + + if(www->http_accept || www->user_agent) { + size_t accept_len = 0; + size_t ua_len = 0; + size_t cc_len = 0; + size_t len = 0; + char *p; + + if(www->http_accept) { + accept_len = strlen(www->http_accept); + len += accept_len+2; /* \r\n */ + } + + if(www->user_agent) { + ua_len = strlen(www->user_agent); + len += 12+ua_len+2; /* strlen("User-Agent: ") + \r\n */ + } + + if(www->cache_control) { + cc_len = strlen(www->cache_control); + len += cc_len+2; /* \r\n */ + } + + headers = RAPTOR_MALLOC(char*, len + 1); + if(!headers) + return 1; + + p = headers; + if(www->http_accept) { + memcpy(p, www->http_accept, accept_len); + p+= accept_len; + *p++='\r'; + *p++='\n'; + } + if(www->user_agent) { + memcpy(p, "User-Agent: ", 12); + p += 12; + memcpy(p, www->user_agent, ua_len); + p+= ua_len; + *p++='\r'; + *p++='\n'; + } + if(www->cache_control) { + memcpy(p, www->cache_control, cc_len); + p+= cc_len; + *p++='\r'; + *p++='\n'; + } + *p='\0'; + } + + www->ctxt = xmlNanoHTTPMethod((const char*)raptor_uri_as_string(www->uri), + NULL, /* HTTP method (default GET) */ + NULL, /* input string */ + &www->type, + headers, + 0); /* input length - ilen */ + + if(headers) + RAPTOR_FREE(char*, headers); + + if(!www->ctxt) + return 1; + + if(www->type) { + if(www->content_type) { + www->content_type(www, www->content_type_userdata, www->type); + if(www->failed) { + xmlNanoHTTPClose(www->ctxt); + return 1; + } + } + xmlFree(www->type); + www->type = NULL; + } + + www->status_code = xmlNanoHTTPReturnCode(www->ctxt); + + while(1) { + int len = xmlNanoHTTPRead(www->ctxt, www->buffer, RAPTOR_WWW_BUFFER_SIZE); + if(len < 0) + break; + + www->total_bytes += len; + + if(www->write_bytes) + www->write_bytes(www, www->write_bytes_userdata, www->buffer, len, 1); + + if(len < RAPTOR_WWW_BUFFER_SIZE || www->failed) + break; + } + + xmlNanoHTTPClose(www->ctxt); + + return www->failed; +} + +#endif /* #ifdef RAPTOR_WWW_LIBXML*/ diff --git a/src/raptor_www_test.c b/src/raptor_www_test.c new file mode 100644 index 0000000..060626f --- /dev/null +++ b/src/raptor_www_test.c @@ -0,0 +1,105 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_www_test.c - Raptor WWW retrieval test code + * + * Copyright (C) 2003-2006, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <stdarg.h> + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +static void +write_content_type(raptor_www* www, + void *userdata, const char *content_type) +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf((FILE*)userdata, "Content Type: %s\n", content_type); +#endif +} + + +int main (int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); + raptor_world *world; + const char *uri_string; + raptor_www *www; + const char *user_agent = "raptor_www_test/0.1"; + raptor_uri *uri; + void *string = NULL; + size_t string_length = 0; + + if(argc > 1) + uri_string = argv[1]; + else + uri_string = "http://librdf.org/"; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + uri = raptor_new_uri(world, (const unsigned char*)uri_string); + if(!uri) { + fprintf(stderr, "%s: Failed to create Raptor URI for %s\n", + program, uri_string); + exit(1); + } + + www = raptor_new_www(world); + + raptor_www_set_content_type_handler(www, write_content_type, (void*)stderr); + raptor_www_set_user_agent2(www, user_agent, 0); + + /* start retrieval (always a GET) */ + + if(raptor_www_fetch_to_string(www, uri, + &string, &string_length, malloc)) { + fprintf(stderr, "%s: WWW fetch failed\n", program); + } else { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: HTTP response status %d\n", + program, www->status_code); + + fprintf(stderr, "%s: Returned %d bytes of content\n", + program, (int)string_length); +#endif + } + if(string) + free(string); + + raptor_free_www(www); + + raptor_free_uri(uri); + + raptor_free_world(world); + + return 0; +} diff --git a/src/raptor_xml.c b/src/raptor_xml.c new file mode 100644 index 0000000..d89030c --- /dev/null +++ b/src/raptor_xml.c @@ -0,0 +1,1100 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_xml.c - Raptor XML routines + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +/** + * raptor_new_xml_element: + * @name: The XML element name + * @xml_language: the in-scope XML language (or NULL) + * @xml_base: the in-scope XML base URI (or NULL) + * + * Constructor - create a new XML element from a QName + * + * The @xml_language and @xml_base become owned by the new object. + * + * Return value: a new #raptor_xml_element or NULL on failure + **/ +raptor_xml_element* +raptor_new_xml_element(raptor_qname *name, + const unsigned char *xml_language, + raptor_uri *xml_base) +{ + raptor_xml_element* xml_element; + + xml_element = RAPTOR_CALLOC(raptor_xml_element*, 1, sizeof(*xml_element)); + if(!xml_element) + return NULL; + + /* Element name */ + xml_element->name = name; + xml_element->xml_language = xml_language; + xml_element->base_uri = xml_base; + + xml_element->declared_nspaces = NULL; + + xml_element->content_cdata_sb = raptor_new_stringbuffer(); + if(!xml_element->content_cdata_sb) { + RAPTOR_FREE(raptor_xml_element, xml_element); + xml_element = NULL; + } + + return xml_element; +} + + +/** + * raptor_new_xml_element_from_namespace_local_name: + * @ns: namespace + * @name: the XML element local name + * @xml_language: the in-scope XML language (or NULL) + * @xml_base: base uri (or NULL) + * + * Constructor - create a new XML element from an XML namespace and a local name + * + * Added in 1.4.16. + * + * Return value: a new #raptor_xml_element or NULL on failure + */ +raptor_xml_element* +raptor_new_xml_element_from_namespace_local_name(raptor_namespace *ns, + const unsigned char *name, + const unsigned char *xml_language, + raptor_uri *xml_base) +{ + raptor_uri *base_uri_copy; + raptor_qname *qname; + raptor_xml_element *element = NULL; + + qname = raptor_new_qname_from_namespace_local_name(ns->nstack->world, ns, + name, NULL); + if(qname) { + base_uri_copy = xml_base ? raptor_uri_copy(xml_base) : NULL; + element = raptor_new_xml_element(qname, xml_language, base_uri_copy); + if(!element) { + raptor_free_qname(qname); + if(base_uri_copy) + raptor_free_uri(base_uri_copy); + } + } + return element; +} + + +/** + * raptor_free_xml_element: + * @element: XML Element + * + * Destructor - destroy a raptor_xml_element object. + **/ +void +raptor_free_xml_element(raptor_xml_element *element) +{ + unsigned int i; + + if(!element) + return; + + for(i = 0; i < element->attribute_count; i++) + if(element->attributes[i]) + raptor_free_qname(element->attributes[i]); + + if(element->attributes) + RAPTOR_FREE(raptor_qname_array, element->attributes); + + if(element->content_cdata_sb) + raptor_free_stringbuffer(element->content_cdata_sb); + + if(element->base_uri) + raptor_free_uri(element->base_uri); + + if(element->xml_language) + RAPTOR_FREE(char*, element->xml_language); + + raptor_free_qname(element->name); + + if(element->declared_nspaces) + raptor_free_sequence(element->declared_nspaces); + + RAPTOR_FREE(raptor_element, element); +} + + +/** + * raptor_xml_element_get_name: + * @xml_element: XML Element + * + * Get the XML Name of an XML element + * + * Return value: The Name. + **/ +raptor_qname* +raptor_xml_element_get_name(raptor_xml_element *xml_element) +{ + return xml_element->name; +} + + +/** + * raptor_xml_element_set_attributes: + * @xml_element: XML Element + * @attributes: Array of XML Qname attributes with values + * @count: Length of array + * + * Set the attributes on an XML element. + * + * The @attributes array becomes owned by the element after this function. + **/ +void +raptor_xml_element_set_attributes(raptor_xml_element* xml_element, + raptor_qname **attributes, int count) +{ + xml_element->attributes = attributes; + xml_element->attribute_count = count; +} + + +/** + * raptor_xml_element_get_attributes: + * @xml_element: XML Element + * + * Get the array of attributes on the XML element. + * + * Use raptor_xml_element_get_attributes_count() to get the count + * of the array size. + * + * Return value: the array of qnames or NULL if none are present. + **/ +raptor_qname** +raptor_xml_element_get_attributes(raptor_xml_element* xml_element) +{ + return xml_element->attributes; +} + + +/** + * raptor_xml_element_get_attributes_count: + * @xml_element: XML Element + * + * Get the number of attributes on the XML element. + * + * Return value: Integer number of attributes - 0 or more. + **/ +int +raptor_xml_element_get_attributes_count(raptor_xml_element* xml_element) +{ + return xml_element->attribute_count; +} + + +/** + * raptor_xml_element_declare_namespace: + * @xml_element: XML Element + * @nspace: raptor_namespace to declare + * + * Declare a namespace on the XML Element. + * + * Return value: non-0 if namespace cannot be declared + **/ +int +raptor_xml_element_declare_namespace(raptor_xml_element* xml_element, + raptor_namespace *nspace) +{ + int i; + const raptor_namespace *ns; + + if(!xml_element->declared_nspaces) + xml_element->declared_nspaces = raptor_new_sequence(NULL, NULL); + + if((ns = xml_element->name->nspace)) { + /* Cannot have same namespace already seen */ + if(ns == nspace || + /* ... or two default nspaces */ + (!ns->prefix && !nspace->prefix) || + /* ... or two same prefixes */ + (ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + ) + return 1; + } + + + for(i = 0; + (ns = (const raptor_namespace*)raptor_sequence_get_at(xml_element->declared_nspaces, i)); + i++) { + /* Cannot have same namespace already seen */ + if(ns == nspace || + /* ... or two default nspaces */ + (!ns->prefix && !nspace->prefix) || + /* ... or two same prefixes */ + (ns->prefix && nspace->prefix && + !strcmp((const char*)ns->prefix, (const char*)nspace->prefix)) + ) + return 1; + } + + raptor_sequence_push(xml_element->declared_nspaces, nspace); + + return 0; +} + + +#ifdef RAPTOR_DEBUG +void +raptor_print_xml_element(raptor_xml_element *element, FILE* stream) +{ + raptor_qname_print(stream, element->name); + fputc('\n', stream); + + if(element->attribute_count) { + unsigned int i; + int printed = 0; + + fputs(" attributes: ", stream); + for(i = 0; i < element->attribute_count; i++) { + if(element->attributes[i]) { + if(printed) + fputc(' ', stream); + raptor_qname_print(stream, element->attributes[i]); + fprintf(stream, "='%s'", element->attributes[i]->value); + printed = 1; + } + } + fputc('\n', stream); + } +} +#endif + + +struct nsd +{ + const raptor_namespace *nspace; + unsigned char *declaration; + size_t length; +}; + + +static int +raptor_nsd_compare(const void *a, const void *b) +{ + struct nsd* nsd_a = (struct nsd*)a; + struct nsd* nsd_b = (struct nsd*)b; + + /* Sort NULLs earlier */ + if(!nsd_a->declaration) + return -1; + else if(!nsd_b->declaration) + return 1; + return strcmp((const char*)nsd_a->declaration, (const char*)nsd_b->declaration); +} + + +/** + * raptor_xml_element_write: + * @element: XML element to format + * @nstack: Namespace stack context to use in formatting + * @is_empty: non-0 if element is empty + * @is_end: non-0 if this is an end element (else is a start element) + * @depth: XML element depth + * @iostr: iostream object + * + * Write a formatted XML element to a #raptor_iostream + * + * Return value: non-0 on failure +*/ +int +raptor_xml_element_write(raptor_xml_element *element, + raptor_namespace_stack *nstack, + int is_empty, + int is_end, + int depth, + raptor_iostream* iostr) +{ + struct nsd *nspace_declarations = NULL; + size_t nspace_declarations_count = 0; + unsigned int i; + + /* max is 1 per element and 1 for each attribute + size of declared */ + if(nstack) { + int nspace_max_count = element->attribute_count+1; + if(element->declared_nspaces) + nspace_max_count += raptor_sequence_size(element->declared_nspaces); + + nspace_declarations = RAPTOR_CALLOC(struct nsd*, nspace_max_count, + sizeof(struct nsd)); + } + + if(element->name->nspace) { + if(!is_end && nstack && + !raptor_namespaces_namespace_in_scope(nstack, element->name->nspace)) { + nspace_declarations[0].declaration= + raptor_namespace_format_as_xml(element->name->nspace, + &nspace_declarations[0].length); + nspace_declarations[0].nspace = element->name->nspace; + nspace_declarations_count++; + } + } + + if(!is_end && element->attributes) { + for(i = 0; i < element->attribute_count; i++) { + /* qname */ + if(element->attributes[i]->nspace) { + if(nstack && + !raptor_namespaces_namespace_in_scope(nstack, element->attributes[i]->nspace) && element->attributes[i]->nspace != element->name->nspace) { + /* not in scope and not same as element (so already going to be declared)*/ + unsigned int j; + int declare_me = 1; + + /* check it wasn't an earlier declaration too */ + for(j = 0; j < nspace_declarations_count; j++) + if(nspace_declarations[j].nspace == element->attributes[j]->nspace) { + declare_me = 0; + break; + } + + if(declare_me) { + nspace_declarations[nspace_declarations_count].declaration= + raptor_namespace_format_as_xml(element->attributes[i]->nspace, + &nspace_declarations[nspace_declarations_count].length); + nspace_declarations[nspace_declarations_count].nspace = element->attributes[i]->nspace; + nspace_declarations_count++; + } + } + + } + } + } + + + if(!is_end && nstack && element->declared_nspaces && + raptor_sequence_size(element->declared_nspaces) > 0) { + for(i = 0; i< (unsigned int)raptor_sequence_size(element->declared_nspaces); i++) { + raptor_namespace* nspace = (raptor_namespace*)raptor_sequence_get_at(element->declared_nspaces, i); + unsigned int j; + int declare_me = 1; + + /* check it wasn't an earlier declaration too */ + for(j = 0; j < nspace_declarations_count; j++) + if(nspace_declarations[j].nspace == nspace) { + declare_me = 0; + break; + } + + if(declare_me) { + nspace_declarations[nspace_declarations_count].declaration= + raptor_namespace_format_as_xml(nspace, + &nspace_declarations[nspace_declarations_count].length); + nspace_declarations[nspace_declarations_count].nspace = nspace; + nspace_declarations_count++; + } + + } + } + + + + raptor_iostream_write_byte('<', iostr); + if(is_end) + raptor_iostream_write_byte('/', iostr); + + if(element->name->nspace && element->name->nspace->prefix_length > 0) { + raptor_iostream_counted_string_write((const char*)element->name->nspace->prefix, + element->name->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + } + raptor_iostream_counted_string_write((const char*)element->name->local_name, + element->name->local_name_length, + iostr); + + /* declare namespaces */ + if(nspace_declarations_count) { + /* sort them into the canonical order */ + qsort((void*)nspace_declarations, + nspace_declarations_count, sizeof(struct nsd), + raptor_nsd_compare); + /* add them */ + for(i = 0; i < nspace_declarations_count; i++) { + raptor_iostream_write_byte(' ', iostr); + raptor_iostream_counted_string_write((const char*)nspace_declarations[i].declaration, + nspace_declarations[i].length, + iostr); + RAPTOR_FREE(char*, nspace_declarations[i].declaration); + nspace_declarations[i].declaration = NULL; + + raptor_namespace_stack_start_namespace(nstack, + (raptor_namespace*)nspace_declarations[i].nspace, + depth); + } + } + + + if(!is_end && element->attributes) { + for(i = 0; i < element->attribute_count; i++) { + raptor_iostream_write_byte(' ', iostr); + + if(element->attributes[i]->nspace && + element->attributes[i]->nspace->prefix_length > 0) { + raptor_iostream_counted_string_write((char*)element->attributes[i]->nspace->prefix, + element->attributes[i]->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + } + + raptor_iostream_counted_string_write((const char*)element->attributes[i]->local_name, + element->attributes[i]->local_name_length, + iostr); + + raptor_iostream_counted_string_write("=\"", 2, iostr); + + raptor_xml_escape_string_write(element->attributes[i]->value, + element->attributes[i]->value_length, + '"', + iostr); + raptor_iostream_write_byte('"', iostr); + } + } + + if(is_empty) + raptor_iostream_write_byte('/', iostr); + + raptor_iostream_write_byte('>', iostr); + + if(nstack) + RAPTOR_FREE(stringarray, nspace_declarations); + + return 0; +} + + +/** + * raptor_xml_element_get_language: + * @xml_element: XML Element + * + * Get the XML language of the element. + * + * Return value: XML language or NULL if none in scope + **/ +const unsigned char* +raptor_xml_element_get_language(raptor_xml_element* xml_element) +{ + return xml_element->xml_language; +} + + +/** + * raptor_valid_xml_ID: + * @rdf_parser: RDF parser + * @string: The string to check. + * + * Check the string matches the xml:ID value constraints. + * + * This checks the syntax part of the xml:ID validity constraint, + * that it matches [ VC: Name Token ] as amended by XML Namespaces: + * + * See <ulink url="http://www.w3.org/TR/REC-xml-names/#NT-NCName">Namespaces in XML NCName<ulink> + * + * Return value: non-zero if the ID string is valid + **/ +int +raptor_valid_xml_ID(raptor_parser *rdf_parser, const unsigned char *string) +{ + size_t len = strlen((const char*)string); +#ifdef RAPTOR_XML_1_1 + #define XML_ID_XML_VERSION 11 +#else + #define XML_ID_XML_VERSION 10 +#endif + + return raptor_xml_name_check(string, len, XML_ID_XML_VERSION); +} + + +/** + * raptor_xml_escape_string_any: + * @world: raptor world + * @string: string to XML escape (UTF-8) + * @len: length of string + * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length + * @length: buffer size + * @quote: optional quote character to escape for attribute content, or 0 + * @xml_version: XML 1.0 (10) or XML 1.1 (11) + * + * Return an XML-escaped version a string. + * + * Follows + * <ulink url="http://www.w3.org/TR/xml-c14n#ProcessingModel">Canonical XML rules on Text Nodes and Attribute Nodes</ulink> + * + * Both: + * Replaces <literal>&</literal> and <literal><</literal> + * with <literal>&amp;</literal> and <literal>&lt;</literal> + * respectively, preserving other characters. + * + * Text Nodes: + * <literal>></literal> is turned into <literal>&gt;</literal> + * ##xD is turned into <literal>&##xD;</literal> + * + * Attribute Nodes: + * <literal>></literal> is generated not <literal>&gt</literal>. + * ##x9, ##xA and ##xD are turned into + * <literal>&##x9;</literal>, + * <literal>&##xA;</literal> and + * <literal>&##xD;</literal> + * entities. + * + * If @quote is given it can be either of '\'' or '\"' + * which will be turned into <literal>&apos;</literal> or + * <literal>&quot;</literal> respectively. + * ASCII NUL ('\0') or any other character will not be escaped. + * + * If @buffer is NULL, no work is done but the size of buffer + * required is returned. The output in buffer remains in UTF-8. + * + * If the input @string is empty, a single NUL will be written to the + * buffer. + * + * Return value: the number of bytes required / used or <0 on failure. + **/ +int +raptor_xml_escape_string_any(raptor_world *world, + const unsigned char *string, size_t len, + unsigned char *buffer, size_t length, + char quote, + int xml_version) +{ + size_t l; + size_t new_len = 0; + const unsigned char *p; + unsigned char *q; + int unichar_len; + raptor_unichar unichar; + + if(!string) + return -1; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + raptor_world_open(world); + + if(quote != '\"' && quote != '\'') + quote='\0'; + + for(l = len, p = string; l; p++, l--) { + if(*p > 0x7f) { + unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) { + raptor_log_error(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Bad UTF-8 encoding."); + return -1; + } + } else { + unichar=*p; + unichar_len = 1; + } + + if(unichar == '&') + /* & */ + new_len+= 5; + else if(unichar == '<' || (!quote && unichar == '>')) + /* < or > */ + new_len+= 4; + else if(quote && unichar == (unsigned long)quote) + /* ' or " */ + new_len+= 6; + else if(unichar == 0x0d || + (quote && (unichar == 0x09 || unichar == 0x0a))) + /* 
 or 	 or &xA; */ + new_len+= 5; + else if(unichar == 0x7f || + (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { + if(!unichar || xml_version < 11) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot write illegal XML 1.0 character U+%6lX.", + unichar); + } else { + /* &#xX; */ + new_len+= 5; + if(unichar > 0x0f) + new_len++; + } + } else + new_len+= unichar_len; + + unichar_len--; /* since loop does len-- */ + p += unichar_len; l -= unichar_len; + } + + if(length && new_len > length) + return 0; + + if(!buffer) + return RAPTOR_BAD_CAST(int, new_len); + + for(l = len, p = string, q = buffer; l; p++, l--) { + if(*p > 0x7f) { + unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); + /* if the UTF-8 encoding is bad, we already did return -1 above */ + } else { + unichar=*p; + unichar_len = 1; + } + + if(unichar == '&') { + memcpy(q, "&", 5); + q+= 5; + } else if(unichar == '<') { + memcpy(q, "<", 4); + q+= 4; + } else if(!quote && unichar == '>') { + memcpy(q, ">", 4); + q+= 4; + } else if(quote && unichar == RAPTOR_GOOD_CAST(unsigned long, quote)) { + if(quote == '\'') + memcpy(q, "'", 6); + else + memcpy(q, """, 6); + q+= 6; + } else if(unichar == 0x0d || + (quote && (unichar == 0x09 || unichar == 0x0a))) { + /* &#xX; */ + *q++='&'; + *q++='#'; + *q++='x'; + if(unichar == 0x09) + *q++ = '9'; + else + *q++ = RAPTOR_GOOD_CAST(unsigned char, 'A' + (RAPTOR_GOOD_CAST(char, unichar) - 0x0a)); + *q++= ';'; + } else if(unichar == 0x7f || + (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { + if(!unichar || xml_version < 11) { + raptor_log_error_formatted(world, RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot write illegal XML 1.0 character U+%6lX.", + unichar); + } else { + /* &#xX; */ + *q++ = '&'; + *q++ = '#'; + *q++ = 'x'; + q += raptor_format_integer((char*)q, 3, + RAPTOR_GOOD_CAST(unsigned int, unichar), + /* base */ 16, -1, '\0'); + *q++ = ';'; + } + } else { + /* coverity[negative_returns] + * negative unichar_len values are checked and cause return -1 above */ + memcpy(q, p, unichar_len); + q+= unichar_len; + } + + unichar_len--; /* since loop does len-- */ + p += unichar_len; l -= unichar_len; + } + + /* Terminate new string */ + *q = '\0'; + + return RAPTOR_BAD_CAST(int, new_len); +} + + +/** + * raptor_xml_escape_string: + * @world: raptor world + * @string: string to XML 1.0 escape (UTF-8) + * @len: length of string + * @buffer: the buffer to use for new string (UTF-8) or NULL to just calculate expected length. + * @length: buffer size + * @quote: optional quote character to escape for attribute content, or 0 + * + * Return an XML 1.0-escaped version a string. + * + * See raptor_xml_escape_string_any() for the conditions on parameters. + * + * Return value: the number of bytes required / used or <0 on failure. + **/ +int +raptor_xml_escape_string(raptor_world *world, + const unsigned char *string, size_t len, + unsigned char *buffer, size_t length, + char quote) +{ + if(!string) + return -1; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(world, raptor_world, -1); + + raptor_world_open(world); + + return raptor_xml_escape_string_any(world, string, len, + buffer, length, + quote, + 10); +} + + +/** + * raptor_xml_escape_string_any_write: + * @string: string to XML escape (UTF-8) + * @len: length of string + * @quote: optional quote character to escape for attribute content, or 0 + * @xml_version: XML version - 10 (XML 1.0) or 11 (XML 1.1) + * @iostr: the #raptor_iostream to write to + * + * Write an XML-escaped version of a string to an iostream. + * + * See raptor_xml_escape_string() for the escapes performed and + * the conditions on @quote and @string. XML 1.1 allows additional + * characters in XML such as U+0001 to U+001F inclusive. + * + * Return value: non 0 on failure + **/ +int +raptor_xml_escape_string_any_write(const unsigned char *string, + size_t len, + char quote, + int xml_version, + raptor_iostream* iostr) +{ + size_t l; + const unsigned char *p; + + if(xml_version != 10) + xml_version = 11; + + if(quote != '\"' && quote != '\'') + quote='\0'; + + for(l = len, p = string; l; p++, l--) { + int unichar_len = 1; + raptor_unichar unichar=*p; + + if(*p > 0x7f) { + unichar_len = raptor_unicode_utf8_string_get_char(p, l, &unichar); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > l) { + raptor_log_error(raptor_iostream_get_world(iostr), + RAPTOR_LOG_LEVEL_ERROR, NULL, + "Bad UTF-8 encoding."); + return 1; + } + } + + if(unichar == '&') + raptor_iostream_counted_string_write("&", 5, iostr); + else if(unichar == '<') + raptor_iostream_counted_string_write("<", 4, iostr); + else if(!quote && unichar == '>') + raptor_iostream_counted_string_write(">", 4, iostr); + else if(quote && unichar == (unsigned long)quote) { + if(quote == '\'') + raptor_iostream_counted_string_write("'", 6, iostr); + else + raptor_iostream_counted_string_write(""", 6, iostr); + } else if(unichar == 0x0d || + (quote && (unichar == 0x09 || unichar == 0x0a))) { + /* &#xX; */ + raptor_iostream_counted_string_write("&#x", 3, iostr); + if(unichar == 0x09) + raptor_iostream_write_byte('9', iostr); + else + raptor_iostream_write_byte('A'+ ((char)unichar-0x0a), iostr); + raptor_iostream_write_byte(';', iostr); + } else if(unichar == 0x7f || + (unichar < 0x20 && unichar != 0x09 && unichar != 0x0a)) { + if(!unichar || xml_version < 11) { + raptor_log_error_formatted(raptor_iostream_get_world(iostr), + RAPTOR_LOG_LEVEL_ERROR, NULL, + "Cannot write illegal XML 1.0 character U+%6lX.", + unichar); + } else { + int width = (unichar < 0x10) ? 1 : 2; + + /* &#xX; */ + raptor_iostream_counted_string_write("&#x", 3, iostr); + raptor_iostream_hexadecimal_write(RAPTOR_GOOD_CAST(unsigned int, unichar), width, iostr); + raptor_iostream_write_byte(';', iostr); + } + } else + raptor_iostream_counted_string_write((const char*)p, unichar_len, iostr); + + unichar_len--; /* since loop does len-- */ + p += unichar_len; l -= unichar_len; + } + + return 0; +} + + +/** + * raptor_xml_escape_string_write: + * @string: string to XML 1.0 escape (UTF-8) + * @len: length of string + * @quote: optional quote character to escape for attribute content, or 0 + * @iostr: the #raptor_iostream to write to + * + * Write an XML 1.0-escaped version of a string to an iostream. + * + * See raptor_xml_escape_string_any_write() for the escapes + * performed and the conditions on @quote and @string. + * + * Return value: non 0 on failure + **/ +int +raptor_xml_escape_string_write(const unsigned char *string, + size_t len, + char quote, + raptor_iostream* iostr) +{ + return raptor_xml_escape_string_any_write(string, len, quote, 10, + iostr); +} + + +/** + * raptor_xml_name_check: + * @string: UTF-8 name string + * @length: length of string + * @xml_version: XML version + * + * Check a string is a legal XML name (and legal UTF8). + * + * xml_version is either 10 (for XML 1.0) or 11 for (XML 1.1). Any + * other version fails. + * + * Return value: Non 0 if the string is a legal XML name + **/ +int +raptor_xml_name_check(const unsigned char *string, size_t length, + int xml_version) +{ + int pos; + + if(xml_version != 10 && xml_version != 11) + return 0; + + for(pos = 0; length > 0; pos++) { + raptor_unichar unichar = 0; + + int unichar_len; + unichar_len = raptor_unicode_utf8_string_get_char(string, length, &unichar); + if(unichar_len < 0 || RAPTOR_GOOD_CAST(size_t, unichar_len) > length) + return 0; + + if(unichar > raptor_unicode_max_codepoint) + return 0; + + if(!pos) { + /* start of name */ + if(xml_version == 10) { + if(!raptor_unicode_is_xml10_namestartchar(unichar)) + return 0; + } else { + if(!raptor_unicode_is_xml11_namestartchar(unichar)) + return 0; + } + } else { + /* rest of name */ + if(xml_version == 10) { + if(!raptor_unicode_is_xml10_namechar(unichar)) + return 0; + } else { + if(!raptor_unicode_is_xml11_namechar(unichar)) + return 0; + } + } + + string += unichar_len; + length -= unichar_len; + } + return 1; +} + + +#endif + + + + +#ifdef STANDALONE + +/* static prototypes */ +void raptor_bad_string_print(const unsigned char *input, FILE *stream); +int main(int argc, char *argv[]); + +void +raptor_bad_string_print(const unsigned char *input, FILE *stream) +{ + while(*input) { + char c=(char)*input; + if(isprint(c)) + fputc(c, stream); + else + fprintf(stream, "\\x%02X", (c & 0xff)); + input++; + } +} + + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + struct tv { + const char *string; + const char quote; + const char *result; + }; + struct tv *t; + struct tv test_values[]={ + {"", 0, ""}, + + {"&", 0, "&"}, + {"<", 0, "<"}, + {">", 0, ">"}, + {"\x09", 0, "\x09"}, + {"\x0a", 0, "\x0a"}, + {"\x0d", 0, "
"}, + + {"'&'", '\'', "'&'"}, + {"'<'", '\'', "'<'"}, + {"'>'", '\'', "'>'"}, + {"\x09", '\'', "	"}, + {"\x0a", '\'', "
"}, + {"\x0d", '\'', "
"}, + + {"\"&\"", '\"', ""&""}, + {"\"<\"", '\"', ""<""}, + {"\">\"", '\"', "">""}, + {"\x09", '\"', "	"}, + {"\x0a", '\"', "
"}, + {"\x0d", '\"', "
"}, + + {"&", 0, "&amp;"}, + {"<foo>", 0, "<foo>"}, +#if 0 + {"\x1f", 0, ""}, + {"\xc2\x80", 0, "€"}, + {"\xe0\xa0\x80", 0, "ࠀ"}, + {"\xf0\x90\x80\x80", 0, "𐀀"}, + + {"\x7f", 0, ""}, + {"\xdf\xbf", 0, "߿"}, + {"\xef\xbf\xbd", 0, "�"}, + {"\xf4\x8f\xbf\xbf", 0, ""}, + + {"\xc3\xbf", 0, "ÿ"}, + {"\xf0\x8f\xbf\xbf", 0, ""}, +#endif + {NULL, 0, 0} + }; + int i; + int failures = 0; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + for(i = 0; (t=&test_values[i]) && t->string; i++) { + const unsigned char *utf8_string = (const unsigned char*)t->string; + char quote = t->quote; + size_t utf8_string_len = strlen((const char*)utf8_string); + unsigned char *xml_string; + int xml_string_len = 0; + + xml_string_len = raptor_xml_escape_string(world, + utf8_string, utf8_string_len, + NULL, 0, quote); + if(xml_string_len < 0) { + fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", + program); + raptor_bad_string_print(utf8_string, stderr); + fputs("'\n", stderr); + failures++; + continue; + } + + xml_string = RAPTOR_MALLOC(unsigned char*, xml_string_len + 1); + + xml_string_len = raptor_xml_escape_string(world, + utf8_string, utf8_string_len, + xml_string, xml_string_len, quote); + if(xml_string_len < 0) { + fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", + program); + raptor_bad_string_print(utf8_string, stderr); + fputs("'\n", stderr); + failures++; + continue; + } + if(strcmp((const char*)xml_string, t->result)) { + fprintf(stderr, "%s: raptor_xml_escape_string FAILED to escape string '", + program); + raptor_bad_string_print(utf8_string, stderr); + fprintf(stderr, "', expected '%s', result was '%s'\n", + t->result, xml_string); + failures++; + continue; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: raptor_xml_escape_string escaped string to '%s' ok\n", + program, xml_string); +#endif + RAPTOR_FREE(char*, xml_string); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + if(!failures) + fprintf(stderr, "%s: raptor_xml_escape_string all tests OK\n", program); +#endif + + raptor_free_world(world); + + return failures; +} + +#endif diff --git a/src/raptor_xml_writer.c b/src/raptor_xml_writer.c new file mode 100644 index 0000000..4426d38 --- /dev/null +++ b/src/raptor_xml_writer.c @@ -0,0 +1,1046 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_xml_writer.c - Raptor XML Writer for SAX2 events API + * + * Copyright (C) 2003-2010, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#ifndef STANDALONE + + +#define XML_WRITER_AUTO_INDENT(xml_writer) RAPTOR_OPTIONS_GET_NUMERIC(xml_writer, RAPTOR_OPTION_WRITER_AUTO_INDENT) +#define XML_WRITER_AUTO_EMPTY(xml_writer) RAPTOR_OPTIONS_GET_NUMERIC(xml_writer, RAPTOR_OPTION_WRITER_AUTO_EMPTY) +#define XML_WRITER_INDENT(xml_writer) RAPTOR_OPTIONS_GET_NUMERIC(xml_writer, RAPTOR_OPTION_WRITER_INDENT_WIDTH) +#define XML_WRITER_XML_VERSION(xml_writer) RAPTOR_OPTIONS_GET_NUMERIC(xml_writer, RAPTOR_OPTION_WRITER_XML_VERSION) + + +#define XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer) \ + if((XML_WRITER_AUTO_EMPTY(xml_writer)) && \ + xml_writer->current_element && \ + !(xml_writer->current_element->content_cdata_seen || \ + xml_writer->current_element->content_element_seen)) { \ + raptor_iostream_write_byte('>', xml_writer->iostr); \ +} + + +/* Define this for far too much output */ +#undef RAPTOR_DEBUG_CDATA + + +struct raptor_xml_writer_s { + raptor_world *world; + + int canonicalize; + + int depth; + + int my_nstack; + raptor_namespace_stack *nstack; + int nstack_depth; + + raptor_xml_element* current_element; + + /* outputting to this iostream */ + raptor_iostream *iostr; + + /* Has writing the XML declaration writing been checked? */ + int xml_declaration_checked; + + /* An extra newline is wanted */ + int pending_newline; + + /* Options (per-object) */ + raptor_object_options options; +}; + + +/* 16 spaces */ +#define SPACES_BUFFER_SIZE sizeof(spaces_buffer) +static const unsigned char spaces_buffer[] = { + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ', + ' ', ' ', ' ', ' ' +}; + + + +/* helper functions */ + +/* Handle printing a pending newline OR newline with indenting */ +static int +raptor_xml_writer_indent(raptor_xml_writer *xml_writer) +{ + int num_spaces; + + if(!XML_WRITER_AUTO_INDENT(xml_writer)) { + if(xml_writer->pending_newline) { + raptor_iostream_write_byte('\n', xml_writer->iostr); + xml_writer->pending_newline = 0; + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; + } + return 0; + } + + num_spaces = xml_writer->depth * XML_WRITER_INDENT(xml_writer); + + /* Do not write an extra newline at the start of the document + * (after the XML declaration or XMP processing instruction has + * been writtten) + */ + if(xml_writer->xml_declaration_checked == 1) + xml_writer->xml_declaration_checked++; + else { + raptor_iostream_write_byte('\n', xml_writer->iostr); + xml_writer->pending_newline = 0; + } + + while(num_spaces > 0) { + + int count = (num_spaces > RAPTOR_GOOD_CAST(int, SPACES_BUFFER_SIZE)) ? + RAPTOR_GOOD_CAST(int, SPACES_BUFFER_SIZE) : num_spaces; + + raptor_iostream_counted_string_write(spaces_buffer, count, + xml_writer->iostr); + + num_spaces -= count; + } + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; + + return 0; +} + + +struct nsd { + const raptor_namespace *nspace; + unsigned char *declaration; + size_t length; +}; + + +static int +raptor_xml_writer_nsd_compare(const void *a, const void *b) +{ + struct nsd* nsd_a = (struct nsd*)a; + struct nsd* nsd_b = (struct nsd*)b; + + /* Sort NULLs earlier */ + if(!nsd_a->declaration) + return -1; + else if(!nsd_b->declaration) + return 1; + return strcmp((const char*)nsd_a->declaration, (const char*)nsd_b->declaration); +} + + +static int +raptor_xml_writer_start_element_common(raptor_xml_writer* xml_writer, + raptor_xml_element* element, + int auto_empty) +{ + raptor_iostream* iostr = xml_writer->iostr; + raptor_namespace_stack *nstack = xml_writer->nstack; + int depth = xml_writer->depth; + int auto_indent = XML_WRITER_AUTO_INDENT(xml_writer); + struct nsd *nspace_declarations = NULL; + size_t nspace_declarations_count = 0; + unsigned int i; + + if(nstack) { + int nspace_max_count = element->attribute_count * 2; /* attr and value */ + if(element->name->nspace) + nspace_max_count++; + if(element->declared_nspaces) + nspace_max_count += raptor_sequence_size(element->declared_nspaces); + if(element->xml_language) + nspace_max_count++; + + nspace_declarations = RAPTOR_CALLOC(struct nsd*, nspace_max_count, + sizeof(struct nsd)); + if(!nspace_declarations) + return 1; + } + + if(element->name->nspace) { + if(nstack && !raptor_namespaces_namespace_in_scope(nstack, element->name->nspace)) { + nspace_declarations[0].declaration= + raptor_namespace_format_as_xml(element->name->nspace, + &nspace_declarations[0].length); + if(!nspace_declarations[0].declaration) + goto error; + nspace_declarations[0].nspace = element->name->nspace; + nspace_declarations_count++; + } + } + + if(nstack && element->attributes) { + for(i = 0; i < element->attribute_count; i++) { + /* qname */ + if(element->attributes[i]->nspace) { + /* Check if we need a namespace declaration attribute */ + if(nstack && + !raptor_namespaces_namespace_in_scope(nstack, element->attributes[i]->nspace) && element->attributes[i]->nspace != element->name->nspace) { + /* not in scope and not same as element (so already going to be declared)*/ + unsigned int j; + int declare_me = 1; + + /* check it wasn't an earlier declaration too */ + for(j = 0; j < nspace_declarations_count; j++) + if(nspace_declarations[j].nspace == element->attributes[i]->nspace) { + declare_me = 0; + break; + } + + if(declare_me) { + nspace_declarations[nspace_declarations_count].declaration= + raptor_namespace_format_as_xml(element->attributes[i]->nspace, + &nspace_declarations[nspace_declarations_count].length); + if(!nspace_declarations[nspace_declarations_count].declaration) + goto error; + nspace_declarations[nspace_declarations_count].nspace = element->attributes[i]->nspace; + nspace_declarations_count++; + } + } + } + + /* Add the attribute's value */ + nspace_declarations[nspace_declarations_count].declaration= + raptor_qname_format_as_xml(element->attributes[i], + &nspace_declarations[nspace_declarations_count].length); + if(!nspace_declarations[nspace_declarations_count].declaration) + goto error; + nspace_declarations[nspace_declarations_count].nspace = NULL; + nspace_declarations_count++; + + } + } + + if(nstack && element->declared_nspaces && + raptor_sequence_size(element->declared_nspaces) > 0) { + for(i = 0; i< (unsigned int)raptor_sequence_size(element->declared_nspaces); i++) { + raptor_namespace* nspace = (raptor_namespace*)raptor_sequence_get_at(element->declared_nspaces, i); + unsigned int j; + int declare_me = 1; + + /* check it wasn't an earlier declaration too */ + for(j = 0; j < nspace_declarations_count; j++) + if(nspace_declarations[j].nspace == nspace) { + declare_me = 0; + break; + } + + if(declare_me) { + nspace_declarations[nspace_declarations_count].declaration= + raptor_namespace_format_as_xml(nspace, + &nspace_declarations[nspace_declarations_count].length); + if(!nspace_declarations[nspace_declarations_count].declaration) + goto error; + nspace_declarations[nspace_declarations_count].nspace = nspace; + nspace_declarations_count++; + } + + } + } + + if(nstack && element->xml_language) { + size_t lang_len = strlen(RAPTOR_GOOD_CAST(char*, element->xml_language)); +#define XML_LANG_PREFIX "xml:lang=\"" +#define XML_LANG_PREFIX_LEN 10 + size_t buf_length = XML_LANG_PREFIX_LEN + lang_len + 1; + unsigned char* buffer = RAPTOR_MALLOC(unsigned char*, buf_length + 1); + const char quote = '\"'; + unsigned char* p; + + memcpy(buffer, XML_LANG_PREFIX, XML_LANG_PREFIX_LEN); + p = buffer + XML_LANG_PREFIX_LEN; + p += raptor_xml_escape_string(xml_writer->world, + element->xml_language, lang_len, + p, buf_length, quote); + *p++ = quote; + *p = '\0'; + + nspace_declarations[nspace_declarations_count].declaration = buffer; + nspace_declarations[nspace_declarations_count].length = buf_length; + nspace_declarations[nspace_declarations_count].nspace = NULL; + nspace_declarations_count++; + } + + + raptor_iostream_write_byte('<', iostr); + + if(element->name->nspace && element->name->nspace->prefix_length > 0) { + raptor_iostream_counted_string_write((const char*)element->name->nspace->prefix, + element->name->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + } + raptor_iostream_counted_string_write((const char*)element->name->local_name, + element->name->local_name_length, + iostr); + + /* declare namespaces and attributes */ + if(nspace_declarations_count) { + int need_indent = 0; + + /* sort them into the canonical order */ + qsort((void*)nspace_declarations, + nspace_declarations_count, sizeof(struct nsd), + raptor_xml_writer_nsd_compare); + + /* declare namespaces first */ + for(i = 0; i < nspace_declarations_count; i++) { + if(!nspace_declarations[i].nspace) + continue; + + if(auto_indent && need_indent) { + /* indent attributes */ + raptor_xml_writer_newline(xml_writer); + xml_writer->depth++; + raptor_xml_writer_indent(xml_writer); + xml_writer->depth--; + } + raptor_iostream_write_byte(' ', iostr); + raptor_iostream_counted_string_write((const char*)nspace_declarations[i].declaration, + nspace_declarations[i].length, + iostr); + RAPTOR_FREE(char*, nspace_declarations[i].declaration); + nspace_declarations[i].declaration = NULL; + need_indent = 1; + + if(raptor_namespace_stack_start_namespace(nstack, + (raptor_namespace*)nspace_declarations[i].nspace, + depth)) + goto error; + } + + /* declare attributes */ + for(i = 0; i < nspace_declarations_count; i++) { + if(nspace_declarations[i].nspace) + continue; + + if(auto_indent && need_indent) { + /* indent attributes */ + raptor_xml_writer_newline(xml_writer); + xml_writer->depth++; + raptor_xml_writer_indent(xml_writer); + xml_writer->depth--; + } + raptor_iostream_write_byte(' ', iostr); + raptor_iostream_counted_string_write((const char*)nspace_declarations[i].declaration, + nspace_declarations[i].length, + iostr); + need_indent = 1; + + RAPTOR_FREE(char*, nspace_declarations[i].declaration); + nspace_declarations[i].declaration = NULL; + } + } + + if(!auto_empty) + raptor_iostream_write_byte('>', iostr); + + if(nstack) + RAPTOR_FREE(stringarray, nspace_declarations); + + return 0; + + /* Clean up nspace_declarations on error */ + error: + + for(i = 0; i < nspace_declarations_count; i++) { + if(nspace_declarations[i].declaration) + RAPTOR_FREE(char*, nspace_declarations[i].declaration); + } + + RAPTOR_FREE(stringarray, nspace_declarations); + + return 1; +} + + +static int +raptor_xml_writer_end_element_common(raptor_xml_writer* xml_writer, + raptor_xml_element *element, + int is_empty) +{ + raptor_iostream* iostr = xml_writer->iostr; + + if(is_empty) + raptor_iostream_write_byte('/', iostr); + else { + + raptor_iostream_write_byte('<', iostr); + + raptor_iostream_write_byte('/', iostr); + + if(element->name->nspace && element->name->nspace->prefix_length > 0) { + raptor_iostream_counted_string_write((const char*)element->name->nspace->prefix, + element->name->nspace->prefix_length, + iostr); + raptor_iostream_write_byte(':', iostr); + } + raptor_iostream_counted_string_write((const char*)element->name->local_name, + element->name->local_name_length, + iostr); + } + + raptor_iostream_write_byte('>', iostr); + + return 0; + +} + + +/** + * raptor_new_xml_writer: + * @world: raptor_world object + * @nstack: Namespace stack for the writer to start with (or NULL) + * @iostr: I/O stream to write to + * + * Constructor - Create a new XML Writer writing XML to a raptor_iostream + * + * Return value: a new #raptor_xml_writer object or NULL on failure + **/ +raptor_xml_writer* +raptor_new_xml_writer(raptor_world* world, + raptor_namespace_stack *nstack, + raptor_iostream* iostr) +{ + raptor_xml_writer* xml_writer; + + RAPTOR_CHECK_CONSTRUCTOR_WORLD(world); + + if(!iostr) + return NULL; + + raptor_world_open(world); + + xml_writer = RAPTOR_CALLOC(raptor_xml_writer*, 1, sizeof(*xml_writer)); + if(!xml_writer) + return NULL; + + xml_writer->world = world; + + xml_writer->nstack_depth = 0; + + xml_writer->nstack = nstack; + if(!xml_writer->nstack) { + xml_writer->nstack = raptor_new_namespaces(world, 1); + xml_writer->my_nstack = 1; + } + + xml_writer->iostr = iostr; + + raptor_object_options_init(&xml_writer->options, + RAPTOR_OPTION_AREA_XML_WRITER); + + return xml_writer; +} + + +/** + * raptor_free_xml_writer: + * @xml_writer: XML writer object + * + * Destructor - Free XML Writer + * + **/ +void +raptor_free_xml_writer(raptor_xml_writer* xml_writer) +{ + if(!xml_writer) + return; + + if(xml_writer->nstack && xml_writer->my_nstack) + raptor_free_namespaces(xml_writer->nstack); + + raptor_object_options_clear(&xml_writer->options); + + RAPTOR_FREE(raptor_xml_writer, xml_writer); +} + + +static void +raptor_xml_writer_write_xml_declaration(raptor_xml_writer* xml_writer) +{ + if(!xml_writer->xml_declaration_checked) { + /* check that it should be written once only */ + xml_writer->xml_declaration_checked = 1; + + if(RAPTOR_OPTIONS_GET_NUMERIC(xml_writer, + RAPTOR_OPTION_WRITER_XML_DECLARATION)) { + raptor_iostream_string_write((const unsigned char*)"<?xml version=\"", + xml_writer->iostr); + raptor_iostream_counted_string_write((XML_WRITER_XML_VERSION(xml_writer) == 10) ? + (const unsigned char*)"1.0" : + (const unsigned char*)"1.1", + 3, xml_writer->iostr); + raptor_iostream_string_write((const unsigned char*)"\" encoding=\"utf-8\"?>\n", + xml_writer->iostr); + } + } + +} + + +/** + * raptor_xml_writer_empty_element: + * @xml_writer: XML writer object + * @element: XML element object + * + * Write an empty XML element to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + **/ +void +raptor_xml_writer_empty_element(raptor_xml_writer* xml_writer, + raptor_xml_element *element) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + if(xml_writer->pending_newline || XML_WRITER_AUTO_INDENT(xml_writer)) + raptor_xml_writer_indent(xml_writer); + + raptor_xml_writer_start_element_common(xml_writer, element, 1); + + raptor_xml_writer_end_element_common(xml_writer, element, 1); + + raptor_namespaces_end_for_depth(xml_writer->nstack, xml_writer->depth); +} + + +/** + * raptor_xml_writer_start_element: + * @xml_writer: XML writer object + * @element: XML element object + * + * Write a start XML element to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + * Indents the start element if XML writer option AUTO_INDENT is enabled. + **/ +void +raptor_xml_writer_start_element(raptor_xml_writer* xml_writer, + raptor_xml_element *element) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + if(xml_writer->pending_newline || XML_WRITER_AUTO_INDENT(xml_writer)) + raptor_xml_writer_indent(xml_writer); + + raptor_xml_writer_start_element_common(xml_writer, element, + XML_WRITER_AUTO_EMPTY(xml_writer)); + + xml_writer->depth++; + + /* SJS Note: This "if" clause is necessary because raptor_rdfxml.c + * uses xml_writer for parseType="literal" and passes in elements + * whose parent field is already set. The first time this function + * is called, it sets element->parent to 0, causing the warn-07.rdf + * test to fail. Subsequent calls to this function set + * element->parent to its existing value. + */ + if(xml_writer->current_element) + element->parent = xml_writer->current_element; + + xml_writer->current_element = element; + if(element->parent) + element->parent->content_element_seen = 1; +} + + +/** + * raptor_xml_writer_end_element: + * @xml_writer: XML writer object + * @element: XML element object + * + * Write an end XML element to the XML writer. + * + * Indents the end element if XML writer option AUTO_INDENT is enabled. + **/ +void +raptor_xml_writer_end_element(raptor_xml_writer* xml_writer, + raptor_xml_element* element) +{ + int is_empty; + + xml_writer->depth--; + + if(xml_writer->pending_newline || + (XML_WRITER_AUTO_INDENT(xml_writer) && element->content_element_seen)) + raptor_xml_writer_indent(xml_writer); + + is_empty = XML_WRITER_AUTO_EMPTY(xml_writer) ? + !(element->content_cdata_seen || element->content_element_seen) : 0; + + raptor_xml_writer_end_element_common(xml_writer, element, is_empty); + + raptor_namespaces_end_for_depth(xml_writer->nstack, xml_writer->depth); + + if(xml_writer->current_element) + xml_writer->current_element = xml_writer->current_element->parent; +} + + +/** + * raptor_xml_writer_newline: + * @xml_writer: XML writer object + * + * Write a newline to the XML writer. + * + * Indents the next line if XML writer option AUTO_INDENT is enabled. + **/ +void +raptor_xml_writer_newline(raptor_xml_writer* xml_writer) +{ + xml_writer->pending_newline = 1; +} + + +/** + * raptor_xml_writer_cdata: + * @xml_writer: XML writer object + * @s: string to XML escape and write + * + * Write CDATA XML-escaped to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_cdata(raptor_xml_writer* xml_writer, + const unsigned char *s) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_xml_escape_string_any_write(s, strlen((const char*)s), + '\0', + XML_WRITER_XML_VERSION(xml_writer), + xml_writer->iostr); + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; +} + + +/** + * raptor_xml_writer_cdata_counted: + * @xml_writer: XML writer object + * @s: string to XML escape and write + * @len: length of string + * + * Write counted CDATA XML-escaped to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_cdata_counted(raptor_xml_writer* xml_writer, + const unsigned char *s, unsigned int len) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_xml_escape_string_any_write(s, len, + '\0', + XML_WRITER_XML_VERSION(xml_writer), + xml_writer->iostr); + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; +} + + +/** + * raptor_xml_writer_raw: + * @xml_writer: XML writer object + * @s: string to write + * + * Write a string raw to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_raw(raptor_xml_writer* xml_writer, + const unsigned char *s) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_iostream_string_write(s, xml_writer->iostr); + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; +} + + +/** + * raptor_xml_writer_raw_counted: + * @xml_writer: XML writer object + * @s: string to write + * @len: length of string + * + * Write a counted string raw to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_raw_counted(raptor_xml_writer* xml_writer, + const unsigned char *s, unsigned int len) +{ + raptor_xml_writer_write_xml_declaration(xml_writer); + + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_iostream_counted_string_write(s, len, xml_writer->iostr); + + if(xml_writer->current_element) + xml_writer->current_element->content_cdata_seen = 1; +} + + +/** + * raptor_xml_writer_comment: + * @xml_writer: XML writer object + * @s: comment string to write + * + * Write an XML comment to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_comment(raptor_xml_writer* xml_writer, + const unsigned char *s) +{ + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"<!-- ", 5); + raptor_xml_writer_cdata(xml_writer, s); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)" -->", 4); +} + + +/** + * raptor_xml_writer_comment_counted: + * @xml_writer: XML writer object + * @s: comment string to write + * @len: length of string + * + * Write a counted XML comment to the XML writer. + * + * Closes any previous empty element if XML writer option AUTO_EMPTY + * is enabled. + * + **/ +void +raptor_xml_writer_comment_counted(raptor_xml_writer* xml_writer, + const unsigned char *s, unsigned int len) +{ + XML_WRITER_FLUSH_CLOSE_BRACKET(xml_writer); + + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)"<!-- ", 5); + raptor_xml_writer_cdata_counted(xml_writer, s, len); + raptor_xml_writer_raw_counted(xml_writer, (const unsigned char*)" -->", 4); +} + + +/** + * raptor_xml_writer_flush: + * @xml_writer: XML writer object + * + * Finish the XML writer. + * + **/ +void +raptor_xml_writer_flush(raptor_xml_writer* xml_writer) +{ + if(xml_writer->pending_newline) { + raptor_iostream_write_byte('\n', xml_writer->iostr); + xml_writer->pending_newline = 0; + } +} + + +/** + * raptor_xml_writer_set_option: + * @xml_writer: #raptor_xml_writer xml_writer object + * @option: option to set from enumerated #raptor_option values + * @string: string option value (or NULL) + * @integer: integer option value + * + * Set xml_writer option. + * + * If @string is not NULL and the option type is numeric, the string + * value is converted to an integer and used in preference to @integer. + * + * If @string is NULL and the option type is not numeric, an error is + * returned. + * + * The @string values used are copied. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: non 0 on failure or if the option is unknown + **/ +int +raptor_xml_writer_set_option(raptor_xml_writer *xml_writer, + raptor_option option, char* string, int integer) +{ + return raptor_object_options_set_option(&xml_writer->options, option, + string, integer); +} + + +/** + * raptor_xml_writer_get_option: + * @xml_writer: #raptor_xml_writer xml_writer object + * @option: option to get value + * @string_p: pointer to where to store string value + * @integer_p: pointer to where to store integer value + * + * Get xml_writer option. + * + * Any string value returned in *@string_p is shared and must + * be copied by the caller. + * + * The allowed options are available via + * raptor_world_get_option_description(). + * + * Return value: option value or < 0 for an illegal option + **/ +int +raptor_xml_writer_get_option(raptor_xml_writer *xml_writer, + raptor_option option, + char** string_p, int* integer_p) +{ + return raptor_object_options_get_option(&xml_writer->options, option, + string_p, integer_p); +} + + +/** + * raptor_xml_writer_get_depth: + * @xml_writer: #raptor_xml_writer xml writer object + * + * Get the current XML Writer element depth + * + * Return value: element stack depth + */ +int +raptor_xml_writer_get_depth(raptor_xml_writer *xml_writer) +{ + return xml_writer->depth; +} + + +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +const unsigned char *base_uri_string = (const unsigned char*)"http://example.org/base#"; + +#define OUT_BYTES_COUNT 135 + +int +main(int argc, char *argv[]) +{ + raptor_world *world; + const char *program = raptor_basename(argv[0]); + raptor_iostream *iostr; + raptor_namespace_stack *nstack; + raptor_namespace* foo_ns; + raptor_xml_writer* xml_writer; + raptor_uri* base_uri; + raptor_qname* el_name; + raptor_xml_element *element; + unsigned long offset; + raptor_qname **attrs; + raptor_uri* base_uri_copy = NULL; + + /* for raptor_new_iostream_to_string */ + void *string = NULL; + size_t string_len = 0; + + world = raptor_new_world(); + if(!world || raptor_world_open(world)) + exit(1); + + iostr = raptor_new_iostream_to_string(world, &string, &string_len, NULL); + if(!iostr) { + fprintf(stderr, "%s: Failed to create iostream to string\n", program); + exit(1); + } + + nstack = raptor_new_namespaces(world, 1); + + xml_writer = raptor_new_xml_writer(world, nstack, iostr); + if(!xml_writer) { + fprintf(stderr, "%s: Failed to create xml_writer to iostream\n", program); + exit(1); + } + + base_uri = raptor_new_uri(world, base_uri_string); + + foo_ns = raptor_new_namespace(nstack, + (const unsigned char*)"foo", + (const unsigned char*)"http://example.org/foo-ns#", + 0); + + + el_name = raptor_new_qname_from_namespace_local_name(world, + foo_ns, + (const unsigned char*)"bar", + NULL); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + element = raptor_new_xml_element(el_name, + NULL, /* language */ + base_uri_copy); + + raptor_xml_writer_start_element(xml_writer, element); + raptor_xml_writer_cdata_counted(xml_writer, (const unsigned char*)"hello\n", 6); + raptor_xml_writer_comment_counted(xml_writer, (const unsigned char*)"comment", 7); + raptor_xml_writer_cdata(xml_writer, (const unsigned char*)"\n"); + raptor_xml_writer_end_element(xml_writer, element); + + raptor_free_xml_element(element); + + raptor_xml_writer_cdata(xml_writer, (const unsigned char*)"\n"); + + el_name = raptor_new_qname(nstack, + (const unsigned char*)"blah", + NULL /* no attribute value - element */); + base_uri_copy = base_uri ? raptor_uri_copy(base_uri) : NULL; + element = raptor_new_xml_element(el_name, + NULL, /* language */ + base_uri_copy); + + attrs = RAPTOR_CALLOC(raptor_qname**, 1, sizeof(raptor_qname*)); + attrs[0] = raptor_new_qname(nstack, + (const unsigned char*)"a", + (const unsigned char*)"b" /* attribute value */); + raptor_xml_element_set_attributes(element, attrs, 1); + + raptor_xml_writer_empty_element(xml_writer, element); + + raptor_xml_writer_cdata(xml_writer, (const unsigned char*)"\n"); + + raptor_free_xml_writer(xml_writer); + + raptor_free_xml_element(element); + + raptor_free_namespace(foo_ns); + + raptor_free_namespaces(nstack); + + raptor_free_uri(base_uri); + + + offset = raptor_iostream_tell(iostr); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Freeing iostream\n", program); +#endif + raptor_free_iostream(iostr); + + if(offset != OUT_BYTES_COUNT) { + fprintf(stderr, "%s: I/O stream wrote %d bytes, expected %d\n", program, + (int)offset, (int)OUT_BYTES_COUNT); + fputs("[[", stderr); + (void)fwrite(string, 1, string_len, stderr); + fputs("]]\n", stderr); + return 1; + } + + if(!string) { + fprintf(stderr, "%s: I/O stream failed to create a string\n", program); + return 1; + } + string_len = strlen((const char*)string); + if(string_len != offset) { + fprintf(stderr, "%s: I/O stream created a string length %d, expected %d\n", program, (int)string_len, (int)offset); + return 1; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fprintf(stderr, "%s: Made XML string of %d bytes\n", program, (int)string_len); + fputs("[[", stderr); + (void)fwrite(string, 1, string_len, stderr); + fputs("]]\n", stderr); +#endif + + raptor_free_memory(string); + + raptor_free_world(world); + + /* keep gcc -Wall happy */ + return(0); +} + +#endif diff --git a/src/snprintf.c b/src/snprintf.c new file mode 100644 index 0000000..9d79d0c --- /dev/null +++ b/src/snprintf.c @@ -0,0 +1,449 @@ +/* + * This file is in the Public Domain + * + * Based on code from Public Domain snprintf.c from mutt + * http://dev.mutt.org/hg/mutt/file/55cd4cb611d9/snprintf.c + * Tue Aug 08 22:49:12 2006 +0000 + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef HAVE_VASPRINTF +#ifndef _GNU_SOURCE +#define _GNU_SOURCE /* to get vasprintf() available */ +#endif +#endif +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + + + +/* + * Thanks to the patch in this Debian bug for the solution + * to the crash inside vsnprintf on some architectures. + * + * "reuse of args inside the while(1) loop is in violation of the + * specs and only happens to work by accident on other systems." + * + * http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=104325 + */ + +#ifndef va_copy +#ifdef __va_copy +#define va_copy(dest, src) __va_copy(dest,src) +#else +#define va_copy(dest, src) (dest) = (src) +#endif +#endif + + +#ifdef CHECK_VSNPRINTF_RUNTIME +static int vsnprintf_checked = -1; + +static int +vsnprintf_check_is_c99(char *buf, const char *s, ...) +{ + va_list args; + int r; + va_start(args, s); + r = vsnprintf(buf, buf ? 5 : 0, s, args); + va_end(args); + + return (r == 7); +} + +static int +vsnprintf_is_c99(void) +{ + if(vsnprintf_checked < 0) { + char buffer[32]; + vsnprintf_checked = (vsnprintf_check_is_c99(NULL, "1234567") && + vsnprintf_check_is_c99(buffer, "1234567")) + ? 1 : 0; + } + + return vsnprintf_checked; +} +#endif /* CHECK_VSNPRINTF_RUNTIME */ + + +#define VSNPRINTF_C99_BLOCK(len, buffer, size, format, arguments) \ + do { \ + len = vsnprintf(buffer, size, format, arguments); \ + } while(0) + +#define VSNPRINTF_NOT_C99_BLOCK(len, buffer, size, format, arguments) \ + do { \ + if((buffer == NULL) || !size) { \ + /* This vsnprintf doesn't return number of bytes required */ \ + size = 2 + strlen(format); \ + while(1) { \ + va_list args_copy; \ + char* tmp_buffer = RAPTOR_MALLOC(char*, size + 1); \ + \ + if(!tmp_buffer) \ + break; \ + \ + /* copy for re-use */ \ + va_copy(args_copy, arguments); \ + len = vsnprintf(tmp_buffer, size, format, args_copy); \ + va_end(args_copy); \ + \ + /* On windows, vsnprintf() returns -1 if the buffer does not \ + * fit. If the buffer exactly fits the string without a NULL \ + * terminator, it returns the string length and it ends up \ + * with an unterminated string. The added check makes sure \ + * the string returned is terminated - otherwise more buffer \ + * space is allocated and the while() loop retries. \ + * \ + * On tru64, vsnprintf() returns the buffer size minus 1 if \ + * the buffer is too small, leaving room for the terminator. \ + */ \ + if((len >= 0) && \ + (RAPTOR_GOOD_CAST(size_t, len) + 1 < size) && \ + (tmp_buffer[len] == '\0')) { \ + len = RAPTOR_BAD_CAST(int, strlen(tmp_buffer)); \ + RAPTOR_FREE(char*, tmp_buffer); \ + break; \ + } \ + RAPTOR_FREE(char*, tmp_buffer); \ + size += (size >> 1); \ + } \ + } \ + \ + if(buffer != NULL) \ + len = vsnprintf(buffer, size, format, arguments); \ + } while(0) + +#ifndef STANDALONE + +/** + * raptor_vsnprintf2: + * @buffer: buffer (or NULL) + * @size: size of buffer (or 0) + * @format: printf-style format string + * @arguments: variable arguments list + * + * Format output for a variable arguments list into an allocated sized buffer. + * + * This is a wrapper around system versions of vsnprintf with + * different call and return conventions. + * + * If @buffer is NULL or size is 0 or the buffer size is too small, + * returns the number of bytes that would be needed for buffer + * + * Return value: number of bytes allocated (excluding NUL) or <0 on failure + **/ +int +raptor_vsnprintf2(char *buffer, size_t size, + const char *format, va_list arguments) +{ + int len = -1; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(format, char*, -1); + +#ifdef CHECK_VSNPRINTF_RUNTIME + + if(vsnprintf_is_c99()) + VSNPRINTF_C99_BLOCK(len, buffer, size, format, arguments) ; + else + VSNPRINTF_NOT_C99_BLOCK(len, buffer, size, format, arguments) ; + +#else + +#ifdef HAVE_C99_VSNPRINTF + VSNPRINTF_C99_BLOCK(len, buffer, size, format, arguments) ; +#else + VSNPRINTF_NOT_C99_BLOCK(len, buffer, size, format, arguments) ; +#endif + +#endif + + return len; +} + + +/** + * raptor_vsnprintf: + * @format: printf-style format string + * @arguments: variable arguments list + * + * Format output for a variable arguments list into a newly allocated buffer + * + * @Deprecated: This does not actually conform to vsnprintf's calling + * convention and does not return the allocated buffer length. Use + * raptor_vsnprintf2() or raptor_vasprintf() instead. + * + * Return value: a newly allocated string as the formatted result or NULL on failure + **/ +char* +raptor_vsnprintf(const char *format, va_list arguments) +{ + int len; + char *buffer = NULL; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(format, char*, NULL); + + len = raptor_vasprintf(&buffer, format, arguments); + if(len < 0) + return NULL; + + return buffer; +} + + +/** + * raptor_snprintf: + * @buffer: buffer (or NULL) + * @size: bufer size (or 0) + * @format: printf-style format string + * @...: format arguments + * + * Format output into an allocated sized buffer + * + * This provides a portable version snprintf() over variants on + * different systems. + * + * If @buffer is NULL, calculates the number of bytes needed to + * allocate for buffer and do no formatting. + * + * Return value: number of bytes allocated (excluding NUL) or 0 on failure + **/ +int +raptor_snprintf(char *buffer, size_t size, const char *format, ...) +{ + va_list arguments; + int length; + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(format, char*, 0); + + va_start(arguments, format); + length = raptor_vsnprintf2(buffer, size, format, arguments); + va_end(arguments); + + return length; +} + + +/** + * raptor_vasprintf: + * @ret: pointer to store buffer + * @format: printf-style format string + * @arguments: format arguments list + * + * Format output into a new buffer and return it + * + * This is a wrapper around the (GNU) vasprintf function that is not + * always avaiable. + * + * Return value: number of bytes allocated (excluding NUL) or < 0 on failure + **/ +int +raptor_vasprintf(char **ret, const char *format, va_list arguments) +{ + int length; +#ifndef HAVE_VASPRINTF + va_list args_copy; +#endif + + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(ret, char**, -1); + RAPTOR_ASSERT_OBJECT_POINTER_RETURN_VALUE(format, char*, -1); + +#ifdef HAVE_VASPRINTF + length = vasprintf(ret, format, arguments); +#else + va_copy(args_copy, arguments); + length = raptor_vsnprintf2(NULL, 0, format, args_copy); + va_end(args_copy); + if(length < 0) { + *ret = NULL; + return length; + } + *ret = RAPTOR_MALLOC(char*, length + 1); + if(!*ret) + return -1; + + va_copy(args_copy, arguments); + length = raptor_vsnprintf2(*ret, length + 1, format, args_copy); + va_end(args_copy); +#endif + + return length; +} + + +static const char digits[] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ"; + +/** + * raptor_format_integer: + * @buffer: buffer (or NULL) + * @bufsize: size of above (or 0) + * @integer: integer value to format + * @base: numeric base up to 36 + * @width: field width (or -1) + * @padding: padding char (or \0) + * + * INTERNAL - Format an integer as a decimal into a buffer or + * calculate the size needed. + * + * Works Like the C99 snprintf() but just for integers. + * + * If @buffer is NULL or the @bufsize is too small, the number of + * bytes needed (excluding NUL) is returned and no formatting is done. + * + * NOTE: Does NOT add a '\0' at end of string. + * + * Return value: number of bytes needed or written (excluding NUL) or 0 on failure + */ +size_t +raptor_format_integer(char* buffer, size_t bufsize, int integer, + unsigned int base, int width, char padding) +{ + size_t len = 1; + char *p; + unsigned int value; + + if(integer < 0) { + value = (unsigned int)-integer; + len++; + width++; + } else + value = (unsigned int)integer; + while(value /= base) + len++; + + if(width > 0 && RAPTOR_GOOD_CAST(size_t, width) > len) + len = width; + + if(!buffer || bufsize < RAPTOR_GOOD_CAST(size_t, (len + 1))) /* +1 for NUL */ + return len; + + if(!padding) + padding = ' '; + + if(integer < 0) + value = (unsigned int)-integer; + else + value = (unsigned int)integer; + + p = &buffer[len]; + *p-- = '\0'; + while(value > 0 && p >= buffer) { + *p-- = digits[value % base]; + value /= base; + } + while(p >= buffer) + *p-- = padding; + if(integer < 0) + *buffer = '-'; + + return len; +} + + +#else /* STANDALONE */ + + +int main(int argc, char *argv[]); +static int test_snprintf_real(int len_ref, const char *format, va_list arguments) RAPTOR_PRINTF_FORMAT(2, 0); +static int test_snprintf(size_t len_ref, const char *format, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +static const char* program; + + +static int +test_snprintf_real(int len_ref, const char *format, va_list arguments) +{ + int len = -2; + size_t size = 0; + + VSNPRINTF_NOT_C99_BLOCK(len, NULL, size, format, arguments); + + if(len != len_ref) { + fprintf(stderr, + "%s: VSNPRINTF_NOT_C99_BLOCK(len=%d, size=%d, format=\"%s\") failed : expected %d, got %d\n", + program, len, (int)size, format, (int)len_ref, (int)len); + return 1; + } + + return 0; +} + + +static int +test_snprintf(size_t len_ref, const char *format, ...) +{ + va_list arguments; + int rc; + + va_start(arguments, format); + rc = test_snprintf_real(RAPTOR_BAD_CAST(int, len_ref), format, arguments); + va_end(arguments); + + return rc; +} + + +#define FMT_LEN_MAX 128 +#define ARG_LEN_MAX 128 + +int +main(int argc, char *argv[]) +{ + char fmt[FMT_LEN_MAX + 1]; + char arg[ARG_LEN_MAX + 1]; + size_t x, y; + int errors = 0; + + program = raptor_basename(argv[0]); + + for(x = 2; x < FMT_LEN_MAX; x++) { + for(y = 0; y < ARG_LEN_MAX; y++) { + size_t len_ref = x + y - 2; + + /* fmt = "xxxxxxxx%s" + * (number of 'x' characters varies) + */ + memset(fmt, 'x', x - 2); + fmt[x - 2] = '%'; + fmt[x - 1] = 's'; + fmt[x] = '\0'; + + /* arg = "yyyyyyyy" + * (number of 'y' characters varies) + */ + memset(arg, 'y', y); + arg[y] = '\0'; + + /* assert(strlen(fmt) == x); */ + /* assert(strlen(arg) == y); */ + + /* len_ref = sprintf(buf_ref, fmt, arg); + assert((size_t)len_ref == x + y - 2); */ + + PRAGMA_IGNORE_WARNING_FORMAT_NONLITERAL_START + if(test_snprintf(len_ref, fmt, arg)) + errors++; + PRAGMA_IGNORE_WARNING_END + } + } + + return errors; +} + + +#endif /* STANDALONE */ diff --git a/src/sort_r.c b/src/sort_r.c new file mode 100644 index 0000000..b1c8c84 --- /dev/null +++ b/src/sort_r.c @@ -0,0 +1,135 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * sort_r.c - Portable sort_r + * + * Copyright (C) 2014, David Beckett http://www.dajobe.org/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + + +#include <stdio.h> +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + + +#ifndef STANDALONE + +#if defined(HAVE_QSORT_R) || defined(HAVE_QSORT_S) +/* Include inline code */ +#include "sort_r.h" +#else +#include "ssort.h" +#endif + + +/** + * raptor_sort_r: + * @base: the array to be sorted + * @nel: the number of elements in the array + * @width: the size in bytes of each element of the array + * @compar: comparison function + * @user_data: a pointer to be passed to the comparison function + * + * Sort an array with an extra user data arg for the comparison funciton. + * + * Sorts data at @base of @nel elememnts of width @width using + * comparison function @comp that takes args (void* data1, void* + * data2, @user_data) and returns <0, 0, or >0 for object comparison. + * + */ +void +raptor_sort_r(void *base, size_t nel, size_t width, + raptor_data_compare_arg_handler compar, void *user_data) +{ +#if defined(HAVE_QSORT_R) || defined(HAVE_QSORT_S) + sort_r(base, nel, width, compar, user_data); +#else + ssort_r(base, nel, width, compar, user_data); +#endif +} + + +#endif + + + +#ifdef STANDALONE + +/* one more prototype */ +int main(int argc, char *argv[]); + + +/* Public Domain licensed example code by Isaac Turner from + * https://github.com/noporpoise/sort_r + */ + +/* Isaac Turner 18 Nov 2013 Public Domain */ + +/* +Comparison function to sort an array of int, inverting a given region. +`arg` should be of type int[2], with the elements representing the start and end +of the region to invert (inclusive). +*/ +static int sort_r_cmp(const void *aa, const void *bb, void *arg) +{ + const int *a = (const int*)aa; + const int *b = (const int*)bb; + const int *interval = (const int*)arg; + int cmp = *a - *b; + int inv_start = interval[0], inv_end = interval[1]; + char norm = (*a < inv_start || *a > inv_end || *b < inv_start || *b > inv_end); + return norm ? cmp : -cmp; +} + +int +main(int argc, char *argv[]) +{ + const char *program = raptor_basename(argv[0]); + + int i; + /* sort 1..19, 30..20, 30..100 */ + int arr[18] = {1, 5, 28, 4, 3, 2, 10, 20, 18, 25, 21, 29, 34, 35, 14, 100, 27, 19}; + int tru[18] = {1, 2, 3, 4, 5, 10, 14, 18, 19, 29, 28, 27, 25, 21, 20, 34, 35, 100}; + + /* Region to invert: 20-30 (inclusive) */ + int interval[2] = {20, 30}; + int failures = 0; + + raptor_sort_r(arr, 18, sizeof(int), sort_r_cmp, interval); + + /* Check PASS/FAIL */ + for(i = 0; i < 18; i++) { + if(arr[i] != tru[i]) { + printf("%s: sort_r() result %i: got %d expected %d", program, + i, arr[i], tru[i]); + failures++; + } + } + + return failures; +} + +#endif diff --git a/src/sort_r.h b/src/sort_r.h new file mode 100644 index 0000000..8d8b580 --- /dev/null +++ b/src/sort_r.h @@ -0,0 +1,125 @@ +/* Isaac Turner 29 April 2014 Public Domain */ +#ifndef SORT_R_H_ +#define SORT_R_H_ + +#include <stdlib.h> + +/* +sort_r function to be exported. + +Parameters: + base is the array to be sorted + nel is the number of elements in the array + width is the size in bytes of each element of the array + compar is the comparison function + arg is a pointer to be passed to the comparison function + +void sort_r(void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b, void *_arg), + void *arg); +*/ + +#if (defined __APPLE__ || defined __MACH__ || defined __DARWIN__ || \ + defined __FreeBSD__ || defined __BSD__ || defined __bsdi__ || \ + defined OpenBSD3_1 || defined OpenBSD3_9 || defined __OpenBSD__ || \ + defined __NetBSD__ || \ + defined __DragonFly__ || \ + defined AMIGA) +# define _SORT_R_BSD +#elif (defined _GNU_SOURCE || defined __gnu_hurd__ || defined __GNU__ || \ + defined __linux__ || defined __MINGW32__ || defined __GLIBC__ || \ + defined __CYGWIN__) +# define _SORT_R_LINUX +#elif (defined _WIN32 || defined _WIN64 || defined __WINDOWS__) +# define _SORT_R_WINDOWS +#else +# error Cannot detect operating system +#endif + +#if (defined NESTED_QSORT && NESTED_QSORT == 0) +# undef NESTED_QSORT +#elif (!defined NESTED_QSORT && \ + defined __GLIBC__ && __GLIBC__ == 2 && __GLIBC_MINOR__ < 8) +/* no qsort_r in glibc before 2.8 */ +# define NESTED_QSORT +#endif + + +#if defined NESTED_QSORT + + static inline void sort_r(void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b, void *aarg), + void *arg) + { + int nested_cmp(const void *a, const void *b) + { + return compar(a, b, arg); + } + + qsort(base, nel, width, nested_cmp); + } + +#else /* !NESTED_QSORT */ + + /* Declare structs and functions */ + #if defined _SORT_R_BSD + + /* BSD requires argument swap */ + extern void qsort_r(void *base, size_t nel, size_t width, void *thunk, + int (*compar)(void *_thunk, const void *_a, const void *_b)); + + struct sort_r_data + { + void *arg; + int (*compar)(const void *_a, const void *_b, void *_arg); + }; + + static inline int sort_r_arg_swap(void *s, const void *a, const void *b) + { + struct sort_r_data *ss = (struct sort_r_data*)s; + return (ss->compar)(a, b, ss->arg); + } + + #elif defined _SORT_R_LINUX + + typedef int(* __compar_d_fn_t)(const void *, const void *, void *); + extern void qsort_r(void *base, size_t nel, size_t width, + __compar_d_fn_t __compar, void *arg) + __attribute__((nonnull (1, 4))); + + #endif + + /* implementation */ + + static inline void sort_r(void *base, size_t nel, size_t width, + int (*compar)(const void *_a, const void *_b, void *_arg), + void *arg) + { + #if defined _SORT_R_LINUX + + qsort_r(base, nel, width, compar, arg); + + #elif defined _SORT_R_BSD + + struct sort_r_data tmp; + tmp.arg = arg; + tmp.compar = compar; + qsort_r(base, nel, width, &tmp, sort_r_arg_swap); + + #else /* defined _SORT_R_WINDOWS */ + + struct sort_r_data tmp; + tmp.arg = arg; + tmp.compar = compar; + qsort_s(base, nel, width, sort_r_arg_swap, &tmp); + + #endif + } + +#endif /* !NESTED_QSORT */ + +#undef _SORT_R_WINDOWS +#undef _SORT_R_LINUX +#undef _SORT_R_BSD + +#endif /* SORT_R_H_ */ diff --git a/src/ssort.h b/src/ssort.h new file mode 100644 index 0000000..7cc1572 --- /dev/null +++ b/src/ssort.h @@ -0,0 +1,73 @@ +/* +** ssort() -- Fast, small, qsort()-compatible Shell sort +** +** by Ray Gardner, public domain 5/90 +*/ + +#include <stddef.h> + +/** + * ssort_r: + * @base: base data + * @nel: number of elements at @base + * @width: width of an element + * @comp: comparison function taking args (a, b, @arg) + * @arg: user data (thunk) for the comparison function @comp + * + * Fast, small shell sort compatible to qsort_r() taking an extra thunk / user data arg. + * + * Sorts data at @base of @nel elements of width @width using + * comparison function @comp that takes args (a, b, @arg). + * + * Return value: non-0 on failure + * +*/ +static int +ssort_r(void* base, size_t nel, size_t width, + raptor_data_compare_arg_handler comp, + void* arg) +{ + size_t wnel, gap, k; + + /* bad args */ + if(!base || !width || !comp) + return -1; + + /* nothing to do */ + if(nel < 2) + return 0; + + wnel = width * nel; + for(gap = 0; ++gap < nel;) + gap *= 3; + + while((gap /= 3) != 0) { + size_t wgap = width * gap; + size_t i; + + for(i = wgap; i < wnel; i += width) { + size_t j = i; + do { + char* a; + char* b; + + j -= wgap; + a = j + (char *)base; + b = a + wgap; + + if ((*comp)(a, b, arg) <= 0) + break; + + k = width; + do { + char tmp = *a; + *a++ = *b; + *b++ = tmp; + } while (--k); + + } while(j >= wgap); + } + } + + return 0; +} diff --git a/src/strcasecmp.c b/src/strcasecmp.c new file mode 100644 index 0000000..a1a3605 --- /dev/null +++ b/src/strcasecmp.c @@ -0,0 +1,108 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * strcasecmp.c - strcasecmp compatibility + * + * This file is in the public domain. + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> + +int raptor_strcasecmp(const char* s1, const char* s2); +int raptor_strncasecmp(const char* s1, const char* s2, size_t n); + + +int +raptor_strcasecmp(const char* s1, const char* s2) +{ + register int c1, c2; + + while(*s1 && *s2) { + c1 = tolower((int)*s1); + c2 = tolower((int)*s2); + if(c1 != c2) + return (c1 - c2); + s1++; + s2++; + } + return (int) (*s1 - *s2); +} + + +int +raptor_strncasecmp(const char* s1, const char* s2, size_t n) +{ + register int c1, c2; + + while(*s1 && *s2 && n) { + c1 = tolower((int)*s1); + c2 = tolower((int)*s2); + if(c1 != c2) + return (c1 - c2); + s1++; + s2++; + n--; + } + return 0; +} + + + +#ifdef STANDALONE + + +static int +assert_strcasecmp (const char *s1, const char *s2, int expected) +{ + int result = raptor_strcasecmp(s1, s2); + result = (result > 0) ? 1 : ((result <0) ? -1 : 0); + + if(result != expected) + { + fprintf(stderr, "FAIL strcasecmp (%s, %s) gave %d != %d\n", + s1, s2, result, expected); + return 1; + } + return 0; +} + + +static int +assert_strncasecmp (const char *s1, const char *s2, size_t size, int expected) +{ + int result = raptor_strncasecmp(s1, s2, size); + result = (result > 0) ? 1 : ((result <0) ? -1 : 0); + + if(result != expected) + { + fprintf(stderr, "FAIL strncasecmp (%s, %s, %d) gave %d != %d\n", + s1, s2, (unsigned int)size, result, expected); + return 1; + } + return 0; +} + + +int +main(int argc, char *argv[]) +{ + int failures = 0; + + failures += assert_strcasecmp("foo", "foo", 0); + failures += assert_strcasecmp("foo", "FOO", 0); + failures += assert_strcasecmp("foo", "BaR", 1); + + failures += assert_strncasecmp("foo", "foobar", 3, 0); + failures += assert_strncasecmp("foo", "FOOxyz", 3, 0); + failures += assert_strncasecmp("foo", "BaRfoo", 3, 1); + + return failures; +} + +#endif diff --git a/src/turtle_common.c b/src/turtle_common.c new file mode 100644 index 0000000..c822b34 --- /dev/null +++ b/src/turtle_common.c @@ -0,0 +1,336 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_common.c - Raptor Turtle common code + * + * Copyright (C) 2003-2007, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +/* Raptor includes */ +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> +#define YY_NO_UNISTD_H 1 +#define YYSTYPE TURTLE_PARSER_STYPE +#include <turtle_lexer.h> +#include <turtle_common.h> + +/** + * raptor_stringbuffer_append_turtle_string: + * @stringbuffer: String buffer to add to + * @text: turtle string to decode + * @len: length of string + * @delim: terminating delimiter for string - only ', " or > are allowed + * @error_handler: error handling function + * @error_data: error handler data + * + * Append to a stringbuffer a Turtle-escaped string. + * + * The passed in string is handled according to the Turtle string + * escape rules giving a UTF-8 encoded output of the Unicode codepoints. + * + * The Turtle escapes are \b \f \n \r \t \\ + * \uXXXX \UXXXXXXXX where X is [A-F0-9] + * + * Turtle 2013 allows \ with -_~.!$&\'()*+,;=/?#@% + * + * URIs may not have \t \b \n \r \f or raw ' ' or \u0020 or \u003C or \u003E + * + * Return value: non-0 on failure + **/ +int +raptor_stringbuffer_append_turtle_string(raptor_stringbuffer* stringbuffer, + const unsigned char *text, + size_t len, int delim, + raptor_simple_message_handler error_handler, + void *error_data, + int is_uri) +{ + size_t i; + const unsigned char *s; + unsigned char *d; + unsigned char *string = RAPTOR_MALLOC(unsigned char*, len + 1); + const char* label = (is_uri ? "URI" : "string"); + + if(!string) + return -1; + + for(s = text, d = string, i = 0; i < len; s++, i++) { + unsigned char c=*s; + + if(c == ' ' && is_uri) { + error_handler(error_data, + "Turtle %s error - character '%c'", label, c); + RAPTOR_FREE(char*, string); + return 1; + } + + if(c == '\\' ) { + s++; i++; + c = *s; + if(c == 'n' || c == 'r' || c == 't' || c == 'b' || c == 'f') { + if(is_uri) { + error_handler(error_data, + "Turtle %s error - illegal URI escape '\\%c'", label, c); + RAPTOR_FREE(char*, string); + return 1; + } + if(c == 'n') + *d++ = '\n'; + else if(c == 'r') + *d++ = '\r'; + else if(c == 't') + *d++ = '\t'; + else if(c == 'b') + *d++ = '\b'; + else /* 'f' */ + *d++ = '\f'; + } else if(c == '\\' || c == delim || + c == '-' || c == '_' || c == '~' || c == '.' || c == '!' || + c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || + c == '*' || c == '+' || c == ',' || c == ';' ||c == '=' || + c == '/' || c == '?' || c == '#' || c == '@' ||c == '%') + *d++ = c; + else if(c == 'u' || c == 'U') { + size_t ulen = (c == 'u') ? 4 : 8; + unsigned long unichar = 0; + int n; + int unichar_width; + size_t ii; + + s++; i++; + if(i+ulen > len) { + error_handler(error_data, + "Turtle %s error - \\%c over end of line", label, c); + RAPTOR_FREE(char*, string); + return 1; + } + + for(ii = 0; ii < ulen; ii++) { + char cc = s[ii]; + if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) { + error_handler(error_data, + "Turtle %s error - illegal hex digit %c in Unicode escape '%c%s...'", + label, cc, c, s); + RAPTOR_FREE(char*, string); + return 1; + } + } + + n = sscanf((const char*)s, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar); + if(n != 1) { + error_handler(error_data, + "Turtle %s error - illegal Unicode escape '%c%s...'", + label, c, s); + RAPTOR_FREE(char*, string); + return 1; + } + + s+= ulen-1; + i+= ulen-1; + + if(is_uri && (unichar == 0x0020 || unichar == 0x003C || unichar == 0x003E)) { + error_handler(error_data, + "Turtle %s error - illegal Unicode escape \\u%04lX in URI.", label, unichar); + break; + } + + if(unichar > raptor_unicode_max_codepoint) { + error_handler(error_data, + "Turtle %s error - illegal Unicode character with code point #x%lX (max #x%lX).", + label, unichar, raptor_unicode_max_codepoint); + RAPTOR_FREE(char*, string); + return 1; + } + + unichar_width = raptor_unicode_utf8_string_put_char(unichar, d, + len-(d-string)); + if(unichar_width < 0) { + error_handler(error_data, + "Turtle %s error - illegal Unicode character with code point #x%lX.", + label, unichar); + RAPTOR_FREE(char*, string); + return 1; + } + d += (size_t)unichar_width; + + } else { + /* don't handle \x where x isn't one of: \t \n \r \\ (delim) */ + error_handler(error_data, + "Turtle %s error - illegal escape \\%c (#x%02X) in \"%s\"", + label, c, c, text); + } + } else + *d++=c; + } + *d='\0'; + + /* calculate output string size */ + len = d-string; + +#ifdef __clang_analyzer__ + /* clang --analyze does not know about ownership of next call */ + free(string); string = NULL; +#endif + /* string gets owned by the stringbuffer after this */ + return raptor_stringbuffer_append_counted_string(stringbuffer, + string, len, 0); + +} + + +/** + * raptor_turtle_expand_qname_escapes: + * @name: turtle qname string to decode + * @len: length of name + * @error_handler: error handling function + * @error_data: error handler data + * + * Expands Turtle escapes for the given turtle qname string + * + * The passed in string is handled according to the Turtle string + * escape rules giving a UTF-8 encoded output of the Unicode codepoints. + * + * The Turtle escapes are \b \f \n \r \t \\ + * \uXXXX \UXXXXXXXX where X is [A-F0-9] + * + * Turtle 2013 allows \ with -_~.!$&\'()*+,;=/?#@% + * + * Return value: new length or 0 on failure + **/ +size_t +raptor_turtle_expand_qname_escapes(unsigned char *name, + size_t len, + raptor_simple_message_handler error_handler, + void *error_data) +{ + size_t i; + const unsigned char *s; + unsigned char *d; + + if(!name) + return 0; + + for(s = name, d = name, i = 0; i < len; s++, i++) { + unsigned char c=*s; + + if(c == '\\' ) { + s++; i++; + c = *s; + if(c == 'n') + *d++ = '\n'; + else if(c == 'r') + *d++ = '\r'; + else if(c == 't') + *d++ = '\t'; + else if(c == 'b') + *d++ = '\b'; + else if(c == 'f') + *d++ = '\f'; + else if(c == '\\' || + c == '-' || c == '_' || c == '~' || c == '.' || c == '!' || + c == '$' || c == '&' || c == '\'' || c == '(' || c == ')' || + c == '*' || c == '+' || c == ',' || c == ';' ||c == '=' || + c == '/' || c == '?' || c == '#' || c == '@' ||c == '%') + *d++ = c; + else if(c == 'u' || c == 'U') { + size_t ulen = (c == 'u') ? 4 : 8; + unsigned long unichar = 0; + int n; + int unichar_width; + size_t ii; + + s++; i++; + if(i+ulen > len) { + error_handler(error_data, + "Turtle name error - \\%c over end of line", c); + return 0; + } + + for(ii = 0; ii < ulen; ii++) { + char cc = s[ii]; + if(!isxdigit(RAPTOR_GOOD_CAST(char, cc))) { + error_handler(error_data, + "Turtle name error - illegal hex digit %c in Unicode escape '%c%s...'", + cc, c, s); + return 0; + } + } + + n = sscanf((const char*)s, ((ulen == 4) ? "%04lx" : "%08lx"), &unichar); + if(n != 1) { + error_handler(error_data, + "Turtle name error - illegal Uncode escape '%c%s...'", + c, s); + return 0; + } + + s+= ulen-1; + i+= ulen-1; + + if(unichar > raptor_unicode_max_codepoint) { + error_handler(error_data, + "Turtle name error - illegal Unicode character with code point #x%lX (max #x%lX).", + unichar, raptor_unicode_max_codepoint); + return 0; + } + + unichar_width = raptor_unicode_utf8_string_put_char(unichar, d, + len - (d-name)); + if(unichar_width < 0) { + error_handler(error_data, + "Turtle name error - illegal Unicode character with code point #x%lX.", + unichar); + return 0; + } + d += (size_t)unichar_width; + + } else { + /* don't handle \x where x isn't one of: \t \n \r \\ (delim) */ + error_handler(error_data, + "Turtle name error - illegal escape \\%c (#x%02X) in \"%s\"", + c, c, name); + } + } else + *d++ = c; + } + *d='\0'; + + /* calculate output string size */ + len = d - name; + + /* string gets owned by the stringbuffer after this */ + return len; +} diff --git a/src/turtle_common.h b/src/turtle_common.h new file mode 100644 index 0000000..b869eba --- /dev/null +++ b/src/turtle_common.h @@ -0,0 +1,98 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_common.h - Turtle lexer/parser shared internals + * + * Copyright (C) 2003-2008, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2004, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + */ + +#ifndef TURTLE_COMMON_H +#define TURTLE_COMMON_H + +#ifdef __cplusplus +extern "C" { +#endif + + +/* turtle_parser.y */ +RAPTOR_INTERNAL_API int turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); +RAPTOR_INTERNAL_API raptor_uri* turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len); +RAPTOR_INTERNAL_API size_t raptor_turtle_expand_qname_escapes(unsigned char *name, size_t len, raptor_simple_message_handler error_handler, void *error_data); + +/* turtle_lexer.l */ +extern void turtle_token_free(raptor_world* world, int token, TURTLE_PARSER_STYPE *lval); + + +/* + * Turtle parser object + */ +struct raptor_turtle_parser_s { + /* buffer */ + char *buffer; + + /* buffer length */ + size_t buffer_length; + + raptor_namespace_stack namespaces; /* static */ + + /* for lexer to store result in */ + TURTLE_PARSER_STYPE lval; + + /* STATIC lexer */ + yyscan_t scanner; + + int scanner_set; + + int lineno; + int lineno_last_good; + + /* for the chunk parser, how much of the input has been consumed */ + size_t consumed; + /* likewise, how much of the input has been successfully processed */ + size_t processed; + /* indicates what can be processed at most */ + size_t consumable; + /* real end-of-buffer indicator, as we kill the last line */ + size_t end_of_buffer; + + /* a sequence holding deferred statements */ + raptor_sequence *deferred; + + /* for creating long literals */ + raptor_stringbuffer* sb; + + /* count of errors in current parse */ + int error_count; + + /* TRIG graph name */ + raptor_term* graph_name; + + /* Allow TRIG extensions */ + int trig; + + /* Last run of many */ + int is_end; +}; + + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/src/turtle_lexer.c b/src/turtle_lexer.c new file mode 100644 index 0000000..ee28f5e --- /dev/null +++ b/src/turtle_lexer.c @@ -0,0 +1,3532 @@ +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#line 6 "turtle_lexer.c" + +#line 8 "turtle_lexer.c" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define turtle_lexer__create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer turtle_lexer__create_buffer +#endif + +#ifdef yy_delete_buffer +#define turtle_lexer__delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer turtle_lexer__delete_buffer +#endif + +#ifdef yy_scan_buffer +#define turtle_lexer__scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer turtle_lexer__scan_buffer +#endif + +#ifdef yy_scan_string +#define turtle_lexer__scan_string_ALREADY_DEFINED +#else +#define yy_scan_string turtle_lexer__scan_string +#endif + +#ifdef yy_scan_bytes +#define turtle_lexer__scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes turtle_lexer__scan_bytes +#endif + +#ifdef yy_init_buffer +#define turtle_lexer__init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer turtle_lexer__init_buffer +#endif + +#ifdef yy_flush_buffer +#define turtle_lexer__flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer turtle_lexer__flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define turtle_lexer__load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state turtle_lexer__load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define turtle_lexer__switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer turtle_lexer__switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define turtle_lexer_push_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state turtle_lexer_push_buffer_state +#endif + +#ifdef yypop_buffer_state +#define turtle_lexer_pop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state turtle_lexer_pop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define turtle_lexer_ensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack turtle_lexer_ensure_buffer_stack +#endif + +#ifdef yylex +#define turtle_lexer_lex_ALREADY_DEFINED +#else +#define yylex turtle_lexer_lex +#endif + +#ifdef yyrestart +#define turtle_lexer_restart_ALREADY_DEFINED +#else +#define yyrestart turtle_lexer_restart +#endif + +#ifdef yylex_init +#define turtle_lexer_lex_init_ALREADY_DEFINED +#else +#define yylex_init turtle_lexer_lex_init +#endif + +#ifdef yylex_init_extra +#define turtle_lexer_lex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra turtle_lexer_lex_init_extra +#endif + +#ifdef yylex_destroy +#define turtle_lexer_lex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy turtle_lexer_lex_destroy +#endif + +#ifdef yyget_debug +#define turtle_lexer_get_debug_ALREADY_DEFINED +#else +#define yyget_debug turtle_lexer_get_debug +#endif + +#ifdef yyset_debug +#define turtle_lexer_set_debug_ALREADY_DEFINED +#else +#define yyset_debug turtle_lexer_set_debug +#endif + +#ifdef yyget_extra +#define turtle_lexer_get_extra_ALREADY_DEFINED +#else +#define yyget_extra turtle_lexer_get_extra +#endif + +#ifdef yyset_extra +#define turtle_lexer_set_extra_ALREADY_DEFINED +#else +#define yyset_extra turtle_lexer_set_extra +#endif + +#ifdef yyget_in +#define turtle_lexer_get_in_ALREADY_DEFINED +#else +#define yyget_in turtle_lexer_get_in +#endif + +#ifdef yyset_in +#define turtle_lexer_set_in_ALREADY_DEFINED +#else +#define yyset_in turtle_lexer_set_in +#endif + +#ifdef yyget_out +#define turtle_lexer_get_out_ALREADY_DEFINED +#else +#define yyget_out turtle_lexer_get_out +#endif + +#ifdef yyset_out +#define turtle_lexer_set_out_ALREADY_DEFINED +#else +#define yyset_out turtle_lexer_set_out +#endif + +#ifdef yyget_leng +#define turtle_lexer_get_leng_ALREADY_DEFINED +#else +#define yyget_leng turtle_lexer_get_leng +#endif + +#ifdef yyget_text +#define turtle_lexer_get_text_ALREADY_DEFINED +#else +#define yyget_text turtle_lexer_get_text +#endif + +#ifdef yyget_lineno +#define turtle_lexer_get_lineno_ALREADY_DEFINED +#else +#define yyget_lineno turtle_lexer_get_lineno +#endif + +#ifdef yyset_lineno +#define turtle_lexer_set_lineno_ALREADY_DEFINED +#else +#define yyset_lineno turtle_lexer_set_lineno +#endif + +#ifdef yyget_column +#define turtle_lexer_get_column_ALREADY_DEFINED +#else +#define yyget_column turtle_lexer_get_column +#endif + +#ifdef yyset_column +#define turtle_lexer_set_column_ALREADY_DEFINED +#else +#define yyset_column turtle_lexer_set_column +#endif + +#ifdef yywrap +#define turtle_lexer_wrap_ALREADY_DEFINED +#else +#define yywrap turtle_lexer_wrap +#endif + +#ifdef yyget_lval +#define turtle_lexer_get_lval_ALREADY_DEFINED +#else +#define yyget_lval turtle_lexer_get_lval +#endif + +#ifdef yyset_lval +#define turtle_lexer_set_lval_ALREADY_DEFINED +#else +#define yyset_lval turtle_lexer_set_lval +#endif + +#ifdef yyalloc +#define turtle_lexer_alloc_ALREADY_DEFINED +#else +#define yyalloc turtle_lexer_alloc +#endif + +#ifdef yyrealloc +#define turtle_lexer_realloc_ALREADY_DEFINED +#else +#define yyrealloc turtle_lexer_realloc +#endif + +#ifdef yyfree +#define turtle_lexer_free_ALREADY_DEFINED +#else +#define yyfree turtle_lexer_free +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an + * integer in range [0..255] for use as an array index. + */ +#define YY_SC_TO_UI(c) ((YY_CHAR) (c)) + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yyg->yy_start = 1 + 2 * +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yyg->yy_start - 1) / 2) +#define YYSTATE YY_START +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin , yyscanner ) +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +/* The state buf must be large enough to hold one state per character in the main buffer. + */ +#define YY_STATE_BUF_SIZE ((YY_BUF_SIZE + 2) * sizeof(yy_state_type)) + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + + #define YY_LESS_LINENO(n) + #define YY_LINENO_REWIND_TO(ptr) + +/* Return all but the first "n" matched characters back to the input stream. */ +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + *yy_cp = yyg->yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yyg->yy_c_buf_p = yy_cp = yy_bp + yyless_macro_arg - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) +#define unput(c) yyunput( c, yyg->yytext_ptr , yyscanner ) + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + * + * Returns the top of the stack, or NULL. + */ +#define YY_CURRENT_BUFFER ( yyg->yy_buffer_stack \ + ? yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] \ + : NULL) +/* Same as previous macro, but useful when we know that the buffer stack is not + * NULL or when we need an lvalue. For internal use only. + */ +#define YY_CURRENT_BUFFER_LVALUE yyg->yy_buffer_stack[yyg->yy_buffer_stack_top] + +void yyrestart ( FILE *input_file , yyscan_t yyscanner ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner ); +void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +void yypop_buffer_state ( yyscan_t yyscanner ); + +static void yyensure_buffer_stack ( yyscan_t yyscanner ); +static void yy_load_buffer_state ( yyscan_t yyscanner ); +static void yy_init_buffer ( YY_BUFFER_STATE b, FILE *file , yyscan_t yyscanner ); +#define YY_FLUSH_BUFFER yy_flush_buffer( YY_CURRENT_BUFFER , yyscanner) + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, yy_size_t len , yyscan_t yyscanner ); + +void *yyalloc ( yy_size_t , yyscan_t yyscanner ); +void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner ); +void yyfree ( void * , yyscan_t yyscanner ); + +#define yy_new_buffer yy_create_buffer +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! YY_CURRENT_BUFFER ){ \ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_is_interactive = is_interactive; \ + } +#define yy_set_bol(at_bol) \ + { \ + if ( ! YY_CURRENT_BUFFER ){\ + yyensure_buffer_stack (yyscanner); \ + YY_CURRENT_BUFFER_LVALUE = \ + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); \ + } \ + YY_CURRENT_BUFFER_LVALUE->yy_at_bol = at_bol; \ + } +#define YY_AT_BOL() (YY_CURRENT_BUFFER_LVALUE->yy_at_bol) + +/* Begin user sect3 */ +typedef flex_uint8_t YY_CHAR; + +typedef int yy_state_type; + +#define yytext_ptr yytext_r + +static yy_state_type yy_get_previous_state ( yyscan_t yyscanner ); +static yy_state_type yy_try_NUL_trans ( yy_state_type current_state , yyscan_t yyscanner); +static int yy_get_next_buffer ( yyscan_t yyscanner ); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yyg->yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yyg->yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yyg->yy_c_buf_p = yy_cp; +#define YY_NUM_RULES 46 +#define YY_END_OF_BUFFER 47 +/* This struct is not used in this scanner, + but its presence is necessary. */ +struct yy_trans_info + { + flex_int32_t yy_verify; + flex_int32_t yy_nxt; + }; +static const flex_int16_t yy_accept[162] = + { 0, + 0, 0, 0, 0, 24, 24, 28, 28, 47, 45, + 2, 1, 1, 45, 44, 45, 14, 15, 45, 5, + 4, 34, 31, 6, 45, 45, 45, 45, 45, 7, + 8, 45, 45, 3, 45, 45, 16, 17, 38, 35, + 37, 38, 24, 24, 24, 25, 28, 28, 28, 29, + 2, 1, 0, 20, 0, 44, 43, 43, 0, 21, + 0, 0, 34, 32, 0, 0, 0, 0, 31, 0, + 0, 40, 0, 41, 0, 42, 42, 42, 0, 0, + 31, 0, 0, 13, 0, 0, 0, 35, 0, 0, + 36, 24, 24, 0, 0, 28, 28, 0, 0, 20, + + 22, 21, 26, 0, 32, 0, 0, 33, 0, 0, + 31, 0, 0, 0, 0, 0, 39, 0, 0, 42, + 42, 42, 0, 0, 30, 0, 0, 23, 27, 0, + 33, 0, 33, 0, 0, 0, 0, 42, 42, 12, + 0, 30, 0, 0, 18, 0, 0, 11, 42, 0, + 19, 0, 0, 42, 10, 0, 9, 0, 0, 0, + 0 + } ; + +static const YY_CHAR yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 2, 1, 4, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 5, 6, 7, 5, 8, 5, 9, 10, + 11, 5, 12, 13, 14, 15, 5, 16, 16, 16, + 16, 16, 16, 16, 16, 16, 16, 17, 18, 19, + 20, 21, 5, 22, 23, 24, 25, 25, 26, 27, + 28, 28, 29, 28, 28, 28, 28, 28, 28, 30, + 28, 31, 32, 28, 33, 28, 28, 34, 28, 28, + 35, 36, 37, 38, 39, 1, 40, 41, 25, 25, + + 42, 43, 28, 28, 44, 28, 28, 45, 28, 28, + 28, 46, 28, 47, 48, 49, 50, 28, 28, 51, + 28, 28, 52, 1, 53, 5, 54, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55, 55, 55, 55, 55, 55, + 55, 55, 55, 55, 55 + } ; + +static const YY_CHAR yy_meta[56] = + { 0, + 1, 2, 3, 3, 4, 5, 4, 6, 7, 4, + 4, 4, 4, 8, 9, 10, 11, 4, 1, 6, + 12, 4, 13, 13, 13, 13, 13, 14, 14, 14, + 14, 14, 14, 14, 12, 15, 12, 1, 16, 13, + 13, 13, 13, 14, 14, 14, 14, 14, 14, 14, + 14, 2, 1, 12, 17 + } ; + +static const flex_int16_t yy_base[195] = + { 0, + 0, 0, 55, 0, 108, 109, 110, 113, 568, 620, + 559, 620, 553, 111, 117, 114, 620, 620, 109, 620, + 538, 111, 128, 620, 97, 88, 123, 137, 124, 620, + 620, 515, 523, 141, 142, 146, 620, 620, 620, 519, + 620, 150, 132, 139, 508, 163, 140, 148, 502, 169, + 502, 620, 156, 495, 0, 170, 620, 481, 161, 443, + 0, 402, 170, 163, 165, 186, 206, 0, 212, 215, + 0, 620, 167, 219, 154, 0, 339, 314, 184, 196, + 226, 218, 221, 620, 0, 225, 226, 353, 234, 237, + 620, 221, 241, 250, 343, 252, 256, 222, 325, 620, + + 620, 620, 620, 267, 249, 268, 302, 297, 0, 0, + 291, 270, 0, 283, 294, 298, 620, 0, 0, 0, + 256, 261, 290, 293, 274, 300, 309, 620, 620, 253, + 252, 245, 244, 0, 326, 0, 0, 203, 194, 304, + 308, 191, 175, 316, 323, 0, 0, 0, 139, 330, + 324, 0, 0, 77, 327, 0, 0, 0, 0, 0, + 620, 381, 398, 415, 432, 449, 465, 479, 181, 489, + 499, 516, 533, 299, 549, 563, 572, 579, 343, 344, + 593, 349, 350, 602, 355, 356, 357, 361, 362, 363, + 367, 459, 460, 464 + + } ; + +static const flex_int16_t yy_def[195] = + { 0, + 161, 1, 161, 3, 162, 162, 163, 163, 161, 161, + 161, 161, 161, 164, 165, 166, 161, 161, 161, 161, + 161, 161, 167, 161, 168, 169, 170, 170, 170, 161, + 161, 161, 161, 170, 170, 170, 161, 161, 161, 161, + 161, 171, 172, 172, 161, 172, 173, 173, 161, 173, + 161, 161, 164, 161, 164, 165, 161, 161, 166, 161, + 166, 161, 161, 161, 161, 161, 161, 174, 175, 161, + 176, 161, 168, 161, 161, 177, 177, 177, 170, 170, + 167, 170, 170, 161, 178, 170, 170, 161, 171, 171, + 161, 172, 172, 172, 161, 173, 173, 173, 161, 161, + + 161, 161, 161, 161, 161, 161, 161, 161, 179, 180, + 175, 111, 181, 161, 161, 161, 161, 182, 183, 177, + 177, 177, 170, 170, 184, 170, 170, 161, 161, 161, + 161, 161, 161, 185, 161, 186, 187, 177, 177, 170, + 170, 184, 184, 170, 170, 188, 189, 177, 177, 170, + 170, 190, 191, 177, 170, 192, 177, 193, 194, 191, + 0, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161 + + } ; + +static const flex_int16_t yy_nxt[676] = + { 0, + 10, 11, 12, 13, 10, 14, 15, 10, 16, 17, + 18, 19, 20, 19, 21, 22, 23, 24, 25, 10, + 10, 26, 27, 28, 27, 27, 27, 27, 27, 29, + 27, 27, 27, 27, 30, 10, 31, 32, 33, 34, + 28, 27, 35, 27, 27, 29, 27, 27, 36, 27, + 27, 37, 38, 10, 27, 39, 40, 39, 39, 39, + 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 39, 41, 39, 39, 39, 39, 39, 42, 42, 42, + 42, 42, 42, 42, 42, 42, 42, 42, 42, 39, + 39, 39, 39, 39, 42, 42, 42, 42, 42, 42, + + 42, 42, 42, 42, 42, 42, 39, 39, 39, 42, + 44, 44, 48, 45, 45, 48, 54, 74, 49, 57, + 58, 49, 60, 62, 63, 65, 63, 157, 77, 67, + 67, 67, 75, 78, 93, 68, 66, 80, 80, 81, + 81, 93, 97, 46, 46, 50, 55, 70, 50, 61, + 97, 80, 66, 81, 83, 80, 80, 81, 81, 82, + 80, 100, 81, 71, 90, 161, 91, 94, 92, 102, + 83, 161, 57, 58, 94, 98, 82, 96, 64, 72, + 105, 86, 154, 98, 65, 63, 118, 74, 104, 143, + 106, 55, 87, 76, 76, 66, 61, 107, 80, 107, + + 81, 108, 75, 119, 104, 143, 106, 67, 67, 67, + 80, 66, 161, 67, 67, 67, 114, 114, 114, 110, + 115, 115, 115, 93, 161, 70, 112, 67, 67, 67, + 96, 70, 80, 68, 81, 80, 149, 81, 116, 80, + 80, 81, 81, 93, 148, 70, 124, 113, 90, 123, + 91, 90, 161, 161, 97, 92, 94, 72, 97, 133, + 133, 71, 124, 72, 105, 123, 72, 131, 131, 126, + 117, 161, 161, 161, 106, 127, 94, 72, 130, 132, + 130, 132, 131, 133, 114, 114, 114, 98, 143, 161, + 106, 98, 67, 67, 67, 115, 115, 115, 110, 135, + + 135, 135, 139, 138, 80, 112, 81, 80, 109, 81, + 70, 109, 108, 116, 80, 140, 81, 108, 80, 141, + 81, 161, 80, 80, 81, 81, 113, 135, 135, 135, + 80, 140, 81, 129, 72, 141, 150, 80, 80, 81, + 81, 80, 72, 81, 80, 117, 81, 144, 128, 117, + 145, 150, 69, 134, 88, 69, 134, 151, 136, 137, + 122, 136, 137, 155, 111, 146, 147, 111, 146, 147, + 152, 153, 156, 152, 153, 156, 73, 117, 121, 73, + 155, 43, 43, 43, 43, 43, 43, 43, 43, 43, + 43, 43, 43, 43, 43, 43, 43, 43, 47, 47, + + 47, 47, 47, 47, 47, 47, 47, 47, 47, 47, + 47, 47, 47, 47, 47, 53, 53, 64, 53, 53, + 53, 53, 53, 53, 53, 53, 53, 53, 53, 53, + 53, 53, 56, 56, 56, 56, 56, 56, 56, 56, + 56, 56, 56, 56, 56, 56, 56, 56, 56, 59, + 59, 103, 59, 59, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 59, 59, 59, 69, 69, 158, 159, + 69, 158, 159, 160, 69, 69, 160, 69, 69, 69, + 69, 69, 73, 57, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 79, 79, 79, 79, + + 101, 79, 79, 51, 79, 79, 89, 89, 89, 89, + 99, 89, 89, 95, 89, 89, 92, 92, 92, 92, + 88, 92, 92, 92, 92, 92, 92, 92, 92, 92, + 92, 92, 92, 96, 96, 96, 96, 96, 96, 85, + 96, 96, 96, 96, 96, 96, 96, 96, 96, 96, + 111, 111, 84, 64, 111, 52, 111, 111, 111, 111, + 51, 111, 111, 111, 111, 111, 69, 161, 69, 69, + 69, 69, 161, 161, 161, 161, 161, 161, 69, 120, + 161, 120, 161, 161, 120, 120, 161, 120, 125, 161, + 161, 125, 125, 161, 125, 125, 111, 161, 111, 111, + + 111, 111, 161, 161, 161, 161, 161, 161, 111, 142, + 142, 142, 161, 161, 142, 142, 161, 142, 142, 9, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161 + } ; + +static const flex_int16_t yy_chk[676] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + + 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, + 5, 6, 7, 5, 6, 8, 14, 25, 7, 15, + 15, 8, 16, 19, 19, 22, 22, 154, 26, 23, + 23, 23, 25, 26, 43, 23, 22, 27, 29, 27, + 29, 44, 47, 5, 6, 7, 14, 23, 8, 16, + 48, 28, 22, 28, 29, 34, 35, 34, 35, 28, + 36, 53, 36, 23, 42, 46, 42, 43, 46, 59, + 29, 50, 56, 56, 44, 47, 28, 50, 64, 23, + 65, 35, 149, 48, 63, 63, 75, 73, 64, 143, + 65, 53, 36, 169, 169, 63, 59, 66, 79, 66, + + 79, 66, 73, 75, 64, 142, 65, 67, 67, 67, + 80, 63, 80, 69, 69, 69, 70, 70, 70, 69, + 74, 74, 74, 92, 98, 67, 69, 81, 81, 81, + 98, 69, 82, 81, 82, 83, 139, 83, 74, 86, + 87, 86, 87, 93, 138, 81, 83, 69, 89, 82, + 89, 90, 94, 90, 96, 94, 92, 67, 97, 133, + 132, 81, 83, 69, 105, 82, 70, 131, 130, 86, + 74, 112, 112, 112, 105, 87, 93, 81, 104, 106, + 104, 106, 104, 106, 114, 114, 114, 96, 125, 112, + 105, 97, 111, 111, 111, 115, 115, 115, 111, 116, + + 116, 116, 122, 121, 123, 111, 123, 124, 174, 124, + 111, 174, 108, 115, 126, 123, 126, 107, 140, 124, + 140, 112, 141, 127, 141, 127, 111, 135, 135, 135, + 144, 123, 144, 99, 114, 124, 141, 145, 151, 145, + 151, 155, 111, 155, 150, 115, 150, 126, 95, 116, + 127, 141, 179, 180, 88, 179, 180, 144, 182, 183, + 78, 182, 183, 150, 185, 186, 187, 185, 186, 187, + 188, 189, 190, 188, 189, 190, 191, 135, 77, 191, + 150, 162, 162, 162, 162, 162, 162, 162, 162, 162, + 162, 162, 162, 162, 162, 162, 162, 162, 163, 163, + + 163, 163, 163, 163, 163, 163, 163, 163, 163, 163, + 163, 163, 163, 163, 163, 164, 164, 62, 164, 164, + 164, 164, 164, 164, 164, 164, 164, 164, 164, 164, + 164, 164, 165, 165, 165, 165, 165, 165, 165, 165, + 165, 165, 165, 165, 165, 165, 165, 165, 165, 166, + 166, 60, 166, 166, 166, 166, 166, 166, 166, 166, + 166, 166, 166, 166, 166, 166, 167, 167, 192, 193, + 167, 192, 193, 194, 167, 167, 194, 167, 167, 167, + 167, 167, 168, 58, 168, 168, 168, 168, 168, 168, + 168, 168, 168, 168, 168, 168, 170, 170, 170, 170, + + 54, 170, 170, 51, 170, 170, 171, 171, 171, 171, + 49, 171, 171, 45, 171, 171, 172, 172, 172, 172, + 40, 172, 172, 172, 172, 172, 172, 172, 172, 172, + 172, 172, 172, 173, 173, 173, 173, 173, 173, 33, + 173, 173, 173, 173, 173, 173, 173, 173, 173, 173, + 175, 175, 32, 21, 175, 13, 175, 175, 175, 175, + 11, 175, 175, 175, 175, 175, 176, 9, 176, 176, + 176, 176, 0, 0, 0, 0, 0, 0, 176, 177, + 0, 177, 0, 0, 177, 177, 0, 177, 178, 0, + 0, 178, 178, 0, 178, 178, 181, 0, 181, 181, + + 181, 181, 0, 0, 0, 0, 0, 0, 181, 184, + 184, 184, 0, 0, 184, 184, 0, 184, 184, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161, 161, 161, 161, 161, 161, + 161, 161, 161, 161, 161 + } ; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +#line 1 "./turtle_lexer.l" +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_lexer.l - Raptor Turtle lexer - making tokens for turtle grammar generator + * + * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * Turtle is defined in http://www.dajobe.org/2004/01/turtle/ + * + * To generate the C files from this source, rather than use the + * shipped turtle_lexer.c/.h needs a patched version of flex 2.5.31 such + * as the one available in Debian GNU/Linux. Details below + * near the %option descriptions. + * + */ +/* recognise 8-bits */ +/* all symbols prefixed by this */ +/* This is not needed, flex is invoked -oturtle_lexer.c */ +/* %option outfile="turtle_lexer.c" */ +/* Emit a C header file for prototypes + * Only available in flex 2.5.13 or newer. + * It was renamed to header-file in flex 2.5.19 + */ +/* Do not emit #include <unistd.h> + * Only available in flex 2.5.7 or newer. + * Broken in flex 2.5.31 without patches. + */ +#define YY_NO_UNISTD_H 1 +/* Never interactive */ +/* No isatty() check */ +/* Batch scanner */ +/* Never use yyunput */ +/* Supply our own alloc/realloc/free functions */ +/* Re-entrant scanner */ +/* Makes yyget_lval() yyset_lval() and yylval appear */ +/* Makes yyget_lloc() yyset_lloc() and yylloc appear */ +/* %option bison-locations */ +#line 79 "./turtle_lexer.l" + /* definitions */ + +/* NOTE: These headers are NOT included here but are inserted by + * fix-flex since otherwise it appears far too late in the generated C + */ + +/* +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SETJMP_H +#include <setjmp.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> +#include <turtle_common.h> + +#define YYSTYPE TURTLE_PARSER_STYPE + +/* Prototypes */ +static unsigned char *turtle_copy_token(unsigned char *text, size_t len); +static unsigned char *turtle_copy_string_token(raptor_parser* rdf_parser, unsigned char *text, size_t len, int delim); +void turtle_lexer_syntax_error(void* ctx, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval); +#endif + +#ifdef __cplusplus +#define INPUT_FN yyinput +#else +#define INPUT_FN input +#endif + + +#if FLEX_VERSION_DECIMAL < 20536 +/* debian flex 2.5.35-10.1 added these column header prototypes in + * re-entrant mode. standard flex omits them + */ +void turtle_lexer_set_column(int column_no, yyscan_t yyscanner); +int turtle_lexer_get_column(yyscan_t yyscanner); +#endif + +static void turtle_lexer_cleanup(yyscan_t yyscanner); +#undef yycleanup +#define yycleanup turtle_lexer_cleanup + +#ifdef HAVE_SETJMP +static jmp_buf turtle_lexer_fatal_error_longjmp_env; + +/* fatal error handler declaration */ +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + longjmp(turtle_lexer_fatal_error_longjmp_env, 1); \ +} while(0) +#else +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + abort(); \ +} while(0) +#endif + +/* Remove the re-fill function since it should never be called */ +#define YY_INPUT(buf,result,max_size) { return YY_NULL; } + +static void turtle_lexer_error(yyscan_t yyscanner, raptor_log_level level, yyconst char *message, ...) RAPTOR_PRINTF_FORMAT(3, 4); + +/* Fatal error handler that returns EOF instead of abort()/longjmp() + * so that parser can clean up properly */ +#define YY_FATAL_ERROR_EOF(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, "%s", msg); \ + yyterminate(); \ +} while(0) + +/* Out-of-memory reporting macro */ +#define TURTLE_LEXER_OOM() YY_FATAL_ERROR_EOF(turtle_lexer_oom_text) +static char turtle_lexer_oom_text[]="turtle_lexer: Out of memory"; + +/* Do not need input() to to read from stdin */ +#define YY_NO_INPUT 1 + +#define YY_USER_ACTION \ + turtle_parser->consumed += yyleng; + +#line 1006 "turtle_lexer.c" +/* Tokens from Turtle 2013 spec - lex-ifyed to remove unicode ranges */ +/* flex: only 1 level of definition expansion so have to expand PLX */ + +#line 1010 "turtle_lexer.c" + +#define INITIAL 0 +#define PREF 1 +#define LONG_DLITERAL 2 +#define LONG_SLITERAL 3 + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#ifndef YY_NO_UNISTD_H +#include <unistd.h> +#endif +#endif + +#define YY_EXTRA_TYPE raptor_parser* + +/* Holds the entire state of the reentrant scanner. */ +struct yyguts_t + { + + /* User-defined. Not touched by flex. */ + YY_EXTRA_TYPE yyextra_r; + + /* The rest are the same as the globals declared in the non-reentrant scanner. */ + FILE *yyin_r, *yyout_r; + size_t yy_buffer_stack_top; /**< index of top of stack. */ + size_t yy_buffer_stack_max; /**< capacity of stack. */ + YY_BUFFER_STATE * yy_buffer_stack; /**< Stack as an array. */ + char yy_hold_char; + int yy_n_chars; + int yyleng_r; + char *yy_c_buf_p; + int yy_init; + int yy_start; + int yy_did_buffer_switch_on_eof; + int yy_start_stack_ptr; + int yy_start_stack_depth; + int *yy_start_stack; + yy_state_type yy_last_accepting_state; + char* yy_last_accepting_cpos; + + int yylineno_r; + int yy_flex_debug_r; + + char *yytext_r; + int yy_more_flag; + int yy_more_len; + + YYSTYPE * yylval_r; + + }; /* end struct yyguts_t */ + +static int yy_init_globals ( yyscan_t yyscanner ); + + /* This must go here because YYSTYPE and YYLTYPE are included + * from bison output in section 1.*/ + # define yylval yyg->yylval_r + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( yyscan_t yyscanner ); + +int yyget_debug ( yyscan_t yyscanner ); + +void yyset_debug ( int debug_flag , yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner ); + +FILE *yyget_in ( yyscan_t yyscanner ); + +void yyset_in ( FILE * _in_str , yyscan_t yyscanner ); + +FILE *yyget_out ( yyscan_t yyscanner ); + +void yyset_out ( FILE * _out_str , yyscan_t yyscanner ); + + int yyget_leng ( yyscan_t yyscanner ); + +char *yyget_text ( yyscan_t yyscanner ); + +int yyget_lineno ( yyscan_t yyscanner ); + +void yyset_lineno ( int _line_number , yyscan_t yyscanner ); + +int yyget_column ( yyscan_t yyscanner ); + +void yyset_column ( int _column_no , yyscan_t yyscanner ); + +YYSTYPE * yyget_lval ( yyscan_t yyscanner ); + +void yyset_lval ( YYSTYPE * yylval_param , yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( yyscan_t yyscanner ); +#else +extern int yywrap ( yyscan_t yyscanner ); +#endif +#endif + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * , yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput ( yyscan_t yyscanner ); +#else +static int input ( yyscan_t yyscanner ); +#endif + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Copy whatever the last rule matched to the standard output. */ +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO do { if (fwrite( yytext, (size_t) yyleng, 1, yyout )) {} } while (0) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( YY_CURRENT_BUFFER_LVALUE->yy_is_interactive ) \ + { \ + int c = '*'; \ + int n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = (int) fread(buf, 1, (yy_size_t) max_size, yyin)) == 0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + }\ +\ + +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg , yyscanner) +#endif + +/* end tables serialization structures and prototypes */ + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK /*LINTED*/break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +/** The main scanner function which does all the work. + */ +YY_DECL +{ + yy_state_type yy_current_state; + char *yy_cp, *yy_bp; + int yy_act; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yylval = yylval_param; + + if ( !yyg->yy_init ) + { + yyg->yy_init = 1; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yyg->yy_start ) + yyg->yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! YY_CURRENT_BUFFER ) { + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_load_buffer_state( yyscanner ); + } + + { +#line 209 "./turtle_lexer.l" + +#line 211 "./turtle_lexer.l" + /* rules */ + + + raptor_parser *rdf_parser = yyextra; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#ifdef HAVE_SETJMP + if(setjmp(turtle_lexer_fatal_error_longjmp_env)) + return 1; +#endif + + + +#line 1301 "turtle_lexer.c" + + while ( /*CONSTCOND*/1 ) /* loops until end-of-file is reached */ + { + yy_cp = yyg->yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yyg->yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yyg->yy_start; +yy_match: + do + { + YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)] ; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 162 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + ++yy_cp; + } + while ( yy_current_state != 161 ); + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + + YY_DO_BEFORE_ACTION; + +do_action: /* This label is used only to access EOF actions. */ + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yyg->yy_hold_char; + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + +case 1: +/* rule 1 can match eol */ +YY_RULE_SETUP +#line 224 "./turtle_lexer.l" +{ turtle_parser->lineno++; } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 226 "./turtle_lexer.l" +{ /* empty */ } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 229 "./turtle_lexer.l" +{ return A; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 231 "./turtle_lexer.l" +{ return DOT; } + YY_BREAK +case 5: +YY_RULE_SETUP +#line 232 "./turtle_lexer.l" +{ return COMMA; } + YY_BREAK +case 6: +YY_RULE_SETUP +#line 233 "./turtle_lexer.l" +{ return SEMICOLON; } + YY_BREAK +case 7: +YY_RULE_SETUP +#line 234 "./turtle_lexer.l" +{ return LEFT_SQUARE; } + YY_BREAK +case 8: +YY_RULE_SETUP +#line 235 "./turtle_lexer.l" +{ return RIGHT_SQUARE; } + YY_BREAK +case 9: +YY_RULE_SETUP +#line 236 "./turtle_lexer.l" +{ BEGIN(PREF); return PREFIX; } + YY_BREAK +case 10: +YY_RULE_SETUP +#line 237 "./turtle_lexer.l" +{ BEGIN(PREF); + return SPARQL_PREFIX; } + YY_BREAK +case 11: +YY_RULE_SETUP +#line 239 "./turtle_lexer.l" +{ return BASE; } + YY_BREAK +case 12: +YY_RULE_SETUP +#line 240 "./turtle_lexer.l" +{ return SPARQL_BASE; } + YY_BREAK +case 13: +YY_RULE_SETUP +#line 241 "./turtle_lexer.l" +{ return HAT; } + YY_BREAK +case 14: +YY_RULE_SETUP +#line 242 "./turtle_lexer.l" +{ return LEFT_ROUND; } + YY_BREAK +case 15: +YY_RULE_SETUP +#line 243 "./turtle_lexer.l" +{ return RIGHT_ROUND; } + YY_BREAK +case 16: +YY_RULE_SETUP +#line 244 "./turtle_lexer.l" +{ return LEFT_CURLY; } + YY_BREAK +case 17: +YY_RULE_SETUP +#line 245 "./turtle_lexer.l" +{ return RIGHT_CURLY; } + YY_BREAK +case 18: +YY_RULE_SETUP +#line 246 "./turtle_lexer.l" +{ return TRUE_TOKEN; } + YY_BREAK +case 19: +YY_RULE_SETUP +#line 247 "./turtle_lexer.l" +{ return FALSE_TOKEN; } + YY_BREAK +case 20: +YY_RULE_SETUP +#line 250 "./turtle_lexer.l" +{ yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + YY_BREAK +case 21: +YY_RULE_SETUP +#line 256 "./turtle_lexer.l" +{ yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + YY_BREAK +case 22: +YY_RULE_SETUP +#line 262 "./turtle_lexer.l" +{ BEGIN(LONG_DLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + YY_BREAK +case 23: +YY_RULE_SETUP +#line 268 "./turtle_lexer.l" +{ + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + YY_BREAK +case 24: +/* rule 24 can match eol */ +YY_RULE_SETUP +#line 283 "./turtle_lexer.l" +{ + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of literal"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + YY_BREAK +case 25: +YY_RULE_SETUP +#line 308 "./turtle_lexer.l" +{ + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + YY_BREAK +case YY_STATE_EOF(LONG_DLITERAL): +#line 317 "./turtle_lexer.l" +{ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + YY_BREAK +case 26: +YY_RULE_SETUP +#line 330 "./turtle_lexer.l" +{ BEGIN(LONG_SLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + YY_BREAK +case 27: +YY_RULE_SETUP +#line 336 "./turtle_lexer.l" +{ + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + YY_BREAK +case 28: +/* rule 28 can match eol */ +YY_RULE_SETUP +#line 351 "./turtle_lexer.l" +{ + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of \'\'\'literal\'\'\'"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + YY_BREAK +case 29: +YY_RULE_SETUP +#line 376 "./turtle_lexer.l" +{ + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + YY_BREAK +case YY_STATE_EOF(LONG_SLITERAL): +#line 385 "./turtle_lexer.l" +{ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + YY_BREAK +case 30: +YY_RULE_SETUP +#line 398 "./turtle_lexer.l" +{ yylval->string = turtle_copy_token((unsigned char*)yytext+2, yyleng-2); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return BLANK_LITERAL; } + YY_BREAK +case 31: +YY_RULE_SETUP +#line 403 "./turtle_lexer.l" +{ yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return QNAME_LITERAL; } + YY_BREAK +case 32: +YY_RULE_SETUP +#line 411 "./turtle_lexer.l" +{ yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return DECIMAL_LITERAL; +} + YY_BREAK +case 33: +YY_RULE_SETUP +#line 417 "./turtle_lexer.l" +{ yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return FLOATING_LITERAL; +} + YY_BREAK +case 34: +YY_RULE_SETUP +#line 423 "./turtle_lexer.l" +{ yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return INTEGER_LITERAL; } + YY_BREAK +case 35: +YY_RULE_SETUP +#line 428 "./turtle_lexer.l" +{ /* eat up leading whitespace */ } + YY_BREAK +case 36: +YY_RULE_SETUP +#line 429 "./turtle_lexer.l" +{ yylval->string=turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + BEGIN(INITIAL); + return IDENTIFIER; } + YY_BREAK +case 37: +YY_RULE_SETUP +#line 434 "./turtle_lexer.l" +{ BEGIN(INITIAL); + yylval->string = turtle_copy_token((unsigned char*)yytext, 0); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return IDENTIFIER; } + YY_BREAK +case 38: +/* rule 38 can match eol */ +YY_RULE_SETUP +#line 440 "./turtle_lexer.l" +{ BEGIN(INITIAL); + if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); } + YY_BREAK +case 39: +/* rule 39 can match eol */ +YY_RULE_SETUP +#line 448 "./turtle_lexer.l" +{ + raptor_stringbuffer* sb; + unsigned char* uri_string; + + /* make length just the IRI */ + while(yytext[yyleng - 1] != '>') + yyleng--; + + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + + /* start at yytext + 1 to skip '<' and operate over + * length-2 bytes to skip '<' and '>' + */ + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-2, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + + if(!*uri_string) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + + raptor_free_stringbuffer(sb); + + if(!yylval->uri) + TURTLE_LEXER_OOM(); + return GRAPH_NAME_LEFT_CURLY; } + YY_BREAK +case 40: +/* rule 40 can match eol */ +YY_RULE_SETUP +#line 480 "./turtle_lexer.l" +{ + while(1) { + int c = yytext[yyleng - 1]; + if(c == '{' || c == ' ' || c=='\t' || c == '\v' || c == '\n' || + c == '=') { + yyleng--; + } else + break; + } + yytext[yyleng] = '\0'; + + yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return GRAPH_NAME_LEFT_CURLY; } + YY_BREAK +case 41: +YY_RULE_SETUP +#line 499 "./turtle_lexer.l" +{ if(yyleng == 2) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else { + raptor_stringbuffer* sb; + unsigned char* uri_string; + + yytext[yyleng-1]='\0'; + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-1, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + if(!yylval->uri) { + raptor_free_stringbuffer(sb); + TURTLE_LEXER_OOM(); + } + raptor_free_stringbuffer(sb); + } + return URI_LITERAL; } + YY_BREAK +case 42: +YY_RULE_SETUP +#line 523 "./turtle_lexer.l" +{ yylval->string = turtle_copy_token((unsigned char*)yytext+1, yyleng-1); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return LANGTAG; } + YY_BREAK +case 43: +/* rule 43 can match eol */ +YY_RULE_SETUP +#line 528 "./turtle_lexer.l" +{ /* # comment */ + turtle_parser->lineno++; + } + YY_BREAK +case 44: +YY_RULE_SETUP +#line 532 "./turtle_lexer.l" +{ /* # comment on the last line with no terminating newline */ + } + YY_BREAK +case 45: +YY_RULE_SETUP +#line 535 "./turtle_lexer.l" +{ if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); + } + YY_BREAK +case 46: +YY_RULE_SETUP +#line 542 "./turtle_lexer.l" +YY_FATAL_ERROR( "flex scanner jammed" ); + YY_BREAK +#line 1837 "turtle_lexer.c" +case YY_STATE_EOF(INITIAL): +case YY_STATE_EOF(PREF): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yyg->yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yyg->yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between YY_CURRENT_BUFFER and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + YY_CURRENT_BUFFER_LVALUE->yy_input_file = yyin; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yyg->yy_c_buf_p <= &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yyg->yy_c_buf_p = yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state , yyscanner); + + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yyg->yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yyg->yy_last_accepting_cpos; + yy_current_state = yyg->yy_last_accepting_state; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_END_OF_FILE: + { + yyg->yy_did_buffer_switch_on_eof = 0; + + if ( yywrap( yyscanner ) ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yyg->yy_c_buf_p = yyg->yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = + yyg->yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yyg->yy_c_buf_p = + &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars]; + + yy_current_state = yy_get_previous_state( yyscanner ); + + yy_cp = yyg->yy_c_buf_p; + yy_bp = yyg->yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of user's declarations */ +} /* end of yylex */ + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ +static int yy_get_next_buffer (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + char *dest = YY_CURRENT_BUFFER_LVALUE->yy_ch_buf; + char *source = yyg->yytext_ptr; + int number_to_move, i; + int ret_val; + + if ( yyg->yy_c_buf_p > &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yyg->yy_c_buf_p - yyg->yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr - 1); + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( YY_CURRENT_BUFFER_LVALUE->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars = 0; + + else + { + int num_to_read = + YY_CURRENT_BUFFER_LVALUE->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = YY_CURRENT_BUFFER_LVALUE; + + int yy_c_buf_p_offset = + (int) (yyg->yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yyrealloc( (void *) b->yy_ch_buf, + (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = NULL; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yyg->yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = YY_CURRENT_BUFFER_LVALUE->yy_buf_size - + number_to_move - 1; + + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[number_to_move]), + yyg->yy_n_chars, num_to_read ); + + + } + + if ( yyg->yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin , yyscanner); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + YY_CURRENT_BUFFER_LVALUE->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + if ((yyg->yy_n_chars + number_to_move) > YY_CURRENT_BUFFER_LVALUE->yy_buf_size) { + /* Extend the array by 50%, plus the number we really need. */ + int new_size = yyg->yy_n_chars + number_to_move + (yyg->yy_n_chars >> 1); + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf = (char *) yyrealloc( + (void *) YY_CURRENT_BUFFER_LVALUE->yy_ch_buf, (yy_size_t) new_size , yyscanner ); + if ( ! YY_CURRENT_BUFFER_LVALUE->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_get_next_buffer()" ); + /* "- 2" to take care of EOB's */ + YY_CURRENT_BUFFER_LVALUE->yy_buf_size = (int) (new_size - 2); + } + + yyg->yy_n_chars += number_to_move; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] = YY_END_OF_BUFFER_CHAR; + YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yyg->yytext_ptr = &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[0]; + + return ret_val; +} + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + + static yy_state_type yy_get_previous_state (yyscan_t yyscanner) +{ + yy_state_type yy_current_state; + char *yy_cp; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_current_state = yyg->yy_start; + + for ( yy_cp = yyg->yytext_ptr + YY_MORE_ADJ; yy_cp < yyg->yy_c_buf_p; ++yy_cp ) + { + YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 162 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + } + + return yy_current_state; +} + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + static yy_state_type yy_try_NUL_trans (yy_state_type yy_current_state , yyscan_t yyscanner) +{ + int yy_is_jam; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; /* This var may be unused depending upon options. */ + char *yy_cp = yyg->yy_c_buf_p; + + YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yyg->yy_last_accepting_state = yy_current_state; + yyg->yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 162 ) + yy_c = yy_meta[yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + yy_c]; + yy_is_jam = (yy_current_state == 161); + + (void)yyg; + return yy_is_jam ? 0 : yy_current_state; +} + +#ifndef YY_NO_UNPUT + +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus + static int yyinput (yyscan_t yyscanner) +#else + static int input (yyscan_t yyscanner) +#endif + +{ + int c; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + *yyg->yy_c_buf_p = yyg->yy_hold_char; + + if ( *yyg->yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yyg->yy_c_buf_p < &YY_CURRENT_BUFFER_LVALUE->yy_ch_buf[yyg->yy_n_chars] ) + /* This was really a NUL. */ + *yyg->yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = (int) (yyg->yy_c_buf_p - yyg->yytext_ptr); + ++yyg->yy_c_buf_p; + + switch ( yy_get_next_buffer( yyscanner ) ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin , yyscanner); + + /*FALLTHROUGH*/ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap( yyscanner ) ) + return 0; + + if ( ! yyg->yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(yyscanner); +#else + return input(yyscanner); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yyg->yy_c_buf_p = yyg->yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yyg->yy_c_buf_p; /* cast for 8-bit char's */ + *yyg->yy_c_buf_p = '\0'; /* preserve yytext */ + yyg->yy_hold_char = *++yyg->yy_c_buf_p; + + return c; +} +#endif /* ifndef YY_NO_INPUT */ + +/** Immediately switch to a different input stream. + * @param input_file A readable stream. + * @param yyscanner The scanner object. + * @note This function does not reset the start condition to @c INITIAL . + */ + void yyrestart (FILE * input_file , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! yyg->yy_buffer_stack ){ + yyensure_buffer_stack (yyscanner); + YY_CURRENT_BUFFER_LVALUE = + yy_create_buffer( yyin, YY_BUF_SIZE , yyscanner); + } + + yy_init_buffer( YY_CURRENT_BUFFER, input_file , yyscanner); + yy_load_buffer_state( yyscanner ); +} + +/** Switch to a different input buffer. + * @param new_buffer The new input buffer. + * @param yyscanner The scanner object. + */ + void yy_switch_to_buffer (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* TODO. We should be able to replace this entire function body + * with + * yypop_buffer_state(); + * yypush_buffer_state(new_buffer); + */ + yyensure_buffer_stack (yyscanner); + if ( YY_CURRENT_BUFFER_LVALUE == new_buffer ) + return; + + if ( YY_CURRENT_BUFFER_LVALUE ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + YY_CURRENT_BUFFER_LVALUE = new_buffer; + yy_load_buffer_state( yyscanner ); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yyg->yy_did_buffer_switch_on_eof = 1; +} + +static void yy_load_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyg->yy_n_chars = YY_CURRENT_BUFFER_LVALUE->yy_n_chars; + yyg->yytext_ptr = yyg->yy_c_buf_p = YY_CURRENT_BUFFER_LVALUE->yy_buf_pos; + yyin = YY_CURRENT_BUFFER_LVALUE->yy_input_file; + yyg->yy_hold_char = *yyg->yy_c_buf_p; +} + +/** Allocate and initialize an input buffer state. + * @param file A readable stream. + * @param size The character buffer size in bytes. When in doubt, use @c YY_BUF_SIZE. + * @param yyscanner The scanner object. + * @return the allocated buffer state. + */ + YY_BUFFER_STATE yy_create_buffer (FILE * file, int size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yyalloc( (yy_size_t) (b->yy_buf_size + 2) , yyscanner ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file , yyscanner); + + return b; +} + +/** Destroy the buffer. + * @param b a buffer created with yy_create_buffer() + * @param yyscanner The scanner object. + */ + void yy_delete_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if ( ! b ) + return; + + if ( b == YY_CURRENT_BUFFER ) /* Not sure if we should pop here. */ + YY_CURRENT_BUFFER_LVALUE = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yyfree( (void *) b->yy_ch_buf , yyscanner ); + + yyfree( (void *) b , yyscanner ); +} + +/* Initializes or reinitializes a buffer. + * This function is sometimes called more than once on the same buffer, + * such as during a yyrestart() or at EOF. + */ + static void yy_init_buffer (YY_BUFFER_STATE b, FILE * file , yyscan_t yyscanner) + +{ + int oerrno = errno; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + yy_flush_buffer( b , yyscanner); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + + /* If b is the current buffer, then yy_init_buffer was _probably_ + * called from yyrestart() or through yy_get_next_buffer. + * In that case, we don't want to reset the lineno or column. + */ + if (b != YY_CURRENT_BUFFER){ + b->yy_bs_lineno = 1; + b->yy_bs_column = 0; + } + + b->yy_is_interactive = 0; + + errno = oerrno; +} + +/** Discard all buffered characters. On the next scan, YY_INPUT will be called. + * @param b the buffer state to be flushed, usually @c YY_CURRENT_BUFFER. + * @param yyscanner The scanner object. + */ + void yy_flush_buffer (YY_BUFFER_STATE b , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == YY_CURRENT_BUFFER ) + yy_load_buffer_state( yyscanner ); +} + +/** Pushes the new state onto the stack. The new state becomes + * the current state. This function will allocate the stack + * if necessary. + * @param new_buffer The new state. + * @param yyscanner The scanner object. + */ +void yypush_buffer_state (YY_BUFFER_STATE new_buffer , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (new_buffer == NULL) + return; + + yyensure_buffer_stack(yyscanner); + + /* This block is copied from yy_switch_to_buffer. */ + if ( YY_CURRENT_BUFFER_LVALUE ) + { + /* Flush out information for old buffer. */ + *yyg->yy_c_buf_p = yyg->yy_hold_char; + YY_CURRENT_BUFFER_LVALUE->yy_buf_pos = yyg->yy_c_buf_p; + YY_CURRENT_BUFFER_LVALUE->yy_n_chars = yyg->yy_n_chars; + } + + /* Only push if top exists. Otherwise, replace top. */ + if (YY_CURRENT_BUFFER_LVALUE) + yyg->yy_buffer_stack_top++; + YY_CURRENT_BUFFER_LVALUE = new_buffer; + + /* copied from yy_switch_to_buffer. */ + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; +} + +/** Removes and deletes the top of the stack, if present. + * The next element becomes the new top. + * @param yyscanner The scanner object. + */ +void yypop_buffer_state (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + if (!YY_CURRENT_BUFFER) + return; + + yy_delete_buffer(YY_CURRENT_BUFFER , yyscanner); + YY_CURRENT_BUFFER_LVALUE = NULL; + if (yyg->yy_buffer_stack_top > 0) + --yyg->yy_buffer_stack_top; + + if (YY_CURRENT_BUFFER_LVALUE) { + yy_load_buffer_state( yyscanner ); + yyg->yy_did_buffer_switch_on_eof = 1; + } +} + +/* Allocates the stack if it does not exist. + * Guarantees space for at least one push. + */ +static void yyensure_buffer_stack (yyscan_t yyscanner) +{ + yy_size_t num_to_alloc; + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (!yyg->yy_buffer_stack) { + + /* First allocation is just for 2 elements, since we don't know if this + * scanner will even need a stack. We use 2 instead of 1 to avoid an + * immediate realloc on the next call. + */ + num_to_alloc = 1; /* After all that talk, this was set to 1 anyways... */ + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyalloc + (num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + memset(yyg->yy_buffer_stack, 0, num_to_alloc * sizeof(struct yy_buffer_state*)); + + yyg->yy_buffer_stack_max = num_to_alloc; + yyg->yy_buffer_stack_top = 0; + return; + } + + if (yyg->yy_buffer_stack_top >= (yyg->yy_buffer_stack_max) - 1){ + + /* Increase the buffer to prepare for a possible push. */ + yy_size_t grow_size = 8 /* arbitrary grow size */; + + num_to_alloc = yyg->yy_buffer_stack_max + grow_size; + yyg->yy_buffer_stack = (struct yy_buffer_state**)yyrealloc + (yyg->yy_buffer_stack, + num_to_alloc * sizeof(struct yy_buffer_state*) + , yyscanner); + if ( ! yyg->yy_buffer_stack ) + YY_FATAL_ERROR( "out of dynamic memory in yyensure_buffer_stack()" ); + + /* zero only the new slots.*/ + memset(yyg->yy_buffer_stack + yyg->yy_buffer_stack_max, 0, grow_size * sizeof(struct yy_buffer_state*)); + yyg->yy_buffer_stack_max = num_to_alloc; + } +} + +/** Setup the input buffer state to scan directly from a user-specified character buffer. + * @param base the character buffer + * @param size the size in bytes of the character buffer + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_buffer (char * base, yy_size_t size , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return NULL; + + b = (YY_BUFFER_STATE) yyalloc( sizeof( struct yy_buffer_state ) , yyscanner ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = (int) (size - 2); /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = NULL; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b , yyscanner ); + + return b; +} + +/** Setup the input buffer state to scan a string. The next call to yylex() will + * scan from a @e copy of @a str. + * @param yystr a NUL-terminated string to scan + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + * @note If you want to scan bytes that may contain NUL values, then use + * yy_scan_bytes() instead. + */ +YY_BUFFER_STATE yy_scan_string (const char * yystr , yyscan_t yyscanner) +{ + + return yy_scan_bytes( yystr, (int) strlen(yystr) , yyscanner); +} + +/** Setup the input buffer state to scan the given bytes. The next call to yylex() will + * scan from a @e copy of @a bytes. + * @param yybytes the byte buffer to scan + * @param _yybytes_len the number of bytes in the buffer pointed to by @a bytes. + * @param yyscanner The scanner object. + * @return the newly allocated buffer state object. + */ +YY_BUFFER_STATE yy_scan_bytes (const char * yybytes, yy_size_t _yybytes_len , yyscan_t yyscanner) +{ + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + yy_size_t i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = (yy_size_t) (_yybytes_len + 2); + buf = (char *) yyalloc( n , yyscanner ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < _yybytes_len; ++i ) + buf[i] = yybytes[i]; + + buf[_yybytes_len] = buf[_yybytes_len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n , yyscanner); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; +} + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + int yyless_macro_arg = (n); \ + YY_LESS_LINENO(yyless_macro_arg);\ + yytext[yyleng] = yyg->yy_hold_char; \ + yyg->yy_c_buf_p = yytext + yyless_macro_arg; \ + yyg->yy_hold_char = *yyg->yy_c_buf_p; \ + *yyg->yy_c_buf_p = '\0'; \ + yyleng = yyless_macro_arg; \ + } \ + while ( 0 ) + +/* Accessor methods (get/set functions) to struct members. */ + +/** Get the user-defined data for this scanner. + * @param yyscanner The scanner object. + */ +YY_EXTRA_TYPE yyget_extra (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyextra; +} + +/** Get the current line number. + * @param yyscanner The scanner object. + */ +int yyget_lineno (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yylineno; +} + +/** Get the current column number. + * @param yyscanner The scanner object. + */ +int yyget_column (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + if (! YY_CURRENT_BUFFER) + return 0; + + return yycolumn; +} + +/** Get the input stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_in (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyin; +} + +/** Get the output stream. + * @param yyscanner The scanner object. + */ +FILE *yyget_out (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyout; +} + +/** Get the length of the current token. + * @param yyscanner The scanner object. + */ +int yyget_leng (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yyleng; +} + +/** Get the current token. + * @param yyscanner The scanner object. + */ + +char *yyget_text (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yytext; +} + +/** Set the user-defined data. This data is never touched by the scanner. + * @param user_defined The data to be associated with this scanner. + * @param yyscanner The scanner object. + */ +void yyset_extra (YY_EXTRA_TYPE user_defined , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyextra = user_defined ; +} + +/** Set the current line number. + * @param _line_number line number + * @param yyscanner The scanner object. + */ +void yyset_lineno (int _line_number , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* lineno is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_lineno called with no buffer" ); + + yylineno = _line_number; +} + +/** Set the current column. + * @param _column_no column number + * @param yyscanner The scanner object. + */ +void yyset_column (int _column_no , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* column is only valid if an input buffer exists. */ + if (! YY_CURRENT_BUFFER ) + YY_FATAL_ERROR( "yyset_column called with no buffer" ); + + yycolumn = _column_no; +} + +/** Set the input stream. This does not discard the current + * input buffer. + * @param _in_str A readable stream. + * @param yyscanner The scanner object. + * @see yy_switch_to_buffer + */ +void yyset_in (FILE * _in_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyin = _in_str ; +} + +void yyset_out (FILE * _out_str , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yyout = _out_str ; +} + +int yyget_debug (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yy_flex_debug; +} + +void yyset_debug (int _bdebug , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yy_flex_debug = _bdebug ; +} + +/* Accessor methods for yylval and yylloc */ + +YYSTYPE * yyget_lval (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + return yylval; +} + +void yyset_lval (YYSTYPE * yylval_param , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + yylval = yylval_param; +} + +/* User-visible API */ + +/* yylex_init is special because it creates the scanner itself, so it is + * the ONLY reentrant function that doesn't take the scanner as the last argument. + * That's why we explicitly handle the declaration, instead of using our macros. + */ +int yylex_init(yyscan_t* ptr_yy_globals) +{ + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), NULL ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + return yy_init_globals ( *ptr_yy_globals ); +} + +/* yylex_init_extra has the same functionality as yylex_init, but follows the + * convention of taking the scanner as the last argument. Note however, that + * this is a *pointer* to a scanner, as it will be allocated by this call (and + * is the reason, too, why this function also must handle its own declaration). + * The user defined value in the first argument will be available to yyalloc in + * the yyextra field. + */ +int yylex_init_extra( YY_EXTRA_TYPE yy_user_defined, yyscan_t* ptr_yy_globals ) +{ + struct yyguts_t dummy_yyguts; + + yyset_extra (yy_user_defined, &dummy_yyguts); + + if (ptr_yy_globals == NULL){ + errno = EINVAL; + return 1; + } + + *ptr_yy_globals = (yyscan_t) yyalloc ( sizeof( struct yyguts_t ), &dummy_yyguts ); + + if (*ptr_yy_globals == NULL){ + errno = ENOMEM; + return 1; + } + + /* By setting to 0xAA, we expose bugs in + yy_init_globals. Leave at 0x00 for releases. */ + memset(*ptr_yy_globals,0x00,sizeof(struct yyguts_t)); + + yyset_extra (yy_user_defined, *ptr_yy_globals); + + return yy_init_globals ( *ptr_yy_globals ); +} + +static int yy_init_globals (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + /* Initialization is the same as for the non-reentrant scanner. + * This function is called from yylex_destroy(), so don't allocate here. + */ + + yyg->yy_buffer_stack = NULL; + yyg->yy_buffer_stack_top = 0; + yyg->yy_buffer_stack_max = 0; + yyg->yy_c_buf_p = NULL; + yyg->yy_init = 0; + yyg->yy_start = 0; + + yyg->yy_start_stack_ptr = 0; + yyg->yy_start_stack_depth = 0; + yyg->yy_start_stack = NULL; + +/* Defined in main.c */ +#ifdef YY_STDINIT + yyin = stdin; + yyout = stdout; +#else + yyin = NULL; + yyout = NULL; +#endif + + /* For future reference: Set errno on error, since we are called by + * yylex_init() + */ + return 0; +} + +/* yylex_destroy is for both reentrant and non-reentrant scanners. */ +int yylex_destroy (yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + + /* Pop the buffer stack, destroying each element. */ + while(YY_CURRENT_BUFFER){ + yy_delete_buffer( YY_CURRENT_BUFFER , yyscanner ); + YY_CURRENT_BUFFER_LVALUE = NULL; + yypop_buffer_state(yyscanner); + } + + /* Destroy the stack itself. */ + yyfree(yyg->yy_buffer_stack , yyscanner); + yyg->yy_buffer_stack = NULL; + + /* Destroy the start condition stack. */ + yyfree( yyg->yy_start_stack , yyscanner ); + yyg->yy_start_stack = NULL; + + /* Reset the globals. This is important in a non-reentrant scanner so the next time + * yylex() is called, initialization will occur. */ + yy_init_globals( yyscanner); + + /* Destroy the main struct (reentrant only). */ + /* clean up leaks if any before freeing yyscanner */ + yycleanup(yyscanner); + yyfree ( yyscanner , yyscanner ); + yyscanner = NULL; + return 0; +} + +/* + * Internal utility routines. + */ + +#ifndef yytext_ptr +static void yy_flex_strncpy (char* s1, const char * s2, int n , yyscan_t yyscanner) +{ + struct yyguts_t * yyg = (struct yyguts_t*)yyscanner; + (void)yyg; + + int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; +} +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen (const char * s , yyscan_t yyscanner) +{ + int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; +} +#endif + +#define YYTABLES_NAME "yytables" + +#line 542 "./turtle_lexer.l" + + /* user code */ + +int +yywrap (yyscan_t yyscanner) { + return 1; +} + + +static unsigned char * +turtle_copy_token(unsigned char *text, size_t len) +{ + unsigned char *s; + if(!len) + len = strlen((const char*)text); + s = RAPTOR_MALLOC(unsigned char*, len + 1); + if(s) { + memcpy(s, text, len); + s[len] = '\0'; + } + return s; +} + + +static unsigned char * +turtle_copy_string_token(raptor_parser* rdf_parser, + unsigned char *string, size_t len, int delim) +{ + raptor_stringbuffer* sb = NULL; + int rc; + + if(len) { + sb = raptor_new_stringbuffer(); + if(!sb) + return NULL; + + rc = raptor_stringbuffer_append_turtle_string(sb, string, len, delim, + (raptor_simple_message_handler)turtle_lexer_syntax_error, + rdf_parser, 0); + if(rc) { + raptor_free_stringbuffer(sb); + return NULL; + } + + len = raptor_stringbuffer_length(sb); + } + + string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(string) { + if(sb) + raptor_stringbuffer_copy_to_string(sb, string, len+1); + string[len]='\0'; + } + + if(sb) + raptor_free_stringbuffer(sb); + + return string; +} + + +void +turtle_lexer_syntax_error(void* ctx, const char *message, ...) +{ + raptor_parser* rdf_parser = (raptor_parser *)ctx; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); +} + + +/* + * turtle_lexer_error: + * @yyscanner: scanner object + * @level: log level RAPTOR_LOG_LEVEL_FATAL otherwise error + * @message: erro message + * + * INTERNAL - replacement for the generated error handler. + */ +static void turtle_lexer_error(yyscan_t yyscanner, + raptor_log_level level, + yyconst char *message, ...) +{ + raptor_parser *rdf_parser = NULL; + va_list arguments; + + va_start(arguments, message); + + if(yyscanner) + rdf_parser = (raptor_parser*)turtle_lexer_get_extra(yyscanner); + + /* This handles NULL rdf_parser properly */ + raptor_parser_log_error_varargs(rdf_parser, level, message, arguments); + + va_end(arguments); +} + + +/* Define LEXER_ALLOC_TRACKING to enable allocated memory tracking + * - fixes lexer memory leak when ensure_buffer_stack fails + */ + +#ifdef LEXER_ALLOC_TRACKING +typedef struct { + /* Number of void* slots allocated */ + int lexer_allocs_size; + /* Allocted void* slots follow in memory after this header */ +} lexer_alloc_tracker_header; + +/* Initial alloc tracker slot array size - 2 seems to be enough for almost all cases */ +static const int initial_lexer_allocs_size = 2; +#endif + +/* + * turtle_lexer_cleanup: + * @yyscanner: + * + * INTERNAL - Clean up unfreed lexer allocs if LEXER_ALLOC_TRACKING is enabled. + */ +static void turtle_lexer_cleanup(yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + if(!yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i]) + free(lexer_allocs[i]); + lexer_allocs[i] = NULL; + } + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = NULL; +#endif +} + + +/* + * turtle_lexer_alloc: + * @size + * @yyscanner + * + * INTERNAL - alloc replacement. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_alloc(yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *ptr; + + /* yyscanner not initialized -> probably initializing yyscanner itself + * -> just malloc without tracking + */ + if(!yyscanner) + return malloc(size); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_alloc: yyscanner extra not initialized"); + + /* try to allocate tracker if it does not exist */ + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) { + /* allocate tracker header + array of void* slots */ + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+initial_lexer_allocs_size*sizeof(void*)); + if(!tracker) + YY_FATAL_ERROR("lexer_alloc: cannot allocate tracker"); + tracker->lexer_allocs_size = initial_lexer_allocs_size; + rdf_parser->lexer_user_data = (void *)tracker; + } + lexer_allocs = (void**)&tracker[1]; + + /* allocate memory */ + ptr = malloc(size); + + /* find a free slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(!lexer_allocs[i]) { + lexer_allocs[i] = ptr; + break; + } + } + + /* no free slots -> grow tracker slot array */ + if(i>=tracker->lexer_allocs_size) { + int j; + void **dest; + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+i*2*sizeof(void*)); + if(!tracker) { + if(ptr) + free(ptr); + YY_FATAL_ERROR("lexer_alloc: cannot grow tracker"); + } + tracker->lexer_allocs_size = i*2; + + /* copy data from old tracker */ + dest = (void**)&tracker[1]; + for(j = 0; j < i; ++j) { + dest[j] = lexer_allocs[j]; + } + + /* set new item to first free slot */ + dest[j] = ptr; + + /* free old tracker and replace with new one */ + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = tracker; + } + + return ptr; +#else + return malloc(size); +#endif +} + + +/* + * turtle_lexer_realloc: + * + * INTERNAL - realloc replacement + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_realloc(void *ptr, yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *newptr; + + if(!yyscanner) + YY_FATAL_ERROR("lexer_realloc: yyscanner not initialized"); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_realloc: yyscanner extra not initialized"); + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + YY_FATAL_ERROR("lexer_realloc: no alloc tracker"); + lexer_allocs = (void**)&tracker[1]; + + /* find the old slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no old slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_realloc: cell not in tracker"); + + /* realloc */ + newptr = realloc((char*)ptr, size); + + /* replace entry in tracker */ + lexer_allocs[i] = newptr; + + return newptr; +#else + return realloc((char*)ptr, size); +#endif +} + + +/* + * turtle_lexer_free: + * + * INTERNAL - free replacement. + * Checks for NULL pointer to be freed unlike the default lexer free function. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void turtle_lexer_free(void *ptr, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + /* do not free NULL */ + if(!ptr) + return; + + /* free ptr even if we would encounter an error */ + free(ptr); + + /* yyscanner is allocated with turtle_lexer_alloc() but it's never stored in the tracker + * - we need yyscanner to access the tracker */ + if(!yyscanner || ptr == yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + /* find the slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_free: cell not in tracker"); + + /* remove entry from tracker */ + lexer_allocs[i] = NULL; +#else + if(ptr) + free(ptr); +#endif +} + + +#ifdef RAPTOR_DEBUG + +const char * +turtle_token_print(raptor_world* world, int token, YYSTYPE *lval) +{ + #define TTP_DEBUG_BUFFER_SIZE 2048 + static char buffer[TTP_DEBUG_BUFFER_SIZE]; + + if(!token) + return "<<EOF>>"; + + switch(token) { + case PREFIX: + return "PREFIX"; + + case BASE: + return "BASE"; + + case A: + return "A"; + + case DOT: + return "DOT"; + + case COMMA: + return "COMMA"; + + case SEMICOLON: + return "SEMICOLON"; + + case LEFT_SQUARE: + return "LEFT_SQUARE"; + + case RIGHT_SQUARE: + return "RIGHT_SQUARE"; + + case HAT: + return "HAT"; + + case STRING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "STRING_LITERAL(%s)", + lval->string); + return buffer; + + case URI_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "URI_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case BLANK_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "BLANK_LITERAL(%s)", + lval->string); + return buffer; + + case QNAME_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "QNAME_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case INTEGER_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "INTEGER_LITERAL(%s)", + lval->string); + return buffer; + + case FLOATING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "FLOATING_LITERAL(%s)", + lval->string); + return buffer; + + case IDENTIFIER: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "IDENTIFIER(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case LANGTAG: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "LANGTAG(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case DECIMAL_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "DECIMAL_LITERAL(%s)", + lval->string); + return buffer; + + case ERROR_TOKEN: + return "ERROR"; + + case LEFT_CURLY: + return "{"; + + case RIGHT_CURLY: + return "}"; + + case GRAPH_NAME_LEFT_CURLY: + return "GRAPH_NAME {"; + + default: + RAPTOR_DEBUG2("UNKNOWN token %d - add a new case\n", token); + return "(UNKNOWN)"; + } +} +#endif + + + +void +turtle_token_free(raptor_world* world, int token, YYSTYPE *lval) +{ + if(!token) + return; + + switch(token) { + case STRING_LITERAL: + case BLANK_LITERAL: + case IDENTIFIER: + if(lval->string) + RAPTOR_FREE(char*, lval->string); + break; + + case URI_LITERAL: + case QNAME_LITERAL: + if(lval->uri) + raptor_free_uri(lval->uri); + break; + default: + break; + } +} + + +#ifdef STANDALONE + +#define FILE_READ_BUF_SIZE 4096 + +int +main(int argc, char *argv[]) +{ + char *turtle_string = NULL; + raptor_parser rdf_parser; + raptor_turtle_parser turtle_parser; + yyscan_t scanner; + int token = EOF; + YYSTYPE lval; + const unsigned char *uri_string; + const char *filename = NULL; + char *buf = NULL; + size_t len; + raptor_world* world; + FILE *fh; + + world = raptor_new_world(); + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = (FILE*)stdin; + } + + turtle_string = RAPTOR_CALLOC(char*, FILE_READ_BUF_SIZE, 1); + fread(turtle_string, FILE_READ_BUF_SIZE, 1, fh); + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + rdf_parser.world = world; + + /* discard namespace errors - caused by not interpreting @prefix + * and hence causing failed qname construction + */ + raptor_namespaces_init(rdf_parser.world, &turtle_parser.namespaces, 0); + + yylex_init(&turtle_parser.scanner); + scanner = turtle_parser.scanner; + + len = strlen(RAPTOR_GOOD_CAST(const char*, turtle_string)); + buf = RAPTOR_MALLOC(char*, len + 3); + memcpy(buf, turtle_string, len); + buf[len] = ' '; + buf[len + 1] = buf[len + 2] = '\0'; /* YY_END_OF_BUFFER_CHAR; */ + (void)turtle_lexer__scan_buffer(buf, len + 3, scanner); + + turtle_lexer_set_extra(&rdf_parser, scanner); + + /* Initialise enough of the parser and locator to get error messages */ + rdf_parser.context = &turtle_parser; + turtle_parser.lineno = 1; + rdf_parser.locator.file = filename; + rdf_parser.locator.column = -1; + + uri_string = raptor_uri_filename_to_uri_string(filename); + rdf_parser.base_uri = raptor_new_uri(world, uri_string); + RAPTOR_FREE(char*, uri_string); + + while(1) { + memset(&lval, 0, sizeof(YYSTYPE)); + if(turtle_lexer_get_text(scanner) != NULL) + printf("yyinput '%s'\n", turtle_lexer_get_text(scanner)); + token = yylex(&lval, scanner); +#ifdef RAPTOR_DEBUG + printf("token %s\n", turtle_token_print(world, token, &lval)); +#else + printf("token %d\n", token); +#endif + turtle_token_free(world, token, &lval); + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } + + if(buf) + RAPTOR_FREE(char*, buf); + + yylex_destroy(scanner); + + raptor_namespaces_clear(&turtle_parser.namespaces); + + raptor_free_uri(rdf_parser.base_uri); + + RAPTOR_FREE(char*, turtle_string); + + raptor_free_world(world); + + + if(token == ERROR_TOKEN) + return 1; + + return 0; +} +#endif + diff --git a/src/turtle_lexer.h b/src/turtle_lexer.h new file mode 100644 index 0000000..85a3a9e --- /dev/null +++ b/src/turtle_lexer.h @@ -0,0 +1,724 @@ +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifndef turtle_lexer_HEADER_H +#define turtle_lexer_HEADER_H 1 +#define turtle_lexer_IN_HEADER 1 + +#line 10 "turtle_lexer.h" + +#line 12 "turtle_lexer.h" + +#define YY_INT_ALIGNED short int + +/* A lexical scanner generated by flex */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 6 +#define YY_FLEX_SUBMINOR_VERSION 4 +#if YY_FLEX_SUBMINOR_VERSION > 0 +#define FLEX_BETA +#endif + +#ifdef yy_create_buffer +#define turtle_lexer__create_buffer_ALREADY_DEFINED +#else +#define yy_create_buffer turtle_lexer__create_buffer +#endif + +#ifdef yy_delete_buffer +#define turtle_lexer__delete_buffer_ALREADY_DEFINED +#else +#define yy_delete_buffer turtle_lexer__delete_buffer +#endif + +#ifdef yy_scan_buffer +#define turtle_lexer__scan_buffer_ALREADY_DEFINED +#else +#define yy_scan_buffer turtle_lexer__scan_buffer +#endif + +#ifdef yy_scan_string +#define turtle_lexer__scan_string_ALREADY_DEFINED +#else +#define yy_scan_string turtle_lexer__scan_string +#endif + +#ifdef yy_scan_bytes +#define turtle_lexer__scan_bytes_ALREADY_DEFINED +#else +#define yy_scan_bytes turtle_lexer__scan_bytes +#endif + +#ifdef yy_init_buffer +#define turtle_lexer__init_buffer_ALREADY_DEFINED +#else +#define yy_init_buffer turtle_lexer__init_buffer +#endif + +#ifdef yy_flush_buffer +#define turtle_lexer__flush_buffer_ALREADY_DEFINED +#else +#define yy_flush_buffer turtle_lexer__flush_buffer +#endif + +#ifdef yy_load_buffer_state +#define turtle_lexer__load_buffer_state_ALREADY_DEFINED +#else +#define yy_load_buffer_state turtle_lexer__load_buffer_state +#endif + +#ifdef yy_switch_to_buffer +#define turtle_lexer__switch_to_buffer_ALREADY_DEFINED +#else +#define yy_switch_to_buffer turtle_lexer__switch_to_buffer +#endif + +#ifdef yypush_buffer_state +#define turtle_lexer_push_buffer_state_ALREADY_DEFINED +#else +#define yypush_buffer_state turtle_lexer_push_buffer_state +#endif + +#ifdef yypop_buffer_state +#define turtle_lexer_pop_buffer_state_ALREADY_DEFINED +#else +#define yypop_buffer_state turtle_lexer_pop_buffer_state +#endif + +#ifdef yyensure_buffer_stack +#define turtle_lexer_ensure_buffer_stack_ALREADY_DEFINED +#else +#define yyensure_buffer_stack turtle_lexer_ensure_buffer_stack +#endif + +#ifdef yylex +#define turtle_lexer_lex_ALREADY_DEFINED +#else +#define yylex turtle_lexer_lex +#endif + +#ifdef yyrestart +#define turtle_lexer_restart_ALREADY_DEFINED +#else +#define yyrestart turtle_lexer_restart +#endif + +#ifdef yylex_init +#define turtle_lexer_lex_init_ALREADY_DEFINED +#else +#define yylex_init turtle_lexer_lex_init +#endif + +#ifdef yylex_init_extra +#define turtle_lexer_lex_init_extra_ALREADY_DEFINED +#else +#define yylex_init_extra turtle_lexer_lex_init_extra +#endif + +#ifdef yylex_destroy +#define turtle_lexer_lex_destroy_ALREADY_DEFINED +#else +#define yylex_destroy turtle_lexer_lex_destroy +#endif + +#ifdef yyget_debug +#define turtle_lexer_get_debug_ALREADY_DEFINED +#else +#define yyget_debug turtle_lexer_get_debug +#endif + +#ifdef yyset_debug +#define turtle_lexer_set_debug_ALREADY_DEFINED +#else +#define yyset_debug turtle_lexer_set_debug +#endif + +#ifdef yyget_extra +#define turtle_lexer_get_extra_ALREADY_DEFINED +#else +#define yyget_extra turtle_lexer_get_extra +#endif + +#ifdef yyset_extra +#define turtle_lexer_set_extra_ALREADY_DEFINED +#else +#define yyset_extra turtle_lexer_set_extra +#endif + +#ifdef yyget_in +#define turtle_lexer_get_in_ALREADY_DEFINED +#else +#define yyget_in turtle_lexer_get_in +#endif + +#ifdef yyset_in +#define turtle_lexer_set_in_ALREADY_DEFINED +#else +#define yyset_in turtle_lexer_set_in +#endif + +#ifdef yyget_out +#define turtle_lexer_get_out_ALREADY_DEFINED +#else +#define yyget_out turtle_lexer_get_out +#endif + +#ifdef yyset_out +#define turtle_lexer_set_out_ALREADY_DEFINED +#else +#define yyset_out turtle_lexer_set_out +#endif + +#ifdef yyget_leng +#define turtle_lexer_get_leng_ALREADY_DEFINED +#else +#define yyget_leng turtle_lexer_get_leng +#endif + +#ifdef yyget_text +#define turtle_lexer_get_text_ALREADY_DEFINED +#else +#define yyget_text turtle_lexer_get_text +#endif + +#ifdef yyget_lineno +#define turtle_lexer_get_lineno_ALREADY_DEFINED +#else +#define yyget_lineno turtle_lexer_get_lineno +#endif + +#ifdef yyset_lineno +#define turtle_lexer_set_lineno_ALREADY_DEFINED +#else +#define yyset_lineno turtle_lexer_set_lineno +#endif + +#ifdef yyget_column +#define turtle_lexer_get_column_ALREADY_DEFINED +#else +#define yyget_column turtle_lexer_get_column +#endif + +#ifdef yyset_column +#define turtle_lexer_set_column_ALREADY_DEFINED +#else +#define yyset_column turtle_lexer_set_column +#endif + +#ifdef yywrap +#define turtle_lexer_wrap_ALREADY_DEFINED +#else +#define yywrap turtle_lexer_wrap +#endif + +#ifdef yyget_lval +#define turtle_lexer_get_lval_ALREADY_DEFINED +#else +#define yyget_lval turtle_lexer_get_lval +#endif + +#ifdef yyset_lval +#define turtle_lexer_set_lval_ALREADY_DEFINED +#else +#define yyset_lval turtle_lexer_set_lval +#endif + +#ifdef yyalloc +#define turtle_lexer_alloc_ALREADY_DEFINED +#else +#define yyalloc turtle_lexer_alloc +#endif + +#ifdef yyrealloc +#define turtle_lexer_realloc_ALREADY_DEFINED +#else +#define yyrealloc turtle_lexer_realloc +#endif + +#ifdef yyfree +#define turtle_lexer_free_ALREADY_DEFINED +#else +#define yyfree turtle_lexer_free +#endif + +/* First, we deal with platform-specific or compiler-specific issues. */ + +/* begin standard C headers. */ +#include <stdio.h> +#include <string.h> +#include <errno.h> +#include <stdlib.h> + +/* end standard C headers. */ + +/* flex integer type definitions */ + +#ifndef FLEXINT_H +#define FLEXINT_H + +/* C99 systems have <inttypes.h>. Non-C99 systems may or may not. */ + +#if defined (__STDC_VERSION__) && __STDC_VERSION__ >= 199901L + +/* C99 says to define __STDC_LIMIT_MACROS before including stdint.h, + * if you want the limit (max/min) macros for int types. + */ +#ifndef __STDC_LIMIT_MACROS +#define __STDC_LIMIT_MACROS 1 +#endif + +#include <inttypes.h> +typedef int8_t flex_int8_t; +typedef uint8_t flex_uint8_t; +typedef int16_t flex_int16_t; +typedef uint16_t flex_uint16_t; +typedef int32_t flex_int32_t; +typedef uint32_t flex_uint32_t; +#else +typedef signed char flex_int8_t; +typedef short int flex_int16_t; +typedef int flex_int32_t; +typedef unsigned char flex_uint8_t; +typedef unsigned short int flex_uint16_t; +typedef unsigned int flex_uint32_t; + +/* Limits of integral types. */ +#ifndef INT8_MIN +#define INT8_MIN (-128) +#endif +#ifndef INT16_MIN +#define INT16_MIN (-32767-1) +#endif +#ifndef INT32_MIN +#define INT32_MIN (-2147483647-1) +#endif +#ifndef INT8_MAX +#define INT8_MAX (127) +#endif +#ifndef INT16_MAX +#define INT16_MAX (32767) +#endif +#ifndef INT32_MAX +#define INT32_MAX (2147483647) +#endif +#ifndef UINT8_MAX +#define UINT8_MAX (255U) +#endif +#ifndef UINT16_MAX +#define UINT16_MAX (65535U) +#endif +#ifndef UINT32_MAX +#define UINT32_MAX (4294967295U) +#endif + +#ifndef SIZE_MAX +#define SIZE_MAX (~(size_t)0) +#endif + +#endif /* ! C99 */ + +#endif /* ! FLEXINT_H */ + +/* begin standard C++ headers. */ + +/* TODO: this is always defined, so inline it */ +#define yyconst const + +#if defined(__GNUC__) && __GNUC__ >= 3 +#define yynoreturn __attribute__((__noreturn__)) +#else +#define yynoreturn +#endif + +/* An opaque pointer. */ +#ifndef YY_TYPEDEF_YY_SCANNER_T +#define YY_TYPEDEF_YY_SCANNER_T +typedef void* yyscan_t; +#endif + +/* For convenience, these vars (plus the bison vars far below) + are macros in the reentrant scanner. */ +#define yyin yyg->yyin_r +#define yyout yyg->yyout_r +#define yyextra yyg->yyextra_r +#define yyleng yyg->yyleng_r +#define yytext yyg->yytext_r +#define yylineno (YY_CURRENT_BUFFER_LVALUE->yy_bs_lineno) +#define yycolumn (YY_CURRENT_BUFFER_LVALUE->yy_bs_column) +#define yy_flex_debug yyg->yy_flex_debug_r + +/* Size of default input buffer. */ +#ifndef YY_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k. + * Moreover, YY_BUF_SIZE is 2*YY_READ_BUF_SIZE in the general case. + * Ditto for the __ia64__ case accordingly. + */ +#define YY_BUF_SIZE 32768 +#else +#define YY_BUF_SIZE 16384 +#endif /* __ia64__ */ +#endif + +#ifndef YY_TYPEDEF_YY_BUFFER_STATE +#define YY_TYPEDEF_YY_BUFFER_STATE +typedef struct yy_buffer_state *YY_BUFFER_STATE; +#endif + +#ifndef YY_TYPEDEF_YY_SIZE_T +#define YY_TYPEDEF_YY_SIZE_T +typedef size_t yy_size_t; +#endif + +#ifndef YY_STRUCT_YY_BUFFER_STATE +#define YY_STRUCT_YY_BUFFER_STATE +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + int yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + int yy_bs_lineno; /**< The line count. */ + int yy_bs_column; /**< The column count. */ + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; + + }; +#endif /* !YY_STRUCT_YY_BUFFER_STATE */ + +void yyrestart ( FILE *input_file , yyscan_t yyscanner ); +void yy_switch_to_buffer ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_create_buffer ( FILE *file, int size , yyscan_t yyscanner ); +void yy_delete_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yy_flush_buffer ( YY_BUFFER_STATE b , yyscan_t yyscanner ); +void yypush_buffer_state ( YY_BUFFER_STATE new_buffer , yyscan_t yyscanner ); +void yypop_buffer_state ( yyscan_t yyscanner ); + +YY_BUFFER_STATE yy_scan_buffer ( char *base, yy_size_t size , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_string ( const char *yy_str , yyscan_t yyscanner ); +YY_BUFFER_STATE yy_scan_bytes ( const char *bytes, yy_size_t len , yyscan_t yyscanner ); + +void *yyalloc ( yy_size_t , yyscan_t yyscanner ); +void *yyrealloc ( void *, yy_size_t , yyscan_t yyscanner ); +void yyfree ( void * , yyscan_t yyscanner ); + +/* Begin user sect3 */ + +#define yytext_ptr yytext_r + +#ifdef YY_HEADER_EXPORT_START_CONDITIONS +#define INITIAL 0 +#define PREF 1 +#define LONG_DLITERAL 2 +#define LONG_SLITERAL 3 + +#endif + +#ifndef YY_NO_UNISTD_H +/* Special case for "unistd.h", since it is non-ANSI. We include it way + * down here because we want the user's section 1 to have been scanned first. + * The user has a chance to override it with an option. + */ +#ifndef YY_NO_UNISTD_H +#include <unistd.h> +#endif +#endif + +#define YY_EXTRA_TYPE raptor_parser* + +int yylex_init (yyscan_t* scanner); + +int yylex_init_extra ( YY_EXTRA_TYPE user_defined, yyscan_t* scanner); + +/* Accessor methods to globals. + These are made visible to non-reentrant scanners for convenience. */ + +int yylex_destroy ( yyscan_t yyscanner ); + +int yyget_debug ( yyscan_t yyscanner ); + +void yyset_debug ( int debug_flag , yyscan_t yyscanner ); + +YY_EXTRA_TYPE yyget_extra ( yyscan_t yyscanner ); + +void yyset_extra ( YY_EXTRA_TYPE user_defined , yyscan_t yyscanner ); + +FILE *yyget_in ( yyscan_t yyscanner ); + +void yyset_in ( FILE * _in_str , yyscan_t yyscanner ); + +FILE *yyget_out ( yyscan_t yyscanner ); + +void yyset_out ( FILE * _out_str , yyscan_t yyscanner ); + + int yyget_leng ( yyscan_t yyscanner ); + +char *yyget_text ( yyscan_t yyscanner ); + +int yyget_lineno ( yyscan_t yyscanner ); + +void yyset_lineno ( int _line_number , yyscan_t yyscanner ); + +int yyget_column ( yyscan_t yyscanner ); + +void yyset_column ( int _column_no , yyscan_t yyscanner ); + +YYSTYPE * yyget_lval ( yyscan_t yyscanner ); + +void yyset_lval ( YYSTYPE * yylval_param , yyscan_t yyscanner ); + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap ( yyscan_t yyscanner ); +#else +extern int yywrap ( yyscan_t yyscanner ); +#endif +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy ( char *, const char *, int , yyscan_t yyscanner); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen ( const char * , yyscan_t yyscanner); +#endif + +#ifndef YY_NO_INPUT + +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#ifdef __ia64__ +/* On IA-64, the buffer size is 16k, not 8k */ +#define YY_READ_BUF_SIZE 16384 +#else +#define YY_READ_BUF_SIZE 8192 +#endif /* __ia64__ */ +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL_IS_OURS 1 + +extern int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner); + +#define YY_DECL int yylex \ + (YYSTYPE * yylval_param , yyscan_t yyscanner) +#endif /* !YY_DECL */ + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +#undef YY_NEW_FILE +#undef YY_FLUSH_BUFFER +#undef yy_set_bol +#undef yy_new_buffer +#undef yy_set_interactive +#undef YY_DO_BEFORE_ACTION + +#ifdef YY_DECL_IS_OURS +#undef YY_DECL_IS_OURS +#undef YY_DECL +#endif + +#ifndef turtle_lexer__create_buffer_ALREADY_DEFINED +#undef yy_create_buffer +#endif +#ifndef turtle_lexer__delete_buffer_ALREADY_DEFINED +#undef yy_delete_buffer +#endif +#ifndef turtle_lexer__scan_buffer_ALREADY_DEFINED +#undef yy_scan_buffer +#endif +#ifndef turtle_lexer__scan_string_ALREADY_DEFINED +#undef yy_scan_string +#endif +#ifndef turtle_lexer__scan_bytes_ALREADY_DEFINED +#undef yy_scan_bytes +#endif +#ifndef turtle_lexer__init_buffer_ALREADY_DEFINED +#undef yy_init_buffer +#endif +#ifndef turtle_lexer__flush_buffer_ALREADY_DEFINED +#undef yy_flush_buffer +#endif +#ifndef turtle_lexer__load_buffer_state_ALREADY_DEFINED +#undef yy_load_buffer_state +#endif +#ifndef turtle_lexer__switch_to_buffer_ALREADY_DEFINED +#undef yy_switch_to_buffer +#endif +#ifndef turtle_lexer_push_buffer_state_ALREADY_DEFINED +#undef yypush_buffer_state +#endif +#ifndef turtle_lexer_pop_buffer_state_ALREADY_DEFINED +#undef yypop_buffer_state +#endif +#ifndef turtle_lexer_ensure_buffer_stack_ALREADY_DEFINED +#undef yyensure_buffer_stack +#endif +#ifndef turtle_lexer_lex_ALREADY_DEFINED +#undef yylex +#endif +#ifndef turtle_lexer_restart_ALREADY_DEFINED +#undef yyrestart +#endif +#ifndef turtle_lexer_lex_init_ALREADY_DEFINED +#undef yylex_init +#endif +#ifndef turtle_lexer_lex_init_extra_ALREADY_DEFINED +#undef yylex_init_extra +#endif +#ifndef turtle_lexer_lex_destroy_ALREADY_DEFINED +#undef yylex_destroy +#endif +#ifndef turtle_lexer_get_debug_ALREADY_DEFINED +#undef yyget_debug +#endif +#ifndef turtle_lexer_set_debug_ALREADY_DEFINED +#undef yyset_debug +#endif +#ifndef turtle_lexer_get_extra_ALREADY_DEFINED +#undef yyget_extra +#endif +#ifndef turtle_lexer_set_extra_ALREADY_DEFINED +#undef yyset_extra +#endif +#ifndef turtle_lexer_get_in_ALREADY_DEFINED +#undef yyget_in +#endif +#ifndef turtle_lexer_set_in_ALREADY_DEFINED +#undef yyset_in +#endif +#ifndef turtle_lexer_get_out_ALREADY_DEFINED +#undef yyget_out +#endif +#ifndef turtle_lexer_set_out_ALREADY_DEFINED +#undef yyset_out +#endif +#ifndef turtle_lexer_get_leng_ALREADY_DEFINED +#undef yyget_leng +#endif +#ifndef turtle_lexer_get_text_ALREADY_DEFINED +#undef yyget_text +#endif +#ifndef turtle_lexer_get_lineno_ALREADY_DEFINED +#undef yyget_lineno +#endif +#ifndef turtle_lexer_set_lineno_ALREADY_DEFINED +#undef yyset_lineno +#endif +#ifndef turtle_lexer_get_column_ALREADY_DEFINED +#undef yyget_column +#endif +#ifndef turtle_lexer_set_column_ALREADY_DEFINED +#undef yyset_column +#endif +#ifndef turtle_lexer_wrap_ALREADY_DEFINED +#undef yywrap +#endif +#ifndef turtle_lexer_get_lval_ALREADY_DEFINED +#undef yyget_lval +#endif +#ifndef turtle_lexer_set_lval_ALREADY_DEFINED +#undef yyset_lval +#endif +#ifndef turtle_lexer_get_lloc_ALREADY_DEFINED +#undef yyget_lloc +#endif +#ifndef turtle_lexer_set_lloc_ALREADY_DEFINED +#undef yyset_lloc +#endif +#ifndef turtle_lexer_alloc_ALREADY_DEFINED +#undef yyalloc +#endif +#ifndef turtle_lexer_realloc_ALREADY_DEFINED +#undef yyrealloc +#endif +#ifndef turtle_lexer_free_ALREADY_DEFINED +#undef yyfree +#endif +#ifndef turtle_lexer_text_ALREADY_DEFINED +#undef yytext +#endif +#ifndef turtle_lexer_leng_ALREADY_DEFINED +#undef yyleng +#endif +#ifndef turtle_lexer_in_ALREADY_DEFINED +#undef yyin +#endif +#ifndef turtle_lexer_out_ALREADY_DEFINED +#undef yyout +#endif +#ifndef turtle_lexer__flex_debug_ALREADY_DEFINED +#undef yy_flex_debug +#endif +#ifndef turtle_lexer_lineno_ALREADY_DEFINED +#undef yylineno +#endif +#ifndef turtle_lexer_tables_fload_ALREADY_DEFINED +#undef yytables_fload +#endif +#ifndef turtle_lexer_tables_destroy_ALREADY_DEFINED +#undef yytables_destroy +#endif +#ifndef turtle_lexer_TABLES_NAME_ALREADY_DEFINED +#undef yyTABLES_NAME +#endif + +#line 542 "./turtle_lexer.l" + + +#line 723 "turtle_lexer.h" +#undef turtle_lexer_IN_HEADER +#endif /* turtle_lexer_HEADER_H */ diff --git a/src/turtle_lexer.l b/src/turtle_lexer.l new file mode 100644 index 0000000..8d0c53e --- /dev/null +++ b/src/turtle_lexer.l @@ -0,0 +1,1124 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_lexer.l - Raptor Turtle lexer - making tokens for turtle grammar generator + * + * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * Turtle is defined in http://www.dajobe.org/2004/01/turtle/ + * + * To generate the C files from this source, rather than use the + * shipped turtle_lexer.c/.h needs a patched version of flex 2.5.31 such + * as the one available in Debian GNU/Linux. Details below + * near the %option descriptions. + * + */ + + +/* recognise 8-bits */ +%option 8bit +%option warn nodefault + +/* all symbols prefixed by this */ +%option prefix="turtle_lexer_" + +/* This is not needed, flex is invoked -oturtle_lexer.c */ +/* %option outfile="turtle_lexer.c" */ + +/* Emit a C header file for prototypes + * Only available in flex 2.5.13 or newer. + * It was renamed to header-file in flex 2.5.19 + */ +%option header-file="turtle_lexer.h" + +/* Do not emit #include <unistd.h> + * Only available in flex 2.5.7 or newer. + * Broken in flex 2.5.31 without patches. + */ +%option nounistd + +/* Never interactive */ +/* No isatty() check */ +%option never-interactive + +/* Batch scanner */ +%option batch + +/* Never use yyunput */ +%option nounput + +/* Supply our own alloc/realloc/free functions */ +%option noyyalloc noyyrealloc noyyfree + +/* Re-entrant scanner */ +%option reentrant + +%option extra-type="raptor_parser*" + +/* Makes yyget_lval() yyset_lval() and yylval appear */ +%option bison-bridge +/* Makes yyget_lloc() yyset_lloc() and yylloc appear */ +/* %option bison-locations */ + + /* definitions */ + +%{ + +/* NOTE: These headers are NOT included here but are inserted by + * fix-flex since otherwise it appears far too late in the generated C + */ + +/* +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif +*/ + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif +#ifdef HAVE_SETJMP_H +#include <setjmp.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> +#include <turtle_common.h> + +#define YYSTYPE TURTLE_PARSER_STYPE + +/* Prototypes */ +static unsigned char *turtle_copy_token(unsigned char *text, size_t len); +static unsigned char *turtle_copy_string_token(raptor_parser* rdf_parser, unsigned char *text, size_t len, int delim); +void turtle_lexer_syntax_error(void* ctx, const char *message, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, YYSTYPE *lval); +#endif + +#ifdef __cplusplus +#define INPUT_FN yyinput +#else +#define INPUT_FN input +#endif + + +#if FLEX_VERSION_DECIMAL < 20536 +/* debian flex 2.5.35-10.1 added these column header prototypes in + * re-entrant mode. standard flex omits them + */ +void turtle_lexer_set_column(int column_no, yyscan_t yyscanner); +int turtle_lexer_get_column(yyscan_t yyscanner); +#endif + +static void turtle_lexer_cleanup(yyscan_t yyscanner); +#undef yycleanup +#define yycleanup turtle_lexer_cleanup + +#ifdef HAVE_SETJMP +static jmp_buf turtle_lexer_fatal_error_longjmp_env; + +/* fatal error handler declaration */ +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + longjmp(turtle_lexer_fatal_error_longjmp_env, 1); \ +} while(0) +#else +#define YY_FATAL_ERROR(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, msg); \ + abort(); \ +} while(0) +#endif + +/* Remove the re-fill function since it should never be called */ +#define YY_INPUT(buf,result,max_size) { return YY_NULL; } + +static void turtle_lexer_error(yyscan_t yyscanner, raptor_log_level level, yyconst char *message, ...) RAPTOR_PRINTF_FORMAT(3, 4); + +/* Fatal error handler that returns EOF instead of abort()/longjmp() + * so that parser can clean up properly */ +#define YY_FATAL_ERROR_EOF(msg) do { \ + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_FATAL, "%s", msg); \ + yyterminate(); \ +} while(0) + +/* Out-of-memory reporting macro */ +#define TURTLE_LEXER_OOM() YY_FATAL_ERROR_EOF(turtle_lexer_oom_text) +static char turtle_lexer_oom_text[]="turtle_lexer: Out of memory"; + +/* Do not need input() to to read from stdin */ +#define YY_NO_INPUT 1 + +#define YY_USER_ACTION \ + turtle_parser->consumed += yyleng; + +%} + +/* Tokens from Turtle 2013 spec - lex-ifyed to remove unicode ranges */ +PN_CHARS_BASE [A-Za-z\x80-\xff] +PN_CHARS {PN_CHARS_BASE}|"_"|"-"|[0-9] +BS_ESCAPES [-_~\.!$&\'()*+,;=/?#@%] +HEX [0-9A-Fa-f] +PLX "%"{HEX}{HEX})|("\\"{BS_ESCAPES} + +LANGTAG "@"[A-Za-z][-A-Z_a-z0-9]* + +/* flex: only 1 level of definition expansion so have to expand PLX */ +BN_LABEL ({PN_CHARS_BASE}|"_"|[0-9])(({PN_CHARS}|".")*({PN_CHARS}))* +PN_PREFIX ({PN_CHARS_BASE})(({PN_CHARS}|".")*({PN_CHARS}))* +PN_LOCAL ({PN_CHARS_BASE}|"_"|[0-9]|":"|{PLX})(({PN_CHARS}|"."|":"|{PLX})*({PN_CHARS}|":"|{PLX}))* + +QNAME {PN_PREFIX}?":"{PN_LOCAL}? + +UCHAR "\\u"{HEX}{HEX}{HEX}{HEX}|"\\U"{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX}{HEX} +IRI "<"([^\x00-\x20<>\"{}\|^`\\]|{UCHAR})*">" + +INTEGER [-+]?[0-9]+ +DECIMAL [-+]?[0-9]*"."[0-9]+ +DOUBLE [-+]?([0-9]+"."[0-9]*{EXPONENT}|"."[0-9]+{EXPONENT}|[0-9]+{EXPONENT}) +EXPONENT [eE][+-]?[0-9]+ + + +%x PREF LONG_DLITERAL LONG_SLITERAL + + +%% + /* rules */ + +%{ + raptor_parser *rdf_parser = yyextra; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#ifdef HAVE_SETJMP + if(setjmp(turtle_lexer_fatal_error_longjmp_env)) + return 1; +#endif +%} + + +\r\n|\r|\n { turtle_parser->lineno++; } + +[\ \t\v]+ { /* empty */ } + + +"a" { return A; } + +"." { return DOT; } +"," { return COMMA; } +";" { return SEMICOLON; } +"[" { return LEFT_SQUARE; } +"]" { return RIGHT_SQUARE; } +"@prefix" { BEGIN(PREF); return PREFIX; } +[Pp][Rr][Ee][Ff][Ii][Xx] { BEGIN(PREF); + return SPARQL_PREFIX; } +"@base" { return BASE; } +[Bb][Aa][Ss][Ee] { return SPARQL_BASE; } +"^^" { return HAT; } +"(" { return LEFT_ROUND; } +")" { return RIGHT_ROUND; } +"{" { return LEFT_CURLY; } +"}" { return RIGHT_CURLY; } +"true" { return TRUE_TOKEN; } +"false" { return FALSE_TOKEN; } + + +\"([^\"\\\n\r]|\\[^\n\r])*\" { yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + +\'([^\'\\\n\r]|\\[^\n\r])*\' { yylval->string = turtle_copy_string_token(rdf_parser, (unsigned char*)yytext+1, yyleng-2, '"'); /* ' */ + if(!yylval->string) + yyterminate(); + + return STRING_LITERAL; } + +\"\"\" { BEGIN(LONG_DLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + +<LONG_DLITERAL>\"\"\" { + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + +<LONG_DLITERAL>\"|(\\.|[^\"\\]|\n)* { + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of literal"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + +<LONG_DLITERAL>\\ { + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + +<LONG_DLITERAL><<EOF>> { + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of \"\"\"literal\"\"\""); + yyterminate(); +} + +\'\'\' { BEGIN(LONG_SLITERAL); + turtle_parser->sb = raptor_new_stringbuffer(); + if(!turtle_parser->sb) + TURTLE_LEXER_OOM(); + } + +<LONG_SLITERAL>\'\'\' { + size_t len; + + BEGIN(INITIAL); + len = raptor_stringbuffer_length(turtle_parser->sb); + yylval->string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(!yylval->string) + TURTLE_LEXER_OOM(); + raptor_stringbuffer_copy_to_string(turtle_parser->sb, (unsigned char*)yylval->string, len); + yylval->string[len]='\0'; + + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return STRING_LITERAL; } + +<LONG_SLITERAL>\'|(\\.|[^\'\\]|\n)* { + char *p; + + if(*yytext == EOF) { + BEGIN(INITIAL); + turtle_syntax_error(rdf_parser, "End of file in middle of \'\'\'literal\'\'\'"); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + return EOF; + } + + for(p = yytext; *p; p++) { + if(*p == '\n') + turtle_parser->lineno++; + } + + if(raptor_stringbuffer_append_turtle_string(turtle_parser->sb, (unsigned char*)yytext, yyleng, '"', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 0)) { /* " */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + + } + +<LONG_SLITERAL>\\ { + /* this should only happen if \ is at the end of the file so the Turtle doc is illegal anyway */ + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + +<LONG_SLITERAL><<EOF>> { + BEGIN(INITIAL); + raptor_free_stringbuffer(turtle_parser->sb); + turtle_parser->sb = NULL; + if(!turtle_parser->is_end) { + /* next run will fix things, hopefully */ + return EOF; + } + /* otherwise abort */ + turtle_syntax_error(rdf_parser, "End of file in middle of '''literal'''"); + yyterminate(); +} + +"_:"{BN_LABEL} { yylval->string = turtle_copy_token((unsigned char*)yytext+2, yyleng-2); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return BLANK_LITERAL; } + +{QNAME} { yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return QNAME_LITERAL; } + +{DECIMAL} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return DECIMAL_LITERAL; +} + +{DOUBLE} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return FLOATING_LITERAL; +} + +{INTEGER} { yylval->string = turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return INTEGER_LITERAL; } + +<PREF>[\ \t\v]+ { /* eat up leading whitespace */ } +<PREF>{PN_PREFIX}":" { yylval->string=turtle_copy_token((unsigned char*)yytext, yyleng); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + BEGIN(INITIAL); + return IDENTIFIER; } +<PREF>":" { BEGIN(INITIAL); + yylval->string = turtle_copy_token((unsigned char*)yytext, 0); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return IDENTIFIER; } + +<PREF>(.|\n) { BEGIN(INITIAL); + if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); } + + +{IRI}[\ \t\v\r\n]*("=")?[\ \t\v\r\n]*"{" { + raptor_stringbuffer* sb; + unsigned char* uri_string; + + /* make length just the IRI */ + while(yytext[yyleng - 1] != '>') + yyleng--; + + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + + /* start at yytext + 1 to skip '<' and operate over + * length-2 bytes to skip '<' and '>' + */ + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-2, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + + if(!*uri_string) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + + raptor_free_stringbuffer(sb); + + if(!yylval->uri) + TURTLE_LEXER_OOM(); + return GRAPH_NAME_LEFT_CURLY; } + +{QNAME}[\ \t\v\r\n]*("=")?[\ \t\v\r\n]*"{" { + while(1) { + int c = yytext[yyleng - 1]; + if(c == '{' || c == ' ' || c=='\t' || c == '\v' || c == '\n' || + c == '=') { + yyleng--; + } else + break; + } + yytext[yyleng] = '\0'; + + yylval->uri = turtle_qname_to_uri(rdf_parser, (unsigned char*)yytext, yyleng); + if(!yylval->uri) { + turtle_lexer_error(yyscanner, RAPTOR_LOG_LEVEL_ERROR, "Failed to convert qname %s to URI", yytext); + yyterminate(); + } + + return GRAPH_NAME_LEFT_CURLY; } + +{IRI} { if(yyleng == 2) + yylval->uri = raptor_uri_copy(rdf_parser->base_uri); + else { + raptor_stringbuffer* sb; + unsigned char* uri_string; + + yytext[yyleng-1]='\0'; + sb = raptor_new_stringbuffer(); + if(!sb) + TURTLE_LEXER_OOM(); + if(raptor_stringbuffer_append_turtle_string(sb, (unsigned char*)yytext+1, yyleng-1, '>', (raptor_simple_message_handler)turtle_lexer_syntax_error, rdf_parser, 1)) { + raptor_free_stringbuffer(sb); + YY_FATAL_ERROR_EOF("raptor_stringbuffer_append_turtle_string failed"); + } + uri_string = raptor_stringbuffer_as_string(sb); + yylval->uri = raptor_new_uri_relative_to_base(rdf_parser->world, rdf_parser->base_uri, uri_string); + if(!yylval->uri) { + raptor_free_stringbuffer(sb); + TURTLE_LEXER_OOM(); + } + raptor_free_stringbuffer(sb); + } + return URI_LITERAL; } + +{LANGTAG} { yylval->string = turtle_copy_token((unsigned char*)yytext+1, yyleng-1); + if(!yylval->string) + YY_FATAL_ERROR_EOF("turtle_copy_token failed"); + return LANGTAG; } + +\#[^\r\n]*(\r\n|\r|\n) { /* # comment */ + turtle_parser->lineno++; + } + +\#[^\r\n]* { /* # comment on the last line with no terminating newline */ + } + +. { if(*yytext == EOF) + return EOF; + + turtle_syntax_error(rdf_parser, "syntax error at '%c'", *yytext); + yyterminate(); + } + +%% + /* user code */ + +int +yywrap (yyscan_t yyscanner) { + return 1; +} + + +static unsigned char * +turtle_copy_token(unsigned char *text, size_t len) +{ + unsigned char *s; + if(!len) + len = strlen((const char*)text); + s = RAPTOR_MALLOC(unsigned char*, len + 1); + if(s) { + memcpy(s, text, len); + s[len] = '\0'; + } + return s; +} + + +static unsigned char * +turtle_copy_string_token(raptor_parser* rdf_parser, + unsigned char *string, size_t len, int delim) +{ + raptor_stringbuffer* sb = NULL; + int rc; + + if(len) { + sb = raptor_new_stringbuffer(); + if(!sb) + return NULL; + + rc = raptor_stringbuffer_append_turtle_string(sb, string, len, delim, + (raptor_simple_message_handler)turtle_lexer_syntax_error, + rdf_parser, 0); + if(rc) { + raptor_free_stringbuffer(sb); + return NULL; + } + + len = raptor_stringbuffer_length(sb); + } + + string = RAPTOR_MALLOC(unsigned char*, len + 1); + if(string) { + if(sb) + raptor_stringbuffer_copy_to_string(sb, string, len+1); + string[len]='\0'; + } + + if(sb) + raptor_free_stringbuffer(sb); + + return string; +} + + +void +turtle_lexer_syntax_error(void* ctx, const char *message, ...) +{ + raptor_parser* rdf_parser = (raptor_parser *)ctx; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); +} + + +/* + * turtle_lexer_error: + * @yyscanner: scanner object + * @level: log level RAPTOR_LOG_LEVEL_FATAL otherwise error + * @message: erro message + * + * INTERNAL - replacement for the generated error handler. + */ +static void turtle_lexer_error(yyscan_t yyscanner, + raptor_log_level level, + yyconst char *message, ...) +{ + raptor_parser *rdf_parser = NULL; + va_list arguments; + + va_start(arguments, message); + + if(yyscanner) + rdf_parser = (raptor_parser*)turtle_lexer_get_extra(yyscanner); + + /* This handles NULL rdf_parser properly */ + raptor_parser_log_error_varargs(rdf_parser, level, message, arguments); + + va_end(arguments); +} + + +/* Define LEXER_ALLOC_TRACKING to enable allocated memory tracking + * - fixes lexer memory leak when ensure_buffer_stack fails + */ + +#ifdef LEXER_ALLOC_TRACKING +typedef struct { + /* Number of void* slots allocated */ + int lexer_allocs_size; + /* Allocted void* slots follow in memory after this header */ +} lexer_alloc_tracker_header; + +/* Initial alloc tracker slot array size - 2 seems to be enough for almost all cases */ +static const int initial_lexer_allocs_size = 2; +#endif + +/* + * turtle_lexer_cleanup: + * @yyscanner: + * + * INTERNAL - Clean up unfreed lexer allocs if LEXER_ALLOC_TRACKING is enabled. + */ +static void turtle_lexer_cleanup(yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + if(!yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i]) + free(lexer_allocs[i]); + lexer_allocs[i] = NULL; + } + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = NULL; +#endif +} + + +/* + * turtle_lexer_alloc: + * @size + * @yyscanner + * + * INTERNAL - alloc replacement. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_alloc(yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *ptr; + + /* yyscanner not initialized -> probably initializing yyscanner itself + * -> just malloc without tracking + */ + if(!yyscanner) + return malloc(size); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_alloc: yyscanner extra not initialized"); + + /* try to allocate tracker if it does not exist */ + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) { + /* allocate tracker header + array of void* slots */ + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+initial_lexer_allocs_size*sizeof(void*)); + if(!tracker) + YY_FATAL_ERROR("lexer_alloc: cannot allocate tracker"); + tracker->lexer_allocs_size = initial_lexer_allocs_size; + rdf_parser->lexer_user_data = (void *)tracker; + } + lexer_allocs = (void**)&tracker[1]; + + /* allocate memory */ + ptr = malloc(size); + + /* find a free slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(!lexer_allocs[i]) { + lexer_allocs[i] = ptr; + break; + } + } + + /* no free slots -> grow tracker slot array */ + if(i>=tracker->lexer_allocs_size) { + int j; + void **dest; + tracker = (lexer_alloc_tracker_header*)calloc(1, sizeof(lexer_alloc_tracker_header)+i*2*sizeof(void*)); + if(!tracker) { + if(ptr) + free(ptr); + YY_FATAL_ERROR("lexer_alloc: cannot grow tracker"); + } + tracker->lexer_allocs_size = i*2; + + /* copy data from old tracker */ + dest = (void**)&tracker[1]; + for(j = 0; j < i; ++j) { + dest[j] = lexer_allocs[j]; + } + + /* set new item to first free slot */ + dest[j] = ptr; + + /* free old tracker and replace with new one */ + free(rdf_parser->lexer_user_data); + rdf_parser->lexer_user_data = tracker; + } + + return ptr; +#else + return malloc(size); +#endif +} + + +/* + * turtle_lexer_realloc: + * + * INTERNAL - realloc replacement + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void *turtle_lexer_realloc(void *ptr, yy_size_t size, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + void *newptr; + + if(!yyscanner) + YY_FATAL_ERROR("lexer_realloc: yyscanner not initialized"); + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + YY_FATAL_ERROR("lexer_realloc: yyscanner extra not initialized"); + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + YY_FATAL_ERROR("lexer_realloc: no alloc tracker"); + lexer_allocs = (void**)&tracker[1]; + + /* find the old slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no old slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_realloc: cell not in tracker"); + + /* realloc */ + newptr = realloc((char*)ptr, size); + + /* replace entry in tracker */ + lexer_allocs[i] = newptr; + + return newptr; +#else + return realloc((char*)ptr, size); +#endif +} + + +/* + * turtle_lexer_free: + * + * INTERNAL - free replacement. + * Checks for NULL pointer to be freed unlike the default lexer free function. + * Tracks allocated cells if LEXER_ALLOC_TRACKING is enabled. + */ +void turtle_lexer_free(void *ptr, yyscan_t yyscanner) +{ +#ifdef LEXER_ALLOC_TRACKING + raptor_parser *rdf_parser; + lexer_alloc_tracker_header *tracker; + void **lexer_allocs; + int i; + + /* do not free NULL */ + if(!ptr) + return; + + /* free ptr even if we would encounter an error */ + free(ptr); + + /* yyscanner is allocated with turtle_lexer_alloc() but it's never stored in the tracker + * - we need yyscanner to access the tracker */ + if(!yyscanner || ptr == yyscanner) + return; + + rdf_parser = (raptor_parser *)turtle_lexer_get_extra(yyscanner); + if(!rdf_parser) + return; + + tracker = (lexer_alloc_tracker_header *)rdf_parser->lexer_user_data; + if(!tracker) + return; + lexer_allocs = (void**)&tracker[1]; + + /* find the slot for ptr */ + for(i = 0; i < tracker->lexer_allocs_size; ++i) { + if(lexer_allocs[i] == ptr) + break; + } + + /* no slot -> error */ + if(i>=tracker->lexer_allocs_size) + YY_FATAL_ERROR("lexer_free: cell not in tracker"); + + /* remove entry from tracker */ + lexer_allocs[i] = NULL; +#else + if(ptr) + free(ptr); +#endif +} + + +#ifdef RAPTOR_DEBUG + +const char * +turtle_token_print(raptor_world* world, int token, YYSTYPE *lval) +{ + #define TTP_DEBUG_BUFFER_SIZE 2048 + static char buffer[TTP_DEBUG_BUFFER_SIZE]; + + if(!token) + return "<<EOF>>"; + + switch(token) { + case PREFIX: + return "PREFIX"; + + case BASE: + return "BASE"; + + case A: + return "A"; + + case DOT: + return "DOT"; + + case COMMA: + return "COMMA"; + + case SEMICOLON: + return "SEMICOLON"; + + case LEFT_SQUARE: + return "LEFT_SQUARE"; + + case RIGHT_SQUARE: + return "RIGHT_SQUARE"; + + case HAT: + return "HAT"; + + case STRING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "STRING_LITERAL(%s)", + lval->string); + return buffer; + + case URI_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "URI_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case BLANK_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "BLANK_LITERAL(%s)", + lval->string); + return buffer; + + case QNAME_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "QNAME_LITERAL(%s)", + (lval->uri ? (char*)raptor_uri_as_string(lval->uri) : "")); + return buffer; + + case INTEGER_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "INTEGER_LITERAL(%s)", + lval->string); + return buffer; + + case FLOATING_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "FLOATING_LITERAL(%s)", + lval->string); + return buffer; + + case IDENTIFIER: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "IDENTIFIER(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case LANGTAG: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "LANGTAG(%s)", + (lval->string ? (char*)lval->string : "")); + return buffer; + + case DECIMAL_LITERAL: + snprintf(buffer, TTP_DEBUG_BUFFER_SIZE, "DECIMAL_LITERAL(%s)", + lval->string); + return buffer; + + case ERROR_TOKEN: + return "ERROR"; + + case LEFT_CURLY: + return "{"; + + case RIGHT_CURLY: + return "}"; + + case GRAPH_NAME_LEFT_CURLY: + return "GRAPH_NAME {"; + + default: + RAPTOR_DEBUG2("UNKNOWN token %d - add a new case\n", token); + return "(UNKNOWN)"; + } +} +#endif + + + +void +turtle_token_free(raptor_world* world, int token, YYSTYPE *lval) +{ + if(!token) + return; + + switch(token) { + case STRING_LITERAL: + case BLANK_LITERAL: + case IDENTIFIER: + if(lval->string) + RAPTOR_FREE(char*, lval->string); + break; + + case URI_LITERAL: + case QNAME_LITERAL: + if(lval->uri) + raptor_free_uri(lval->uri); + break; + default: + break; + } +} + + +#ifdef STANDALONE + +#define FILE_READ_BUF_SIZE 4096 + +int +main(int argc, char *argv[]) +{ + char *turtle_string = NULL; + raptor_parser rdf_parser; + raptor_turtle_parser turtle_parser; + yyscan_t scanner; + int token = EOF; + YYSTYPE lval; + const unsigned char *uri_string; + const char *filename = NULL; + char *buf = NULL; + size_t len; + raptor_world* world; + FILE *fh; + + world = raptor_new_world(); + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = (FILE*)stdin; + } + + turtle_string = RAPTOR_CALLOC(char*, FILE_READ_BUF_SIZE, 1); + fread(turtle_string, FILE_READ_BUF_SIZE, 1, fh); + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + rdf_parser.world = world; + + /* discard namespace errors - caused by not interpreting @prefix + * and hence causing failed qname construction + */ + raptor_namespaces_init(rdf_parser.world, &turtle_parser.namespaces, 0); + + yylex_init(&turtle_parser.scanner); + scanner = turtle_parser.scanner; + + len = strlen(RAPTOR_GOOD_CAST(const char*, turtle_string)); + buf = RAPTOR_MALLOC(char*, len + 3); + memcpy(buf, turtle_string, len); + buf[len] = ' '; + buf[len + 1] = buf[len + 2] = '\0'; /* YY_END_OF_BUFFER_CHAR; */ + (void)turtle_lexer__scan_buffer(buf, len + 3, scanner); + + turtle_lexer_set_extra(&rdf_parser, scanner); + + /* Initialise enough of the parser and locator to get error messages */ + rdf_parser.context = &turtle_parser; + turtle_parser.lineno = 1; + rdf_parser.locator.file = filename; + rdf_parser.locator.column = -1; + + uri_string = raptor_uri_filename_to_uri_string(filename); + rdf_parser.base_uri = raptor_new_uri(world, uri_string); + RAPTOR_FREE(char*, uri_string); + + while(1) { + memset(&lval, 0, sizeof(YYSTYPE)); + if(turtle_lexer_get_text(scanner) != NULL) + printf("yyinput '%s'\n", turtle_lexer_get_text(scanner)); + token = yylex(&lval, scanner); +#ifdef RAPTOR_DEBUG + printf("token %s\n", turtle_token_print(world, token, &lval)); +#else + printf("token %d\n", token); +#endif + turtle_token_free(world, token, &lval); + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } + + if(buf) + RAPTOR_FREE(char*, buf); + + yylex_destroy(scanner); + + raptor_namespaces_clear(&turtle_parser.namespaces); + + raptor_free_uri(rdf_parser.base_uri); + + RAPTOR_FREE(char*, turtle_string); + + raptor_free_world(world); + + + if(token == ERROR_TOKEN) + return 1; + + return 0; +} +#endif diff --git a/src/turtle_parser.c b/src/turtle_parser.c new file mode 100644 index 0000000..bb2fbba --- /dev/null +++ b/src/turtle_parser.c @@ -0,0 +1,4037 @@ +/* A Bison parser, made by GNU Bison 3.8.2. */ + +/* Bison implementation for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* C LALR(1) parser skeleton written by Richard Stallman, by + simplifying the original so-called "semantic" parser. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +/* All symbols defined below should begin with yy or YY, to avoid + infringing on user name space. This should be done even for local + variables, as they might otherwise be expanded by user macros. + There are some unavoidable exceptions within include files to + define necessary library symbols; they are noted "INFRINGES ON + USER NAME SPACE" below. */ + +/* Identify Bison output, and Bison version. */ +#define YYBISON 30802 + +/* Bison version string. */ +#define YYBISON_VERSION "3.8.2" + +/* Skeleton name. */ +#define YYSKELETON_NAME "yacc.c" + +/* Pure parsers. */ +#define YYPURE 2 + +/* Push parsers. */ +#define YYPUSH 0 + +/* Pull parsers. */ +#define YYPULL 1 + +/* Substitute the type names. */ +#define YYSTYPE TURTLE_PARSER_STYPE +/* Substitute the variable and function names. */ +#define yyparse turtle_parser_parse +#define yylex turtle_parser_lex +#define yyerror turtle_parser_error +#define yydebug turtle_parser_debug +#define yynerrs turtle_parser_nerrs + +/* First part of user prologue. */ +#line 31 "./turtle_parser.y" + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> + +#define YY_NO_UNISTD_H 1 +#undef yylex +#include <turtle_lexer.h> + +#include <turtle_common.h> + + +/* Set RAPTOR_DEBUG to 3 for super verbose parsing - watching the shift/reduces */ +#if 0 +#undef RAPTOR_DEBUG +#define RAPTOR_DEBUG 3 +#endif + + +/* Fail with an debug error message if RAPTOR_DEBUG > 1 */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +#define YYERROR_MSG(msg) do { fputs("** YYERROR ", RAPTOR_DEBUG_FH); fputs(msg, RAPTOR_DEBUG_FH); fputc('\n', RAPTOR_DEBUG_FH); YYERROR; } while(0) +#else +#define YYERROR_MSG(ignore) YYERROR +#endif +#define YYERR_MSG_GOTO(label,msg) do { errmsg = msg; goto label; } while(0) + +/* Slow down the grammar operation and watch it work */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 +#undef YYDEBUG +#define YYDEBUG 1 +#endif + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, + TURTLE_PARSER_STYPE *lval); +#endif + + +/* the lexer does not seem to track this */ +#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS + +/* set api.push-pull to "push" if this is defined */ +#undef TURTLE_PUSH_PARSE + +/* Prototypes */ +int turtle_parser_error(raptor_parser* rdf_parser, void* scanner, const char *msg); +static void turtle_parser_error_simple(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +/* Make lex/yacc interface as small as possible */ +#undef yylex +#define yylex turtle_lexer_lex + +/* Prototypes for local functions */ +static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *triple); + + +#line 155 "turtle_parser.c" + +# ifndef YY_CAST +# ifdef __cplusplus +# define YY_CAST(Type, Val) static_cast<Type> (Val) +# define YY_REINTERPRET_CAST(Type, Val) reinterpret_cast<Type> (Val) +# else +# define YY_CAST(Type, Val) ((Type) (Val)) +# define YY_REINTERPRET_CAST(Type, Val) ((Type) (Val)) +# endif +# endif +# ifndef YY_NULLPTR +# if defined __cplusplus +# if 201103L <= __cplusplus +# define YY_NULLPTR nullptr +# else +# define YY_NULLPTR 0 +# endif +# else +# define YY_NULLPTR ((void*)0) +# endif +# endif + +#include <turtle_parser.h> +/* Symbol kind. */ +enum yysymbol_kind_t +{ + YYSYMBOL_YYEMPTY = -2, + YYSYMBOL_YYEOF = 0, /* "end of file" */ + YYSYMBOL_YYerror = 1, /* error */ + YYSYMBOL_YYUNDEF = 2, /* "invalid token" */ + YYSYMBOL_A = 3, /* "a" */ + YYSYMBOL_HAT = 4, /* "^" */ + YYSYMBOL_DOT = 5, /* "." */ + YYSYMBOL_COMMA = 6, /* "," */ + YYSYMBOL_SEMICOLON = 7, /* ";" */ + YYSYMBOL_LEFT_SQUARE = 8, /* "[" */ + YYSYMBOL_RIGHT_SQUARE = 9, /* "]" */ + YYSYMBOL_LEFT_ROUND = 10, /* "(" */ + YYSYMBOL_RIGHT_ROUND = 11, /* ")" */ + YYSYMBOL_LEFT_CURLY = 12, /* "{" */ + YYSYMBOL_RIGHT_CURLY = 13, /* "}" */ + YYSYMBOL_TRUE_TOKEN = 14, /* "true" */ + YYSYMBOL_FALSE_TOKEN = 15, /* "false" */ + YYSYMBOL_PREFIX = 16, /* "@prefix" */ + YYSYMBOL_BASE = 17, /* "@base" */ + YYSYMBOL_SPARQL_PREFIX = 18, /* "PREFIX" */ + YYSYMBOL_SPARQL_BASE = 19, /* "BASE" */ + YYSYMBOL_STRING_LITERAL = 20, /* "string literal" */ + YYSYMBOL_IDENTIFIER = 21, /* "identifier" */ + YYSYMBOL_LANGTAG = 22, /* "langtag" */ + YYSYMBOL_INTEGER_LITERAL = 23, /* "integer literal" */ + YYSYMBOL_FLOATING_LITERAL = 24, /* "floating point literal" */ + YYSYMBOL_DECIMAL_LITERAL = 25, /* "decimal literal" */ + YYSYMBOL_BLANK_LITERAL = 26, /* "blank node" */ + YYSYMBOL_URI_LITERAL = 27, /* "URI literal" */ + YYSYMBOL_GRAPH_NAME_LEFT_CURLY = 28, /* "Graph URI literal {" */ + YYSYMBOL_QNAME_LITERAL = 29, /* "QName" */ + YYSYMBOL_ERROR_TOKEN = 30, /* ERROR_TOKEN */ + YYSYMBOL_YYACCEPT = 31, /* $accept */ + YYSYMBOL_Document = 32, /* Document */ + YYSYMBOL_graph = 33, /* graph */ + YYSYMBOL_34_1 = 34, /* $@1 */ + YYSYMBOL_35_2 = 35, /* $@2 */ + YYSYMBOL_graphBody = 36, /* graphBody */ + YYSYMBOL_triplesList = 37, /* triplesList */ + YYSYMBOL_dotTriplesList = 38, /* dotTriplesList */ + YYSYMBOL_statementList = 39, /* statementList */ + YYSYMBOL_statement = 40, /* statement */ + YYSYMBOL_triples = 41, /* triples */ + YYSYMBOL_objectList = 42, /* objectList */ + YYSYMBOL_itemList = 43, /* itemList */ + YYSYMBOL_verb = 44, /* verb */ + YYSYMBOL_predicateObjectList = 45, /* predicateObjectList */ + YYSYMBOL_directive = 46, /* directive */ + YYSYMBOL_prefix = 47, /* prefix */ + YYSYMBOL_base = 48, /* base */ + YYSYMBOL_subject = 49, /* subject */ + YYSYMBOL_predicate = 50, /* predicate */ + YYSYMBOL_object = 51, /* object */ + YYSYMBOL_literal = 52, /* literal */ + YYSYMBOL_resource = 53, /* resource */ + YYSYMBOL_predicateObjectListOpt = 54, /* predicateObjectListOpt */ + YYSYMBOL_blankNode = 55, /* blankNode */ + YYSYMBOL_blankNodePropertyList = 56, /* blankNodePropertyList */ + YYSYMBOL_collection = 57 /* collection */ +}; +typedef enum yysymbol_kind_t yysymbol_kind_t; + + + + +#ifdef short +# undef short +#endif + +/* On compilers that do not define __PTRDIFF_MAX__ etc., make sure + <limits.h> and (if available) <stdint.h> are included + so that the code can choose integer types of a good width. */ + +#ifndef __PTRDIFF_MAX__ +# include <limits.h> /* INFRINGES ON USER NAME SPACE */ +# if defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stdint.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_STDINT_H +# endif +#endif + +/* Narrow types that promote to a signed type and that can represent a + signed or unsigned integer of at least N bits. In tables they can + save space and decrease cache pressure. Promoting to a signed type + helps avoid bugs in integer arithmetic. */ + +#ifdef __INT_LEAST8_MAX__ +typedef __INT_LEAST8_TYPE__ yytype_int8; +#elif defined YY_STDINT_H +typedef int_least8_t yytype_int8; +#else +typedef signed char yytype_int8; +#endif + +#ifdef __INT_LEAST16_MAX__ +typedef __INT_LEAST16_TYPE__ yytype_int16; +#elif defined YY_STDINT_H +typedef int_least16_t yytype_int16; +#else +typedef short yytype_int16; +#endif + +/* Work around bug in HP-UX 11.23, which defines these macros + incorrectly for preprocessor constants. This workaround can likely + be removed in 2023, as HPE has promised support for HP-UX 11.23 + (aka HP-UX 11i v2) only through the end of 2022; see Table 2 of + <https://h20195.www2.hpe.com/V2/getpdf.aspx/4AA4-7673ENW.pdf>. */ +#ifdef __hpux +# undef UINT_LEAST8_MAX +# undef UINT_LEAST16_MAX +# define UINT_LEAST8_MAX 255 +# define UINT_LEAST16_MAX 65535 +#endif + +#if defined __UINT_LEAST8_MAX__ && __UINT_LEAST8_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST8_TYPE__ yytype_uint8; +#elif (!defined __UINT_LEAST8_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST8_MAX <= INT_MAX) +typedef uint_least8_t yytype_uint8; +#elif !defined __UINT_LEAST8_MAX__ && UCHAR_MAX <= INT_MAX +typedef unsigned char yytype_uint8; +#else +typedef short yytype_uint8; +#endif + +#if defined __UINT_LEAST16_MAX__ && __UINT_LEAST16_MAX__ <= __INT_MAX__ +typedef __UINT_LEAST16_TYPE__ yytype_uint16; +#elif (!defined __UINT_LEAST16_MAX__ && defined YY_STDINT_H \ + && UINT_LEAST16_MAX <= INT_MAX) +typedef uint_least16_t yytype_uint16; +#elif !defined __UINT_LEAST16_MAX__ && USHRT_MAX <= INT_MAX +typedef unsigned short yytype_uint16; +#else +typedef int yytype_uint16; +#endif + +#ifndef YYPTRDIFF_T +# if defined __PTRDIFF_TYPE__ && defined __PTRDIFF_MAX__ +# define YYPTRDIFF_T __PTRDIFF_TYPE__ +# define YYPTRDIFF_MAXIMUM __PTRDIFF_MAX__ +# elif defined PTRDIFF_MAX +# ifndef ptrdiff_t +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# endif +# define YYPTRDIFF_T ptrdiff_t +# define YYPTRDIFF_MAXIMUM PTRDIFF_MAX +# else +# define YYPTRDIFF_T long +# define YYPTRDIFF_MAXIMUM LONG_MAX +# endif +#endif + +#ifndef YYSIZE_T +# ifdef __SIZE_TYPE__ +# define YYSIZE_T __SIZE_TYPE__ +# elif defined size_t +# define YYSIZE_T size_t +# elif defined __STDC_VERSION__ && 199901 <= __STDC_VERSION__ +# include <stddef.h> /* INFRINGES ON USER NAME SPACE */ +# define YYSIZE_T size_t +# else +# define YYSIZE_T unsigned +# endif +#endif + +#define YYSIZE_MAXIMUM \ + YY_CAST (YYPTRDIFF_T, \ + (YYPTRDIFF_MAXIMUM < YY_CAST (YYSIZE_T, -1) \ + ? YYPTRDIFF_MAXIMUM \ + : YY_CAST (YYSIZE_T, -1))) + +#define YYSIZEOF(X) YY_CAST (YYPTRDIFF_T, sizeof (X)) + + +/* Stored state numbers (used for stacks). */ +typedef yytype_int8 yy_state_t; + +/* State numbers in computations. */ +typedef int yy_state_fast_t; + +#ifndef YY_ +# if defined YYENABLE_NLS && YYENABLE_NLS +# if ENABLE_NLS +# include <libintl.h> /* INFRINGES ON USER NAME SPACE */ +# define YY_(Msgid) dgettext ("bison-runtime", Msgid) +# endif +# endif +# ifndef YY_ +# define YY_(Msgid) Msgid +# endif +#endif + + +#ifndef YY_ATTRIBUTE_PURE +# if defined __GNUC__ && 2 < __GNUC__ + (96 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_PURE __attribute__ ((__pure__)) +# else +# define YY_ATTRIBUTE_PURE +# endif +#endif + +#ifndef YY_ATTRIBUTE_UNUSED +# if defined __GNUC__ && 2 < __GNUC__ + (7 <= __GNUC_MINOR__) +# define YY_ATTRIBUTE_UNUSED __attribute__ ((__unused__)) +# else +# define YY_ATTRIBUTE_UNUSED +# endif +#endif + +/* Suppress unused-variable warnings by "using" E. */ +#if ! defined lint || defined __GNUC__ +# define YY_USE(E) ((void) (E)) +#else +# define YY_USE(E) /* empty */ +#endif + +/* Suppress an incorrect diagnostic about yylval being uninitialized. */ +#if defined __GNUC__ && ! defined __ICC && 406 <= __GNUC__ * 100 + __GNUC_MINOR__ +# if __GNUC__ * 100 + __GNUC_MINOR__ < 407 +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") +# else +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuninitialized\"") \ + _Pragma ("GCC diagnostic ignored \"-Wmaybe-uninitialized\"") +# endif +# define YY_IGNORE_MAYBE_UNINITIALIZED_END \ + _Pragma ("GCC diagnostic pop") +#else +# define YY_INITIAL_VALUE(Value) Value +#endif +#ifndef YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +# define YY_IGNORE_MAYBE_UNINITIALIZED_END +#endif +#ifndef YY_INITIAL_VALUE +# define YY_INITIAL_VALUE(Value) /* Nothing. */ +#endif + +#if defined __cplusplus && defined __GNUC__ && ! defined __ICC && 6 <= __GNUC__ +# define YY_IGNORE_USELESS_CAST_BEGIN \ + _Pragma ("GCC diagnostic push") \ + _Pragma ("GCC diagnostic ignored \"-Wuseless-cast\"") +# define YY_IGNORE_USELESS_CAST_END \ + _Pragma ("GCC diagnostic pop") +#endif +#ifndef YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_BEGIN +# define YY_IGNORE_USELESS_CAST_END +#endif + + +#define YY_ASSERT(E) ((void) (0 && (E))) + +#if 1 + +/* The parser invokes alloca or malloc; define the necessary symbols. */ + +# ifdef YYSTACK_USE_ALLOCA +# if YYSTACK_USE_ALLOCA +# ifdef __GNUC__ +# define YYSTACK_ALLOC __builtin_alloca +# elif defined __BUILTIN_VA_ARG_INCR +# include <alloca.h> /* INFRINGES ON USER NAME SPACE */ +# elif defined _AIX +# define YYSTACK_ALLOC __alloca +# elif defined _MSC_VER +# include <malloc.h> /* INFRINGES ON USER NAME SPACE */ +# define alloca _alloca +# else +# define YYSTACK_ALLOC alloca +# if ! defined _ALLOCA_H && ! defined EXIT_SUCCESS +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ + /* Use EXIT_SUCCESS as a witness for stdlib.h. */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# endif +# endif +# endif + +# ifdef YYSTACK_ALLOC + /* Pacify GCC's 'empty if-body' warning. */ +# define YYSTACK_FREE(Ptr) do { /* empty */; } while (0) +# ifndef YYSTACK_ALLOC_MAXIMUM + /* The OS might guarantee only one guard page at the bottom of the stack, + and a page size can be as small as 4096 bytes. So we cannot safely + invoke alloca (N) if N exceeds 4096. Use a slightly smaller number + to allow for a few compiler-allocated temporary stack slots. */ +# define YYSTACK_ALLOC_MAXIMUM 4032 /* reasonable circa 2006 */ +# endif +# else +# define YYSTACK_ALLOC YYMALLOC +# define YYSTACK_FREE YYFREE +# ifndef YYSTACK_ALLOC_MAXIMUM +# define YYSTACK_ALLOC_MAXIMUM YYSIZE_MAXIMUM +# endif +# if (defined __cplusplus && ! defined EXIT_SUCCESS \ + && ! ((defined YYMALLOC || defined malloc) \ + && (defined YYFREE || defined free))) +# include <stdlib.h> /* INFRINGES ON USER NAME SPACE */ +# ifndef EXIT_SUCCESS +# define EXIT_SUCCESS 0 +# endif +# endif +# ifndef YYMALLOC +# define YYMALLOC malloc +# if ! defined malloc && ! defined EXIT_SUCCESS +# endif +# endif +# ifndef YYFREE +# define YYFREE free +# if ! defined free && ! defined EXIT_SUCCESS +# endif +# endif +# endif +#endif /* 1 */ + +#if (! defined yyoverflow \ + && (! defined __cplusplus \ + || (defined TURTLE_PARSER_STYPE_IS_TRIVIAL && TURTLE_PARSER_STYPE_IS_TRIVIAL))) + +/* A type that is properly aligned for any stack member. */ +union yyalloc +{ + yy_state_t yyss_alloc; + YYSTYPE yyvs_alloc; +}; + +/* The size of the maximum gap between one aligned stack and the next. */ +# define YYSTACK_GAP_MAXIMUM (YYSIZEOF (union yyalloc) - 1) + +/* The size of an array large to enough to hold all stacks, each with + N elements. */ +# define YYSTACK_BYTES(N) \ + ((N) * (YYSIZEOF (yy_state_t) + YYSIZEOF (YYSTYPE)) \ + + YYSTACK_GAP_MAXIMUM) + +# define YYCOPY_NEEDED 1 + +/* Relocate STACK from its old location to the new one. The + local variables YYSIZE and YYSTACKSIZE give the old and new number of + elements in the stack, and YYPTR gives the new location of the + stack. Advance YYPTR to a properly aligned location for the next + stack. */ +# define YYSTACK_RELOCATE(Stack_alloc, Stack) \ + do \ + { \ + YYPTRDIFF_T yynewbytes; \ + YYCOPY (&yyptr->Stack_alloc, Stack, yysize); \ + Stack = &yyptr->Stack_alloc; \ + yynewbytes = yystacksize * YYSIZEOF (*Stack) + YYSTACK_GAP_MAXIMUM; \ + yyptr += yynewbytes / YYSIZEOF (*yyptr); \ + } \ + while (0) + +#endif + +#if defined YYCOPY_NEEDED && YYCOPY_NEEDED +/* Copy COUNT objects from SRC to DST. The source and destination do + not overlap. */ +# ifndef YYCOPY +# if defined __GNUC__ && 1 < __GNUC__ +# define YYCOPY(Dst, Src, Count) \ + __builtin_memcpy (Dst, Src, YY_CAST (YYSIZE_T, (Count)) * sizeof (*(Src))) +# else +# define YYCOPY(Dst, Src, Count) \ + do \ + { \ + YYPTRDIFF_T yyi; \ + for (yyi = 0; yyi < (Count); yyi++) \ + (Dst)[yyi] = (Src)[yyi]; \ + } \ + while (0) +# endif +# endif +#endif /* !YYCOPY_NEEDED */ + +/* YYFINAL -- State number of the termination state. */ +#define YYFINAL 3 +/* YYLAST -- Last index in YYTABLE. */ +#define YYLAST 147 + +/* YYNTOKENS -- Number of terminals. */ +#define YYNTOKENS 31 +/* YYNNTS -- Number of nonterminals. */ +#define YYNNTS 27 +/* YYNRULES -- Number of rules. */ +#define YYNRULES 64 +/* YYNSTATES -- Number of states. */ +#define YYNSTATES 88 + +/* YYMAXUTOK -- Last valid token kind. */ +#define YYMAXUTOK 285 + + +/* YYTRANSLATE(TOKEN-NUM) -- Symbol number corresponding to TOKEN-NUM + as returned by yylex, with out-of-bounds checking. */ +#define YYTRANSLATE(YYX) \ + (0 <= (YYX) && (YYX) <= YYMAXUTOK \ + ? YY_CAST (yysymbol_kind_t, yytranslate[YYX]) \ + : YYSYMBOL_YYUNDEF) + +/* YYTRANSLATE[TOKEN-NUM] -- Symbol number corresponding to TOKEN-NUM + as returned by yylex. */ +static const yytype_int8 yytranslate[] = +{ + 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 2, 2, 2, 2, 2, 1, 2, 3, 4, + 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, + 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, + 25, 26, 27, 28, 29, 30 +}; + +#if TURTLE_PARSER_DEBUG +/* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ +static const yytype_int16 yyrline[] = +{ + 0, 218, 218, 223, 222, 255, 254, 280, 281, 284, + 285, 288, 300, 314, 324, 325, 328, 329, 330, 363, + 399, 435, 442, 482, 526, 566, 610, 620, 633, 694, + 726, 737, 737, 740, 774, 811, 819, 829, 833, 837, + 844, 851, 855, 859, 863, 867, 880, 892, 915, 938, + 954, 969, 980, 993, 1006, 1023, 1036, 1052, 1066, 1083, + 1087, 1094, 1112, 1166, 1268 +}; +#endif + +/** Accessing symbol of state STATE. */ +#define YY_ACCESSING_SYMBOL(State) YY_CAST (yysymbol_kind_t, yystos[State]) + +#if 1 +/* The user-facing name of the symbol whose (internal) number is + YYSYMBOL. No bounds checking. */ +static const char *yysymbol_name (yysymbol_kind_t yysymbol) YY_ATTRIBUTE_UNUSED; + +/* YYTNAME[SYMBOL-NUM] -- String name of the symbol SYMBOL-NUM. + First, the terminals, then, starting at YYNTOKENS, nonterminals. */ +static const char *const yytname[] = +{ + "\"end of file\"", "error", "\"invalid token\"", "\"a\"", "\"^\"", + "\".\"", "\",\"", "\";\"", "\"[\"", "\"]\"", "\"(\"", "\")\"", "\"{\"", + "\"}\"", "\"true\"", "\"false\"", "\"@prefix\"", "\"@base\"", + "\"PREFIX\"", "\"BASE\"", "\"string literal\"", "\"identifier\"", + "\"langtag\"", "\"integer literal\"", "\"floating point literal\"", + "\"decimal literal\"", "\"blank node\"", "\"URI literal\"", + "\"Graph URI literal {\"", "\"QName\"", "ERROR_TOKEN", "$accept", + "Document", "graph", "$@1", "$@2", "graphBody", "triplesList", + "dotTriplesList", "statementList", "statement", "triples", "objectList", + "itemList", "verb", "predicateObjectList", "directive", "prefix", "base", + "subject", "predicate", "object", "literal", "resource", + "predicateObjectListOpt", "blankNode", "blankNodePropertyList", + "collection", YY_NULLPTR +}; + +static const char * +yysymbol_name (yysymbol_kind_t yysymbol) +{ + return yytname[yysymbol]; +} +#endif + +#define YYPACT_NINF (-16) + +#define yypact_value_is_default(Yyn) \ + ((Yyn) == YYPACT_NINF) + +#define YYTABLE_NINF (-11) + +#define yytable_value_is_error(Yyn) \ + 0 + +/* YYPACT[STATE-NUM] -- Index in YYTABLE of the portion describing + STATE-NUM. */ +static const yytype_int8 yypact[] = +{ + -16, 8, 9, -16, 11, 27, 78, -16, 12, 13, + 18, 14, -16, -16, -16, -16, -16, -16, 38, -16, + -16, -16, 27, -16, -16, 27, -16, -16, -16, 118, + 37, -16, -16, 36, -16, -16, -16, 7, -16, -16, + -16, 98, -16, -16, -16, -16, -16, -16, 5, 22, + 53, 33, -16, 5, -16, 37, -16, 55, -16, 27, + -16, -15, 59, -16, -16, 11, 49, -16, 60, -16, + 62, -16, -16, 51, 118, 118, -16, -16, -5, -16, + 58, -16, -16, -16, 55, -16, -16, -16 +}; + +/* YYDEFACT[STATE-NUM] -- Default reduction number in state STATE-NUM. + Performed when YYTABLE does not specify something else to do. Zero + means the default is an error. */ +static const yytype_int8 yydefact[] = +{ + 15, 0, 0, 1, 14, 60, 0, 5, 0, 0, + 0, 0, 61, 57, 3, 58, 17, 13, 0, 16, + 31, 32, 0, 37, 38, 60, 39, 21, 27, 0, + 59, 26, 40, 0, 64, 55, 56, 51, 52, 53, + 54, 0, 25, 45, 41, 42, 44, 43, 0, 0, + 0, 0, 36, 0, 18, 19, 20, 29, 23, 30, + 62, 0, 46, 63, 24, 0, 0, 7, 9, 11, + 0, 35, 34, 0, 0, 0, 49, 50, 0, 6, + 0, 33, 4, 22, 28, 47, 48, 12 +}; + +/* YYPGOTO[NTERM-NUM]. */ +static const yytype_int8 yypgoto[] = +{ + -16, -16, -16, -16, -16, 16, -16, -16, -16, -16, + 3, -3, -16, 15, 48, -16, -16, -16, -16, -16, + 1, -16, -2, 52, -1, 0, 2 +}; + +/* YYDEFGOTO[NTERM-NUM]. */ +static const yytype_int8 yydefgoto[] = +{ + 0, 1, 16, 53, 48, 66, 67, 68, 2, 17, + 69, 57, 41, 29, 30, 19, 20, 21, 22, 31, + 58, 43, 44, 33, 45, 46, 47 +}; + +/* YYTABLE[YYPACT[STATE-NUM]] -- What to do in state STATE-NUM. If + positive, shift that token. If negative, reduce the rule whose + number is the opposite. If YYTABLE_NINF, syntax error. */ +static const yytype_int8 yytable[] = +{ + 23, 24, 25, 32, 26, 18, 65, 42, 3, -2, + 4, 61, 76, 5, 77, 6, 27, 5, -8, 6, + 32, 7, 85, 32, 86, 8, 9, 10, 11, 62, + 28, 12, 13, 49, 15, 12, 13, 14, 15, 51, + 50, 52, 64, 54, 59, 60, 23, 24, 25, 70, + 26, 23, 24, 25, 13, 26, 15, 32, 71, 65, + 72, 74, 79, 78, 82, 80, 5, 81, 6, 73, + 55, -10, 84, 0, 75, 83, 0, 56, 23, 24, + 25, 0, 26, 87, 12, 13, 5, 15, 6, 34, + 0, 0, 35, 36, 0, 0, 0, 0, 37, 0, + 0, 38, 39, 40, 12, 13, 5, 15, 6, 63, + 0, 0, 35, 36, 0, 0, 0, 0, 37, 0, + 0, 38, 39, 40, 12, 13, 5, 15, 6, 0, + 0, 0, 35, 36, 0, 0, 0, 0, 37, 0, + 0, 38, 39, 40, 12, 13, 0, 15 +}; + +static const yytype_int8 yycheck[] = +{ + 2, 2, 2, 5, 2, 2, 1, 6, 0, 0, + 1, 4, 27, 8, 29, 10, 5, 8, 13, 10, + 22, 12, 27, 25, 29, 16, 17, 18, 19, 22, + 3, 26, 27, 21, 29, 26, 27, 28, 29, 21, + 27, 27, 41, 5, 7, 9, 48, 48, 48, 27, + 48, 53, 53, 53, 27, 53, 29, 59, 5, 1, + 27, 6, 13, 4, 13, 5, 8, 5, 10, 53, + 22, 13, 75, -1, 59, 74, -1, 25, 80, 80, + 80, -1, 80, 80, 26, 27, 8, 29, 10, 11, + -1, -1, 14, 15, -1, -1, -1, -1, 20, -1, + -1, 23, 24, 25, 26, 27, 8, 29, 10, 11, + -1, -1, 14, 15, -1, -1, -1, -1, 20, -1, + -1, 23, 24, 25, 26, 27, 8, 29, 10, -1, + -1, -1, 14, 15, -1, -1, -1, -1, 20, -1, + -1, 23, 24, 25, 26, 27, -1, 29 +}; + +/* YYSTOS[STATE-NUM] -- The symbol kind of the accessing symbol of + state STATE-NUM. */ +static const yytype_int8 yystos[] = +{ + 0, 32, 39, 0, 1, 8, 10, 12, 16, 17, + 18, 19, 26, 27, 28, 29, 33, 40, 41, 46, + 47, 48, 49, 53, 55, 56, 57, 5, 3, 44, + 45, 50, 53, 54, 11, 14, 15, 20, 23, 24, + 25, 43, 51, 52, 53, 55, 56, 57, 35, 21, + 27, 21, 27, 34, 5, 45, 54, 42, 51, 7, + 9, 4, 22, 11, 51, 1, 36, 37, 38, 41, + 27, 5, 27, 36, 6, 44, 27, 29, 4, 13, + 5, 5, 13, 51, 42, 27, 29, 41 +}; + +/* YYR1[RULE-NUM] -- Symbol kind of the left-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr1[] = +{ + 0, 31, 32, 34, 33, 35, 33, 36, 36, 37, + 37, 38, 38, 39, 39, 39, 40, 40, 40, 41, + 41, 41, 42, 42, 43, 43, 44, 44, 45, 45, + 45, 46, 46, 47, 47, 48, 48, 49, 49, 49, + 50, 51, 51, 51, 51, 51, 52, 52, 52, 52, + 52, 52, 52, 52, 52, 52, 52, 53, 53, 54, + 54, 55, 56, 57, 57 +}; + +/* YYR2[RULE-NUM] -- Number of symbols on the right-hand side of rule RULE-NUM. */ +static const yytype_int8 yyr2[] = +{ + 0, 2, 1, 0, 4, 0, 4, 1, 0, 1, + 2, 1, 3, 2, 2, 0, 1, 1, 2, 2, + 2, 2, 3, 1, 2, 1, 1, 1, 4, 2, + 2, 1, 1, 4, 3, 3, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 2, 4, 4, 3, + 3, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 0, 1, 3, 3, 2 +}; + + +enum { YYENOMEM = -2 }; + +#define yyerrok (yyerrstatus = 0) +#define yyclearin (yychar = TURTLE_PARSER_EMPTY) + +#define YYACCEPT goto yyacceptlab +#define YYABORT goto yyabortlab +#define YYERROR goto yyerrorlab +#define YYNOMEM goto yyexhaustedlab + + +#define YYRECOVERING() (!!yyerrstatus) + +#define YYBACKUP(Token, Value) \ + do \ + if (yychar == TURTLE_PARSER_EMPTY) \ + { \ + yychar = (Token); \ + yylval = (Value); \ + YYPOPSTACK (yylen); \ + yystate = *yyssp; \ + goto yybackup; \ + } \ + else \ + { \ + yyerror (rdf_parser, yyscanner, YY_("syntax error: cannot back up")); \ + YYERROR; \ + } \ + while (0) + +/* Backward compatibility with an undocumented macro. + Use TURTLE_PARSER_error or TURTLE_PARSER_UNDEF. */ +#define YYERRCODE TURTLE_PARSER_UNDEF + + +/* Enable debugging if requested. */ +#if TURTLE_PARSER_DEBUG + +# ifndef YYFPRINTF +# include <stdio.h> /* INFRINGES ON USER NAME SPACE */ +# define YYFPRINTF fprintf +# endif + +# define YYDPRINTF(Args) \ +do { \ + if (yydebug) \ + YYFPRINTF Args; \ +} while (0) + + + + +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) \ +do { \ + if (yydebug) \ + { \ + YYFPRINTF (stderr, "%s ", Title); \ + yy_symbol_print (stderr, \ + Kind, Value, rdf_parser, yyscanner); \ + YYFPRINTF (stderr, "\n"); \ + } \ +} while (0) + + +/*-----------------------------------. +| Print this symbol's value on YYO. | +`-----------------------------------*/ + +static void +yy_symbol_value_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, raptor_parser* rdf_parser, void* yyscanner) +{ + FILE *yyoutput = yyo; + YY_USE (yyoutput); + YY_USE (rdf_parser); + YY_USE (yyscanner); + if (!yyvaluep) + return; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + YY_USE (yykind); + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + +/*---------------------------. +| Print this symbol on YYO. | +`---------------------------*/ + +static void +yy_symbol_print (FILE *yyo, + yysymbol_kind_t yykind, YYSTYPE const * const yyvaluep, raptor_parser* rdf_parser, void* yyscanner) +{ + YYFPRINTF (yyo, "%s %s (", + yykind < YYNTOKENS ? "token" : "nterm", yysymbol_name (yykind)); + + yy_symbol_value_print (yyo, yykind, yyvaluep, rdf_parser, yyscanner); + YYFPRINTF (yyo, ")"); +} + +/*------------------------------------------------------------------. +| yy_stack_print -- Print the state stack from its BOTTOM up to its | +| TOP (included). | +`------------------------------------------------------------------*/ + +static void +yy_stack_print (yy_state_t *yybottom, yy_state_t *yytop) +{ + YYFPRINTF (stderr, "Stack now"); + for (; yybottom <= yytop; yybottom++) + { + int yybot = *yybottom; + YYFPRINTF (stderr, " %d", yybot); + } + YYFPRINTF (stderr, "\n"); +} + +# define YY_STACK_PRINT(Bottom, Top) \ +do { \ + if (yydebug) \ + yy_stack_print ((Bottom), (Top)); \ +} while (0) + + +/*------------------------------------------------. +| Report that the YYRULE is going to be reduced. | +`------------------------------------------------*/ + +static void +yy_reduce_print (yy_state_t *yyssp, YYSTYPE *yyvsp, + int yyrule, raptor_parser* rdf_parser, void* yyscanner) +{ + int yylno = yyrline[yyrule]; + int yynrhs = yyr2[yyrule]; + int yyi; + YYFPRINTF (stderr, "Reducing stack by rule %d (line %d):\n", + yyrule - 1, yylno); + /* The symbols being reduced. */ + for (yyi = 0; yyi < yynrhs; yyi++) + { + YYFPRINTF (stderr, " $%d = ", yyi + 1); + yy_symbol_print (stderr, + YY_ACCESSING_SYMBOL (+yyssp[yyi + 1 - yynrhs]), + &yyvsp[(yyi + 1) - (yynrhs)], rdf_parser, yyscanner); + YYFPRINTF (stderr, "\n"); + } +} + +# define YY_REDUCE_PRINT(Rule) \ +do { \ + if (yydebug) \ + yy_reduce_print (yyssp, yyvsp, Rule, rdf_parser, yyscanner); \ +} while (0) + +/* Nonzero means print parse trace. It is left uninitialized so that + multiple parsers can coexist. */ +int yydebug; +#else /* !TURTLE_PARSER_DEBUG */ +# define YYDPRINTF(Args) ((void) 0) +# define YY_SYMBOL_PRINT(Title, Kind, Value, Location) +# define YY_STACK_PRINT(Bottom, Top) +# define YY_REDUCE_PRINT(Rule) +#endif /* !TURTLE_PARSER_DEBUG */ + + +/* YYINITDEPTH -- initial size of the parser's stacks. */ +#ifndef YYINITDEPTH +# define YYINITDEPTH 200 +#endif + +/* YYMAXDEPTH -- maximum size the stacks can grow to (effective only + if the built-in stack extension method is used). + + Do not make this value too large; the results are undefined if + YYSTACK_ALLOC_MAXIMUM < YYSTACK_BYTES (YYMAXDEPTH) + evaluated with infinite-precision integer arithmetic. */ + +#ifndef YYMAXDEPTH +# define YYMAXDEPTH 10000 +#endif + + +/* Context of a parse error. */ +typedef struct +{ + yy_state_t *yyssp; + yysymbol_kind_t yytoken; +} yypcontext_t; + +/* Put in YYARG at most YYARGN of the expected tokens given the + current YYCTX, and return the number of tokens stored in YYARG. If + YYARG is null, return the number of expected tokens (guaranteed to + be less than YYNTOKENS). Return YYENOMEM on memory exhaustion. + Return 0 if there are more than YYARGN expected tokens, yet fill + YYARG up to YYARGN. */ +static int +yypcontext_expected_tokens (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) +{ + /* Actual size of YYARG. */ + int yycount = 0; + int yyn = yypact[+*yyctx->yyssp]; + if (!yypact_value_is_default (yyn)) + { + /* Start YYX at -YYN if negative to avoid negative indexes in + YYCHECK. In other words, skip the first -YYN actions for + this state because they are default actions. */ + int yyxbegin = yyn < 0 ? -yyn : 0; + /* Stay within bounds of both yycheck and yytname. */ + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + if (yycheck[yyx + yyn] == yyx && yyx != YYSYMBOL_YYerror + && !yytable_value_is_error (yytable[yyx + yyn])) + { + if (!yyarg) + ++yycount; + else if (yycount == yyargn) + return 0; + else + yyarg[yycount++] = YY_CAST (yysymbol_kind_t, yyx); + } + } + if (yyarg && yycount == 0 && 0 < yyargn) + yyarg[0] = YYSYMBOL_YYEMPTY; + return yycount; +} + + + + +#ifndef yystrlen +# if defined __GLIBC__ && defined _STRING_H +# define yystrlen(S) (YY_CAST (YYPTRDIFF_T, strlen (S))) +# else +/* Return the length of YYSTR. */ +static YYPTRDIFF_T +yystrlen (const char *yystr) +{ + YYPTRDIFF_T yylen; + for (yylen = 0; yystr[yylen]; yylen++) + continue; + return yylen; +} +# endif +#endif + +#ifndef yystpcpy +# if defined __GLIBC__ && defined _STRING_H && defined _GNU_SOURCE +# define yystpcpy stpcpy +# else +/* Copy YYSRC to YYDEST, returning the address of the terminating '\0' in + YYDEST. */ +static char * +yystpcpy (char *yydest, const char *yysrc) +{ + char *yyd = yydest; + const char *yys = yysrc; + + while ((*yyd++ = *yys++) != '\0') + continue; + + return yyd - 1; +} +# endif +#endif + +#ifndef yytnamerr +/* Copy to YYRES the contents of YYSTR after stripping away unnecessary + quotes and backslashes, so that it's suitable for yyerror. The + heuristic is that double-quoting is unnecessary unless the string + contains an apostrophe, a comma, or backslash (other than + backslash-backslash). YYSTR is taken from yytname. If YYRES is + null, do not copy; instead, return the length of what the result + would have been. */ +static YYPTRDIFF_T +yytnamerr (char *yyres, const char *yystr) +{ + if (*yystr == '"') + { + YYPTRDIFF_T yyn = 0; + char const *yyp = yystr; + for (;;) + switch (*++yyp) + { + case '\'': + case ',': + goto do_not_strip_quotes; + + case '\\': + if (*++yyp != '\\') + goto do_not_strip_quotes; + else + goto append; + + append: + default: + if (yyres) + yyres[yyn] = *yyp; + yyn++; + break; + + case '"': + if (yyres) + yyres[yyn] = '\0'; + return yyn; + } + do_not_strip_quotes: ; + } + + if (yyres) + return yystpcpy (yyres, yystr) - yyres; + else + return yystrlen (yystr); +} +#endif + + +static int +yy_syntax_error_arguments (const yypcontext_t *yyctx, + yysymbol_kind_t yyarg[], int yyargn) +{ + /* Actual size of YYARG. */ + int yycount = 0; + /* There are many possibilities here to consider: + - If this state is a consistent state with a default action, then + the only way this function was invoked is if the default action + is an error action. In that case, don't check for expected + tokens because there are none. + - The only way there can be no lookahead present (in yychar) is if + this state is a consistent state with a default action. Thus, + detecting the absence of a lookahead is sufficient to determine + that there is no unexpected or expected token to report. In that + case, just report a simple "syntax error". + - Don't assume there isn't a lookahead just because this state is a + consistent state with a default action. There might have been a + previous inconsistent state, consistent state with a non-default + action, or user semantic action that manipulated yychar. + - Of course, the expected token list depends on states to have + correct lookahead information, and it depends on the parser not + to perform extra reductions after fetching a lookahead from the + scanner and before detecting a syntax error. Thus, state merging + (from LALR or IELR) and default reductions corrupt the expected + token list. However, the list is correct for canonical LR with + one exception: it will still contain any token that will not be + accepted due to an error action in a later state. + */ + if (yyctx->yytoken != YYSYMBOL_YYEMPTY) + { + int yyn; + if (yyarg) + yyarg[yycount] = yyctx->yytoken; + ++yycount; + yyn = yypcontext_expected_tokens (yyctx, + yyarg ? yyarg + 1 : yyarg, yyargn - 1); + if (yyn == YYENOMEM) + return YYENOMEM; + else + yycount += yyn; + } + return yycount; +} + +/* Copy into *YYMSG, which is of size *YYMSG_ALLOC, an error message + about the unexpected token YYTOKEN for the state stack whose top is + YYSSP. + + Return 0 if *YYMSG was successfully written. Return -1 if *YYMSG is + not large enough to hold the message. In that case, also set + *YYMSG_ALLOC to the required number of bytes. Return YYENOMEM if the + required number of bytes is too large to store. */ +static int +yysyntax_error (YYPTRDIFF_T *yymsg_alloc, char **yymsg, + const yypcontext_t *yyctx) +{ + enum { YYARGS_MAX = 5 }; + /* Internationalized format string. */ + const char *yyformat = YY_NULLPTR; + /* Arguments of yyformat: reported tokens (one for the "unexpected", + one per "expected"). */ + yysymbol_kind_t yyarg[YYARGS_MAX]; + /* Cumulated lengths of YYARG. */ + YYPTRDIFF_T yysize = 0; + + /* Actual size of YYARG. */ + int yycount = yy_syntax_error_arguments (yyctx, yyarg, YYARGS_MAX); + if (yycount == YYENOMEM) + return YYENOMEM; + + switch (yycount) + { +#define YYCASE_(N, S) \ + case N: \ + yyformat = S; \ + break + default: /* Avoid compiler warnings. */ + YYCASE_(0, YY_("syntax error")); + YYCASE_(1, YY_("syntax error, unexpected %s")); + YYCASE_(2, YY_("syntax error, unexpected %s, expecting %s")); + YYCASE_(3, YY_("syntax error, unexpected %s, expecting %s or %s")); + YYCASE_(4, YY_("syntax error, unexpected %s, expecting %s or %s or %s")); + YYCASE_(5, YY_("syntax error, unexpected %s, expecting %s or %s or %s or %s")); +#undef YYCASE_ + } + + /* Compute error message size. Don't count the "%s"s, but reserve + room for the terminator. */ + yysize = yystrlen (yyformat) - 2 * yycount + 1; + { + int yyi; + for (yyi = 0; yyi < yycount; ++yyi) + { + YYPTRDIFF_T yysize1 + = yysize + yytnamerr (YY_NULLPTR, yytname[yyarg[yyi]]); + if (yysize <= yysize1 && yysize1 <= YYSTACK_ALLOC_MAXIMUM) + yysize = yysize1; + else + return YYENOMEM; + } + } + + if (*yymsg_alloc < yysize) + { + *yymsg_alloc = 2 * yysize; + if (! (yysize <= *yymsg_alloc + && *yymsg_alloc <= YYSTACK_ALLOC_MAXIMUM)) + *yymsg_alloc = YYSTACK_ALLOC_MAXIMUM; + return -1; + } + + /* Avoid sprintf, as that infringes on the user's name space. + Don't have undefined behavior even if the translation + produced a string with the wrong number of "%s"s. */ + { + char *yyp = *yymsg; + int yyi = 0; + while ((*yyp = *yyformat) != '\0') + if (*yyp == '%' && yyformat[1] == 's' && yyi < yycount) + { + yyp += yytnamerr (yyp, yytname[yyarg[yyi++]]); + yyformat += 2; + } + else + { + ++yyp; + ++yyformat; + } + } + return 0; +} + + +/*-----------------------------------------------. +| Release the memory associated to this symbol. | +`-----------------------------------------------*/ + +static void +yydestruct (const char *yymsg, + yysymbol_kind_t yykind, YYSTYPE *yyvaluep, raptor_parser* rdf_parser, void* yyscanner) +{ + YY_USE (yyvaluep); + YY_USE (rdf_parser); + YY_USE (yyscanner); + if (!yymsg) + yymsg = "Deleting"; + YY_SYMBOL_PRINT (yymsg, yykind, yyvaluep, yylocationp); + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + switch (yykind) + { + case YYSYMBOL_STRING_LITERAL: /* "string literal" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1285 "turtle_parser.c" + break; + + case YYSYMBOL_IDENTIFIER: /* "identifier" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1294 "turtle_parser.c" + break; + + case YYSYMBOL_LANGTAG: /* "langtag" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1303 "turtle_parser.c" + break; + + case YYSYMBOL_INTEGER_LITERAL: /* "integer literal" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1312 "turtle_parser.c" + break; + + case YYSYMBOL_FLOATING_LITERAL: /* "floating point literal" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1321 "turtle_parser.c" + break; + + case YYSYMBOL_DECIMAL_LITERAL: /* "decimal literal" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1330 "turtle_parser.c" + break; + + case YYSYMBOL_BLANK_LITERAL: /* "blank node" */ +#line 196 "./turtle_parser.y" + { + if(((*yyvaluep).string)) + RAPTOR_FREE(char*, ((*yyvaluep).string)); +} +#line 1339 "turtle_parser.c" + break; + + case YYSYMBOL_URI_LITERAL: /* "URI literal" */ +#line 201 "./turtle_parser.y" + { + if(((*yyvaluep).uri)) + raptor_free_uri(((*yyvaluep).uri)); +} +#line 1348 "turtle_parser.c" + break; + + case YYSYMBOL_GRAPH_NAME_LEFT_CURLY: /* "Graph URI literal {" */ +#line 201 "./turtle_parser.y" + { + if(((*yyvaluep).uri)) + raptor_free_uri(((*yyvaluep).uri)); +} +#line 1357 "turtle_parser.c" + break; + + case YYSYMBOL_QNAME_LITERAL: /* "QName" */ +#line 201 "./turtle_parser.y" + { + if(((*yyvaluep).uri)) + raptor_free_uri(((*yyvaluep).uri)); +} +#line 1366 "turtle_parser.c" + break; + + case YYSYMBOL_triples: /* triples */ +#line 211 "./turtle_parser.y" + { + if(((*yyvaluep).sequence)) + raptor_free_sequence(((*yyvaluep).sequence)); +} +#line 1375 "turtle_parser.c" + break; + + case YYSYMBOL_objectList: /* objectList */ +#line 211 "./turtle_parser.y" + { + if(((*yyvaluep).sequence)) + raptor_free_sequence(((*yyvaluep).sequence)); +} +#line 1384 "turtle_parser.c" + break; + + case YYSYMBOL_itemList: /* itemList */ +#line 211 "./turtle_parser.y" + { + if(((*yyvaluep).sequence)) + raptor_free_sequence(((*yyvaluep).sequence)); +} +#line 1393 "turtle_parser.c" + break; + + case YYSYMBOL_verb: /* verb */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1402 "turtle_parser.c" + break; + + case YYSYMBOL_predicateObjectList: /* predicateObjectList */ +#line 211 "./turtle_parser.y" + { + if(((*yyvaluep).sequence)) + raptor_free_sequence(((*yyvaluep).sequence)); +} +#line 1411 "turtle_parser.c" + break; + + case YYSYMBOL_subject: /* subject */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1420 "turtle_parser.c" + break; + + case YYSYMBOL_predicate: /* predicate */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1429 "turtle_parser.c" + break; + + case YYSYMBOL_object: /* object */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1438 "turtle_parser.c" + break; + + case YYSYMBOL_literal: /* literal */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1447 "turtle_parser.c" + break; + + case YYSYMBOL_resource: /* resource */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1456 "turtle_parser.c" + break; + + case YYSYMBOL_predicateObjectListOpt: /* predicateObjectListOpt */ +#line 211 "./turtle_parser.y" + { + if(((*yyvaluep).sequence)) + raptor_free_sequence(((*yyvaluep).sequence)); +} +#line 1465 "turtle_parser.c" + break; + + case YYSYMBOL_blankNode: /* blankNode */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1474 "turtle_parser.c" + break; + + case YYSYMBOL_blankNodePropertyList: /* blankNodePropertyList */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1483 "turtle_parser.c" + break; + + case YYSYMBOL_collection: /* collection */ +#line 206 "./turtle_parser.y" + { + if(((*yyvaluep).identifier)) + raptor_free_term(((*yyvaluep).identifier)); +} +#line 1492 "turtle_parser.c" + break; + + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + + + + + + +/*----------. +| yyparse. | +`----------*/ + +int +yyparse (raptor_parser* rdf_parser, void* yyscanner) +{ +/* Lookahead token kind. */ +int yychar; + + +/* The semantic value of the lookahead symbol. */ +/* Default value used for initialization, for pacifying older GCCs + or non-GCC compilers. */ +YY_INITIAL_VALUE (static YYSTYPE yyval_default;) +YYSTYPE yylval YY_INITIAL_VALUE (= yyval_default); + + /* Number of syntax errors so far. */ + int yynerrs = 0; + + yy_state_fast_t yystate = 0; + /* Number of tokens to shift before error messages enabled. */ + int yyerrstatus = 0; + + /* Refer to the stacks through separate pointers, to allow yyoverflow + to reallocate them elsewhere. */ + + /* Their size. */ + YYPTRDIFF_T yystacksize = YYINITDEPTH; + + /* The state stack: array, bottom, top. */ + yy_state_t yyssa[YYINITDEPTH]; + yy_state_t *yyss = yyssa; + yy_state_t *yyssp = yyss; + + /* The semantic value stack: array, bottom, top. */ + YYSTYPE yyvsa[YYINITDEPTH]; + YYSTYPE *yyvs = yyvsa; + YYSTYPE *yyvsp = yyvs; + + int yyn; + /* The return value of yyparse. */ + int yyresult; + /* Lookahead symbol kind. */ + yysymbol_kind_t yytoken = YYSYMBOL_YYEMPTY; + /* The variables used to return semantic value and location from the + action routines. */ + YYSTYPE yyval; + + /* Buffer for error messages, and its allocated size. */ + char yymsgbuf[128]; + char *yymsg = yymsgbuf; + YYPTRDIFF_T yymsg_alloc = sizeof yymsgbuf; + +#define YYPOPSTACK(N) (yyvsp -= (N), yyssp -= (N)) + + /* The number of symbols on the RHS of the reduced rule. + Keep to zero when no symbol should be popped. */ + int yylen = 0; + + YYDPRINTF ((stderr, "Starting parse\n")); + + yychar = TURTLE_PARSER_EMPTY; /* Cause a token to be read. */ + + goto yysetstate; + + +/*------------------------------------------------------------. +| yynewstate -- push a new state, which is found in yystate. | +`------------------------------------------------------------*/ +yynewstate: + /* In all cases, when you get here, the value and location stacks + have just been pushed. So pushing a state here evens the stacks. */ + yyssp++; + + +/*--------------------------------------------------------------------. +| yysetstate -- set current state (the top of the stack) to yystate. | +`--------------------------------------------------------------------*/ +yysetstate: + YYDPRINTF ((stderr, "Entering state %d\n", yystate)); + YY_ASSERT (0 <= yystate && yystate < YYNSTATES); + YY_IGNORE_USELESS_CAST_BEGIN + *yyssp = YY_CAST (yy_state_t, yystate); + YY_IGNORE_USELESS_CAST_END + YY_STACK_PRINT (yyss, yyssp); + + if (yyss + yystacksize - 1 <= yyssp) +#if !defined yyoverflow && !defined YYSTACK_RELOCATE + YYNOMEM; +#else + { + /* Get the current used size of the three stacks, in elements. */ + YYPTRDIFF_T yysize = yyssp - yyss + 1; + +# if defined yyoverflow + { + /* Give user a chance to reallocate the stack. Use copies of + these so that the &'s don't force the real ones into + memory. */ + yy_state_t *yyss1 = yyss; + YYSTYPE *yyvs1 = yyvs; + + /* Each stack pointer address is followed by the size of the + data in use in that stack, in bytes. This used to be a + conditional around just the two extra args, but that might + be undefined if yyoverflow is a macro. */ + yyoverflow (YY_("memory exhausted"), + &yyss1, yysize * YYSIZEOF (*yyssp), + &yyvs1, yysize * YYSIZEOF (*yyvsp), + &yystacksize); + yyss = yyss1; + yyvs = yyvs1; + } +# else /* defined YYSTACK_RELOCATE */ + /* Extend the stack our own way. */ + if (YYMAXDEPTH <= yystacksize) + YYNOMEM; + yystacksize *= 2; + if (YYMAXDEPTH < yystacksize) + yystacksize = YYMAXDEPTH; + + { + yy_state_t *yyss1 = yyss; + union yyalloc *yyptr = + YY_CAST (union yyalloc *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, YYSTACK_BYTES (yystacksize)))); + if (! yyptr) + YYNOMEM; + YYSTACK_RELOCATE (yyss_alloc, yyss); + YYSTACK_RELOCATE (yyvs_alloc, yyvs); +# undef YYSTACK_RELOCATE + if (yyss1 != yyssa) + YYSTACK_FREE (yyss1); + } +# endif + + yyssp = yyss + yysize - 1; + yyvsp = yyvs + yysize - 1; + + YY_IGNORE_USELESS_CAST_BEGIN + YYDPRINTF ((stderr, "Stack size increased to %ld\n", + YY_CAST (long, yystacksize))); + YY_IGNORE_USELESS_CAST_END + + if (yyss + yystacksize - 1 <= yyssp) + YYABORT; + } +#endif /* !defined yyoverflow && !defined YYSTACK_RELOCATE */ + + + if (yystate == YYFINAL) + YYACCEPT; + + goto yybackup; + + +/*-----------. +| yybackup. | +`-----------*/ +yybackup: + /* Do appropriate processing given the current state. Read a + lookahead token if we need one and don't already have one. */ + + /* First try to decide what to do without reference to lookahead token. */ + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yydefault; + + /* Not known => get a lookahead token if don't already have one. */ + + /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ + if (yychar == TURTLE_PARSER_EMPTY) + { + YYDPRINTF ((stderr, "Reading a token\n")); + yychar = yylex (&yylval, yyscanner); + } + + if (yychar <= TURTLE_PARSER_EOF) + { + yychar = TURTLE_PARSER_EOF; + yytoken = YYSYMBOL_YYEOF; + YYDPRINTF ((stderr, "Now at end of input.\n")); + } + else if (yychar == TURTLE_PARSER_error) + { + /* The scanner already issued an error message, process directly + to error recovery. But do not keep the error token as + lookahead, it is too special and may lead us to an endless + loop in error recovery. */ + yychar = TURTLE_PARSER_UNDEF; + yytoken = YYSYMBOL_YYerror; + goto yyerrlab1; + } + else + { + yytoken = YYTRANSLATE (yychar); + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc); + } + + /* If the proper action on seeing token YYTOKEN is to reduce or to + detect an error, take that action. */ + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yydefault; + yyn = yytable[yyn]; + if (yyn <= 0) + { + yyn = -yyn; + goto yyreduce; + } + + /* Count tokens shifted since error; after three, turn off error + status. */ + if (yyerrstatus) + yyerrstatus--; + + /* Shift the lookahead token. */ + YY_SYMBOL_PRINT ("Shifting", yytoken, &yylval, &yylloc); + yystate = yyn; + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + /* Discard the shifted token. */ + yychar = TURTLE_PARSER_EMPTY; + goto yynewstate; + + +/*-----------------------------------------------------------. +| yydefault -- do the default action for the current state. | +`-----------------------------------------------------------*/ +yydefault: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyerrlab; + goto yyreduce; + + +/*-----------------------------. +| yyreduce -- do a reduction. | +`-----------------------------*/ +yyreduce: + /* yyn is the number of a rule to reduce with. */ + yylen = yyr2[yyn]; + + /* If YYLEN is nonzero, implement the default value of the action: + '$$ = $1'. + + Otherwise, the following line sets YYVAL to garbage. + This behavior is undocumented and Bison + users should not rely upon it. Assigning to YYVAL + unconditionally makes the parser a bit smaller, and it avoids a + GCC warning that YYVAL may be used uninitialized. */ + yyval = yyvsp[1-yylen]; + + + YY_REDUCE_PRINT (yyn); + switch (yyn) + { + case 3: /* $@1: %empty */ +#line 223 "./turtle_parser.y" + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + if(turtle_parser->graph_name) + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = raptor_new_term_from_uri(rdf_parser->world, (yyvsp[0].uri)); + raptor_free_uri((yyvsp[0].uri)); + raptor_parser_start_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + } + } +#line 1784 "turtle_parser.c" + break; + + case 4: /* graph: "Graph URI literal {" $@1 graphBody "}" */ +#line 240 "./turtle_parser.y" +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + rdf_parser->emitted_default_graph = 0; + } +} +#line 1802 "turtle_parser.c" + break; + + case 5: /* $@2: %empty */ +#line 255 "./turtle_parser.y" + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + raptor_parser_start_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph++; + } + } +#line 1819 "turtle_parser.c" + break; + + case 6: /* graph: "{" $@2 graphBody "}" */ +#line 268 "./turtle_parser.y" +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph = 0; + } +} +#line 1833 "turtle_parser.c" + break; + + case 11: /* dotTriplesList: triples */ +#line 289 "./turtle_parser.y" +{ + int i; + + if((yyvsp[0].sequence)) { + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence((yyvsp[0].sequence)); + } +} +#line 1849 "turtle_parser.c" + break; + + case 12: /* dotTriplesList: dotTriplesList "." triples */ +#line 301 "./turtle_parser.y" +{ + int i; + + if((yyvsp[0].sequence)) { + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence((yyvsp[0].sequence)); + } +} +#line 1865 "turtle_parser.c" + break; + + case 13: /* statementList: statementList statement */ +#line 315 "./turtle_parser.y" +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* sync up consumed/processed so we know what to unwind */ + turtle_parser->processed = turtle_parser->consumed; + turtle_parser->lineno_last_good = turtle_parser->lineno; +} +#line 1879 "turtle_parser.c" + break; + + case 18: /* statement: triples "." */ +#line 331 "./turtle_parser.y" +{ + raptor_turtle_parser* turtle_parser; + int i; + + /* yield deferred statements, if any */ + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + + if((yyvsp[-1].sequence)) { + for(i = 0; i < raptor_sequence_size((yyvsp[-1].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[-1].sequence), i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence((yyvsp[-1].sequence)); + } + + if(turtle_parser->deferred) { + /* debrief resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } +} +#line 1914 "turtle_parser.c" + break; + + case 19: /* triples: subject predicateObjectList */ +#line 364 "./turtle_parser.y" +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 1\n subject="); + if((yyvsp[-1].identifier)) + raptor_term_print_as_ntriples((yyvsp[-1].identifier), stdout); + else + fputs("NULL", stdout); + if((yyvsp[0].sequence)) { + printf("\n predicateObjectList (reverse order to syntax)="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectList\n"); +#endif + + if((yyvsp[-1].identifier) && (yyvsp[0].sequence)) { + /* have subject and non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + t2->subject = raptor_term_copy((yyvsp[-1].identifier)); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectList="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n\n"); +#endif + } + + if((yyvsp[-1].identifier)) + raptor_free_term((yyvsp[-1].identifier)); + + (yyval.sequence) = (yyvsp[0].sequence); +} +#line 1954 "turtle_parser.c" + break; + + case 20: /* triples: blankNodePropertyList predicateObjectListOpt */ +#line 400 "./turtle_parser.y" +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 2\n blankNodePropertyList="); + if((yyvsp[-1].identifier)) + raptor_term_print_as_ntriples((yyvsp[-1].identifier), stdout); + else + fputs("NULL", stdout); + if((yyvsp[0].sequence)) { + printf("\n predicateObjectListOpt (reverse order to syntax)="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectListOpt\n"); +#endif + + if((yyvsp[-1].identifier) && (yyvsp[0].sequence)) { + /* have subject and non-empty predicate object list, handle it */ + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + t2->subject = raptor_term_copy((yyvsp[-1].identifier)); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectListOpt="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n\n"); +#endif + } + + if((yyvsp[-1].identifier)) + raptor_free_term((yyvsp[-1].identifier)); + + (yyval.sequence) = (yyvsp[0].sequence); +} +#line 1994 "turtle_parser.c" + break; + + case 21: /* triples: error "." */ +#line 436 "./turtle_parser.y" +{ + (yyval.sequence) = NULL; +} +#line 2002 "turtle_parser.c" + break; + + case 22: /* objectList: objectList "," object */ +#line 443 "./turtle_parser.y" +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if((yyvsp[0].identifier)) { + printf(" object=\n"); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if((yyvsp[-2].sequence)) { + printf(" objectList="); + raptor_sequence_print((yyvsp[-2].sequence), stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!(yyvsp[0].identifier)) + (yyval.sequence) = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, (yyvsp[0].identifier), NULL); + if(!triple) { + raptor_free_sequence((yyvsp[-2].sequence)); + YYERROR; + } + if(raptor_sequence_push((yyvsp[-2].sequence), triple)) { + raptor_free_sequence((yyvsp[-2].sequence)); + YYERROR; + } + (yyval.sequence) = (yyvsp[-2].sequence); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print((yyval.sequence), stdout); + printf("\n\n"); +#endif + } +} +#line 2046 "turtle_parser.c" + break; + + case 23: /* objectList: object */ +#line 483 "./turtle_parser.y" +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if((yyvsp[0].identifier)) { + printf(" object=\n"); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!(yyvsp[0].identifier)) + (yyval.sequence) = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, (yyvsp[0].identifier), NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + (yyval.sequence) = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + (yyval.sequence) = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!(yyval.sequence)) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push((yyval.sequence), triple)) { + raptor_free_sequence((yyval.sequence)); + (yyval.sequence) = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print((yyval.sequence), stdout); + printf("\n\n"); +#endif + } +} +#line 2092 "turtle_parser.c" + break; + + case 24: /* itemList: itemList object */ +#line 527 "./turtle_parser.y" +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if((yyvsp[0].identifier)) { + printf(" object=\n"); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if((yyvsp[-1].sequence)) { + printf(" objectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!(yyvsp[0].identifier)) + (yyval.sequence) = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, (yyvsp[0].identifier), NULL); + if(!triple) { + raptor_free_sequence((yyvsp[-1].sequence)); + YYERROR; + } + if(raptor_sequence_push((yyvsp[-1].sequence), triple)) { + raptor_free_sequence((yyvsp[-1].sequence)); + YYERROR; + } + (yyval.sequence) = (yyvsp[-1].sequence); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print((yyval.sequence), stdout); + printf("\n\n"); +#endif + } +} +#line 2136 "turtle_parser.c" + break; + + case 25: /* itemList: object */ +#line 567 "./turtle_parser.y" +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if((yyvsp[0].identifier)) { + printf(" object=\n"); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!(yyvsp[0].identifier)) + (yyval.sequence) = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, (yyvsp[0].identifier), NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + (yyval.sequence) = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + (yyval.sequence) = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!(yyval.sequence)) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push((yyval.sequence), triple)) { + raptor_free_sequence((yyval.sequence)); + (yyval.sequence) = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print((yyval.sequence), stdout); + printf("\n\n"); +#endif + } +} +#line 2182 "turtle_parser.c" + break; + + case 26: /* verb: predicate */ +#line 611 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate="); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); +#endif + + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2196 "turtle_parser.c" + break; + + case 27: /* verb: "a" */ +#line 621 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate = rdf:type (a)\n"); +#endif + + (yyval.identifier) = raptor_term_copy(RAPTOR_RDF_type_term(rdf_parser->world)); + if(!(yyval.identifier)) + YYERROR; +} +#line 2210 "turtle_parser.c" + break; + + case 28: /* predicateObjectList: predicateObjectList ";" verb objectList */ +#line 634 "./turtle_parser.y" +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 1\n verb="); + raptor_term_print_as_ntriples((yyvsp[-1].identifier), stdout); + printf("\n objectList="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n predicateObjectList="); + raptor_sequence_print((yyvsp[-3].sequence), stdout); + printf("\n\n"); +#endif + + if((yyvsp[0].sequence) == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty objectList not processed\n"); +#endif + } else if((yyvsp[-1].identifier) && (yyvsp[0].sequence)) { + /* non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + t2->predicate = raptor_term_copy((yyvsp[-1].identifier)); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n"); +#endif + } + + if((yyvsp[-3].sequence) == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty predicateObjectList not copied\n\n"); +#endif + } else if((yyvsp[-1].identifier) && (yyvsp[0].sequence) && (yyvsp[-3].sequence)) { + while(raptor_sequence_size((yyvsp[0].sequence))) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_unshift((yyvsp[0].sequence)); + if(raptor_sequence_push((yyvsp[-3].sequence), t2)) { + raptor_free_sequence((yyvsp[-3].sequence)); + raptor_free_term((yyvsp[-1].identifier)); + raptor_free_sequence((yyvsp[0].sequence)); + YYERROR; + } + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after appending objectList (reverse order)="); + raptor_sequence_print((yyvsp[-3].sequence), stdout); + printf("\n\n"); +#endif + + raptor_free_sequence((yyvsp[0].sequence)); + } + + if((yyvsp[-1].identifier)) + raptor_free_term((yyvsp[-1].identifier)); + + (yyval.sequence) = (yyvsp[-3].sequence); +} +#line 2275 "turtle_parser.c" + break; + + case 29: /* predicateObjectList: verb objectList */ +#line 695 "./turtle_parser.y" +{ + int i; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 2\n verb="); + raptor_term_print_as_ntriples((yyvsp[-1].identifier), stdout); + if((yyvsp[0].sequence)) { + printf("\n objectList="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n"); + } else + printf("\n and empty objectList\n"); +#endif + + if((yyvsp[-1].identifier) && (yyvsp[0].sequence)) { + for(i = 0; i < raptor_sequence_size((yyvsp[0].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[0].sequence), i); + t2->predicate = raptor_term_copy((yyvsp[-1].identifier)); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print((yyvsp[0].sequence), stdout); + printf("\n\n"); +#endif + } + + if((yyvsp[-1].identifier)) + raptor_free_term((yyvsp[-1].identifier)); + + (yyval.sequence) = (yyvsp[0].sequence); +} +#line 2311 "turtle_parser.c" + break; + + case 30: /* predicateObjectList: predicateObjectList ";" */ +#line 727 "./turtle_parser.y" +{ + (yyval.sequence) = (yyvsp[-1].sequence); +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 5\n trailing semicolon returning existing list "); + raptor_sequence_print((yyval.sequence), stdout); + printf("\n\n"); +#endif +} +#line 2324 "turtle_parser.c" + break; + + case 33: /* prefix: "@prefix" "identifier" "URI literal" "." */ +#line 741 "./turtle_parser.y" +{ + unsigned char *prefix = (yyvsp[-2].string); + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive PREFIX %s %s\n",((yyvsp[-2].string) ? (char*)(yyvsp[-2].string) : "(default)"), raptor_uri_as_string((yyvsp[-1].uri))); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix PREFIX : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, (yyvsp[-1].uri), 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if((yyvsp[-2].string)) + RAPTOR_FREE(char*, (yyvsp[-2].string)); + raptor_free_uri((yyvsp[-1].uri)); + + if(!ns) + YYERROR; +} +#line 2362 "turtle_parser.c" + break; + + case 34: /* prefix: "PREFIX" "identifier" "URI literal" */ +#line 775 "./turtle_parser.y" +{ + unsigned char *prefix = (yyvsp[-1].string); + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive @prefix %s %s.\n",((yyvsp[-1].string) ? (char*)(yyvsp[-1].string) : "(default)"), raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix @prefix : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, (yyvsp[0].uri), 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if((yyvsp[-1].string)) + RAPTOR_FREE(char*, (yyvsp[-1].string)); + raptor_free_uri((yyvsp[0].uri)); + + if(!ns) + YYERROR; +} +#line 2400 "turtle_parser.c" + break; + + case 35: /* base: "@base" "URI literal" "." */ +#line 812 "./turtle_parser.y" +{ + raptor_uri *uri=(yyvsp[-1].uri); + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +#line 2412 "turtle_parser.c" + break; + + case 36: /* base: "BASE" "URI literal" */ +#line 820 "./turtle_parser.y" +{ + raptor_uri *uri=(yyvsp[0].uri); + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +#line 2424 "turtle_parser.c" + break; + + case 37: /* subject: resource */ +#line 830 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2432 "turtle_parser.c" + break; + + case 38: /* subject: blankNode */ +#line 834 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2440 "turtle_parser.c" + break; + + case 39: /* subject: collection */ +#line 838 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2448 "turtle_parser.c" + break; + + case 40: /* predicate: resource */ +#line 845 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2456 "turtle_parser.c" + break; + + case 41: /* object: resource */ +#line 852 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2464 "turtle_parser.c" + break; + + case 42: /* object: blankNode */ +#line 856 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2472 "turtle_parser.c" + break; + + case 43: /* object: collection */ +#line 860 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2480 "turtle_parser.c" + break; + + case 44: /* object: blankNodePropertyList */ +#line 864 "./turtle_parser.y" +{ + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2488 "turtle_parser.c" + break; + + case 45: /* object: literal */ +#line 868 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("object literal="); + raptor_term_print_as_ntriples((yyvsp[0].identifier), stdout); + printf("\n"); +#endif + + (yyval.identifier) = (yyvsp[0].identifier); +} +#line 2502 "turtle_parser.c" + break; + + case 46: /* literal: "string literal" "langtag" */ +#line 881 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language string=\"%s\"\n", (yyvsp[-1].string)); +#endif + + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[-1].string), NULL, (yyvsp[0].string)); + RAPTOR_FREE(char*, (yyvsp[-1].string)); + RAPTOR_FREE(char*, (yyvsp[0].string)); + if(!(yyval.identifier)) + YYERROR; +} +#line 2518 "turtle_parser.c" + break; + + case 47: /* literal: "string literal" "langtag" "^" "URI literal" */ +#line 893 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" uri=\"%s\"\n", (yyvsp[-3].string), (yyvsp[-2].string), raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + if((yyvsp[-2].string)) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, (yyvsp[-2].string)); + (yyvsp[-2].string) = NULL; + } + + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[-3].string), (yyvsp[0].uri), NULL); + RAPTOR_FREE(char*, (yyvsp[-3].string)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; + +} +#line 2545 "turtle_parser.c" + break; + + case 48: /* literal: "string literal" "langtag" "^" "QName" */ +#line 916 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" qname URI=<%s>\n", (yyvsp[-3].string), (yyvsp[-2].string), raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + if((yyvsp[-2].string)) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, (yyvsp[-2].string)); + (yyvsp[-2].string) = NULL; + } + + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[-3].string), (yyvsp[0].uri), NULL); + RAPTOR_FREE(char*, (yyvsp[-3].string)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; + +} +#line 2572 "turtle_parser.c" + break; + + case 49: /* literal: "string literal" "^" "URI literal" */ +#line 939 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" uri=\"%s\"\n", (yyvsp[-2].string), raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[-2].string), (yyvsp[0].uri), NULL); + RAPTOR_FREE(char*, (yyvsp[-2].string)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; + +} +#line 2592 "turtle_parser.c" + break; + + case 50: /* literal: "string literal" "^" "QName" */ +#line 955 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" qname URI=<%s>\n", (yyvsp[-2].string), raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[-2].string), (yyvsp[0].uri), NULL); + RAPTOR_FREE(char*, (yyvsp[-2].string)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; +} +#line 2611 "turtle_parser.c" + break; + + case 51: /* literal: "string literal" */ +#line 970 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal string=\"%s\"\n", (yyvsp[0].string)); +#endif + + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[0].string), NULL, NULL); + RAPTOR_FREE(char*, (yyvsp[0].string)); + if(!(yyval.identifier)) + YYERROR; +} +#line 2626 "turtle_parser.c" + break; + + case 52: /* literal: "integer literal" */ +#line 981 "./turtle_parser.y" +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource integer=%s\n", (yyvsp[0].string)); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_integer_uri); + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[0].string), uri, NULL); + RAPTOR_FREE(char*, (yyvsp[0].string)); + raptor_free_uri(uri); + if(!(yyval.identifier)) + YYERROR; +} +#line 2643 "turtle_parser.c" + break; + + case 53: /* literal: "floating point literal" */ +#line 994 "./turtle_parser.y" +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource double=%s\n", (yyvsp[0].string)); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_double_uri); + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[0].string), uri, NULL); + RAPTOR_FREE(char*, (yyvsp[0].string)); + raptor_free_uri(uri); + if(!(yyval.identifier)) + YYERROR; +} +#line 2660 "turtle_parser.c" + break; + + case 54: /* literal: "decimal literal" */ +#line 1007 "./turtle_parser.y" +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource decimal=%s\n", (yyvsp[0].string)); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_decimal_uri); + if(!uri) { + RAPTOR_FREE(char*, (yyvsp[0].string)); + YYERROR; + } + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, (yyvsp[0].string), uri, NULL); + RAPTOR_FREE(char*, (yyvsp[0].string)); + raptor_free_uri(uri); + if(!(yyval.identifier)) + YYERROR; +} +#line 2681 "turtle_parser.c" + break; + + case 55: /* literal: "true" */ +#line 1024 "./turtle_parser.y" +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean true\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"true", uri, NULL); + raptor_free_uri(uri); + if(!(yyval.identifier)) + YYERROR; +} +#line 2698 "turtle_parser.c" + break; + + case 56: /* literal: "false" */ +#line 1037 "./turtle_parser.y" +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean false\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + (yyval.identifier) = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"false", uri, NULL); + raptor_free_uri(uri); + if(!(yyval.identifier)) + YYERROR; +} +#line 2715 "turtle_parser.c" + break; + + case 57: /* resource: "URI literal" */ +#line 1053 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource URI=<%s>\n", raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + (yyval.identifier) = raptor_new_term_from_uri(rdf_parser->world, (yyvsp[0].uri)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; +} +#line 2733 "turtle_parser.c" + break; + + case 58: /* resource: "QName" */ +#line 1067 "./turtle_parser.y" +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource qname URI=<%s>\n", raptor_uri_as_string((yyvsp[0].uri))); +#endif + + if((yyvsp[0].uri)) { + (yyval.identifier) = raptor_new_term_from_uri(rdf_parser->world, (yyvsp[0].uri)); + raptor_free_uri((yyvsp[0].uri)); + if(!(yyval.identifier)) + YYERROR; + } else + (yyval.identifier) = NULL; +} +#line 2751 "turtle_parser.c" + break; + + case 59: /* predicateObjectListOpt: predicateObjectList */ +#line 1084 "./turtle_parser.y" +{ + (yyval.sequence) = (yyvsp[0].sequence); +} +#line 2759 "turtle_parser.c" + break; + + case 60: /* predicateObjectListOpt: %empty */ +#line 1088 "./turtle_parser.y" +{ + (yyval.sequence) = NULL; +} +#line 2767 "turtle_parser.c" + break; + + case 61: /* blankNode: "blank node" */ +#line 1095 "./turtle_parser.y" +{ + const unsigned char *id; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("subject blank=\"%s\"\n", (yyvsp[0].string)); +#endif + id = raptor_world_internal_generate_id(rdf_parser->world, (yyvsp[0].string)); + if(!id) + YYERROR; + + (yyval.identifier) = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + + if(!(yyval.identifier)) + YYERROR; +} +#line 2787 "turtle_parser.c" + break; + + case 62: /* blankNodePropertyList: "[" predicateObjectListOpt "]" */ +#line 1113 "./turtle_parser.y" +{ + int i; + const unsigned char *id; + + id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!id) { + if((yyvsp[-1].sequence)) + raptor_free_sequence((yyvsp[-1].sequence)); + YYERROR; + } + + (yyval.identifier) = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + if(!(yyval.identifier)) { + if((yyvsp[-1].sequence)) + raptor_free_sequence((yyvsp[-1].sequence)); + YYERROR; + } + + if((yyvsp[-1].sequence) == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_term_print_as_ntriples((yyval.identifier), stdout); + printf("\n"); +#endif + } else { + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n"); +#endif + + for(i = 0; i < raptor_sequence_size((yyvsp[-1].sequence)); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[-1].sequence), i); + t2->subject = raptor_term_copy((yyval.identifier)); + raptor_turtle_defer_statement(rdf_parser, t2); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n\n"); +#endif + + raptor_free_sequence((yyvsp[-1].sequence)); + + } + +} +#line 2842 "turtle_parser.c" + break; + + case 63: /* collection: "(" itemList ")" */ +#line 1167 "./turtle_parser.y" +{ + int i; + raptor_world* world = rdf_parser->world; + raptor_term* first_identifier = NULL; + raptor_term* rest_identifier = NULL; + raptor_term* object = NULL; + raptor_term* blank = NULL; + char const *errmsg = NULL; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n objectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n"); +#endif + + first_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_first_URI(world)); + if(!first_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:first term"); + rest_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_rest_URI(world)); + if(!rest_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:rest term"); + + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n"); +#endif + + object = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!object) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:nil term"); + + for(i = raptor_sequence_size((yyvsp[-1].sequence))-1; i>=0; i--) { + raptor_term* temp; + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at((yyvsp[-1].sequence), i); + const unsigned char *blank_id; + + blank_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!blank_id) + YYERR_MSG_GOTO(err_collection, "Cannot create bnodeid"); + + blank = raptor_new_term_from_blank(rdf_parser->world, + blank_id); + RAPTOR_FREE(char*, blank_id); + if(!blank) + YYERR_MSG_GOTO(err_collection, "Cannot create bnode"); + + t2->subject = blank; + t2->predicate = first_identifier; + /* t2->object already set to the value we want */ + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + temp = t2->object; + + t2->subject = blank; + t2->predicate = rest_identifier; + t2->object = object; + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + t2->subject = NULL; + t2->predicate = NULL; + t2->object = temp; + + raptor_free_term(object); + object = blank; + blank = NULL; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print((yyvsp[-1].sequence), stdout); + printf("\n\n"); +#endif + + raptor_free_sequence((yyvsp[-1].sequence)); + + raptor_free_term(first_identifier); + raptor_free_term(rest_identifier); + + (yyval.identifier)=object; + + err_collection: + if(errmsg) { + if(blank) + raptor_free_term(blank); + + if(object) + raptor_free_term(object); + + if(rest_identifier) + raptor_free_term(rest_identifier); + + if(first_identifier) + raptor_free_term(first_identifier); + + raptor_free_sequence((yyvsp[-1].sequence)); + + YYERROR_MSG(errmsg); + } +} +#line 2948 "turtle_parser.c" + break; + + case 64: /* collection: "(" ")" */ +#line 1269 "./turtle_parser.y" +{ + raptor_world* world = rdf_parser->world; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n empty\n"); +#endif + + (yyval.identifier) = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!(yyval.identifier)) + YYERROR; +} +#line 2964 "turtle_parser.c" + break; + + +#line 2968 "turtle_parser.c" + + default: break; + } + /* User semantic actions sometimes alter yychar, and that requires + that yytoken be updated with the new translation. We take the + approach of translating immediately before every use of yytoken. + One alternative is translating here after every semantic action, + but that translation would be missed if the semantic action invokes + YYABORT, YYACCEPT, or YYERROR immediately after altering yychar or + if it invokes YYBACKUP. In the case of YYABORT or YYACCEPT, an + incorrect destructor might then be invoked immediately. In the + case of YYERROR or YYBACKUP, subsequent parser actions might lead + to an incorrect destructor call or verbose syntax error message + before the lookahead is translated. */ + YY_SYMBOL_PRINT ("-> $$ =", YY_CAST (yysymbol_kind_t, yyr1[yyn]), &yyval, &yyloc); + + YYPOPSTACK (yylen); + yylen = 0; + + *++yyvsp = yyval; + + /* Now 'shift' the result of the reduction. Determine what state + that goes to, based on the state we popped back to and the rule + number reduced by. */ + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *yyssp; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *yyssp + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yynewstate; + + +/*--------------------------------------. +| yyerrlab -- here on detecting error. | +`--------------------------------------*/ +yyerrlab: + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = yychar == TURTLE_PARSER_EMPTY ? YYSYMBOL_YYEMPTY : YYTRANSLATE (yychar); + /* If not already recovering from an error, report this error. */ + if (!yyerrstatus) + { + ++yynerrs; + { + yypcontext_t yyctx + = {yyssp, yytoken}; + char const *yymsgp = YY_("syntax error"); + int yysyntax_error_status; + yysyntax_error_status = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx); + if (yysyntax_error_status == 0) + yymsgp = yymsg; + else if (yysyntax_error_status == -1) + { + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + yymsg = YY_CAST (char *, + YYSTACK_ALLOC (YY_CAST (YYSIZE_T, yymsg_alloc))); + if (yymsg) + { + yysyntax_error_status + = yysyntax_error (&yymsg_alloc, &yymsg, &yyctx); + yymsgp = yymsg; + } + else + { + yymsg = yymsgbuf; + yymsg_alloc = sizeof yymsgbuf; + yysyntax_error_status = YYENOMEM; + } + } + yyerror (rdf_parser, yyscanner, yymsgp); + if (yysyntax_error_status == YYENOMEM) + YYNOMEM; + } + } + + if (yyerrstatus == 3) + { + /* If just tried and failed to reuse lookahead token after an + error, discard it. */ + + if (yychar <= TURTLE_PARSER_EOF) + { + /* Return failure if at end of input. */ + if (yychar == TURTLE_PARSER_EOF) + YYABORT; + } + else + { + yydestruct ("Error: discarding", + yytoken, &yylval, rdf_parser, yyscanner); + yychar = TURTLE_PARSER_EMPTY; + } + } + + /* Else will try to reuse lookahead token after shifting the error + token. */ + goto yyerrlab1; + + +/*---------------------------------------------------. +| yyerrorlab -- error raised explicitly by YYERROR. | +`---------------------------------------------------*/ +yyerrorlab: + /* Pacify compilers when the user code never invokes YYERROR and the + label yyerrorlab therefore never appears in user code. */ + if (0) + YYERROR; + ++yynerrs; + + /* Do not reclaim the symbols of the rule whose action triggered + this YYERROR. */ + YYPOPSTACK (yylen); + yylen = 0; + YY_STACK_PRINT (yyss, yyssp); + yystate = *yyssp; + goto yyerrlab1; + + +/*-------------------------------------------------------------. +| yyerrlab1 -- common code for both syntax error and YYERROR. | +`-------------------------------------------------------------*/ +yyerrlab1: + yyerrstatus = 3; /* Each real token shifted decrements this. */ + + /* Pop stack until we find a state that shifts the error token. */ + for (;;) + { + yyn = yypact[yystate]; + if (!yypact_value_is_default (yyn)) + { + yyn += YYSYMBOL_YYerror; + if (0 <= yyn && yyn <= YYLAST && yycheck[yyn] == YYSYMBOL_YYerror) + { + yyn = yytable[yyn]; + if (0 < yyn) + break; + } + } + + /* Pop the current state because it cannot handle the error token. */ + if (yyssp == yyss) + YYABORT; + + + yydestruct ("Error: popping", + YY_ACCESSING_SYMBOL (yystate), yyvsp, rdf_parser, yyscanner); + YYPOPSTACK (1); + yystate = *yyssp; + YY_STACK_PRINT (yyss, yyssp); + } + + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN + *++yyvsp = yylval; + YY_IGNORE_MAYBE_UNINITIALIZED_END + + + /* Shift the error token. */ + YY_SYMBOL_PRINT ("Shifting", YY_ACCESSING_SYMBOL (yyn), yyvsp, yylsp); + + yystate = yyn; + goto yynewstate; + + +/*-------------------------------------. +| yyacceptlab -- YYACCEPT comes here. | +`-------------------------------------*/ +yyacceptlab: + yyresult = 0; + goto yyreturnlab; + + +/*-----------------------------------. +| yyabortlab -- YYABORT comes here. | +`-----------------------------------*/ +yyabortlab: + yyresult = 1; + goto yyreturnlab; + + +/*-----------------------------------------------------------. +| yyexhaustedlab -- YYNOMEM (memory exhaustion) comes here. | +`-----------------------------------------------------------*/ +yyexhaustedlab: + yyerror (rdf_parser, yyscanner, YY_("memory exhausted")); + yyresult = 2; + goto yyreturnlab; + + +/*----------------------------------------------------------. +| yyreturnlab -- parsing is finished, clean up and return. | +`----------------------------------------------------------*/ +yyreturnlab: + if (yychar != TURTLE_PARSER_EMPTY) + { + /* Make sure we have latest lookahead translation. See comments at + user semantic actions for why this is necessary. */ + yytoken = YYTRANSLATE (yychar); + yydestruct ("Cleanup: discarding lookahead", + yytoken, &yylval, rdf_parser, yyscanner); + } + /* Do not reclaim the symbols of the rule whose action triggered + this YYABORT or YYACCEPT. */ + YYPOPSTACK (yylen); + YY_STACK_PRINT (yyss, yyssp); + while (yyssp != yyss) + { + yydestruct ("Cleanup: popping", + YY_ACCESSING_SYMBOL (+*yyssp), yyvsp, rdf_parser, yyscanner); + YYPOPSTACK (1); + } +#ifndef yyoverflow + if (yyss != yyssa) + YYSTACK_FREE (yyss); +#endif + if (yymsg != yymsgbuf) + YYSTACK_FREE (yymsg); + return yyresult; +} + +#line 1283 "./turtle_parser.y" + + + +/* Support functions */ + +/* Error handler with scanner context, during parsing */ +int +turtle_parser_error(raptor_parser* rdf_parser, void* scanner, + const char *msg) +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + return 0; + } + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg); + + return 0; +} + + +/* Error handler within raptor functions and callbacks */ +static void +turtle_parser_error_simple(void* user_data, const char *msg, ...) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_turtle_parser* turtle_parser; + va_list args; + + va_start(args, msg); + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + goto tidy; + } + + if(turtle_parser->error_count++) + goto tidy; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error_varargs(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg, + args); + +tidy: + va_end(args); +} + + +int +turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + if(!turtle_parser) + return 1; + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); + + return 0; +} + + +raptor_uri* +turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!turtle_parser) + return NULL; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + name_len = raptor_turtle_expand_qname_escapes(name, name_len, + (raptor_simple_message_handler)turtle_parser_error_simple, rdf_parser); + if(!name_len) + return NULL; + + return raptor_qname_string_to_uri(&turtle_parser->namespaces, name, name_len); +} + + + +#ifndef TURTLE_PUSH_PARSE +static int +turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + int rc; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + (void)turtle_lexer__scan_bytes((char *)string, (yy_size_t)length, turtle_parser->scanner); + + rc = turtle_parser_parse(rdf_parser, turtle_parser->scanner); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return rc; +} +#endif + + +#ifdef TURTLE_PUSH_PARSE +static int +turtle_push_parse(raptor_parser *rdf_parser, + const char *string, size_t length) +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_world* world = rdf_parser->world; +#endif + raptor_turtle_parser* turtle_parser; + void *buffer; + int status; + yypstate *ps; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + buffer = turtle_lexer__scan_bytes(string, (yy_size_t)length, turtle_parser->scanner); + + /* returns a parser instance or 0 on out of memory */ + ps = yypstate_new(); + if(!ps) + return 1; + + do { + TURTLE_PARSER_YYSTYPE lval; + int token; + + memset(&lval, 0, sizeof(TURTLE_PARSER_YYSTYPE)); + + token = turtle_lexer_lex(&lval, turtle_parser->scanner); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("token %s\n", turtle_token_print(world, token, &lval)); +#endif + + status = yypush_parse(ps, token, &lval, rdf_parser, turtle_parser->scanner); + + /* turtle_token_free(world, token, &lval); */ + + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } while (status == YYPUSH_MORE); + yypstate_delete(ps); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return 0; +} +#endif + + +/** + * raptor_turtle_parse_init - Initialise the Raptor Turtle parser + * + * Return value: non 0 on failure + **/ + +static int +raptor_turtle_parse_init(raptor_parser* rdf_parser, const char *name) { + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(raptor_namespaces_init(rdf_parser->world, &turtle_parser->namespaces, 0)) + return 1; + + turtle_parser->trig = !strcmp(name, "trig"); + + return 0; +} + + +/* PUBLIC FUNCTIONS */ + + +/* + * raptor_turtle_parse_terminate - Free the Raptor Turtle parser + * @rdf_parser: parser object + * + **/ +static void +raptor_turtle_parse_terminate(raptor_parser *rdf_parser) { + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + raptor_namespaces_clear(&turtle_parser->namespaces); + + if(turtle_parser->scanner_set) { + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + } + + if(turtle_parser->buffer) + RAPTOR_FREE(cdata, turtle_parser->buffer); + + if(turtle_parser->graph_name) { + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + } +} + + +static void +raptor_turtle_clone_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)parser->context; + raptor_statement *statement = &parser->statement; + + if(!t->subject || !t->predicate || !t->object) + return; + + if(turtle_parser->trig && turtle_parser->graph_name) + statement->graph = raptor_term_copy(turtle_parser->graph_name); + + if(!parser->emitted_default_graph && !turtle_parser->graph_name) { + /* for non-TRIG - start default graph at first triple */ + raptor_parser_start_graph(parser, NULL, 0); + parser->emitted_default_graph++; + } + + /* Two choices for subject for Turtle */ + if(t->subject->type == RAPTOR_TERM_TYPE_BLANK) { + statement->subject = raptor_new_term_from_blank(parser->world, + t->subject->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_URI */ + RAPTOR_ASSERT(t->subject->type != RAPTOR_TERM_TYPE_URI, + "subject type is not resource"); + statement->subject = raptor_new_term_from_uri(parser->world, + t->subject->value.uri); + } + + /* Predicates are URIs but check for bad ordinals */ + if(!strncmp((const char*)raptor_uri_as_string(t->predicate->value.uri), + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) { + unsigned char* predicate_uri_string = raptor_uri_as_string(t->predicate->value.uri); + int predicate_ordinal = raptor_check_ordinal(predicate_uri_string+44); + if(predicate_ordinal <= 0) + raptor_parser_error(parser, "Illegal ordinal value %d in property '%s'.", predicate_ordinal, predicate_uri_string); + } + + statement->predicate = raptor_new_term_from_uri(parser->world, + t->predicate->value.uri); + + + /* Three choices for object for Turtle */ + if(t->object->type == RAPTOR_TERM_TYPE_URI) { + statement->object = raptor_new_term_from_uri(parser->world, + t->object->value.uri); + } else if(t->object->type == RAPTOR_TERM_TYPE_BLANK) { + statement->object = raptor_new_term_from_blank(parser->world, + t->object->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_LITERAL */ + RAPTOR_ASSERT(t->object->type != RAPTOR_TERM_TYPE_LITERAL, + "object type is not literal"); + statement->object = raptor_new_term_from_literal(parser->world, + t->object->value.literal.string, + t->object->value.literal.datatype, + t->object->value.literal.language); + } +} + +static void +raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *t) +{ + if(!t->subject || !t->predicate || !t->object) + return; + + if(!parser->statement_handler) + return; + + /* Generate the statement */ + (*parser->statement_handler)(parser->user_data, t); +} + +static void +raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_clone_statement(parser, t); + raptor_turtle_handle_statement(parser, &parser->statement); + /* clear resources */ + raptor_statement_clear(&parser->statement); +} + +static void +raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_statement* st; + raptor_turtle_parser* turtle_parser; + + raptor_turtle_clone_statement(parser, t); + st = raptor_new_statement(parser->world); + if(!st) { + return; + } + /* copy static to dynamic statement, it's a move really */ + st->subject = parser->statement.subject, parser->statement.subject = NULL; + st->predicate = parser->statement.predicate, parser->statement.predicate = NULL; + st->object = parser->statement.object, parser->statement.object = NULL; + st->graph = parser->statement.graph, parser->statement.graph = NULL; + + /* prep deferred list */ + turtle_parser = (raptor_turtle_parser*)parser->context; + if(!turtle_parser->deferred) { + turtle_parser->deferred = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); + if(!turtle_parser->deferred) { + goto free_seq; + } + } + /* append to deferred list */ + if(raptor_sequence_push(turtle_parser->deferred, st)) { + free_seq: + raptor_free_statement(st); + } +} + + + +static int +raptor_turtle_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_turtle_parser *turtle_parser; + char *ptr; + int rc; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len); +#endif + + if(!len && !is_end) { + /* nothing to do */ + return 0; + } + + /* the actual buffer will contained unprocessed characters from + * the last run plus the chunk passed here */ + turtle_parser->end_of_buffer = turtle_parser->consumed + len; + if(turtle_parser->end_of_buffer > turtle_parser->buffer_length) { + /* resize */ + size_t new_buffer_length = turtle_parser->end_of_buffer; + + turtle_parser->buffer = RAPTOR_REALLOC(char*, turtle_parser->buffer, + new_buffer_length + 1); + + /* adjust stored length */ + turtle_parser->buffer_length = new_buffer_length; + } + if(!turtle_parser->buffer && turtle_parser->buffer_length) { + /* we tried to alloc a buffer but we failed */ + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } + if(is_end && !turtle_parser->end_of_buffer) { + /* Nothing to do */ + return 0; + } + + /* move pointer to end of cdata buffer */ + ptr = turtle_parser->buffer + turtle_parser->consumed; + + /* now write new stuff at end of cdata buffer */ + memcpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + /* reset processed counter */ + turtle_parser->processed = 0U; + /* unconsume */ + turtle_parser->consumed = 0U; + /* reset line numbers */ + turtle_parser->lineno = turtle_parser->lineno_last_good; + + /* let everyone know if this is the last chunk */ + turtle_parser->is_end = is_end; + if(!is_end) { + /* it's safer not to pass the very last line to the lexer + * just in case we end up with EOB-in-the-middle-of-X situations */ + size_t i = turtle_parser->end_of_buffer; + while(i > 0U && turtle_parser->buffer[--i] != '\n'); + /* either i == 0U or i points to the last \n before the end-of-buffer */ + turtle_parser->consumable = i; + } else { + /* otherwise the consumable number of bytes coincides with the EOB */ + turtle_parser->consumable = turtle_parser->end_of_buffer; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("buffer buffer now '%s' (%ld bytes)\n", + turtle_parser->buffer, turtle_parser->buffer_length); +#endif + +#ifdef TURTLE_PUSH_PARSE + rc = turtle_push_parse(rdf_parser, + turtle_parser->buffer, turtle_parser->consumable); +#else + rc = turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->consumable); +#endif + + if(turtle_parser->error_count) { + rc = 1; + } else if(!is_end) { + /* move stuff to the beginning of the buffer */ + turtle_parser->consumed = turtle_parser->end_of_buffer - turtle_parser->processed; + if(turtle_parser->consumed && turtle_parser->processed) { + memmove(turtle_parser->buffer, + turtle_parser->buffer + turtle_parser->processed, + turtle_parser->consumed); + /* cancel all deferred eval's */ + if(turtle_parser->deferred) { + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + } else { + /* this was the last chunk, finalise */ + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + int i; + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + if(rdf_parser->emitted_default_graph) { + /* for non-TRIG - end default graph after last triple */ + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + if(turtle_parser->deferred) { + /* clear resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + return rc; +} + + +static int +raptor_turtle_parse_start(raptor_parser *rdf_parser) +{ + raptor_locator *locator=&rdf_parser->locator; + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* base URI required for Turtle */ + if(!rdf_parser->base_uri) + return 1; + + locator->line = 1; + locator->column= -1; /* No column info */ + locator->byte= -1; /* No bytes info */ + + if(turtle_parser->buffer_length) { + RAPTOR_FREE(cdata, turtle_parser->buffer); + turtle_parser->buffer = NULL; + turtle_parser->buffer_length = 0; + } + + turtle_parser->lineno = 1; + + return 0; +} + + +static int +raptor_turtle_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; + } + + if(mime_type) { + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; + } + + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TURTLE_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TURTLE_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TURTLE_PREFIX) { + score = 6; + if(HAS_TURTLE_RDF_URI) + score += 2; + } + } + + return score; +} + + +static raptor_uri* +raptor_turtle_get_graph(raptor_parser* rdf_parser) +{ + raptor_turtle_parser *turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->graph_name) + return raptor_uri_copy(turtle_parser->graph_name->value.uri); + + return NULL; +} + + +#ifdef RAPTOR_PARSER_TRIG +static int +raptor_trig_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "trig")) + score = 9; +#ifndef RAPTOR_PARSER_TURTLE + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; +#endif + } + + if(mime_type) { + if(strstr((const char*)mime_type, "trig")) + score = 6; +#ifndef RAPTOR_PARSER_TURTLE + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; +#endif + } + +#ifndef RAPTOR_PARSER_TURTLE + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TRIG_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TRIG_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TRIG_PREFIX) { + score = 6; + if(HAS_TRIG_RDF_URI) + score += 2; + } + } +#endif + + return score; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +static const char* const turtle_names[4] = { "turtle", "ntriples-plus", "n3", NULL }; + +static const char* const turtle_uri_strings[3] = { + "http://www.w3.org/ns/formats/Turtle", + "http://www.dajobe.org/2004/01/turtle/", + NULL +}; + +#define TURTLE_TYPES_COUNT 6 +static const raptor_type_q turtle_types[TURTLE_TYPES_COUNT + 1] = { + /* first one is the default */ + { "text/turtle", 11, 10}, + { "application/x-turtle", 20, 10}, + { "application/turtle", 18, 10}, + { "text/n3", 7, 3}, + { "text/rdf+n3", 11, 3}, + { "application/rdf+n3", 18, 3}, + { NULL, 0} +}; + +static int +raptor_turtle_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = turtle_names; + + factory->desc.mime_types = turtle_types; + + factory->desc.label = "Turtle Terse RDF Triple Language"; + factory->desc.uri_strings = turtle_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_turtle_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TRIG +static const char* const trig_names[2] = { "trig", NULL }; + +static const char* const trig_uri_strings[2] = { + "http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/", + NULL +}; + +#define TRIG_TYPES_COUNT 1 +static const raptor_type_q trig_types[TRIG_TYPES_COUNT + 1] = { + /* first one is the default */ + { "application/x-trig", 18, 10}, + { NULL, 0, 0} +}; + +static int +raptor_trig_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = trig_names; + + factory->desc.mime_types = trig_types; + + factory->desc.label = "TriG - Turtle with Named Graphs"; + factory->desc.uri_strings = trig_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_trig_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +int +raptor_init_parser_turtle(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_turtle_parser_register_factory); +} +#endif + +#ifdef RAPTOR_PARSER_TRIG +int +raptor_init_parser_trig(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_trig_parser_register_factory); +} +#endif + + +#ifdef STANDALONE +#include <stdio.h> +#include <locale.h> + +#define TURTLE_FILE_BUF_SIZE 2048 + +static void +turtle_parser_print_statement(void *user, + raptor_statement *statement) +{ + FILE* stream = (FILE*)user; + raptor_statement_print(statement, stream); + putc('\n', stream); +} + + + +int +main(int argc, char *argv[]) +{ + char string[TURTLE_FILE_BUF_SIZE]; + raptor_parser rdf_parser; /* static */ + raptor_turtle_parser turtle_parser; /* static */ + raptor_locator *locator = &rdf_parser.locator; + FILE *fh; + const char *filename; + size_t nobj; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + turtle_parser_debug = 1; +#endif + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = stdin; + } + + memset(string, 0, TURTLE_FILE_BUF_SIZE); + nobj = fread(string, TURTLE_FILE_BUF_SIZE, 1, fh); + if(nobj < TURTLE_FILE_BUF_SIZE) { + if(ferror(fh)) { + fprintf(stderr, "%s: file '%s' read failed - %s\n", + argv[0], filename, strerror(errno)); + fclose(fh); + return(1); + } + } + + if(argc > 1) + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + locator->line= locator->column = -1; + locator->file= filename; + + turtle_parser.lineno= 1; + + rdf_parser.world = raptor_new_world(); + rdf_parser.context = &turtle_parser; + rdf_parser.base_uri = raptor_new_uri(rdf_parser.world, + (const unsigned char*)"http://example.org/fake-base-uri/"); + + raptor_parser_set_statement_handler(&rdf_parser, stdout, + turtle_parser_print_statement); + raptor_turtle_parse_init(&rdf_parser, "turtle"); + + turtle_parser.error_count = 0; + +#ifdef TURTLE_PUSH_PARSE + turtle_push_parse(&rdf_parser, string, strlen(string)); +#else + turtle_parse(&rdf_parser, string, strlen(string)); +#endif + + raptor_turtle_parse_terminate(&rdf_parser); + + raptor_free_uri(rdf_parser.base_uri); + + raptor_free_world(rdf_parser.world); + + return (0); +} +#endif diff --git a/src/turtle_parser.h b/src/turtle_parser.h new file mode 100644 index 0000000..ca83eea --- /dev/null +++ b/src/turtle_parser.h @@ -0,0 +1,122 @@ +/* A Bison parser, made by GNU Bison 3.8.2. */ + +/* Bison interface for Yacc-like parsers in C + + Copyright (C) 1984, 1989-1990, 2000-2015, 2018-2021 Free Software Foundation, + Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <https://www.gnu.org/licenses/>. */ + +/* As a special exception, you may create a larger work that contains + part or all of the Bison parser skeleton and distribute that work + under terms of your choice, so long as that work isn't itself a + parser generator using the skeleton or a modified version thereof + as a parser skeleton. Alternatively, if you modify or redistribute + the parser skeleton itself, you may (at your option) remove this + special exception, which will cause the skeleton and the resulting + Bison output files to be licensed under the GNU General Public + License without this special exception. + + This special exception was added by the Free Software Foundation in + version 2.2 of Bison. */ + +/* DO NOT RELY ON FEATURES THAT ARE NOT DOCUMENTED in the manual, + especially those whose name start with YY_ or yy_. They are + private implementation details that can be changed or removed. */ + +#ifndef YY_TURTLE_PARSER_TURTLE_PARSER_H_INCLUDED +# define YY_TURTLE_PARSER_TURTLE_PARSER_H_INCLUDED +/* Debug traces. */ +#ifndef TURTLE_PARSER_DEBUG +# if defined YYDEBUG +#if YYDEBUG +# define TURTLE_PARSER_DEBUG 1 +# else +# define TURTLE_PARSER_DEBUG 0 +# endif +# else /* ! defined YYDEBUG */ +# define TURTLE_PARSER_DEBUG 0 +# endif /* ! defined YYDEBUG */ +#endif /* ! defined TURTLE_PARSER_DEBUG */ +#if TURTLE_PARSER_DEBUG +extern int turtle_parser_debug; +#endif + +/* Token kinds. */ +#ifndef TURTLE_PARSER_TOKENTYPE +# define TURTLE_PARSER_TOKENTYPE + enum turtle_parser_tokentype + { + TURTLE_PARSER_EMPTY = -2, + TURTLE_PARSER_EOF = 0, /* "end of file" */ + TURTLE_PARSER_error = 256, /* error */ + TURTLE_PARSER_UNDEF = 257, /* "invalid token" */ + A = 258, /* "a" */ + HAT = 259, /* "^" */ + DOT = 260, /* "." */ + COMMA = 261, /* "," */ + SEMICOLON = 262, /* ";" */ + LEFT_SQUARE = 263, /* "[" */ + RIGHT_SQUARE = 264, /* "]" */ + LEFT_ROUND = 265, /* "(" */ + RIGHT_ROUND = 266, /* ")" */ + LEFT_CURLY = 267, /* "{" */ + RIGHT_CURLY = 268, /* "}" */ + TRUE_TOKEN = 269, /* "true" */ + FALSE_TOKEN = 270, /* "false" */ + PREFIX = 271, /* "@prefix" */ + BASE = 272, /* "@base" */ + SPARQL_PREFIX = 273, /* "PREFIX" */ + SPARQL_BASE = 274, /* "BASE" */ + STRING_LITERAL = 275, /* "string literal" */ + IDENTIFIER = 276, /* "identifier" */ + LANGTAG = 277, /* "langtag" */ + INTEGER_LITERAL = 278, /* "integer literal" */ + FLOATING_LITERAL = 279, /* "floating point literal" */ + DECIMAL_LITERAL = 280, /* "decimal literal" */ + BLANK_LITERAL = 281, /* "blank node" */ + URI_LITERAL = 282, /* "URI literal" */ + GRAPH_NAME_LEFT_CURLY = 283, /* "Graph URI literal {" */ + QNAME_LITERAL = 284, /* "QName" */ + ERROR_TOKEN = 285 /* ERROR_TOKEN */ + }; + typedef enum turtle_parser_tokentype turtle_parser_token_kind_t; +#endif + +/* Value type. */ +#if ! defined TURTLE_PARSER_STYPE && ! defined TURTLE_PARSER_STYPE_IS_DECLARED +union TURTLE_PARSER_STYPE +{ +#line 145 "./turtle_parser.y" + + unsigned char *string; + raptor_term *identifier; + raptor_sequence *sequence; + raptor_uri *uri; + +#line 109 "turtle_parser.h" + +}; +typedef union TURTLE_PARSER_STYPE TURTLE_PARSER_STYPE; +# define TURTLE_PARSER_STYPE_IS_TRIVIAL 1 +# define TURTLE_PARSER_STYPE_IS_DECLARED 1 +#endif + + + + +int turtle_parser_parse (raptor_parser* rdf_parser, void* yyscanner); + + +#endif /* !YY_TURTLE_PARSER_TURTLE_PARSER_H_INCLUDED */ diff --git a/src/turtle_parser.y b/src/turtle_parser.y new file mode 100644 index 0000000..1474a3d --- /dev/null +++ b/src/turtle_parser.y @@ -0,0 +1,2128 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * turtle_parser.y - Raptor Turtle / TRIG / N3 parsers - over tokens from turtle grammar lexer + * + * Copyright (C) 2003-2013, David Beckett http://www.dajobe.org/ + * Copyright (C) 2003-2005, University of Bristol, UK http://www.bristol.ac.uk/ + * + * This package is Free Software and part of Redland http://librdf.org/ + * + * It is licensed under the following three licenses as alternatives: + * 1. GNU Lesser General Public License (LGPL) V2.1 or any newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * complete terms and further detail along with the license texts for + * the licenses in COPYING.LIB, COPYING and LICENSE-2.0.txt respectively. + * + * + * Turtle is defined in http://www.dajobe.org/2004/01/turtle/ + * + * Made from a subset of the terms in + * http://www.w3.org/DesignIssues/Notation3.html + * + * TRIG is defined in http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/ + */ + +%{ +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif +#ifdef HAVE_STDLIB_H +#include <stdlib.h> +#endif + +#include "raptor2.h" +#include "raptor_internal.h" + +#include <turtle_parser.h> + +#define YY_NO_UNISTD_H 1 +#undef yylex +#include <turtle_lexer.h> + +#include <turtle_common.h> + + +/* Set RAPTOR_DEBUG to 3 for super verbose parsing - watching the shift/reduces */ +#if 0 +#undef RAPTOR_DEBUG +#define RAPTOR_DEBUG 3 +#endif + + +/* Fail with an debug error message if RAPTOR_DEBUG > 1 */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 +#define YYERROR_MSG(msg) do { fputs("** YYERROR ", RAPTOR_DEBUG_FH); fputs(msg, RAPTOR_DEBUG_FH); fputc('\n', RAPTOR_DEBUG_FH); YYERROR; } while(0) +#else +#define YYERROR_MSG(ignore) YYERROR +#endif +#define YYERR_MSG_GOTO(label,msg) do { errmsg = msg; goto label; } while(0) + +/* Slow down the grammar operation and watch it work */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 +#undef YYDEBUG +#define YYDEBUG 1 +#endif + +#ifdef RAPTOR_DEBUG +const char * turtle_token_print(raptor_world* world, int token, + TURTLE_PARSER_STYPE *lval); +#endif + + +/* the lexer does not seem to track this */ +#undef RAPTOR_TURTLE_USE_ERROR_COLUMNS + +/* set api.push-pull to "push" if this is defined */ +#undef TURTLE_PUSH_PARSE + +/* Prototypes */ +int turtle_parser_error(raptor_parser* rdf_parser, void* scanner, const char *msg); +static void turtle_parser_error_simple(void* user_data, const char *msg, ...) RAPTOR_PRINTF_FORMAT(2, 3); + +/* Make lex/yacc interface as small as possible */ +#undef yylex +#define yylex turtle_lexer_lex + +/* Prototypes for local functions */ +static void raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *triple); + +static void raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *triple); + +%} + + +/* directives */ + +%require "3.0" + +/* File prefix (-b) */ +%file-prefix "turtle_parser" + +/* Bison 2.6+ : Symbol prefix */ +%define api.prefix {turtle_parser_} +/* Bison 3.4+ : Generated header file */ +%define api.header.include {<turtle_parser.h>} + +/* Write parser header file with macros (bison -d) */ +%defines + +/* Make verbose error messages for syntax errors */ +%define parse.error verbose + +/* Write output file with verbose descriptions of parser states */ +%verbose + +/* Generate code processing locations */ + /* %locations */ + +/* Pure parser - want a reentrant parser */ +%define api.pure full + +/* Push or pull parser? */ +%define api.push-pull pull + +/* Pure parser argument: lexer - yylex() and parser - yyparse() */ +%lex-param { yyscan_t yyscanner } +%parse-param { raptor_parser* rdf_parser } { void* yyscanner } + +/* Interface between lexer and parser */ +%union { + unsigned char *string; + raptor_term *identifier; + raptor_sequence *sequence; + raptor_uri *uri; +} + + +/* others */ + +%token A "a" +%token HAT "^" +%token DOT "." +%token COMMA "," +%token SEMICOLON ";" +%token LEFT_SQUARE "[" +%token RIGHT_SQUARE "]" +%token LEFT_ROUND "(" +%token RIGHT_ROUND ")" +%token LEFT_CURLY "{" +%token RIGHT_CURLY "}" +%token TRUE_TOKEN "true" +%token FALSE_TOKEN "false" +%token PREFIX "@prefix" +%token BASE "@base" +%token SPARQL_PREFIX "PREFIX" +%token SPARQL_BASE "BASE" + +/* literals */ +%token + <string> + STRING_LITERAL "string literal" + IDENTIFIER "identifier" + LANGTAG "langtag" + INTEGER_LITERAL "integer literal" + FLOATING_LITERAL "floating point literal" + DECIMAL_LITERAL "decimal literal" + BLANK_LITERAL "blank node" + <uri> + URI_LITERAL "URI literal" + GRAPH_NAME_LEFT_CURLY "Graph URI literal {" + QNAME_LITERAL "QName" + +/* syntax error */ +%token ERROR_TOKEN + +%type <identifier> subject predicate object verb literal resource blankNode collection blankNodePropertyList +%type <sequence> triples objectList itemList predicateObjectList predicateObjectListOpt + +/* tidy up tokens after errors */ + +%destructor { + if($$) + RAPTOR_FREE(char*, $$); +} <string> + +%destructor { + if($$) + raptor_free_uri($$); +} <uri> + +%destructor { + if($$) + raptor_free_term($$); +} <identifier> + +%destructor { + if($$) + raptor_free_sequence($$); +} <sequence> + +%% + +Document : statementList +;; + + +graph: GRAPH_NAME_LEFT_CURLY + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + if(turtle_parser->graph_name) + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + raptor_parser_start_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + } + } + graphBody RIGHT_CURLY +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, + turtle_parser->graph_name->value.uri, 1); + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + rdf_parser->emitted_default_graph = 0; + } +} +| +LEFT_CURLY + { + /* action in mid-rule so this is run BEFORE the triples in graphBody */ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(!turtle_parser->trig) + turtle_parser_error(rdf_parser, yyscanner, "{ ... } is not allowed in Turtle"); + else { + raptor_parser_start_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph++; + } + } + graphBody RIGHT_CURLY +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->trig) { + raptor_parser_end_graph(rdf_parser, NULL, 1); + rdf_parser->emitted_default_graph = 0; + } +} +; + + +graphBody: triplesList +| %empty +; + +triplesList: dotTriplesList +| dotTriplesList DOT +; + +dotTriplesList: triples +{ + int i; + + if($1) { + for(i = 0; i < raptor_sequence_size($1); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($1); + } +} +| dotTriplesList DOT triples +{ + int i; + + if($3) { + for(i = 0; i < raptor_sequence_size($3); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($3, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($3); + } +} +; + +statementList: statementList statement +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* sync up consumed/processed so we know what to unwind */ + turtle_parser->processed = turtle_parser->consumed; + turtle_parser->lineno_last_good = turtle_parser->lineno; +} +| statementList error +| %empty +; + +statement: directive +| graph +| triples DOT +{ + raptor_turtle_parser* turtle_parser; + int i; + + /* yield deferred statements, if any */ + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + + if($1) { + for(i = 0; i < raptor_sequence_size($1); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($1, i); + raptor_turtle_generate_statement(rdf_parser, t2); + } + raptor_free_sequence($1); + } + + if(turtle_parser->deferred) { + /* debrief resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } +} +; + +triples: subject predicateObjectList +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 1\n subject="); + if($1) + raptor_term_print_as_ntriples($1, stdout); + else + fputs("NULL", stdout); + if($2) { + printf("\n predicateObjectList (reverse order to syntax)="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectList\n"); +#endif + + if($1 && $2) { + /* have subject and non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| blankNodePropertyList predicateObjectListOpt +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("triples 2\n blankNodePropertyList="); + if($1) + raptor_term_print_as_ntriples($1, stdout); + else + fputs("NULL", stdout); + if($2) { + printf("\n predicateObjectListOpt (reverse order to syntax)="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty predicateObjectListOpt\n"); +#endif + + if($1 && $2) { + /* have subject and non-empty predicate object list, handle it */ + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($1); + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution predicateObjectListOpt="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| error DOT +{ + $$ = NULL; +} +; + + +objectList: objectList COMMA object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if($3) { + printf(" object=\n"); + raptor_term_print_as_ntriples($3, stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if($1) { + printf(" objectList="); + raptor_sequence_print($1, stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!$3) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $3, NULL); + if(!triple) { + raptor_free_sequence($1); + YYERROR; + } + if(raptor_sequence_push($1, triple)) { + raptor_free_sequence($1); + YYERROR; + } + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +| object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if($1) { + printf(" object=\n"); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!$1) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!$$) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push($$, triple)) { + raptor_free_sequence($$); + $$ = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +; + +itemList: itemList object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 1\n"); + if($2) { + printf(" object=\n"); + raptor_term_print_as_ntriples($2, stdout); + printf("\n"); + } else + printf(" and empty object\n"); + if($1) { + printf(" objectList="); + raptor_sequence_print($1, stdout); + printf("\n"); + } else + printf(" and empty objectList\n"); +#endif + + if(!$2) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $2, NULL); + if(!triple) { + raptor_free_sequence($1); + YYERROR; + } + if(raptor_sequence_push($1, triple)) { + raptor_free_sequence($1); + YYERROR; + } + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +| object +{ + raptor_statement *triple; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("objectList 2\n"); + if($1) { + printf(" object=\n"); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); + } else + printf(" and empty object\n"); +#endif + + if(!$1) + $$ = NULL; + else { + triple = raptor_new_statement_from_nodes(rdf_parser->world, NULL, NULL, $1, NULL); + if(!triple) + YYERROR; +#ifdef RAPTOR_DEBUG + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, + (raptor_data_print_handler)raptor_statement_print); +#else + $$ = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); +#endif + if(!$$) { + raptor_free_statement(triple); + YYERROR; + } + if(raptor_sequence_push($$, triple)) { + raptor_free_sequence($$); + $$ = NULL; + YYERROR; + } +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" objectList is now "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif + } +} +; + +verb: predicate +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate="); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); +#endif + + $$ = $1; +} +| A +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("verb predicate = rdf:type (a)\n"); +#endif + + $$ = raptor_term_copy(RAPTOR_RDF_type_term(rdf_parser->world)); + if(!$$) + YYERROR; +} +; + + +predicateObjectList: predicateObjectList SEMICOLON verb objectList +{ + int i; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 1\n verb="); + raptor_term_print_as_ntriples($3, stdout); + printf("\n objectList="); + raptor_sequence_print($4, stdout); + printf("\n predicateObjectList="); + raptor_sequence_print($1, stdout); + printf("\n\n"); +#endif + + if($4 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty objectList not processed\n"); +#endif + } else if($3 && $4) { + /* non-empty property list, handle it */ + for(i = 0; i < raptor_sequence_size($4); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($4, i); + t2->predicate = raptor_term_copy($3); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($4, stdout); + printf("\n"); +#endif + } + + if($1 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" empty predicateObjectList not copied\n\n"); +#endif + } else if($3 && $4 && $1) { + while(raptor_sequence_size($4)) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_unshift($4); + if(raptor_sequence_push($1, t2)) { + raptor_free_sequence($1); + raptor_free_term($3); + raptor_free_sequence($4); + YYERROR; + } + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after appending objectList (reverse order)="); + raptor_sequence_print($1, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($4); + } + + if($3) + raptor_free_term($3); + + $$ = $1; +} +| verb objectList +{ + int i; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 2\n verb="); + raptor_term_print_as_ntriples($1, stdout); + if($2) { + printf("\n objectList="); + raptor_sequence_print($2, stdout); + printf("\n"); + } else + printf("\n and empty objectList\n"); +#endif + + if($1 && $2) { + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->predicate = raptor_term_copy($1); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + } + + if($1) + raptor_free_term($1); + + $$ = $2; +} +| predicateObjectList SEMICOLON +{ + $$ = $1; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("predicateObjectList 5\n trailing semicolon returning existing list "); + raptor_sequence_print($$, stdout); + printf("\n\n"); +#endif +} +; + +directive : prefix | base +; + +prefix: PREFIX IDENTIFIER URI_LITERAL DOT +{ + unsigned char *prefix = $2; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive PREFIX %s %s\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3)); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix PREFIX : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if($2) + RAPTOR_FREE(char*, $2); + raptor_free_uri($3); + + if(!ns) + YYERROR; +} +| SPARQL_PREFIX IDENTIFIER URI_LITERAL +{ + unsigned char *prefix = $2; + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)(rdf_parser->context); + raptor_namespace *ns; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("directive @prefix %s %s.\n",($2 ? (char*)$2 : "(default)"), raptor_uri_as_string($3)); +#endif + + if(prefix) { + size_t len = strlen((const char*)prefix); + if(prefix[len-1] == ':') { + if(len == 1) + /* declaring default namespace prefix @prefix : ... */ + prefix = NULL; + else + prefix[len-1]='\0'; + } + } + + ns = raptor_new_namespace_from_uri(&turtle_parser->namespaces, prefix, $3, 0); + if(ns) { + raptor_namespaces_start_namespace(&turtle_parser->namespaces, ns); + raptor_parser_start_namespace(rdf_parser, ns); + } + + if($2) + RAPTOR_FREE(char*, $2); + raptor_free_uri($3); + + if(!ns) + YYERROR; +} +; + + +base: BASE URI_LITERAL DOT +{ + raptor_uri *uri=$2; + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +| SPARQL_BASE URI_LITERAL +{ + raptor_uri *uri=$2; + + if(rdf_parser->base_uri) + raptor_free_uri(rdf_parser->base_uri); + rdf_parser->base_uri = uri; +} +; + +subject: resource +{ + $$ = $1; +} +| blankNode +{ + $$ = $1; +} +| collection +{ + $$ = $1; +} +; + + +predicate: resource +{ + $$ = $1; +} +; + + +object: resource +{ + $$ = $1; +} +| blankNode +{ + $$ = $1; +} +| collection +{ + $$ = $1; +} +| blankNodePropertyList +{ + $$ = $1; +} +| literal +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("object literal="); + raptor_term_print_as_ntriples($1, stdout); + printf("\n"); +#endif + + $$ = $1; +} +; + + +literal: STRING_LITERAL LANGTAG +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language string=\"%s\"\n", $1); +#endif + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, $2); + RAPTOR_FREE(char*, $1); + RAPTOR_FREE(char*, $2); + if(!$$) + YYERROR; +} +| STRING_LITERAL LANGTAG HAT URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" uri=\"%s\"\n", $1, $2, raptor_uri_as_string($4)); +#endif + + if($4) { + if($2) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, $2); + $2 = NULL; + } + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($4); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL LANGTAG HAT QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + language=\"%s\" datatype string=\"%s\" qname URI=<%s>\n", $1, $2, raptor_uri_as_string($4)); +#endif + + if($4) { + if($2) { + raptor_parser_error(rdf_parser, + "Language not allowed with datatyped literal"); + RAPTOR_FREE(char*, $2); + $2 = NULL; + } + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $4, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($4); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL HAT URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" uri=\"%s\"\n", $1, raptor_uri_as_string($3)); +#endif + + if($3) { + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($3); + if(!$$) + YYERROR; + } else + $$ = NULL; + +} +| STRING_LITERAL HAT QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal + datatype string=\"%s\" qname URI=<%s>\n", $1, raptor_uri_as_string($3)); +#endif + + if($3) { + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, $3, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri($3); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +| STRING_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("literal string=\"%s\"\n", $1); +#endif + + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, NULL, NULL); + RAPTOR_FREE(char*, $1); + if(!$$) + YYERROR; +} +| INTEGER_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource integer=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_integer_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| FLOATING_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource double=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_double_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| DECIMAL_LITERAL +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource decimal=%s\n", $1); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_decimal_uri); + if(!uri) { + RAPTOR_FREE(char*, $1); + YYERROR; + } + $$ = raptor_new_term_from_literal(rdf_parser->world, $1, uri, NULL); + RAPTOR_FREE(char*, $1); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| TRUE_TOKEN +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean true\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"true", uri, NULL); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +| FALSE_TOKEN +{ + raptor_uri *uri; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + fputs("resource boolean false\n", stderr); +#endif + uri = raptor_uri_copy(rdf_parser->world->xsd_boolean_uri); + $$ = raptor_new_term_from_literal(rdf_parser->world, + (const unsigned char*)"false", uri, NULL); + raptor_free_uri(uri); + if(!$$) + YYERROR; +} +; + + +resource: URI_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource URI=<%s>\n", raptor_uri_as_string($1)); +#endif + + if($1) { + $$ = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +| QNAME_LITERAL +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource qname URI=<%s>\n", raptor_uri_as_string($1)); +#endif + + if($1) { + $$ = raptor_new_term_from_uri(rdf_parser->world, $1); + raptor_free_uri($1); + if(!$$) + YYERROR; + } else + $$ = NULL; +} +; + + +predicateObjectListOpt: predicateObjectList +{ + $$ = $1; +} +| %empty +{ + $$ = NULL; +} +; + + +blankNode: BLANK_LITERAL +{ + const unsigned char *id; +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("subject blank=\"%s\"\n", $1); +#endif + id = raptor_world_internal_generate_id(rdf_parser->world, $1); + if(!id) + YYERROR; + + $$ = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + + if(!$$) + YYERROR; +} +; + +blankNodePropertyList: LEFT_SQUARE predicateObjectListOpt RIGHT_SQUARE +{ + int i; + const unsigned char *id; + + id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!id) { + if($2) + raptor_free_sequence($2); + YYERROR; + } + + $$ = raptor_new_term_from_blank(rdf_parser->world, id); + RAPTOR_FREE(char*, id); + if(!$$) { + if($2) + raptor_free_sequence($2); + YYERROR; + } + + if($2 == NULL) { +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_term_print_as_ntriples($$, stdout); + printf("\n"); +#endif + } else { + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + for(i = 0; i < raptor_sequence_size($2); i++) { + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + t2->subject = raptor_term_copy($$); + raptor_turtle_defer_statement(rdf_parser, t2); + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($2); + + } + +} +; + + +collection: LEFT_ROUND itemList RIGHT_ROUND +{ + int i; + raptor_world* world = rdf_parser->world; + raptor_term* first_identifier = NULL; + raptor_term* rest_identifier = NULL; + raptor_term* object = NULL; + raptor_term* blank = NULL; + char const *errmsg = NULL; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n objectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + first_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_first_URI(world)); + if(!first_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:first term"); + rest_identifier = raptor_new_term_from_uri(world, RAPTOR_RDF_rest_URI(world)); + if(!rest_identifier) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:rest term"); + + /* non-empty property list, handle it */ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("resource\n predicateObjectList="); + raptor_sequence_print($2, stdout); + printf("\n"); +#endif + + object = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!object) + YYERR_MSG_GOTO(err_collection, "Cannot create rdf:nil term"); + + for(i = raptor_sequence_size($2)-1; i>=0; i--) { + raptor_term* temp; + raptor_statement* t2 = (raptor_statement*)raptor_sequence_get_at($2, i); + const unsigned char *blank_id; + + blank_id = raptor_world_generate_bnodeid(rdf_parser->world); + if(!blank_id) + YYERR_MSG_GOTO(err_collection, "Cannot create bnodeid"); + + blank = raptor_new_term_from_blank(rdf_parser->world, + blank_id); + RAPTOR_FREE(char*, blank_id); + if(!blank) + YYERR_MSG_GOTO(err_collection, "Cannot create bnode"); + + t2->subject = blank; + t2->predicate = first_identifier; + /* t2->object already set to the value we want */ + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + temp = t2->object; + + t2->subject = blank; + t2->predicate = rest_identifier; + t2->object = object; + raptor_turtle_defer_statement((raptor_parser*)rdf_parser, t2); + + t2->subject = NULL; + t2->predicate = NULL; + t2->object = temp; + + raptor_free_term(object); + object = blank; + blank = NULL; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf(" after substitution objectList="); + raptor_sequence_print($2, stdout); + printf("\n\n"); +#endif + + raptor_free_sequence($2); + + raptor_free_term(first_identifier); + raptor_free_term(rest_identifier); + + $$=object; + + err_collection: + if(errmsg) { + if(blank) + raptor_free_term(blank); + + if(object) + raptor_free_term(object); + + if(rest_identifier) + raptor_free_term(rest_identifier); + + if(first_identifier) + raptor_free_term(first_identifier); + + raptor_free_sequence($2); + + YYERROR_MSG(errmsg); + } +} +| LEFT_ROUND RIGHT_ROUND +{ + raptor_world* world = rdf_parser->world; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("collection\n empty\n"); +#endif + + $$ = raptor_new_term_from_uri(world, RAPTOR_RDF_nil_URI(world)); + if(!$$) + YYERROR; +} +; + + +%% + + +/* Support functions */ + +/* Error handler with scanner context, during parsing */ +int +turtle_parser_error(raptor_parser* rdf_parser, void* scanner, + const char *msg) +{ + raptor_turtle_parser* turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + return 0; + } + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg); + + return 0; +} + + +/* Error handler within raptor functions and callbacks */ +static void +turtle_parser_error_simple(void* user_data, const char *msg, ...) +{ + raptor_parser* rdf_parser = (raptor_parser*)user_data; + raptor_turtle_parser* turtle_parser; + va_list args; + + va_start(args, msg); + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(turtle_parser->consumed == turtle_parser->consumable && + turtle_parser->processed < turtle_parser->consumed && + !turtle_parser->is_end) { + /* we encountered an error on or around the last byte of the buffer + * sorting it in the next run aye? */ + goto tidy; + } + + if(turtle_parser->error_count++) + goto tidy; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + raptor_log_error_varargs(rdf_parser->world, RAPTOR_LOG_LEVEL_ERROR, + &rdf_parser->locator, msg, + args); + +tidy: + va_end(args); +} + + +int +turtle_syntax_error(raptor_parser *rdf_parser, const char *message, ...) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + va_list arguments; + + if(!turtle_parser) + return 1; + + if(turtle_parser->error_count++) + return 0; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + va_start(arguments, message); + + raptor_parser_log_error_varargs(((raptor_parser*)rdf_parser), + RAPTOR_LOG_LEVEL_ERROR, message, arguments); + + va_end(arguments); + + return 0; +} + + +raptor_uri* +turtle_qname_to_uri(raptor_parser *rdf_parser, unsigned char *name, size_t name_len) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!turtle_parser) + return NULL; + + rdf_parser->locator.line = turtle_parser->lineno; +#ifdef RAPTOR_TURTLE_USE_ERROR_COLUMNS + rdf_parser->locator.column = turtle_lexer_get_column(yyscanner); +#endif + + name_len = raptor_turtle_expand_qname_escapes(name, name_len, + (raptor_simple_message_handler)turtle_parser_error_simple, rdf_parser); + if(!name_len) + return NULL; + + return raptor_qname_string_to_uri(&turtle_parser->namespaces, name, name_len); +} + + + +#ifndef TURTLE_PUSH_PARSE +static int +turtle_parse(raptor_parser *rdf_parser, const char *string, size_t length) +{ + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + int rc; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + (void)turtle_lexer__scan_bytes((char *)string, (yy_size_t)length, turtle_parser->scanner); + + rc = turtle_parser_parse(rdf_parser, turtle_parser->scanner); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return rc; +} +#endif + + +#ifdef TURTLE_PUSH_PARSE +static int +turtle_push_parse(raptor_parser *rdf_parser, + const char *string, size_t length) +{ +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + raptor_world* world = rdf_parser->world; +#endif + raptor_turtle_parser* turtle_parser; + void *buffer; + int status; + yypstate *ps; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(!string || !*string) + return 0; + + if(turtle_lexer_lex_init(&turtle_parser->scanner)) + return 1; + turtle_parser->scanner_set = 1; + +#if defined(YYDEBUG) && YYDEBUG > 0 + turtle_lexer_set_debug(1 ,&turtle_parser->scanner); + turtle_parser_debug = 1; +#endif + + turtle_lexer_set_extra(rdf_parser, turtle_parser->scanner); + buffer = turtle_lexer__scan_bytes(string, (yy_size_t)length, turtle_parser->scanner); + + /* returns a parser instance or 0 on out of memory */ + ps = yypstate_new(); + if(!ps) + return 1; + + do { + TURTLE_PARSER_YYSTYPE lval; + int token; + + memset(&lval, 0, sizeof(TURTLE_PARSER_YYSTYPE)); + + token = turtle_lexer_lex(&lval, turtle_parser->scanner); + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + printf("token %s\n", turtle_token_print(world, token, &lval)); +#endif + + status = yypush_parse(ps, token, &lval, rdf_parser, turtle_parser->scanner); + + /* turtle_token_free(world, token, &lval); */ + + if(!token || token == EOF || token == ERROR_TOKEN) + break; + } while (status == YYPUSH_MORE); + yypstate_delete(ps); + + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + + return 0; +} +#endif + + +/** + * raptor_turtle_parse_init - Initialise the Raptor Turtle parser + * + * Return value: non 0 on failure + **/ + +static int +raptor_turtle_parse_init(raptor_parser* rdf_parser, const char *name) { + raptor_turtle_parser* turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + if(raptor_namespaces_init(rdf_parser->world, &turtle_parser->namespaces, 0)) + return 1; + + turtle_parser->trig = !strcmp(name, "trig"); + + return 0; +} + + +/* PUBLIC FUNCTIONS */ + + +/* + * raptor_turtle_parse_terminate - Free the Raptor Turtle parser + * @rdf_parser: parser object + * + **/ +static void +raptor_turtle_parse_terminate(raptor_parser *rdf_parser) { + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + raptor_namespaces_clear(&turtle_parser->namespaces); + + if(turtle_parser->scanner_set) { + turtle_lexer_lex_destroy(turtle_parser->scanner); + turtle_parser->scanner_set = 0; + } + + if(turtle_parser->buffer) + RAPTOR_FREE(cdata, turtle_parser->buffer); + + if(turtle_parser->graph_name) { + raptor_free_term(turtle_parser->graph_name); + turtle_parser->graph_name = NULL; + } +} + + +static void +raptor_turtle_clone_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)parser->context; + raptor_statement *statement = &parser->statement; + + if(!t->subject || !t->predicate || !t->object) + return; + + if(turtle_parser->trig && turtle_parser->graph_name) + statement->graph = raptor_term_copy(turtle_parser->graph_name); + + if(!parser->emitted_default_graph && !turtle_parser->graph_name) { + /* for non-TRIG - start default graph at first triple */ + raptor_parser_start_graph(parser, NULL, 0); + parser->emitted_default_graph++; + } + + /* Two choices for subject for Turtle */ + if(t->subject->type == RAPTOR_TERM_TYPE_BLANK) { + statement->subject = raptor_new_term_from_blank(parser->world, + t->subject->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_URI */ + RAPTOR_ASSERT(t->subject->type != RAPTOR_TERM_TYPE_URI, + "subject type is not resource"); + statement->subject = raptor_new_term_from_uri(parser->world, + t->subject->value.uri); + } + + /* Predicates are URIs but check for bad ordinals */ + if(!strncmp((const char*)raptor_uri_as_string(t->predicate->value.uri), + "http://www.w3.org/1999/02/22-rdf-syntax-ns#_", 44)) { + unsigned char* predicate_uri_string = raptor_uri_as_string(t->predicate->value.uri); + int predicate_ordinal = raptor_check_ordinal(predicate_uri_string+44); + if(predicate_ordinal <= 0) + raptor_parser_error(parser, "Illegal ordinal value %d in property '%s'.", predicate_ordinal, predicate_uri_string); + } + + statement->predicate = raptor_new_term_from_uri(parser->world, + t->predicate->value.uri); + + + /* Three choices for object for Turtle */ + if(t->object->type == RAPTOR_TERM_TYPE_URI) { + statement->object = raptor_new_term_from_uri(parser->world, + t->object->value.uri); + } else if(t->object->type == RAPTOR_TERM_TYPE_BLANK) { + statement->object = raptor_new_term_from_blank(parser->world, + t->object->value.blank.string); + } else { + /* RAPTOR_TERM_TYPE_LITERAL */ + RAPTOR_ASSERT(t->object->type != RAPTOR_TERM_TYPE_LITERAL, + "object type is not literal"); + statement->object = raptor_new_term_from_literal(parser->world, + t->object->value.literal.string, + t->object->value.literal.datatype, + t->object->value.literal.language); + } +} + +static void +raptor_turtle_handle_statement(raptor_parser *parser, raptor_statement *t) +{ + if(!t->subject || !t->predicate || !t->object) + return; + + if(!parser->statement_handler) + return; + + /* Generate the statement */ + (*parser->statement_handler)(parser->user_data, t); +} + +static void +raptor_turtle_generate_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_turtle_clone_statement(parser, t); + raptor_turtle_handle_statement(parser, &parser->statement); + /* clear resources */ + raptor_statement_clear(&parser->statement); +} + +static void +raptor_turtle_defer_statement(raptor_parser *parser, raptor_statement *t) +{ + raptor_statement* st; + raptor_turtle_parser* turtle_parser; + + raptor_turtle_clone_statement(parser, t); + st = raptor_new_statement(parser->world); + if(!st) { + return; + } + /* copy static to dynamic statement, it's a move really */ + st->subject = parser->statement.subject, parser->statement.subject = NULL; + st->predicate = parser->statement.predicate, parser->statement.predicate = NULL; + st->object = parser->statement.object, parser->statement.object = NULL; + st->graph = parser->statement.graph, parser->statement.graph = NULL; + + /* prep deferred list */ + turtle_parser = (raptor_turtle_parser*)parser->context; + if(!turtle_parser->deferred) { + turtle_parser->deferred = raptor_new_sequence((raptor_data_free_handler)raptor_free_statement, NULL); + if(!turtle_parser->deferred) { + goto free_seq; + } + } + /* append to deferred list */ + if(raptor_sequence_push(turtle_parser->deferred, st)) { + free_seq: + raptor_free_statement(st); + } +} + + + +static int +raptor_turtle_parse_chunk(raptor_parser* rdf_parser, + const unsigned char *s, size_t len, + int is_end) +{ + raptor_turtle_parser *turtle_parser; + char *ptr; + int rc; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG2("adding %d bytes to line buffer\n", (int)len); +#endif + + if(!len && !is_end) { + /* nothing to do */ + return 0; + } + + /* the actual buffer will contained unprocessed characters from + * the last run plus the chunk passed here */ + turtle_parser->end_of_buffer = turtle_parser->consumed + len; + if(turtle_parser->end_of_buffer > turtle_parser->buffer_length) { + /* resize */ + size_t new_buffer_length = turtle_parser->end_of_buffer; + + turtle_parser->buffer = RAPTOR_REALLOC(char*, turtle_parser->buffer, + new_buffer_length + 1); + + /* adjust stored length */ + turtle_parser->buffer_length = new_buffer_length; + } + if(!turtle_parser->buffer && turtle_parser->buffer_length) { + /* we tried to alloc a buffer but we failed */ + raptor_parser_fatal_error(rdf_parser, "Out of memory"); + return 1; + } + if(is_end && !turtle_parser->end_of_buffer) { + /* Nothing to do */ + return 0; + } + + /* move pointer to end of cdata buffer */ + ptr = turtle_parser->buffer + turtle_parser->consumed; + + /* now write new stuff at end of cdata buffer */ + memcpy(ptr, s, len); + ptr += len; + *ptr = '\0'; + + /* reset processed counter */ + turtle_parser->processed = 0U; + /* unconsume */ + turtle_parser->consumed = 0U; + /* reset line numbers */ + turtle_parser->lineno = turtle_parser->lineno_last_good; + + /* let everyone know if this is the last chunk */ + turtle_parser->is_end = is_end; + if(!is_end) { + /* it's safer not to pass the very last line to the lexer + * just in case we end up with EOB-in-the-middle-of-X situations */ + size_t i = turtle_parser->end_of_buffer; + while(i > 0U && turtle_parser->buffer[--i] != '\n'); + /* either i == 0U or i points to the last \n before the end-of-buffer */ + turtle_parser->consumable = i; + } else { + /* otherwise the consumable number of bytes coincides with the EOB */ + turtle_parser->consumable = turtle_parser->end_of_buffer; + } + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 1 + RAPTOR_DEBUG3("buffer buffer now '%s' (%ld bytes)\n", + turtle_parser->buffer, turtle_parser->buffer_length); +#endif + +#ifdef TURTLE_PUSH_PARSE + rc = turtle_push_parse(rdf_parser, + turtle_parser->buffer, turtle_parser->consumable); +#else + rc = turtle_parse(rdf_parser, turtle_parser->buffer, turtle_parser->consumable); +#endif + + if(turtle_parser->error_count) { + rc = 1; + } else if(!is_end) { + /* move stuff to the beginning of the buffer */ + turtle_parser->consumed = turtle_parser->end_of_buffer - turtle_parser->processed; + if(turtle_parser->consumed && turtle_parser->processed) { + memmove(turtle_parser->buffer, + turtle_parser->buffer + turtle_parser->processed, + turtle_parser->consumed); + /* cancel all deferred eval's */ + if(turtle_parser->deferred) { + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + } else { + /* this was the last chunk, finalise */ + if(turtle_parser->deferred) { + raptor_sequence* def = turtle_parser->deferred; + int i; + for(i = 0; i < raptor_sequence_size(def); i++) { + raptor_statement *t2 = (raptor_statement*)raptor_sequence_get_at(def, i); + + raptor_turtle_handle_statement(rdf_parser, t2); + } + } + if(rdf_parser->emitted_default_graph) { + /* for non-TRIG - end default graph after last triple */ + raptor_parser_end_graph(rdf_parser, NULL, 0); + rdf_parser->emitted_default_graph--; + } + if(turtle_parser->deferred) { + /* clear resources */ + raptor_free_sequence(turtle_parser->deferred); + turtle_parser->deferred = NULL; + } + } + return rc; +} + + +static int +raptor_turtle_parse_start(raptor_parser *rdf_parser) +{ + raptor_locator *locator=&rdf_parser->locator; + raptor_turtle_parser *turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + + /* base URI required for Turtle */ + if(!rdf_parser->base_uri) + return 1; + + locator->line = 1; + locator->column= -1; /* No column info */ + locator->byte= -1; /* No bytes info */ + + if(turtle_parser->buffer_length) { + RAPTOR_FREE(cdata, turtle_parser->buffer); + turtle_parser->buffer = NULL; + turtle_parser->buffer_length = 0; + } + + turtle_parser->lineno = 1; + + return 0; +} + + +static int +raptor_turtle_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; + } + + if(mime_type) { + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; + } + + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TURTLE_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TURTLE_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TURTLE_PREFIX) { + score = 6; + if(HAS_TURTLE_RDF_URI) + score += 2; + } + } + + return score; +} + + +static raptor_uri* +raptor_turtle_get_graph(raptor_parser* rdf_parser) +{ + raptor_turtle_parser *turtle_parser; + + turtle_parser = (raptor_turtle_parser*)rdf_parser->context; + if(turtle_parser->graph_name) + return raptor_uri_copy(turtle_parser->graph_name->value.uri); + + return NULL; +} + + +#ifdef RAPTOR_PARSER_TRIG +static int +raptor_trig_parse_recognise_syntax(raptor_parser_factory* factory, + const unsigned char *buffer, size_t len, + const unsigned char *identifier, + const unsigned char *suffix, + const char *mime_type) +{ + int score= 0; + + if(suffix) { + if(!strcmp((const char*)suffix, "trig")) + score = 9; +#ifndef RAPTOR_PARSER_TURTLE + if(!strcmp((const char*)suffix, "ttl")) + score = 8; + if(!strcmp((const char*)suffix, "n3")) + score = 3; +#endif + } + + if(mime_type) { + if(strstr((const char*)mime_type, "trig")) + score = 6; +#ifndef RAPTOR_PARSER_TURTLE + if(strstr((const char*)mime_type, "turtle")) + score += 6; + if(strstr((const char*)mime_type, "n3")) + score += 3; +#endif + } + +#ifndef RAPTOR_PARSER_TURTLE + /* Do this as long as N3 is not supported since it shares the same syntax */ + if(buffer && len) { +#define HAS_TRIG_PREFIX (raptor_memstr((const char*)buffer, len, "@prefix ") != NULL) +/* The following could also be found with N-Triples but not with @prefix */ +#define HAS_TRIG_RDF_URI (raptor_memstr((const char*)buffer, len, ": <http://www.w3.org/1999/02/22-rdf-syntax-ns#>") != NULL) + + if(HAS_TRIG_PREFIX) { + score = 6; + if(HAS_TRIG_RDF_URI) + score += 2; + } + } +#endif + + return score; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +static const char* const turtle_names[4] = { "turtle", "ntriples-plus", "n3", NULL }; + +static const char* const turtle_uri_strings[3] = { + "http://www.w3.org/ns/formats/Turtle", + "http://www.dajobe.org/2004/01/turtle/", + NULL +}; + +#define TURTLE_TYPES_COUNT 6 +static const raptor_type_q turtle_types[TURTLE_TYPES_COUNT + 1] = { + /* first one is the default */ + { "text/turtle", 11, 10}, + { "application/x-turtle", 20, 10}, + { "application/turtle", 18, 10}, + { "text/n3", 7, 3}, + { "text/rdf+n3", 11, 3}, + { "application/rdf+n3", 18, 3}, + { NULL, 0} +}; + +static int +raptor_turtle_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = turtle_names; + + factory->desc.mime_types = turtle_types; + + factory->desc.label = "Turtle Terse RDF Triple Language"; + factory->desc.uri_strings = turtle_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_turtle_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TRIG +static const char* const trig_names[2] = { "trig", NULL }; + +static const char* const trig_uri_strings[2] = { + "http://www.wiwiss.fu-berlin.de/suhl/bizer/TriG/Spec/", + NULL +}; + +#define TRIG_TYPES_COUNT 1 +static const raptor_type_q trig_types[TRIG_TYPES_COUNT + 1] = { + /* first one is the default */ + { "application/x-trig", 18, 10}, + { NULL, 0, 0} +}; + +static int +raptor_trig_parser_register_factory(raptor_parser_factory *factory) +{ + int rc = 0; + + factory->desc.names = trig_names; + + factory->desc.mime_types = trig_types; + + factory->desc.label = "TriG - Turtle with Named Graphs"; + factory->desc.uri_strings = trig_uri_strings; + + factory->desc.flags = RAPTOR_SYNTAX_NEED_BASE_URI; + + factory->context_length = sizeof(raptor_turtle_parser); + + factory->init = raptor_turtle_parse_init; + factory->terminate = raptor_turtle_parse_terminate; + factory->start = raptor_turtle_parse_start; + factory->chunk = raptor_turtle_parse_chunk; + factory->recognise_syntax = raptor_trig_parse_recognise_syntax; + factory->get_graph = raptor_turtle_get_graph; + + return rc; +} +#endif + + +#ifdef RAPTOR_PARSER_TURTLE +int +raptor_init_parser_turtle(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_turtle_parser_register_factory); +} +#endif + +#ifdef RAPTOR_PARSER_TRIG +int +raptor_init_parser_trig(raptor_world* world) +{ + return !raptor_world_register_parser_factory(world, + &raptor_trig_parser_register_factory); +} +#endif + + +#ifdef STANDALONE +#include <stdio.h> +#include <locale.h> + +#define TURTLE_FILE_BUF_SIZE 2048 + +static void +turtle_parser_print_statement(void *user, + raptor_statement *statement) +{ + FILE* stream = (FILE*)user; + raptor_statement_print(statement, stream); + putc('\n', stream); +} + + + +int +main(int argc, char *argv[]) +{ + char string[TURTLE_FILE_BUF_SIZE]; + raptor_parser rdf_parser; /* static */ + raptor_turtle_parser turtle_parser; /* static */ + raptor_locator *locator = &rdf_parser.locator; + FILE *fh; + const char *filename; + size_t nobj; + +#if defined(RAPTOR_DEBUG) && RAPTOR_DEBUG > 2 + turtle_parser_debug = 1; +#endif + + if(argc > 1) { + filename = argv[1]; + fh = fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: Cannot open file %s - %s\n", argv[0], filename, + strerror(errno)); + exit(1); + } + } else { + filename="<stdin>"; + fh = stdin; + } + + memset(string, 0, TURTLE_FILE_BUF_SIZE); + nobj = fread(string, TURTLE_FILE_BUF_SIZE, 1, fh); + if(nobj < TURTLE_FILE_BUF_SIZE) { + if(ferror(fh)) { + fprintf(stderr, "%s: file '%s' read failed - %s\n", + argv[0], filename, strerror(errno)); + fclose(fh); + return(1); + } + } + + if(argc > 1) + fclose(fh); + + memset(&rdf_parser, 0, sizeof(rdf_parser)); + memset(&turtle_parser, 0, sizeof(turtle_parser)); + + locator->line= locator->column = -1; + locator->file= filename; + + turtle_parser.lineno= 1; + + rdf_parser.world = raptor_new_world(); + rdf_parser.context = &turtle_parser; + rdf_parser.base_uri = raptor_new_uri(rdf_parser.world, + (const unsigned char*)"http://example.org/fake-base-uri/"); + + raptor_parser_set_statement_handler(&rdf_parser, stdout, + turtle_parser_print_statement); + raptor_turtle_parse_init(&rdf_parser, "turtle"); + + turtle_parser.error_count = 0; + +#ifdef TURTLE_PUSH_PARSE + turtle_push_parse(&rdf_parser, string, strlen(string)); +#else + turtle_parse(&rdf_parser, string, strlen(string)); +#endif + + raptor_turtle_parse_terminate(&rdf_parser); + + raptor_free_uri(rdf_parser.base_uri); + + raptor_free_world(rdf_parser.world); + + return (0); +} +#endif |