diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:40:05 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 05:40:05 +0000 |
commit | 4038ab95a094b363f1748f3dcb51511a1217475d (patch) | |
tree | 7f393d66a783f91ddd263c78d681e485cf4f45ca /librdfa | |
parent | Initial commit. (diff) | |
download | raptor2-upstream.tar.xz raptor2-upstream.zip |
Adding upstream version 2.0.16.upstream/2.0.16upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | librdfa/Makefile.am | 83 | ||||
-rw-r--r-- | librdfa/Makefile.in | 747 | ||||
-rw-r--r-- | librdfa/config.h | 2 | ||||
-rw-r--r-- | librdfa/context.c | 591 | ||||
-rw-r--r-- | librdfa/curie.c | 691 | ||||
-rw-r--r-- | librdfa/iri.c | 73 | ||||
-rw-r--r-- | librdfa/language.c | 55 | ||||
-rw-r--r-- | librdfa/lists.c | 243 | ||||
-rw-r--r-- | librdfa/namespace.c | 140 | ||||
-rw-r--r-- | librdfa/rdfa.c | 1552 | ||||
-rw-r--r-- | librdfa/rdfa.h | 377 | ||||
-rw-r--r-- | librdfa/rdfa_utils.c | 543 | ||||
-rw-r--r-- | librdfa/rdfa_utils.h | 486 | ||||
-rw-r--r-- | librdfa/strtok_r.c | 52 | ||||
-rw-r--r-- | librdfa/strtok_r.h | 15 | ||||
-rw-r--r-- | librdfa/subject.c | 535 | ||||
-rw-r--r-- | librdfa/triple.c | 847 |
17 files changed, 7032 insertions, 0 deletions
diff --git a/librdfa/Makefile.am b/librdfa/Makefile.am new file mode 100644 index 0000000..7d53e16 --- /dev/null +++ b/librdfa/Makefile.am @@ -0,0 +1,83 @@ +# -*- Mode: Makefile -*- +# +# Makefile for librdfa convienience library +# + +ANALYZE = clang +ANALYZE_FLAGS = "--analyze" +# Based on COMPILE target +ANALYZE_COMMAND = $(ANALYZE) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) \ + $(ANALYZE_FLAGS) + +if LIBRDFA +noinst_LTLIBRARIES = librdfa.la +AM_CPPFLAGS = -DLIBRDFA_IN_RAPTOR -I$(srcdir) -I$(top_srcdir)/src + +librdfa_la_SOURCES = \ +context.c \ +curie.c \ +iri.c \ +language.c \ +lists.c \ +namespace.c \ +rdfa.c \ +rdfa_utils.c \ +subject.c \ +triple.c \ +rdfa.h \ +rdfa_utils.h \ +config.h + +if NEED_STRTOK_R +librdfa_la_SOURCES += \ +strtok_r.c \ +strtok_r.h +endif + + +# Do not need: +# mingw32_utils.c +# mingw32_utils.h + +else + +EXTRA_DIST = \ +context.c \ +curie.c \ +iri.c \ +language.c \ +lists.c \ +namespace.c \ +rdfa.c \ +rdfa_utils.c \ +strtok_r.c \ +subject.c \ +triple.c \ +rdfa.h \ +rdfa_utils.h \ +strtok_r.h \ +config.h + +endif + +CLEANFILES=*.plist + +if MAINTAINER_MODE +# Run Clang static analyzer over sources. +analyze: $(SOURCES) + @list='$(SOURCES)'; \ + result=0; \ + for file in $$list; do \ + if echo $$file | grep '\.c$$' >/dev/null 2>&1; then \ + $(RECHO) "Analyzing $$file"; \ + $(ANALYZE_COMMAND) $(srcdir)/$$file; \ + status=$$?; \ + if test $$status != 0; then \ + result=1; \ + fi; \ + fi; \ + done; \ + set -e; exit $$result +endif diff --git a/librdfa/Makefile.in b/librdfa/Makefile.in new file mode 100644 index 0000000..d7d6876 --- /dev/null +++ b/librdfa/Makefile.in @@ -0,0 +1,747 @@ +# Makefile.in generated by automake 1.16.5 from Makefile.am. +# @configure_input@ + +# Copyright (C) 1994-2021 Free Software Foundation, Inc. + +# This Makefile.in is free software; the Free Software Foundation +# gives unlimited permission to copy and/or distribute it, +# with or without modifications, as long as this notice is preserved. + +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY, to the extent permitted by law; without +# even the implied warranty of MERCHANTABILITY or FITNESS FOR A +# PARTICULAR PURPOSE. + +@SET_MAKE@ + +# -*- Mode: Makefile -*- +# +# Makefile for librdfa convienience library +# + +VPATH = @srcdir@ +am__is_gnu_make = { \ + if test -z '$(MAKELEVEL)'; then \ + false; \ + elif test -n '$(MAKE_HOST)'; then \ + true; \ + elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \ + true; \ + else \ + false; \ + fi; \ +} +am__make_running_with_option = \ + case $${target_option-} in \ + ?) ;; \ + *) echo "am__make_running_with_option: internal error: invalid" \ + "target option '$${target_option-}' specified" >&2; \ + exit 1;; \ + esac; \ + has_opt=no; \ + sane_makeflags=$$MAKEFLAGS; \ + if $(am__is_gnu_make); then \ + sane_makeflags=$$MFLAGS; \ + else \ + case $$MAKEFLAGS in \ + *\\[\ \ ]*) \ + bs=\\; \ + sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \ + | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \ + esac; \ + fi; \ + skip_next=no; \ + strip_trailopt () \ + { \ + flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \ + }; \ + for flg in $$sane_makeflags; do \ + test $$skip_next = yes && { skip_next=no; continue; }; \ + case $$flg in \ + *=*|--*) continue;; \ + -*I) strip_trailopt 'I'; skip_next=yes;; \ + -*I?*) strip_trailopt 'I';; \ + -*O) strip_trailopt 'O'; skip_next=yes;; \ + -*O?*) strip_trailopt 'O';; \ + -*l) strip_trailopt 'l'; skip_next=yes;; \ + -*l?*) strip_trailopt 'l';; \ + -[dEDm]) skip_next=yes;; \ + -[JT]) skip_next=yes;; \ + esac; \ + case $$flg in \ + *$$target_option*) has_opt=yes; break;; \ + esac; \ + done; \ + test $$has_opt = yes +am__make_dryrun = (target_option=n; $(am__make_running_with_option)) +am__make_keepgoing = (target_option=k; $(am__make_running_with_option)) +pkgdatadir = $(datadir)/@PACKAGE@ +pkgincludedir = $(includedir)/@PACKAGE@ +pkglibdir = $(libdir)/@PACKAGE@ +pkglibexecdir = $(libexecdir)/@PACKAGE@ +am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd +install_sh_DATA = $(install_sh) -c -m 644 +install_sh_PROGRAM = $(install_sh) -c +install_sh_SCRIPT = $(install_sh) -c +INSTALL_HEADER = $(INSTALL_DATA) +transform = $(program_transform_name) +NORMAL_INSTALL = : +PRE_INSTALL = : +POST_INSTALL = : +NORMAL_UNINSTALL = : +PRE_UNINSTALL = : +POST_UNINSTALL = : +build_triplet = @build@ +host_triplet = @host@ +@LIBRDFA_TRUE@@NEED_STRTOK_R_TRUE@am__append_1 = \ +@LIBRDFA_TRUE@@NEED_STRTOK_R_TRUE@strtok_r.c \ +@LIBRDFA_TRUE@@NEED_STRTOK_R_TRUE@strtok_r.h + +subdir = librdfa +ACLOCAL_M4 = $(top_srcdir)/aclocal.m4 +am__aclocal_m4_deps = $(top_srcdir)/build/gtk-doc.m4 \ + $(top_srcdir)/build/libtool.m4 \ + $(top_srcdir)/build/ltoptions.m4 \ + $(top_srcdir)/build/ltsugar.m4 \ + $(top_srcdir)/build/ltversion.m4 \ + $(top_srcdir)/build/lt~obsolete.m4 $(top_srcdir)/build/pkg.m4 \ + $(top_srcdir)/configure.ac +am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \ + $(ACLOCAL_M4) +DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON) +mkinstalldirs = $(install_sh) -d +CONFIG_HEADER = $(top_builddir)/src/raptor_config.h +CONFIG_CLEAN_FILES = +CONFIG_CLEAN_VPATH_FILES = +LTLIBRARIES = $(noinst_LTLIBRARIES) +librdfa_la_LIBADD = +am__librdfa_la_SOURCES_DIST = context.c curie.c iri.c language.c \ + lists.c namespace.c rdfa.c rdfa_utils.c subject.c triple.c \ + rdfa.h rdfa_utils.h config.h strtok_r.c strtok_r.h +@LIBRDFA_TRUE@@NEED_STRTOK_R_TRUE@am__objects_1 = strtok_r.lo +@LIBRDFA_TRUE@am_librdfa_la_OBJECTS = context.lo curie.lo iri.lo \ +@LIBRDFA_TRUE@ language.lo lists.lo namespace.lo rdfa.lo \ +@LIBRDFA_TRUE@ rdfa_utils.lo subject.lo triple.lo \ +@LIBRDFA_TRUE@ $(am__objects_1) +librdfa_la_OBJECTS = $(am_librdfa_la_OBJECTS) +AM_V_lt = $(am__v_lt_@AM_V@) +am__v_lt_ = $(am__v_lt_@AM_DEFAULT_V@) +am__v_lt_0 = --silent +am__v_lt_1 = +@LIBRDFA_TRUE@am_librdfa_la_rpath = +AM_V_P = $(am__v_P_@AM_V@) +am__v_P_ = $(am__v_P_@AM_DEFAULT_V@) +am__v_P_0 = false +am__v_P_1 = : +AM_V_GEN = $(am__v_GEN_@AM_V@) +am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@) +am__v_GEN_0 = @echo " GEN " $@; +am__v_GEN_1 = +AM_V_at = $(am__v_at_@AM_V@) +am__v_at_ = $(am__v_at_@AM_DEFAULT_V@) +am__v_at_0 = @ +am__v_at_1 = +DEFAULT_INCLUDES = -I.@am__isrc@ -I$(top_builddir)/src +depcomp = $(SHELL) $(top_srcdir)/build/depcomp +am__maybe_remake_depfiles = depfiles +am__depfiles_remade = ./$(DEPDIR)/context.Plo ./$(DEPDIR)/curie.Plo \ + ./$(DEPDIR)/iri.Plo ./$(DEPDIR)/language.Plo \ + ./$(DEPDIR)/lists.Plo ./$(DEPDIR)/namespace.Plo \ + ./$(DEPDIR)/rdfa.Plo ./$(DEPDIR)/rdfa_utils.Plo \ + ./$(DEPDIR)/strtok_r.Plo ./$(DEPDIR)/subject.Plo \ + ./$(DEPDIR)/triple.Plo +am__mv = mv -f +COMPILE = $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) +LTCOMPILE = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) \ + $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) \ + $(AM_CFLAGS) $(CFLAGS) +AM_V_CC = $(am__v_CC_@AM_V@) +am__v_CC_ = $(am__v_CC_@AM_DEFAULT_V@) +am__v_CC_0 = @echo " CC " $@; +am__v_CC_1 = +CCLD = $(CC) +LINK = $(LIBTOOL) $(AM_V_lt) --tag=CC $(AM_LIBTOOLFLAGS) \ + $(LIBTOOLFLAGS) --mode=link $(CCLD) $(AM_CFLAGS) $(CFLAGS) \ + $(AM_LDFLAGS) $(LDFLAGS) -o $@ +AM_V_CCLD = $(am__v_CCLD_@AM_V@) +am__v_CCLD_ = $(am__v_CCLD_@AM_DEFAULT_V@) +am__v_CCLD_0 = @echo " CCLD " $@; +am__v_CCLD_1 = +SOURCES = $(librdfa_la_SOURCES) +DIST_SOURCES = $(am__librdfa_la_SOURCES_DIST) +am__can_run_installinfo = \ + case $$AM_UPDATE_INFO_DIR in \ + n|no|NO) false;; \ + *) (install-info --version) >/dev/null 2>&1;; \ + esac +am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP) +# Read a list of newline-separated strings from the standard input, +# and print each of them once, without duplicates. Input order is +# *not* preserved. +am__uniquify_input = $(AWK) '\ + BEGIN { nonempty = 0; } \ + { items[$$0] = 1; nonempty = 1; } \ + END { if (nonempty) { for (i in items) print i; }; } \ +' +# Make sure the list of sources is unique. This is necessary because, +# e.g., the same source file might be shared among _SOURCES variables +# for different programs/libraries. +am__define_uniq_tagged_files = \ + list='$(am__tagged_files)'; \ + unique=`for i in $$list; do \ + if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \ + done | $(am__uniquify_input)` +am__DIST_COMMON = $(srcdir)/Makefile.in $(top_srcdir)/build/depcomp +DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST) +ACLOCAL = @ACLOCAL@ +AMTAR = @AMTAR@ +AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@ +AR = @AR@ +AUTOCONF = @AUTOCONF@ +AUTOHEADER = @AUTOHEADER@ +AUTOMAKE = @AUTOMAKE@ +AWK = @AWK@ +BISON = @BISON@ +CC = @CC@ +CCDEPMODE = @CCDEPMODE@ +CFLAGS = @CFLAGS@ +CPPFLAGS = @CPPFLAGS@ +CSCOPE = @CSCOPE@ +CTAGS = @CTAGS@ +CURL_CONFIG = @CURL_CONFIG@ +CYGPATH_W = @CYGPATH_W@ +DEFS = @DEFS@ +DEPDIR = @DEPDIR@ +DLLTOOL = @DLLTOOL@ +DSYMUTIL = @DSYMUTIL@ +DUMPBIN = @DUMPBIN@ +ECHO_C = @ECHO_C@ +ECHO_N = @ECHO_N@ +ECHO_T = @ECHO_T@ +EGREP = @EGREP@ +ETAGS = @ETAGS@ +EXEEXT = @EXEEXT@ +FGREP = @FGREP@ +FILECMD = @FILECMD@ +GREP = @GREP@ +GTKDOC_CHECK = @GTKDOC_CHECK@ +GTKDOC_CHECK_PATH = @GTKDOC_CHECK_PATH@ +GTKDOC_DEPS_CFLAGS = @GTKDOC_DEPS_CFLAGS@ +GTKDOC_DEPS_LIBS = @GTKDOC_DEPS_LIBS@ +GTKDOC_MKPDF = @GTKDOC_MKPDF@ +GTKDOC_REBASE = @GTKDOC_REBASE@ +HTML_DIR = @HTML_DIR@ +ICU_CFLAGS = @ICU_CFLAGS@ +ICU_LIBS = @ICU_LIBS@ +INSTALL = @INSTALL@ +INSTALL_DATA = @INSTALL_DATA@ +INSTALL_PROGRAM = @INSTALL_PROGRAM@ +INSTALL_SCRIPT = @INSTALL_SCRIPT@ +INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@ +JING = @JING@ +LD = @LD@ +LDFLAGS = @LDFLAGS@ +LEX = @LEX@ +LEXLIB = @LEXLIB@ +LEX_OUTPUT_ROOT = @LEX_OUTPUT_ROOT@ +LIBCURL_CFLAGS = @LIBCURL_CFLAGS@ +LIBCURL_LIBS = @LIBCURL_LIBS@ +LIBOBJS = @LIBOBJS@ +LIBS = @LIBS@ +LIBTOOL = @LIBTOOL@ +LIBXML_CFLAGS = @LIBXML_CFLAGS@ +LIBXML_LIBS = @LIBXML_LIBS@ +LIPO = @LIPO@ +LN_S = @LN_S@ +LTLIBOBJS = @LTLIBOBJS@ +LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@ +MAINT = @MAINT@ +MAKEINFO = @MAKEINFO@ +MANIFEST_TOOL = @MANIFEST_TOOL@ +MEM = @MEM@ +MEM_LIBS = @MEM_LIBS@ +MKDIR_P = @MKDIR_P@ +NM = @NM@ +NMEDIT = @NMEDIT@ +OBJDUMP = @OBJDUMP@ +OBJEXT = @OBJEXT@ +OTOOL = @OTOOL@ +OTOOL64 = @OTOOL64@ +PACKAGE = @PACKAGE@ +PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@ +PACKAGE_NAME = @PACKAGE_NAME@ +PACKAGE_STRING = @PACKAGE_STRING@ +PACKAGE_TARNAME = @PACKAGE_TARNAME@ +PACKAGE_URL = @PACKAGE_URL@ +PACKAGE_VERSION = @PACKAGE_VERSION@ +PATH_SEPARATOR = @PATH_SEPARATOR@ +PERL = @PERL@ +PKG_CONFIG = @PKG_CONFIG@ +PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@ +PKG_CONFIG_PATH = @PKG_CONFIG_PATH@ +PKG_CONFIG_REQUIRES = @PKG_CONFIG_REQUIRES@ +RANLIB = @RANLIB@ +RAPTOR_LDFLAGS = @RAPTOR_LDFLAGS@ +RAPTOR_LIBTOOLLIBS = @RAPTOR_LIBTOOLLIBS@ +RAPTOR_LIBTOOL_VERSION = @RAPTOR_LIBTOOL_VERSION@ +RAPTOR_PARSERS = @RAPTOR_PARSERS@ +RAPTOR_SERIALIZERS = @RAPTOR_SERIALIZERS@ +RAPTOR_VERSION = @RAPTOR_VERSION@ +RAPTOR_VERSION_DECIMAL = @RAPTOR_VERSION_DECIMAL@ +RAPTOR_VERSION_MAJOR = @RAPTOR_VERSION_MAJOR@ +RAPTOR_VERSION_MINOR = @RAPTOR_VERSION_MINOR@ +RAPTOR_VERSION_RELEASE = @RAPTOR_VERSION_RELEASE@ +RAPTOR_WWW_LIBRARY = @RAPTOR_WWW_LIBRARY@ +RAPTOR_XML_PARSER = @RAPTOR_XML_PARSER@ +RECHO = @RECHO@ +RECHO_C = @RECHO_C@ +RECHO_N = @RECHO_N@ +RPM_RELEASE = @RPM_RELEASE@ +SED = @SED@ +SET_MAKE = @SET_MAKE@ +SHELL = @SHELL@ +STRIP = @STRIP@ +TAR = @TAR@ +VERSION = @VERSION@ +XML_CONFIG = @XML_CONFIG@ +XSLT_CFLAGS = @XSLT_CFLAGS@ +XSLT_CONFIG = @XSLT_CONFIG@ +XSLT_LIBS = @XSLT_LIBS@ +abs_builddir = @abs_builddir@ +abs_srcdir = @abs_srcdir@ +abs_top_builddir = @abs_top_builddir@ +abs_top_srcdir = @abs_top_srcdir@ +ac_ct_AR = @ac_ct_AR@ +ac_ct_CC = @ac_ct_CC@ +ac_ct_DUMPBIN = @ac_ct_DUMPBIN@ +am__include = @am__include@ +am__leading_dot = @am__leading_dot@ +am__quote = @am__quote@ +am__tar = @am__tar@ +am__untar = @am__untar@ +bindir = @bindir@ +build = @build@ +build_alias = @build_alias@ +build_cpu = @build_cpu@ +build_os = @build_os@ +build_vendor = @build_vendor@ +builddir = @builddir@ +datadir = @datadir@ +datarootdir = @datarootdir@ +docdir = @docdir@ +dvidir = @dvidir@ +exec_prefix = @exec_prefix@ +host = @host@ +host_alias = @host_alias@ +host_cpu = @host_cpu@ +host_os = @host_os@ +host_vendor = @host_vendor@ +htmldir = @htmldir@ +includedir = @includedir@ +infodir = @infodir@ +install_sh = @install_sh@ +libdir = @libdir@ +libexecdir = @libexecdir@ +localedir = @localedir@ +localstatedir = @localstatedir@ +mandir = @mandir@ +mkdir_p = @mkdir_p@ +oldincludedir = @oldincludedir@ +pdfdir = @pdfdir@ +prefix = @prefix@ +program_transform_name = @program_transform_name@ +psdir = @psdir@ +runstatedir = @runstatedir@ +sbindir = @sbindir@ +sharedstatedir = @sharedstatedir@ +srcdir = @srcdir@ +sysconfdir = @sysconfdir@ +target_alias = @target_alias@ +top_build_prefix = @top_build_prefix@ +top_builddir = @top_builddir@ +top_srcdir = @top_srcdir@ +ANALYZE = clang +ANALYZE_FLAGS = "--analyze" +# Based on COMPILE target +ANALYZE_COMMAND = $(ANALYZE) \ + $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) \ + $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) \ + $(ANALYZE_FLAGS) + +@LIBRDFA_TRUE@noinst_LTLIBRARIES = librdfa.la +@LIBRDFA_TRUE@AM_CPPFLAGS = -DLIBRDFA_IN_RAPTOR -I$(srcdir) -I$(top_srcdir)/src +@LIBRDFA_TRUE@librdfa_la_SOURCES = context.c curie.c iri.c language.c \ +@LIBRDFA_TRUE@ lists.c namespace.c rdfa.c rdfa_utils.c \ +@LIBRDFA_TRUE@ subject.c triple.c rdfa.h rdfa_utils.h config.h \ +@LIBRDFA_TRUE@ $(am__append_1) + +# Do not need: +# mingw32_utils.c +# mingw32_utils.h +@LIBRDFA_FALSE@EXTRA_DIST = \ +@LIBRDFA_FALSE@context.c \ +@LIBRDFA_FALSE@curie.c \ +@LIBRDFA_FALSE@iri.c \ +@LIBRDFA_FALSE@language.c \ +@LIBRDFA_FALSE@lists.c \ +@LIBRDFA_FALSE@namespace.c \ +@LIBRDFA_FALSE@rdfa.c \ +@LIBRDFA_FALSE@rdfa_utils.c \ +@LIBRDFA_FALSE@strtok_r.c \ +@LIBRDFA_FALSE@subject.c \ +@LIBRDFA_FALSE@triple.c \ +@LIBRDFA_FALSE@rdfa.h \ +@LIBRDFA_FALSE@rdfa_utils.h \ +@LIBRDFA_FALSE@strtok_r.h \ +@LIBRDFA_FALSE@config.h + +CLEANFILES = *.plist +all: all-am + +.SUFFIXES: +.SUFFIXES: .c .lo .o .obj +$(srcdir)/Makefile.in: @MAINTAINER_MODE_TRUE@ $(srcdir)/Makefile.am $(am__configure_deps) + @for dep in $?; do \ + case '$(am__configure_deps)' in \ + *$$dep*) \ + ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \ + && { if test -f $@; then exit 0; else break; fi; }; \ + exit 1;; \ + esac; \ + done; \ + echo ' cd $(top_srcdir) && $(AUTOMAKE) --gnu librdfa/Makefile'; \ + $(am__cd) $(top_srcdir) && \ + $(AUTOMAKE) --gnu librdfa/Makefile +Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status + @case '$?' in \ + *config.status*) \ + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \ + *) \ + echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \ + cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \ + esac; + +$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh + +$(top_srcdir)/configure: @MAINTAINER_MODE_TRUE@ $(am__configure_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(ACLOCAL_M4): @MAINTAINER_MODE_TRUE@ $(am__aclocal_m4_deps) + cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh +$(am__aclocal_m4_deps): + +clean-noinstLTLIBRARIES: + -test -z "$(noinst_LTLIBRARIES)" || rm -f $(noinst_LTLIBRARIES) + @list='$(noinst_LTLIBRARIES)'; \ + locs=`for p in $$list; do echo $$p; done | \ + sed 's|^[^/]*$$|.|; s|/[^/]*$$||; s|$$|/so_locations|' | \ + sort -u`; \ + test -z "$$locs" || { \ + echo rm -f $${locs}; \ + rm -f $${locs}; \ + } + +librdfa.la: $(librdfa_la_OBJECTS) $(librdfa_la_DEPENDENCIES) $(EXTRA_librdfa_la_DEPENDENCIES) + $(AM_V_CCLD)$(LINK) $(am_librdfa_la_rpath) $(librdfa_la_OBJECTS) $(librdfa_la_LIBADD) $(LIBS) + +mostlyclean-compile: + -rm -f *.$(OBJEXT) + +distclean-compile: + -rm -f *.tab.c + +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/context.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/curie.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/iri.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/language.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/lists.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/namespace.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdfa.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/rdfa_utils.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/strtok_r.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/subject.Plo@am__quote@ # am--include-marker +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/triple.Plo@am__quote@ # am--include-marker + +$(am__depfiles_remade): + @$(MKDIR_P) $(@D) + @echo '# dummy' >$@-t && $(am__mv) $@-t $@ + +am--depfiles: $(am__depfiles_remade) + +.c.o: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ $< + +.c.obj: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(COMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ `$(CYGPATH_W) '$<'` +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=no @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(COMPILE) -c -o $@ `$(CYGPATH_W) '$<'` + +.c.lo: +@am__fastdepCC_TRUE@ $(AM_V_CC)$(LTCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< +@am__fastdepCC_TRUE@ $(AM_V_at)$(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ $(AM_V_CC)source='$<' object='$@' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(AM_V_CC@am__nodep@)$(LTCOMPILE) -c -o $@ $< + +mostlyclean-libtool: + -rm -f *.lo + +clean-libtool: + -rm -rf .libs _libs + +ID: $(am__tagged_files) + $(am__define_uniq_tagged_files); mkid -fID $$unique +tags: tags-am +TAGS: tags + +tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + set x; \ + here=`pwd`; \ + $(am__define_uniq_tagged_files); \ + shift; \ + if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \ + test -n "$$unique" || unique=$$empty_fix; \ + if test $$# -gt 0; then \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + "$$@" $$unique; \ + else \ + $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \ + $$unique; \ + fi; \ + fi +ctags: ctags-am + +CTAGS: ctags +ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files) + $(am__define_uniq_tagged_files); \ + test -z "$(CTAGS_ARGS)$$unique" \ + || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \ + $$unique + +GTAGS: + here=`$(am__cd) $(top_builddir) && pwd` \ + && $(am__cd) $(top_srcdir) \ + && gtags -i $(GTAGS_ARGS) "$$here" +cscopelist: cscopelist-am + +cscopelist-am: $(am__tagged_files) + list='$(am__tagged_files)'; \ + case "$(srcdir)" in \ + [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \ + *) sdir=$(subdir)/$(srcdir) ;; \ + esac; \ + for i in $$list; do \ + if test -f "$$i"; then \ + echo "$(subdir)/$$i"; \ + else \ + echo "$$sdir/$$i"; \ + fi; \ + done >> $(top_builddir)/cscope.files + +distclean-tags: + -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags +distdir: $(BUILT_SOURCES) + $(MAKE) $(AM_MAKEFLAGS) distdir-am + +distdir-am: $(DISTFILES) + @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \ + list='$(DISTFILES)'; \ + dist_files=`for file in $$list; do echo $$file; done | \ + sed -e "s|^$$srcdirstrip/||;t" \ + -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \ + case $$dist_files in \ + */*) $(MKDIR_P) `echo "$$dist_files" | \ + sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \ + sort -u` ;; \ + esac; \ + for file in $$dist_files; do \ + if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \ + if test -d $$d/$$file; then \ + dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \ + if test -d "$(distdir)/$$file"; then \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \ + cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \ + find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \ + fi; \ + cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \ + else \ + test -f "$(distdir)/$$file" \ + || cp -p $$d/$$file "$(distdir)/$$file" \ + || exit 1; \ + fi; \ + done +check-am: all-am +check: check-am +all-am: Makefile $(LTLIBRARIES) +installdirs: +install: install-am +install-exec: install-exec-am +install-data: install-data-am +uninstall: uninstall-am + +install-am: all-am + @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am + +installcheck: installcheck-am +install-strip: + if test -z '$(STRIP)'; then \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + install; \ + else \ + $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \ + install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \ + "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \ + fi +mostlyclean-generic: + +clean-generic: + -test -z "$(CLEANFILES)" || rm -f $(CLEANFILES) + +distclean-generic: + -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES) + -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES) + +maintainer-clean-generic: + @echo "This command is intended for maintainers to use" + @echo "it deletes files that may require special tools to rebuild." +clean: clean-am + +clean-am: clean-generic clean-libtool clean-noinstLTLIBRARIES \ + mostlyclean-am + +distclean: distclean-am + -rm -f ./$(DEPDIR)/context.Plo + -rm -f ./$(DEPDIR)/curie.Plo + -rm -f ./$(DEPDIR)/iri.Plo + -rm -f ./$(DEPDIR)/language.Plo + -rm -f ./$(DEPDIR)/lists.Plo + -rm -f ./$(DEPDIR)/namespace.Plo + -rm -f ./$(DEPDIR)/rdfa.Plo + -rm -f ./$(DEPDIR)/rdfa_utils.Plo + -rm -f ./$(DEPDIR)/strtok_r.Plo + -rm -f ./$(DEPDIR)/subject.Plo + -rm -f ./$(DEPDIR)/triple.Plo + -rm -f Makefile +distclean-am: clean-am distclean-compile distclean-generic \ + distclean-tags + +dvi: dvi-am + +dvi-am: + +html: html-am + +html-am: + +info: info-am + +info-am: + +install-data-am: + +install-dvi: install-dvi-am + +install-dvi-am: + +install-exec-am: + +install-html: install-html-am + +install-html-am: + +install-info: install-info-am + +install-info-am: + +install-man: + +install-pdf: install-pdf-am + +install-pdf-am: + +install-ps: install-ps-am + +install-ps-am: + +installcheck-am: + +maintainer-clean: maintainer-clean-am + -rm -f ./$(DEPDIR)/context.Plo + -rm -f ./$(DEPDIR)/curie.Plo + -rm -f ./$(DEPDIR)/iri.Plo + -rm -f ./$(DEPDIR)/language.Plo + -rm -f ./$(DEPDIR)/lists.Plo + -rm -f ./$(DEPDIR)/namespace.Plo + -rm -f ./$(DEPDIR)/rdfa.Plo + -rm -f ./$(DEPDIR)/rdfa_utils.Plo + -rm -f ./$(DEPDIR)/strtok_r.Plo + -rm -f ./$(DEPDIR)/subject.Plo + -rm -f ./$(DEPDIR)/triple.Plo + -rm -f Makefile +maintainer-clean-am: distclean-am maintainer-clean-generic + +mostlyclean: mostlyclean-am + +mostlyclean-am: mostlyclean-compile mostlyclean-generic \ + mostlyclean-libtool + +pdf: pdf-am + +pdf-am: + +ps: ps-am + +ps-am: + +uninstall-am: + +.MAKE: install-am install-strip + +.PHONY: CTAGS GTAGS TAGS all all-am am--depfiles check check-am clean \ + clean-generic clean-libtool clean-noinstLTLIBRARIES \ + cscopelist-am ctags ctags-am distclean distclean-compile \ + distclean-generic distclean-libtool distclean-tags distdir dvi \ + dvi-am html html-am info info-am install install-am \ + install-data install-data-am install-dvi install-dvi-am \ + install-exec install-exec-am install-html install-html-am \ + install-info install-info-am install-man install-pdf \ + install-pdf-am install-ps install-ps-am install-strip \ + installcheck installcheck-am installdirs maintainer-clean \ + maintainer-clean-generic mostlyclean mostlyclean-compile \ + mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \ + tags tags-am uninstall uninstall-am + +.PRECIOUS: Makefile + + +# Run Clang static analyzer over sources. +@MAINTAINER_MODE_TRUE@analyze: $(SOURCES) +@MAINTAINER_MODE_TRUE@ @list='$(SOURCES)'; \ +@MAINTAINER_MODE_TRUE@ result=0; \ +@MAINTAINER_MODE_TRUE@ for file in $$list; do \ +@MAINTAINER_MODE_TRUE@ if echo $$file | grep '\.c$$' >/dev/null 2>&1; then \ +@MAINTAINER_MODE_TRUE@ $(RECHO) "Analyzing $$file"; \ +@MAINTAINER_MODE_TRUE@ $(ANALYZE_COMMAND) $(srcdir)/$$file; \ +@MAINTAINER_MODE_TRUE@ status=$$?; \ +@MAINTAINER_MODE_TRUE@ if test $$status != 0; then \ +@MAINTAINER_MODE_TRUE@ result=1; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ fi; \ +@MAINTAINER_MODE_TRUE@ done; \ +@MAINTAINER_MODE_TRUE@ set -e; exit $$result + +# Tell versions [3.59,3.63) of GNU make to not export all variables. +# Otherwise a system limit (for SysV at least) may be exceeded. +.NOEXPORT: diff --git a/librdfa/config.h b/librdfa/config.h new file mode 100644 index 0000000..6eac131 --- /dev/null +++ b/librdfa/config.h @@ -0,0 +1,2 @@ +/* Fake librdfa config.h - reads configuration from Raptor's config header */ +#include <raptor_config.h> diff --git a/librdfa/context.c b/librdfa/context.c new file mode 100644 index 0000000..fca5b07 --- /dev/null +++ b/librdfa/context.c @@ -0,0 +1,591 @@ +/** + * Copyright 2008-2012 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The librdfa library is the Fastest RDFa Parser in the Universe. It is + * a stream parser, meaning that it takes an XML data as input and spits + * out RDF triples as it comes across them in the stream. Due to this + * processing approach, librdfa has a very, very small memory footprint. + * It is also very fast and can operate on hundreds of gigabytes of XML + * data without breaking a sweat. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +rdfacontext* rdfa_create_context(const char* base) +{ + rdfacontext* rval = NULL; + size_t base_length = strlen(base); + + /* if the base isn't specified, don't create a context */ + if(base_length > 0) + { + char* cleaned_base; + + /* malloc and init whole context to NULL */ + rval = (rdfacontext*)malloc(sizeof(rdfacontext)); + if(!rval) + return NULL; + + memset(rval, 0, sizeof(rdfacontext)); + + /* clean and initialize base */ + cleaned_base = rdfa_iri_get_base(base); + rval->base = rdfa_replace_string(rval->base, cleaned_base); + free(cleaned_base); + } + else + { +#ifdef LIBRDFA_IN_RAPTOR +#else + printf("librdfa error: Failed to create a parsing context, " + "base IRI was not specified!\n"); +#endif + } + + return rval; +} + +void rdfa_init_context(rdfacontext* context) +{ + /* assume the RDFa processing rules are RDFa 1.1 unless otherwise specified */ + context->rdfa_version = RDFA_VERSION_1_1; + + /* assume the default host language is XML1 */ + context->host_language = HOST_LANGUAGE_XML1; + + /* the [parent subject] is set to the [base] value; */ + context->parent_subject = NULL; + if(context->base != NULL) + { + char* cleaned_base = rdfa_iri_get_base(context->base); + context->parent_subject = + rdfa_replace_string(context->parent_subject, cleaned_base); + free(cleaned_base); + } + + /* the [parent object] is set to null; */ + context->parent_object = NULL; + +#ifdef LIBRDFA_IN_RAPTOR +#else + /* the [list of URI mappings] is cleared; */ + context->uri_mappings = rdfa_create_mapping(MAX_URI_MAPPINGS); +#endif + + /* the [list of incomplete triples] is cleared; */ + context->incomplete_triples = rdfa_create_list(3); + + /* the [language] is set to null. */ + context->language = NULL; + + /* set the [current object resource] to null; */ + context->current_object_resource = NULL; + + /* the list of term mappings is set to null + * (or a list defined in the initial context of the Host Language). */ + context->term_mappings = rdfa_create_mapping(MAX_TERM_MAPPINGS); + + /* the maximum number of list mappings */ + context->list_mappings = rdfa_create_mapping(MAX_LIST_MAPPINGS); + + /* the maximum number of local list mappings */ + context->local_list_mappings = + rdfa_create_mapping(MAX_LOCAL_LIST_MAPPINGS); + + /* the default vocabulary is set to null + * (or a IRI defined in the initial context of the Host Language). */ + context->default_vocabulary = NULL; + + /* whether or not the @inlist attribute is present on the current element */ + context->inlist_present = 0; + + /* whether or not the @rel attribute is present on the current element */ + context->rel_present = 0; + + /* whether or not the @rev attribute is present on the current element */ + context->rev_present = 0; + + /* 1. First, the local values are initialized, as follows: + * + * * the [recurse] flag is set to 'true'; */ + context->recurse = 1; + + /* * the [skip element] flag is set to 'false'; */ + context->skip_element = 0; + + /* * [new subject] is set to null; */ + context->new_subject = NULL; + + /* * [current object resource] is set to null; */ + context->current_object_resource = NULL; + + /* * the [local list of URI mappings] is set to the list of URI + * mappings from the [evaluation context]; + * NOTE: This step is done in rdfa_create_new_element_context() */ + + /* FIXME: Initialize the term mappings and URI mappings based on Host Language */ + + /* * the [local list of incomplete triples] is set to null; */ + context->local_incomplete_triples = rdfa_create_list(3); + + /* * the [current language] value is set to the [language] value + * from the [evaluation context]. + * NOTE: This step is done in rdfa_create_new_element_context() */ +} + +#ifdef LIBRDFA_IN_RAPTOR +#define DECLARE_URI_MAPPING(context, prefix, value) \ +do { \ + raptor_namespace_stack* nstack = &context->sax2->namespaces; \ + raptor_namespace* ns = raptor_new_namespace(nstack, \ + (const unsigned char *)prefix, (const unsigned char*)value, 0); \ + raptor_namespaces_start_namespace(nstack, ns); \ + } while(0) +#else +#define DECLARE_URI_MAPPING(context, prefix, value) \ + rdfa_update_mapping(context->uri_mappings, prefix, value, \ + (update_mapping_value_fp)rdfa_replace_string) +#endif + +void rdfa_setup_initial_context(rdfacontext* context) +{ +#ifdef LIBRDFA_IN_RAPTOR +#else + char* key = NULL; + void* value = NULL; + void** mptr = context->uri_mappings; +#endif + + /* Setup the base RDFa 1.1 prefix and term mappings */ + if(context->rdfa_version == RDFA_VERSION_1_1) + { + /* Setup the base RDFa 1.1 prefix mappings */ + DECLARE_URI_MAPPING(context, + "grddl", "http://www.w3.org/2003/g/data-view#"); + DECLARE_URI_MAPPING(context, + "ma", "http://www.w3.org/ns/ma-ont#"); + DECLARE_URI_MAPPING(context, + "owl", "http://www.w3.org/2002/07/owl#"); + DECLARE_URI_MAPPING(context, + "rdf", "http://www.w3.org/1999/02/22-rdf-syntax-ns#"); + DECLARE_URI_MAPPING(context, + "rdfa", "http://www.w3.org/ns/rdfa#"); + DECLARE_URI_MAPPING(context, + "rdfs", "http://www.w3.org/2000/01/rdf-schema#"); + DECLARE_URI_MAPPING(context, + "rif", "http://www.w3.org/2007/rif#"); + DECLARE_URI_MAPPING(context, + "skos", "http://www.w3.org/2004/02/skos/core#"); + DECLARE_URI_MAPPING(context, + "skosxl", "http://www.w3.org/2008/05/skos-xl#"); + DECLARE_URI_MAPPING(context, + "wdr", "http://www.w3.org/2007/05/powder#"); + DECLARE_URI_MAPPING(context, + "void", "http://rdfs.org/ns/void#"); + DECLARE_URI_MAPPING(context, + "wdrs", "http://www.w3.org/2007/05/powder-s#"); + DECLARE_URI_MAPPING(context, + "xhv", "http://www.w3.org/1999/xhtml/vocab#"); + DECLARE_URI_MAPPING(context, + "xml", "http://www.w3.org/XML/1998/namespace"); + DECLARE_URI_MAPPING(context, + "xsd", "http://www.w3.org/2001/XMLSchema#"); + DECLARE_URI_MAPPING(context, + "cc", "http://creativecommons.org/ns#"); + DECLARE_URI_MAPPING(context, + "ctag", "http://commontag.org/ns#"); + DECLARE_URI_MAPPING(context, + "dc", "http://purl.org/dc/terms/"); + DECLARE_URI_MAPPING(context, + "dcterms", "http://purl.org/dc/terms/"); + DECLARE_URI_MAPPING(context, + "foaf", "http://xmlns.com/foaf/0.1/"); + DECLARE_URI_MAPPING(context, + "gr", "http://purl.org/goodrelations/v1#"); + DECLARE_URI_MAPPING(context, + "ical", "http://www.w3.org/2002/12/cal/icaltzd#"); + DECLARE_URI_MAPPING(context, + "og", "http://ogp.me/ns#"); + DECLARE_URI_MAPPING(context, + "rev", "http://purl.org/stuff/rev#"); + DECLARE_URI_MAPPING(context, + "sioc", "http://rdfs.org/sioc/ns#"); + DECLARE_URI_MAPPING(context, + "v", "http://rdf.data-vocabulary.org/#"); + DECLARE_URI_MAPPING(context, + "vcard", "http://www.w3.org/2006/vcard/ns#"); + DECLARE_URI_MAPPING(context, + "schema", "http://schema.org/"); + + /* Setup the base RDFa 1.1 term mappings */ + rdfa_update_mapping(context->term_mappings, + "describedby", "http://www.w3.org/2007/05/powder-s#describedby", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "license", "http://www.w3.org/1999/xhtml/vocab#license", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "role", "http://www.w3.org/1999/xhtml/vocab#role", + (update_mapping_value_fp)rdfa_replace_string); + } + + /* Setup the term mappings for XHTML1 */ + if(context->host_language == HOST_LANGUAGE_XHTML1) + { + rdfa_update_mapping(context->term_mappings, + "alternate", "http://www.w3.org/1999/xhtml/vocab#alternate", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "appendix", "http://www.w3.org/1999/xhtml/vocab#appendix", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "cite", "http://www.w3.org/1999/xhtml/vocab#cite", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "bookmark", "http://www.w3.org/1999/xhtml/vocab#bookmark", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "contents", "http://www.w3.org/1999/xhtml/vocab#contents", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "chapter", "http://www.w3.org/1999/xhtml/vocab#chapter", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "copyright", "http://www.w3.org/1999/xhtml/vocab#copyright", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "first", "http://www.w3.org/1999/xhtml/vocab#first", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "glossary", "http://www.w3.org/1999/xhtml/vocab#glossary", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "help", "http://www.w3.org/1999/xhtml/vocab#help", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "icon", "http://www.w3.org/1999/xhtml/vocab#icon", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "index", "http://www.w3.org/1999/xhtml/vocab#index", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "last", "http://www.w3.org/1999/xhtml/vocab#last", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "license", "http://www.w3.org/1999/xhtml/vocab#license", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "meta", "http://www.w3.org/1999/xhtml/vocab#meta", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "next", "http://www.w3.org/1999/xhtml/vocab#next", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "prev", "http://www.w3.org/1999/xhtml/vocab#prev", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "previous", "http://www.w3.org/1999/xhtml/vocab#previous", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "section", "http://www.w3.org/1999/xhtml/vocab#section", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "start", "http://www.w3.org/1999/xhtml/vocab#start", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "stylesheet", "http://www.w3.org/1999/xhtml/vocab#stylesheet", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "subsection", "http://www.w3.org/1999/xhtml/vocab#subsection", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "top", "http://www.w3.org/1999/xhtml/vocab#top", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "up", "http://www.w3.org/1999/xhtml/vocab#up", + (update_mapping_value_fp)rdfa_replace_string); + rdfa_update_mapping(context->term_mappings, + "p3pv1", "http://www.w3.org/1999/xhtml/vocab#p3pv1", + (update_mapping_value_fp)rdfa_replace_string); + + /* From the role attribute module */ + rdfa_update_mapping(context->term_mappings, + "role", "http://www.w3.org/1999/xhtml/vocab#role", + (update_mapping_value_fp)rdfa_replace_string); + } + + /* Setup the prefix and term mappings for HTML4 and HTML5 */ + if(context->host_language == HOST_LANGUAGE_HTML) + { + /* No term or prefix mappings as of 2012-04-04 */ + } + +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this elsewhere */ +#else + /* Generate namespace triples for all values in the uri_mapping */ + while(*mptr != NULL) + { + rdfa_next_mapping(mptr++, &key, &value); + mptr++; + rdfa_generate_namespace_triple(context, key, value); + } +#endif +} + +/** + * Creates a new context for the current element by cloning certain + * parts of the old context on the top of the given stack. + * + * @param context_stack the context stack that is associated with this + * processing run. + */ +rdfacontext* rdfa_create_new_element_context(rdfalist* context_stack) +{ + rdfacontext* parent_context = (rdfacontext*) + context_stack->items[context_stack->num_items - 1]->data; + rdfacontext* rval = rdfa_create_context(parent_context->base); + + if(!rval) + return NULL; + + /* * Otherwise, the values are: */ + + /* * the [ base ] is set to the [ base ] value of the current + * [ evaluation context ]; */ + rval->base = rdfa_replace_string(rval->base, parent_context->base); + rdfa_init_context(rval); + + /* Set the processing depth as parent + 1 */ + rval->depth = parent_context->depth + 1; + + /* copy the URI mappings */ +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this automatically for URIs */ +#else + rdfa_free_mapping(rval->uri_mappings, (free_mapping_value_fp)free); +#endif + rdfa_free_mapping(rval->term_mappings, (free_mapping_value_fp)free); + rdfa_free_mapping(rval->list_mappings, (free_mapping_value_fp)rdfa_free_list); + rdfa_free_mapping(rval->local_list_mappings, (free_mapping_value_fp)rdfa_free_list); +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor does this automatically for URIs */ +#else + rval->uri_mappings = + rdfa_copy_mapping((void**)parent_context->uri_mappings, + (copy_mapping_value_fp)rdfa_replace_string); +#endif + rval->term_mappings = + rdfa_copy_mapping((void**)parent_context->term_mappings, + (copy_mapping_value_fp)rdfa_replace_string); + rval->list_mappings = + rdfa_copy_mapping((void**)parent_context->local_list_mappings, + (copy_mapping_value_fp)rdfa_replace_list); + rval->local_list_mappings = + rdfa_copy_mapping((void**)parent_context->local_list_mappings, + (copy_mapping_value_fp)rdfa_replace_list); + + /* inherit the parent context's host language and RDFa processor mode */ + rval->host_language = parent_context->host_language; + rval->rdfa_version = parent_context->rdfa_version; + + /* inherit the parent context's language */ + if(parent_context->language != NULL) + { + rval->language = + rdfa_replace_string(rval->language, parent_context->language); + } + + /* inherit the parent context's default vocabulary */ + if(parent_context->default_vocabulary != NULL) + { + rval->default_vocabulary = rdfa_replace_string( + rval->default_vocabulary, parent_context->default_vocabulary); + } + + /* set the callbacks callback */ + rval->default_graph_triple_callback = + parent_context->default_graph_triple_callback; + rval->processor_graph_triple_callback = + parent_context->processor_graph_triple_callback; + rval->buffer_filler_callback = parent_context->buffer_filler_callback; + + /* inherit the bnode count, _: bnode name, recurse flag, and state + * of the xml_literal_namespace_insertion */ + rval->bnode_count = parent_context->bnode_count; + rval->underscore_colon_bnode_name = + rdfa_replace_string(rval->underscore_colon_bnode_name, + parent_context->underscore_colon_bnode_name); + rval->recurse = parent_context->recurse; + rval->skip_element = 0; + rval->callback_data = parent_context->callback_data; + rval->xml_literal_namespaces_defined = + parent_context->xml_literal_namespaces_defined; + rval->xml_literal_xml_lang_defined = + parent_context->xml_literal_xml_lang_defined; + +#if 0 + /* inherit the parent context's new_subject + * TODO: This is not anywhere in the syntax processing document */ + if(parent_context->new_subject != NULL) + { + rval->new_subject = rdfa_replace_string( + rval->new_subject, parent_context->new_subject); + } +#endif + + if(parent_context->skip_element == 0) + { + /* o the [ parent subject ] is set to the value of [ new subject ], + * if non-null, or the value of the [ parent subject ] of the + * current [ evaluation context ]; */ + if(parent_context->new_subject != NULL) + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->new_subject); + } + else + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->parent_subject); + } + + /* o the [ parent object ] is set to value of [ current object + * resource ], if non-null, or the value of [ new subject ], if + * non-null, or the value of the [ parent subject ] of the + * current [ evaluation context ]; */ + if(parent_context->current_object_resource != NULL) + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->current_object_resource); + } + else if(parent_context->new_subject != NULL) + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->new_subject); + } + else + { + rval->parent_object = + rdfa_replace_string( + rval->parent_object, parent_context->parent_subject); + } + + /* o the [ list of incomplete triples ] is set to the [ local list + * of incomplete triples ]; */ + rval->incomplete_triples = rdfa_replace_list( + rval->incomplete_triples, parent_context->local_incomplete_triples); + } + else + { + rval->parent_subject = rdfa_replace_string( + rval->parent_subject, parent_context->parent_subject); + rval->parent_object = rdfa_replace_string( + rval->parent_object, parent_context->parent_object); + + /* copy the incomplete triples */ + rval->incomplete_triples = rdfa_replace_list( + rval->incomplete_triples, parent_context->incomplete_triples); + + /* copy the local list of incomplete triples */ + rval->local_incomplete_triples = rdfa_replace_list( + rval->local_incomplete_triples, + parent_context->local_incomplete_triples); + } + +#ifdef LIBRDFA_IN_RAPTOR + rval->base_uri = parent_context->base_uri; + rval->sax2 = parent_context->sax2; + rval->namespace_handler = parent_context->namespace_handler; + rval->namespace_handler_user_data = parent_context->namespace_handler_user_data; +#endif + + return rval; +} + +void rdfa_free_context_stack(rdfacontext* context) +{ + /* this field is not NULL only on the rdfacontext* at the top of the stack */ + if(context->context_stack != NULL) + { + void* rval; + /* free the stack ensuring that we do not delete this context if + * it is in the list (which it may be, if parsing ended on error) */ + do + { + rval = rdfa_pop_item(context->context_stack); + if(rval && rval != context) + { + rdfa_free_context((rdfacontext*)rval); + } + } + while(rval); + free(context->context_stack->items); + free(context->context_stack); + context->context_stack = NULL; + } +} + +void rdfa_free_context(rdfacontext* context) +{ + free(context->base); + free(context->default_vocabulary); + free(context->parent_subject); + free(context->parent_object); + +#ifdef LIBRDFA_IN_RAPTOR +#else + rdfa_free_mapping(context->uri_mappings, (free_mapping_value_fp)free); +#endif + + rdfa_free_mapping(context->term_mappings, (free_mapping_value_fp)free); + rdfa_free_list(context->incomplete_triples); + rdfa_free_mapping(context->list_mappings, + (free_mapping_value_fp)rdfa_free_list); + rdfa_free_mapping(context->local_list_mappings, + (free_mapping_value_fp)rdfa_free_list); + free(context->language); + free(context->underscore_colon_bnode_name); + free(context->new_subject); + free(context->current_object_resource); + free(context->about); + free(context->typed_resource); + free(context->resource); + free(context->href); + free(context->src); + free(context->content); + free(context->datatype); + rdfa_free_list(context->property); + free(context->plain_literal); + free(context->xml_literal); + + /* TODO: These should be moved into their own data structure */ + rdfa_free_list(context->local_incomplete_triples); + + rdfa_free_context_stack(context); + free(context->working_buffer); + free(context); +} diff --git a/librdfa/curie.c b/librdfa/curie.c new file mode 100644 index 0000000..98ee7e0 --- /dev/null +++ b/librdfa/curie.c @@ -0,0 +1,691 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The CURIE module is used to resolve all forms of CURIEs that + * XHTML+RDFa accepts. + * + * @author Manu Sporny + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif +#include <stdio.h> +#include <ctype.h> +#include "rdfa_utils.h" +#include "rdfa.h" +#include "strtok_r.h" + +/* The base XHTML vocab URL is used to resolve URIs that are reserved + * words. Any reserved listed above is appended to the URL below to + * form a complete IRI. */ +#define XHTML_VOCAB_URI "http://www.w3.org/1999/xhtml/vocab#" +#define XHTML_VOCAB_URI_SIZE 35 + +/** + * Gets the type of CURIE that is passed to it. + * + * @param uri the uri to check. + * + * @return either CURIE_TYPE_SAFE, CURIE_TYPE_URI or CURIE_TYPE_INVALID. + */ +static curie_t rdfa_get_curie_type(const char* uri) +{ + curie_t rval = CURIE_TYPE_INVALID; + + if(uri != NULL) + { + size_t uri_length = strlen(uri); + + if((uri[0] == '[') && (uri[uri_length - 1] == ']')) + { + /* a safe curie starts with [ and ends with ] */ + rval = CURIE_TYPE_SAFE; + } + else if(strstr(uri, ":") != NULL) + { + /* at this point, it is unknown whether or not the CURIE is + * an IRI or an unsafe CURIE */ + rval = CURIE_TYPE_IRI_OR_UNSAFE; + } + else + { + /* if none of the above match, then the CURIE is probably a + * relative IRI */ + rval = CURIE_TYPE_IRI_OR_UNSAFE; + } + } + + return rval; +} + +char* rdfa_resolve_uri(rdfacontext* context, const char* uri) +{ + char* rval = NULL; + char* path_start = NULL; + size_t base_length = strlen(context->base); + + if(strlen(uri) < 1) + { + /* if a blank URI is given, use the base context */ + rval = rdfa_replace_string(rval, context->base); + } + else if(strstr(uri, ":") != NULL) + { + /* if a IRI is given, don't concatenate */ + rval = rdfa_replace_string(rval, uri); + } + else if(uri[0] == '#' || uri[0] == '?') + { + /* if a fragment ID or start of a query parameter is given, + * concatenate it with the base URI */ + rval = rdfa_join_string(context->base, uri); + } + else if(uri[0] == '/') + { + /* if a relative URI is given, but it starts with a '/', use the + * host part concatenated to the given URI */ + char* tmp = NULL; + char* end_index = NULL; + + /* initialize the working-set data */ + tmp = rdfa_replace_string(tmp, context->base); + end_index = strchr(tmp, '/'); + + + /* find the final '/' character after the host part of the context base. */ + if(end_index != NULL) + { + end_index = strchr(end_index + 1, '/'); + + if(end_index != NULL) + { + end_index = strchr(end_index + 1, '/'); + } + } + + /* if the '/' character after the host part was found, copy the host + * part and append the given URI to the URI, otherwise, append the + * host part and the URI part as-is, ensuring that a '/' exists at the + * end of the host part. */ + if(end_index != NULL) + { + char* rval_copy; + + *end_index = '\0'; + + /* if the '/' character after the host part was found, copy the host + * part and append the given URI to the URI. */ + rval_copy = rdfa_replace_string(rval, tmp); + rval = rdfa_join_string(rval_copy, uri); + free(rval_copy); + } + else + { + /* append the host part and the URI part as-is, ensuring that a + * '/' exists at the end of the host part. */ + size_t tlen = strlen(tmp) - 1; + char* rval_copy; + + rval_copy = rdfa_replace_string(rval, tmp); + + if(rval_copy[tlen] == '/') + { + rval_copy[tlen] = '\0'; + } + rval = rdfa_join_string(rval_copy, uri); + free(rval_copy); + } + + free(tmp); + } + else + { + if((char)context->base[base_length - 1] == '/') + { + /* if the base URI already ends in /, concatenate */ + rval = rdfa_join_string(context->base, uri); + } + else + { + /* if we have a relative URI, chop off the name of the file + * and replace it with the relative pathname */ + char* end_index = strrchr(context->base, '/'); + + if(end_index != NULL) + { + char* tmpstr = NULL; + char* end_index2; + + tmpstr = rdfa_replace_string(tmpstr, context->base); + end_index2 = strrchr(tmpstr, '/'); + if(end_index2 != NULL) { + end_index2++; + *end_index2 = '\0'; + } + + rval = rdfa_join_string(tmpstr, uri); + free(tmpstr); + } + } + } + + /* It is possible that rval may be NULL here in OOM scenarios */ + if(!rval) + return NULL; + + /* Find the start of a scheme-based URL path */ + path_start = (char*)strstr(rval, "://"); + if(path_start != NULL) + { + if(strstr(path_start, "/.") != NULL) + { + path_start += 3; + path_start = strstr(path_start, "/"); + } + else + { + path_start = NULL; + } + } + + /* remove any dot-segments that remain in the URL for URLs w/ schemes */ + if(path_start != NULL) + { + size_t rlen = strlen(rval) + 1; + size_t hlen = path_start - rval; + char* src = (char*)malloc(rlen + 4); + char* sptr = src + hlen; + char* dest = (char*)malloc(rlen + 1); + char* dptr = dest + hlen; + char* dfence = dptr; + + memset(src, 0, rlen + 4); + memcpy(src, rval, rlen); + strncpy(dest, rval, hlen); + + /* Process the path portion of the IRI */ + while(sptr[0] != '?' && sptr[0] != '\0') + { + if(sptr[0] == '.' && sptr[1] == '.' && sptr[2] == '/') + { + /* A. If the input buffer begins with a prefix of "../", + * then remove that prefix from the input buffer; otherwise, + */ + sptr += 3; + } + else if(sptr[0] == '.' && sptr[1] == '/') + { + /* A. If the input buffer begins with a prefix of "./", + * then remove that prefix from the input buffer; otherwise, + */ + sptr += 2; + } + else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '/') + { + /* B. if the input buffer begins with a prefix of "/./", + * then replace that prefix with "/" in the input buffer; + * otherwise, + */ + sptr += 2; + } + else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '\0') + { + /* B. if the input buffer begins with a prefix of "/.", + * where "." is a complete path segment, then replace that + * prefix with "/" in the input buffer; otherwise, + */ + sptr += 1; + *sptr = '/'; + } + else if(sptr[0] == '/' && sptr[1] == '.' && sptr[2] == '.' && + ((sptr[3] == '/') || (sptr[3] == '\0'))) + { + /* C. if the input buffer begins with a prefix of "/../", + * then replace that prefix with "/" in the input buffer and + * remove the last segment and its preceding "/" (if any) from + * the output buffer; otherwise, + */ + if(sptr[3] == '/') + { + sptr += 3; + } + else if(sptr[3] == '\0') + { + sptr += 2; + *sptr = '/'; + } + + /* remove the last segment and the preceding '/' */ + if(dptr > dfence) + { + dptr--; + if(dptr[0] == '/') + { + dptr--; + } + } + while(dptr >= dfence && dptr[0] != '/') + { + dptr--; + } + if(dptr >= dfence) + { + dptr[0] = '\0'; + } + else + { + dptr = dfence; + dptr[0] = '\0'; + } + } + else if(sptr[0] == '.' && sptr[1] == '\0') + { + /* D. if the input buffer consists only of ".", then remove + * that from the input buffer; otherwise, + */ + sptr++; + + } + else if(sptr[0] == '.' && sptr[1] == '.' && sptr[2] == '\0') + { + /* D. if the input buffer consists only of "..", then remove + * that from the input buffer; otherwise, + */ + sptr += 2; + } + else + { + /* Copy the path segment */ + do + { + *dptr++ = *sptr++; + *dptr = '\0'; + } while(sptr[0] != '/' && sptr[0] != '?' && sptr[0] != '\0'); + } + } + + /* Copy the remaining query parameters */ + if(sptr[0] == '?') + { + size_t rest_len = strlen(sptr); + memcpy(dptr, sptr, rest_len + 1); + } + else + { + dptr[0] = '\0'; + } + + free(rval); + free(src); + rval = dest; + } + + return rval; +} + +char* rdfa_resolve_curie( + rdfacontext* context, const char* uri, curieparse_t mode) +{ + char* rval = NULL; + curie_t ctype = rdfa_get_curie_type(uri); + + if(!uri) + return NULL; + + if(ctype == CURIE_TYPE_INVALID) + { + rval = NULL; + } + else if((ctype == CURIE_TYPE_IRI_OR_UNSAFE) && + ((mode == CURIE_PARSE_HREF_SRC) || + (context->rdfa_version == RDFA_VERSION_1_0 && + mode == CURIE_PARSE_ABOUT_RESOURCE))) + { + /* If we are parsing something that can take either a CURIE or a + * URI, and the type is either IRI or UNSAFE, assume that it is + * an IRI */ + rval = rdfa_resolve_uri(context, uri); + } + + /* + * Check to see if the value is a term. + */ + if(ctype == CURIE_TYPE_IRI_OR_UNSAFE && mode == CURIE_PARSE_PROPERTY) + { + const char* term_iri; + term_iri = (const char*)rdfa_get_mapping(context->term_mappings, uri); + if(term_iri != NULL) + { + rval = rdfa_strdup(term_iri); + } + else if(context->default_vocabulary == NULL && strstr(uri, ":") == NULL) + { + /* Generate the processor warning if this is a missing term */ +#define FORMAT_1 "The use of the '%s' term was unrecognized by the RDFa processor because it is not a valid term for the current Host Language." + +#ifdef LIBRDFA_IN_RAPTOR + raptor_parser_warning((raptor_parser*)context->callback_data, + FORMAT_1, uri); +#else + char msg[1024]; + snprintf(msg, 1024, FORMAT_1, uri); + + rdfa_processor_triples(context, RDFA_PROCESSOR_WARNING, msg); +#endif + } + } + + /* if we are processing a safe CURIE OR + * if we are parsing an unsafe CURIE that is an @type_of, + * @datatype, @property, @rel, or @rev attribute, treat the curie + * as not an IRI, but an unsafe CURIE */ + if(rval == NULL && ((ctype == CURIE_TYPE_SAFE) || + ((ctype == CURIE_TYPE_IRI_OR_UNSAFE) && + ((mode == CURIE_PARSE_INSTANCEOF_DATATYPE) || + (mode == CURIE_PARSE_PROPERTY) || + (mode == CURIE_PARSE_RELREV) || + (context->rdfa_version == RDFA_VERSION_1_1 && + mode == CURIE_PARSE_ABOUT_RESOURCE))))) + { + char* working_copy = NULL; + char* wcptr = NULL; + char* prefix = NULL; + char* curie_reference = NULL; + const char* expanded_prefix = NULL; + size_t uri_len = strlen(uri); + working_copy = (char*)malloc(uri_len + 1); + memcpy(working_copy, uri, uri_len + 1);/*rdfa_replace_string(working_copy, uri);*/ + + /* if this is a safe CURIE, chop off the beginning and the end */ + if(ctype == CURIE_TYPE_SAFE) + { + prefix = strtok_r(working_copy, "[:]", &wcptr); + if(wcptr) + curie_reference = strtok_r(NULL, "[]", &wcptr); + } + else if(ctype == CURIE_TYPE_IRI_OR_UNSAFE) + { + prefix = strtok_r(working_copy, ":", &wcptr); + if(wcptr) + curie_reference = strtok_r(NULL, "", &wcptr); + } + + /* fully resolve the prefix and get its length */ + + /* if a colon was found, but no prefix, use the XHTML vocabulary URI + * as the expanded prefix */ + if((uri[0] == ':') || (strcmp(uri, "[:]") == 0)) + { + expanded_prefix = XHTML_VOCAB_URI; + curie_reference = prefix; + prefix = NULL; + } + else if(uri[0] == ':') + { + /* FIXME: This looks like a bug - don't know why this code is + * in here. I think it's for the case where ":next" is + * specified, but the code's not checking that -- manu */ + expanded_prefix = context->base; + curie_reference = prefix; + prefix = NULL; + } + else if(prefix != NULL) + { + if((mode != CURIE_PARSE_PROPERTY) && + (mode != CURIE_PARSE_RELREV) && + strcmp(prefix, "_") == 0) + { + /* if the prefix specifies this as a blank node, then we + * use the blank node prefix */ + expanded_prefix = "_"; + } + else + { + /* if the prefix was defined, get it from the set of URI mappings. */ +#ifdef LIBRDFA_IN_RAPTOR + if(!strcmp(prefix, "xml")) + { + expanded_prefix = RAPTOR_GOOD_CAST(const char*, raptor_xml_namespace_uri); + } + else + { + raptor_namespace *nspace; + raptor_uri* ns_uri; + nspace = raptor_namespaces_find_namespace(&context->sax2->namespaces, + (const unsigned char*)prefix, + (int)strlen(prefix)); + if(nspace) { + ns_uri = raptor_namespace_get_uri(nspace); + if(ns_uri) + expanded_prefix = (const char*)raptor_uri_as_string(ns_uri); + } + } +#else + expanded_prefix = + rdfa_get_mapping(context->uri_mappings, prefix); + + /* Generate the processor warning if the prefix was not found */ + if(expanded_prefix == NULL && strstr(uri, ":") != NULL && + strstr(uri, "://") == NULL) + { +#define FORMAT_2 "The '%s' prefix was not found. You may want to check that it is declared before it is used, or that it is a valid prefix string." +#ifdef LIBRDFA_IN_RAPTOR + raptor_parser_warning((raptor_parser*)context->callback_data, + FORMAT_2, prefix); +#else + char msg[1024]; + snprintf(msg, 1024, FORMAT_2, prefix); + + rdfa_processor_triples(context, RDFA_PROCESSOR_WARNING, msg); +#endif + } +#endif + } + } + + if((expanded_prefix != NULL) && (curie_reference != NULL)) + { + /* if the expanded prefix and the reference exist, generate the + * full IRI. */ + if(strcmp(expanded_prefix, "_") == 0) + { + rval = rdfa_join_string("_:", curie_reference); + } + else + { + rval = rdfa_join_string(expanded_prefix, curie_reference); + } + } + else if((expanded_prefix != NULL) && (expanded_prefix[0] != '_') && + (curie_reference == NULL)) + { + /* if the expanded prefix exists, but the reference is null, + * generate the CURIE because a reference-less CURIE is still + * valid */ + rval = rdfa_join_string(expanded_prefix, ""); + } + + free(working_copy); + } + + if(rval == NULL) + { + /* if we're NULL at this point, the CURIE might be the special + * unnamed bnode specified by _: */ + if((strcmp(uri, "[_:]") == 0) || (strcmp(uri, "_:") == 0)) + { + if(context->underscore_colon_bnode_name == NULL) + { + context->underscore_colon_bnode_name = rdfa_create_bnode(context); + } + rval = rdfa_replace_string(rval, context->underscore_colon_bnode_name); + } + /* if we're NULL at this point and the IRI isn't [], then this might be + * an IRI */ + else if(context->rdfa_version == RDFA_VERSION_1_1 && + (strcmp(uri, "[]") != 0)) + { + if((context->default_vocabulary != NULL) && + ((mode == CURIE_PARSE_PROPERTY) || (mode == CURIE_PARSE_RELREV) || + (mode == CURIE_PARSE_INSTANCEOF_DATATYPE)) && + (strstr(uri, ":") == NULL)) + { + rval = rdfa_join_string(context->default_vocabulary, uri); + } + else if(((mode == CURIE_PARSE_PROPERTY) || + (mode == CURIE_PARSE_ABOUT_RESOURCE) || + (mode == CURIE_PARSE_INSTANCEOF_DATATYPE)) && + (strstr(uri, "_:") == NULL) && (strstr(uri, "[_:") == NULL)) + { + rval = rdfa_resolve_uri(context, uri); + } + } + } + + /* even though a reference-only CURIE is valid, it does not + * generate a triple in XHTML+RDFa. If we're NULL at this point, + * the given value wasn't valid in XHTML+RDFa. */ + + return rval; +} + +/** + * Resolves a given uri depending on whether or not it is a fully + * qualified IRI, a CURIE, or a short-form XHTML reserved word for + * @rel or @rev as defined in the XHTML+RDFa Syntax Document. + * + * @param context the current processing context. + * @param uri the URI part to process. + * + * @return the fully qualified IRI, or NULL if the conversion failed + * due to the given URI not being a short-form XHTML reserved + * word. The memory returned from this function MUST be freed. + */ +char* rdfa_resolve_relrev_curie(rdfacontext* context, const char* uri) +{ + char* rval = NULL; + const char* resource = uri; + + /* check to make sure the URI doesn't have an empty prefix */ + if(uri[0] == ':') + { + resource++; + } + + /* override reserved words if there is a default vocab defined + * NOTE: Don't have to check for RDFa 1.1 mode because vocab is only defined + * in RDFa 1.1 */ + if(context->default_vocabulary != NULL) + { + rval = rdfa_resolve_curie(context, uri, CURIE_PARSE_RELREV); + } + else if(context->host_language == HOST_LANGUAGE_XHTML1) + { + /* search all of the XHTML @rel/@rev reserved words for a + * case-insensitive match against the given URI */ + char* term = rdfa_strdup(resource); + char* ptr = NULL; + + for(ptr = term; *ptr; ptr++) + { + *ptr = RAPTOR_GOOD_CAST(char, tolower(*ptr)); + } + + rval = (char*)rdfa_get_mapping(context->term_mappings, term); + if(rval != NULL) + { + rval = rdfa_strdup(rval); + } + free(term); + } + else + { + /* Search the term mappings for a match */ + rval = (char*)rdfa_get_mapping(context->term_mappings, resource); + if(rval != NULL) + { + rval = rdfa_strdup(rval); + } + } + + /* if a search against the registered terms failed, + * attempt to resolve the value as a standard CURIE */ + if(rval == NULL) + { + rval = rdfa_resolve_curie(context, uri, CURIE_PARSE_RELREV); + } + + /* if a CURIE wasn't found, attempt to resolve the value as an IRI */ + if(rval == NULL && (context->rdfa_version == RDFA_VERSION_1_1)) + { + rval = rdfa_resolve_uri(context, uri); + } + + return rval; +} + +rdfalist* rdfa_resolve_curie_list( + rdfacontext* rdfa_context, const char* uris, curieparse_t mode) +{ + rdfalist* rval = rdfa_create_list(3); + char* working_uris = NULL; + char* uptr = NULL; + char* ctoken = NULL; + working_uris = rdfa_replace_string(working_uris, uris); + + /* go through each item in the list of CURIEs and resolve each */ + ctoken = strtok_r(working_uris, RDFA_WHITESPACE, &uptr); + + while(ctoken != NULL) + { + char* resolved_curie = NULL; + + if((mode == CURIE_PARSE_INSTANCEOF_DATATYPE) || + (mode == CURIE_PARSE_ABOUT_RESOURCE) || + (mode == CURIE_PARSE_PROPERTY)) + { + resolved_curie = + rdfa_resolve_curie(rdfa_context, ctoken, mode); + } + else if(mode == CURIE_PARSE_RELREV) + { + resolved_curie = + rdfa_resolve_relrev_curie(rdfa_context, ctoken); + } + + /* add the CURIE if it was a valid one */ + if(resolved_curie != NULL) + { + rdfa_add_item(rval, resolved_curie, RDFALIST_FLAG_TEXT); + free(resolved_curie); + } + + ctoken = strtok_r(NULL, RDFA_WHITESPACE, &uptr); + } + + free(working_uris); + + return rval; +} diff --git a/librdfa/iri.c b/librdfa/iri.c new file mode 100644 index 0000000..8f72d78 --- /dev/null +++ b/librdfa/iri.c @@ -0,0 +1,73 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The iri module is used to process IRIs. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> + +#include "rdfa.h" +#include "rdfa_utils.h" + +/** + * Strips the iquery and ifragment part from an IRI. This leaves just the + * scheme and the ihier-part, as defined in RFC 3987. This function will + * copy the input string and return a new string that must be free()'d. + * + * @param iri the IRI that should be stripped of anything after the iquery + * and fragment, if they exist. + */ +char* rdfa_iri_get_base(const char* iri) +{ + char* rval = NULL; + const char* eindex = 0; + + /* search to see if there is iquery separator */ + eindex = strchr(iri, '?'); + if(eindex == NULL) + { + /* if there is no iquery separator, check to see if there is an + * ifragment separator */ + eindex = strchr(iri, '#'); + } + + /* check to see if the output string needs to be different from the + * input string */ + if(eindex == NULL) + { + /* there was no iquery or ifragment in the input string, so there is + * no need to reformat the string */ + rval = rdfa_strdup(iri); + } + else + { + /* the output string should be concatenated */ + unsigned int length = (unsigned int)(eindex - iri); + rval = (char*)malloc(length + 1); + rval = strncpy(rval, iri, length); + rval[length] = '\0'; + } + + return rval; +} diff --git a/librdfa/language.c b/librdfa/language.c new file mode 100644 index 0000000..daa2daf --- /dev/null +++ b/librdfa/language.c @@ -0,0 +1,55 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The language module is used to determine and set the current language. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +/** + * Updates the language given the value of the xml:lang attribute. + * + * @param lang the new value of the lang attribute. + */ +void rdfa_update_language(rdfacontext* context, const char* lang) +{ + /* the [current element] is parsed for any language information, + * and [language] is set in the [current evaluation context]; */ + if(lang != NULL) + { + if(strlen(lang) > 0) + { + /* if a language was specified, set it */ + context->language = rdfa_replace_string(context->language, lang); + } + else + { + /* if a blank language was specified, clear the language context */ + free(context->language); + context->language = NULL; + } + } +} diff --git a/librdfa/lists.c b/librdfa/lists.c new file mode 100644 index 0000000..d399979 --- /dev/null +++ b/librdfa/lists.c @@ -0,0 +1,243 @@ +/** + * Copyright 2012 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * Handles all triple functionality including all incomplete triple + * functionality. + * + * @author Manu Sporny + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +void rdfa_establish_new_inlist_triples(rdfacontext* context, + rdfalist* predicates, const char* object, rdfresource_t object_type) +{ + int i = 0; + for(i = 0; i < (int)predicates->num_items; i++) + { + const char* predicate = (const char*)predicates->items[i]->data; + char* resolved_predicate = rdfa_resolve_relrev_curie(context, predicate); + rdftriple* triple; + /* ensure the list mapping exists */ + rdfa_create_list_mapping( + context, context->local_list_mappings, + context->new_subject, resolved_predicate); + + /* add an incomplete triple for each list mapping */ + triple = rdfa_create_triple(context->new_subject, resolved_predicate, + object, object_type, context->datatype, context->language); + rdfa_append_to_list_mapping(context->local_list_mappings, + context->new_subject, resolved_predicate, triple); + + free(resolved_predicate); + } + +#if defined(DEBUG) && DEBUG > 0 + printf("LOCAL LIST MAPPINGS: "); + rdfa_print_mapping(context->local_list_mappings, + (print_mapping_value_fp)rdfa_print_triple_list); +#endif +} + +void rdfa_save_incomplete_list_triples( + rdfacontext* context, const rdfalist* rel) +{ + unsigned int i; + for(i = 0; i < rel->num_items; i++) + { + const char* curie = (const char*)rel->items[i]->data; + char* resolved_curie = rdfa_resolve_relrev_curie(context, curie); + + /* ensure the list mapping exists */ + rdfa_create_list_mapping( + context, context->local_list_mappings, + context->new_subject, resolved_curie); + + /* get the list name */ + rdfa_add_item( + context->local_incomplete_triples, resolved_curie, + (liflag_t)(RDFALIST_FLAG_DIR_NONE | RDFALIST_FLAG_TEXT)); + + free(resolved_curie); + } + +#if defined(DEBUG) && DEBUG > 0 + printf("LOCAL INCOMPLETE TRIPLES: "); + rdfa_print_list(context->local_incomplete_triples); +#endif +} + +void rdfa_complete_list_triples(rdfacontext* context) +{ + /* For each IRI in the local list mapping, if the equivalent list does + * not exist in the evaluation context, indicating that the list was + * originally instantiated on the current element, use the list as follows: */ + int i; + rdfalist* list; + rdftriple* triple; + void** mptr = context->local_list_mappings; + char* key = NULL; + void** kptr = NULL; + void* value = NULL; + unsigned int list_depth = 0; + +#if defined(DEBUG) && DEBUG > 0 + printf("local_list_mappings: "); + rdfa_print_mapping(context->local_list_mappings, + (print_mapping_value_fp)rdfa_print_triple_list); +#endif + + while(*mptr != NULL) + { + kptr = mptr; + rdfa_next_mapping(mptr++, &key, &value); + list = (rdfalist*)value; + list_depth = list->user_data; + mptr++; +#if defined(DEBUG) && DEBUG > 0 + printf("LIST TRIPLES for key (%u/%u): KEY(%s)\n", + context->depth, list_depth, key); +#endif + + if((context->depth < (int)list_depth) && + (rdfa_get_list_mapping( + context->list_mappings, context->new_subject, key) == NULL) && + (strcmp(key, RDFA_MAPPING_DELETED_KEY) != 0)) + { + char* predicate = strstr(key, " ") + 1; + triple = (rdftriple*)list->items[0]->data; + if(list->num_items == 1) + { + /* Free unused list triple */ + rdfa_free_triple(triple); + + /* the list is empty, generate an empty list triple */ + triple = rdfa_create_triple(context->new_subject, predicate, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#nil", + RDF_TYPE_IRI, NULL, NULL); + context->default_graph_triple_callback( + triple, context->callback_data); + } + else + { + char* bnode = NULL; + char* subject; + char* tmp = NULL; + bnode = rdfa_replace_string(bnode, triple->subject); + for(i = 1; i < (int)list->num_items; i++) + { + char* next = NULL; + triple = (rdftriple*)list->items[i]->data; + /* Create a new 'bnode' array containing newly created bnodes, + * one for each item in the list + * For each bnode-(IRI or literal) pair from the list the + * following triple is generated: + * + * subject + * bnode + * predicate + * http://www.w3.org/1999/02/22-rdf-syntax-ns#first + * object + * full IRI or literal */ + triple->subject = + rdfa_replace_string(triple->subject, bnode); + triple->predicate = + rdfa_replace_string(triple->predicate, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#first"); + context->default_graph_triple_callback( + triple, context->callback_data); + + /* Free the list item */ + free(list->items[i]); + list->items[i] = NULL; + + /* For each item in the 'bnode' array the following triple is + * generated: + * + * subject + * bnode + * predicate + * http://www.w3.org/1999/02/22-rdf-syntax-ns#rest + * object + * next item in the 'bnode' array or, if that does not exist, + * http://www.w3.org/1999/02/22-rdf-syntax-ns#nil */ + if(i < (int)list->num_items - 1) + { + next = rdfa_create_bnode(context); + } + else + { + next = rdfa_strdup((char*)"http://www.w3.org/1999/02/22-rdf-syntax-ns#nil"); + } + + triple = rdfa_create_triple(bnode, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#rest", + next, RDF_TYPE_IRI, NULL, NULL); + context->default_graph_triple_callback( + triple, context->callback_data); + + /* Free the bnode, setting 'next' appropriately */ + free(bnode); + bnode = next; + } + + /* A single additional triple is generated: + * subject + * current subject + * predicate + * full IRI of the local list mapping associated with this list + * object + * first item of the 'bnode' array */ + subject = rdfa_strdup(key); + if(subject) + tmp = strstr(subject, " "); + + if(tmp) { + tmp[0] = '\0'; + triple = (rdftriple*)list->items[0]->data; + triple->subject = + rdfa_replace_string(triple->subject, subject); + triple->predicate = + rdfa_replace_string(triple->predicate, predicate); + context->default_graph_triple_callback( + triple, context->callback_data); + } + if(subject) + free(subject); + if(bnode) + free(bnode); + } + + /* Free the first list item and empty the list */ + free(list->items[0]); + list->items[0] = NULL; + list->num_items = 0; + + /* clear the entry from the mapping */ + *kptr = rdfa_replace_string((char*)*kptr, RDFA_MAPPING_DELETED_KEY); + } + } +} diff --git a/librdfa/namespace.c b/librdfa/namespace.c new file mode 100644 index 0000000..06c7f00 --- /dev/null +++ b/librdfa/namespace.c @@ -0,0 +1,140 @@ +/** + * Copyright 2008-2011 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * This file implements mapping data structure memory management as + * well as updating URI mappings. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +/** + * Attempts to update the uri mappings in the given context using the + * given attribute/value pair. + * + * @param attribute the attribute, which must start with xmlns. + * @param value the value of the attribute + */ +void rdfa_update_uri_mappings( + rdfacontext* context, const char* attr, const char* value) +{ +#ifdef LIBRDFA_IN_RAPTOR + raptor_namespace_stack* nstack; + nstack = &context->sax2->namespaces; +#endif + + /* * the [current element] is parsed for [URI mappings] and these + * are added to the [list of URI mappings]. Note that a [URI + * mapping] will simply overwrite any current mapping in the list + * that has the same name; */ + + /* Mappings are provided by @xmlns. The value to be mapped is set + * by the XML namespace prefix, and the value to map is the value + * of the attribute -- a URI. Note that the URI is not processed + * in any way; in particular if it is a relative path it is not + * resolved against the [current base]. Authors are advised to + * follow best practice for using namespaces, which includes not + * using relative paths. */ + + if(attr == NULL) + { +#ifdef LIBRDFA_IN_RAPTOR + raptor_namespaces_start_namespace_full(nstack, + NULL, + (const unsigned char*)value, + 0); +#else + rdfa_update_mapping( + context->uri_mappings, XMLNS_DEFAULT_MAPPING, value, + (update_mapping_value_fp)rdfa_replace_string); +#endif + } + else if(strcmp(attr, "_") == 0) + { +#define FORMAT_1 "The underscore character must not be declared as a prefix " \ + "because it conflicts with the prefix for blank node identifiers. " \ + "The occurrence of this prefix declaration is being ignored." +#ifdef LIBRDFA_IN_RAPTOR + raptor_parser_warning((raptor_parser*)context->callback_data, + FORMAT_1); +#else + rdfa_processor_triples(context, + RDFA_PROCESSOR_WARNING, + FORMAT_1); +#endif + } + else if(attr[0] == ':' || attr[0] == '_' || + (attr[0] >= 'A' && attr[0] <= 'Z') || + (attr[0] >= 'a' && attr[0] <= 'z') || + ((unsigned char)attr[0] >= 0xc0 && (unsigned char)attr[0] <= 0xd6) || + ((unsigned char)attr[0] >= 0xd8 && (unsigned char)attr[0] <= 0xf6) || (unsigned char)attr[0] >= 0xf8) + { +#ifdef LIBRDFA_IN_RAPTOR + raptor_namespaces_start_namespace_full(nstack, + (const unsigned char*)attr, + (const unsigned char*)value, + 0); +#else + rdfa_generate_namespace_triple(context, attr, value); + rdfa_update_mapping(context->uri_mappings, attr, value, + (update_mapping_value_fp)rdfa_replace_string); +#endif + } + else + { + /* allowable characters for CURIEs: + * ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | + * [#x370-#x37D] | [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | + * [#x2C00-#x2FEF] | [#x3001-#xD7FF] | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] + * | [#x10000-#xEFFFF] + */ + + /* Generate the processor warning if this is an invalid prefix */ +#define FORMAT_2 "The declaration of the '%s' prefix is invalid " \ + "because it starts with an invalid character. Please see " \ + "http://www.w3.org/TR/REC-xml/#NT-NameStartChar for a " \ + "full explanation of valid first characters for declaring " \ + "prefixes." +#ifdef LIBRDFA_IN_RAPTOR + raptor_parser_warning((raptor_parser*)context->callback_data, + FORMAT_2, attr); +#else + char msg[1024]; + snprintf(msg, 1024, FORMAT_1); + rdfa_processor_triples(context, RDFA_PROCESSOR_WARNING, msg); +#endif + } + +#ifdef LIBRDFA_IN_RAPTOR +#else + /* print the current mapping */ + if(DEBUG) + { + printf("DEBUG: PREFIX MAPPINGS:"); + rdfa_print_mapping(context->uri_mappings, + (print_mapping_value_fp)rdfa_print_string); + } +#endif +} diff --git a/librdfa/rdfa.c b/librdfa/rdfa.c new file mode 100644 index 0000000..6dadbe0 --- /dev/null +++ b/librdfa/rdfa.c @@ -0,0 +1,1552 @@ +/** + * Copyright 2008-2011 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * The librdfa library is the Fastest RDFa Parser in the Universe. It is + * a stream parser, meaning that it takes an XML data as input and spits + * out RDF triples as it comes across them in the stream. Due to this + * processing approach, librdfa has a very, very small memory footprint. + * It is also very fast and can operate on hundreds of gigabytes of XML + * data without breaking a sweat. + * + * Usage: + * + * rdfacontext* context = rdfa_create_context(BASE_URI); + * context->callback_data = your_user_data; + * rdfa_set_default_graph_triple_handler(context, &default_graph_triple); + * rdfa_set_processor_graph_triple_handler(context, &processor_graph_triple); + * rdfa_set_buffer_filler(context, &fill_buffer); + * rdfa_parse(context); + * rdfa_free_context(context); + * + * @author Manu Sporny + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#ifdef HAVE_STRINGS_H +# include <strings.h> +#endif +#include <ctype.h> +#include <libxml/SAX2.h> +#include "rdfa_utils.h" +#include "rdfa.h" +#include "strtok_r.h" + +#define READ_BUFFER_SIZE 4096 +#define RDFA_DOCTYPE_STRING_LENGTH 103 + +/** + * Read the head of the XHTML document and determines the base IRI for + * the document. + * + * @param context the current working context. + * @param working_buffer the current working buffer. + * @param wb_allocated the number of bytes that have been allocated to + * the working buffer. + * + * @return the size of the data available in the working buffer. + */ +static size_t rdfa_init_base( + rdfacontext* context, char** working_buffer, size_t* working_buffer_size, + char* temp_buffer, size_t bytes_read) +{ + char* head_end = NULL; + size_t offset = context->wb_position; + size_t needed_size = 0; + + if((offset + bytes_read) > *working_buffer_size) + { + needed_size = (offset + bytes_read) - *working_buffer_size; + } + + /* search for the end of <head>, stop if <head> was found */ + + /* extend the working buffer size */ + if(needed_size > 0) + { + size_t temp_buffer_size = sizeof(char) * READ_BUFFER_SIZE; + if((size_t)needed_size > temp_buffer_size) + temp_buffer_size += needed_size; + + *working_buffer_size += temp_buffer_size; + /* +1 for NUL at end, to allow strstr() etc. to work */ + *working_buffer = (char*)realloc(*working_buffer, *working_buffer_size + 1); + } + + /* append to the working buffer */ + memmove(*working_buffer + offset, temp_buffer, bytes_read); + /* ensure the buffer is a NUL-terminated string */ + *(*working_buffer + offset + bytes_read) = '\0'; + + /* Sniff the beginning of the document for any document information */ + if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.0//EN") != NULL) + { + context->host_language = HOST_LANGUAGE_XHTML1; + context->rdfa_version = RDFA_VERSION_1_0; + } + else if(strstr(*working_buffer, "-//W3C//DTD XHTML+RDFa 1.1//EN") != NULL) + { + context->host_language = HOST_LANGUAGE_XHTML1; + context->rdfa_version = RDFA_VERSION_1_1; + } + else if(strstr(*working_buffer, "<html") != NULL) + { + context->host_language = HOST_LANGUAGE_HTML; + context->rdfa_version = RDFA_VERSION_1_1; + } + else + { + context->host_language = HOST_LANGUAGE_XML1; + context->rdfa_version = RDFA_VERSION_1_1; + } + +#ifdef LIBRDFA_IN_RAPTOR + if(context->raptor_rdfa_version == 10) { + context->host_language = HOST_LANGUAGE_XHTML1; + context->rdfa_version = RDFA_VERSION_1_0; + } else if(context->raptor_rdfa_version == 11) + context->rdfa_version = RDFA_VERSION_1_1; +#endif + + /* search for the end of </head> in */ + head_end = strstr(*working_buffer, "</head>"); + if(head_end == NULL) + head_end = strstr(*working_buffer, "</HEAD>"); + + context->wb_position += bytes_read; + + if(head_end == NULL) + return bytes_read; + + /* if </head> was found, search for <base and extract the base URI */ + if(head_end != NULL) + { + char* base_start = strstr(*working_buffer, "<base "); + char* href_start = NULL; + if(base_start == NULL) + base_start = strstr(*working_buffer, "<BASE "); + if(base_start != NULL) + href_start = strstr(base_start, "href="); + + if(href_start != NULL) + { + char sep = href_start[5]; + char* uri_start = href_start + 6; + char* uri_end = strchr(uri_start, sep); + + if(uri_end != NULL) + { + if(*uri_start != sep) + { + size_t uri_size = uri_end - uri_start; + char* temp_uri = (char*)malloc(sizeof(char) * uri_size + 1); + char* cleaned_base; + strncpy(temp_uri, uri_start, uri_size); + temp_uri[uri_size] = '\0'; + + /* TODO: This isn't in the processing rules, should it + * be? Setting current_object_resource will make + * sure that the BASE element is inherited by all + * subcontexts. */ + cleaned_base = rdfa_iri_get_base(temp_uri); + context->current_object_resource = + rdfa_replace_string( + context->current_object_resource, cleaned_base); + + /* clean up the base context */ + context->base = + rdfa_replace_string(context->base, cleaned_base); + free(cleaned_base); + free(temp_uri); + } + } + } + } + + return bytes_read; +} + +#ifdef LIBRDFA_IN_RAPTOR +static int +raptor_nspace_compare(const void *a, const void *b) +{ + raptor_namespace* ns_a=*(raptor_namespace**)a; + raptor_namespace* ns_b=*(raptor_namespace**)b; + if(!ns_a->prefix) + return 1; + else if(!ns_b->prefix) + return -1; + else + return strcmp((const char*)ns_b->prefix, (const char*)ns_a->prefix); +} +#endif + +/** + * Handles the start_element call + */ +static void start_element(void *parser_context, const char* name, + const char* prefix, const char* URI, int nb_namespaces, + const char** namespaces, int nb_attributes, int nb_defaulted, + const char** attributes) +{ + rdfacontext* root_context = (rdfacontext*)parser_context; + rdfalist* context_stack = (rdfalist*)root_context->context_stack; + rdfacontext* context = rdfa_create_new_element_context(context_stack); + char* xml_lang = NULL; + const char* about_curie = NULL; + char* about = NULL; + const char* src_curie = NULL; + char* src = NULL; + const char* type_of_curie = NULL; + rdfalist* type_of = NULL; + const char* rel_curie = NULL; + rdfalist* rel = NULL; + const char* rev_curie = NULL; + rdfalist* rev = NULL; + const char* property_curie = NULL; + rdfalist* property = NULL; + const char* resource_curie = NULL; + char* resource = NULL; + const char* href_curie = NULL; + char* href = NULL; + char* content = NULL; + const char* datatype_curie = NULL; + char* datatype = NULL; + +#ifdef LIBRDFA_IN_RAPTOR + if(1) { + raptor_parser* rdf_parser = (raptor_parser*)context->callback_data; + raptor_sax2_update_document_locator(context->sax2, + &rdf_parser->locator); + } +#endif + + rdfa_push_item(context_stack, context, RDFALIST_FLAG_CONTEXT); + +#if defined(DEBUG) && DEBUG > 0 + if(1) { + int i; + + /* dump all arguments sent to this callback */ + fprintf(stdout, "DEBUG: SAX.startElementNs(%s", (char *) name); + if (prefix == NULL) + fprintf(stdout, ", NULL"); + else + fprintf(stdout, ", %s", (char *) prefix); + if (URI == NULL) + fprintf(stdout, ", NULL"); + else + fprintf(stdout, ", '%s'", (char *) URI); + fprintf(stdout, ", %d", nb_namespaces); + + /* dump all namespaces */ + if (namespaces != NULL) { + for (i = 0;i < nb_namespaces * 2;i++) { + fprintf(stdout, ", xmlns"); + if (namespaces[i] != NULL) + fprintf(stdout, ":%s", namespaces[i]); + i++; + fprintf(stdout, "='%s'", namespaces[i]); + } + } + + /* dump all attributes */ + fprintf(stdout, ", %d, %d", nb_attributes, nb_defaulted); + if (attributes != NULL) { + for (i = 0;i < nb_attributes * 5;i += 5) { + if (attributes[i + 1] != NULL) + fprintf( + stdout, ", %s:%s='", attributes[i + 1], attributes[i]); + else + fprintf(stdout, ", %s='", attributes[i]); + fprintf(stdout, "%.4s...', %d", attributes[i + 3], + (int)(attributes[i + 4] - attributes[i + 3])); + } + } + fprintf(stdout, ")\n"); + } +#endif + + /* start the XML Literal text */ + if(context->xml_literal == NULL) + { + context->xml_literal = rdfa_replace_string(context->xml_literal, "<"); + context->xml_literal_size = 1; + } + else + { + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, "<", 1); + } + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + name, strlen(name)); + + if(!context->xml_literal_namespaces_defined) + { + /* append namespaces to XML Literal */ +#ifdef LIBRDFA_IN_RAPTOR + raptor_namespace_stack* nstack = &context->sax2->namespaces; + raptor_namespace* ns; + raptor_namespace** ns_list = NULL; + size_t ns_size; +#else + void** umap = context->uri_mappings; +#endif + const char* umap_key = NULL; + void* umap_value = NULL; + + /* if the namespaces are not defined, then neither is the xml:lang */ + context->xml_literal_xml_lang_defined = 0; + +#ifdef LIBRDFA_IN_RAPTOR + ns_size = 0; + ns_list = raptor_namespace_stack_to_array(nstack, &ns_size); + qsort((void*)ns_list, ns_size, sizeof(raptor_namespace*), + raptor_nspace_compare); + + while(ns_size > 0) +#else + while(*umap != NULL) +#endif + { + unsigned char insert_xmlns_definition = 1; + const char* attr = NULL; + + /* get the next mapping to process */ +#ifdef LIBRDFA_IN_RAPTOR + ns=ns_list[--ns_size]; + + umap_key = (const char*)raptor_namespace_get_prefix(ns); + if(!umap_key) + umap_key=(const char*)XMLNS_DEFAULT_MAPPING; + umap_value = (char*)raptor_uri_as_string(raptor_namespace_get_uri(ns)); +#else + rdfa_next_mapping(umap++, &umap_key, &umap_value); + umap++; +#endif + + /* check to make sure that the namespace isn't already + * defined in the current element. */ + if(attributes != NULL) + { + const char** attrs = attributes; + while((*attrs != NULL) && insert_xmlns_definition) + { + attr = *attrs++; + + /* if the attribute is a umap_key, skip the definition + * of the attribute. */ + if(strcmp(attr, umap_key) == 0) + { + insert_xmlns_definition = 0; + } + } + } + + /* if the namespace isn't already defined on the element, + * copy it to the XML Literal string. */ + if(insert_xmlns_definition) + { + /* append the namespace attribute to the XML Literal */ + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + " xmlns", strlen(" xmlns")); + + /* check to see if we're dumping the standard XHTML namespace or + * a user-defined XML namespace */ + if(strcmp(umap_key, XMLNS_DEFAULT_MAPPING) != 0) + { + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, ":", 1); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + umap_key, strlen(umap_key)); + } + + /* append the namespace value */ + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, "=\"", 2); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + (const char*)umap_value, strlen((char*)umap_value)); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, "\"", 1); + } + + } /* end while umap not NULL */ + context->xml_literal_namespaces_defined = 1; + +#ifdef LIBRDFA_IN_RAPTOR + if(ns_list) + raptor_free_memory(ns_list); +#endif + } /* end if namespaces inserted */ + +#ifdef LIBRDFA_IN_RAPTOR + /* Raptor namespace code does this already */ +#else + /* 3. For backward compatibility, RDFa Processors should also permit the + * definition of mappings via @xmlns. In this case, the value to be mapped + * is set by the XML namespace prefix, and the value to map is the value of + * the attribute - an IRI. (Note that prefix mapping via @xmlns is + * deprecated, and may be removed in a future version of this + * specification.) When xmlns is supported, such mappings must be processed + * before processing any mappings from @prefix on the same element. */ + if(namespaces != NULL) + { + int ni; + + for(ni = 0; ni < nb_namespaces * 2; ni += 2) + { + const char* ns = namespaces[ni]; + const char* value = namespaces[ni + 1]; + /* Regardless of how the mapping is declared, the value to be mapped + * must be converted to lower case, and the IRI is not processed in + * any way; in particular if it is a relative path it must not be + * resolved against the current base. */ + char* lcns = NULL; + if(ns != NULL) + { + /* convert the namespace string to lowercase */ + unsigned int i; + size_t ns_length = strlen(ns); + lcns = (char*)malloc(ns_length + 1); + for(i = 0; i <= ns_length; i++) + { + lcns[i] = tolower(ns[i]); + } + } + + /* update the URI mappings */ + rdfa_update_uri_mappings(context, lcns, value); + + if(lcns != NULL) + { + free(lcns); + } + } + } +#endif + + /* detect the RDFa version of the document, if specified */ + if(attributes != NULL) + { + int ci; + + /* search for a version attribute */ + for(ci = 0; ci < nb_attributes * 5; ci += 5) + { + const char* attr; + char* value; + size_t value_length = 0; + + attr = attributes[ci]; + value_length = attributes[ci + 4] - attributes[ci + 3] + 1; + + if(strcmp(attr, "version") == 0) + { + /* append the attribute-value pair to the XML literal */ + value = (char*)malloc(value_length + 1); + snprintf(value, value_length, "%s", attributes[ci + 3]); + if(strstr(value, "RDFa 1.0") != NULL) + { + context->rdfa_version = RDFA_VERSION_1_0; + } + else if(strstr(value, "RDFa 1.1") != NULL) + { + context->rdfa_version = RDFA_VERSION_1_1; + } + + free(value); + } + } + } + +#ifdef LIBRDFA_IN_RAPTOR + if(context->sax2) + { + /* Raptor handles xml:lang itself but not 'lang' */ + xml_lang = (char*)raptor_sax2_inscope_xml_language(context->sax2); + xml_lang = rdfa_replace_string(NULL, xml_lang); + } +#endif + + /* prepare all of the RDFa-specific attributes we are looking for. + * scan all of the attributes for the RDFa-specific attributes */ + if(attributes != NULL) + { + int ci; + + if(context->rdfa_version == RDFA_VERSION_1_1) + { + /* process all vocab and prefix attributes */ + for(ci = 0; ci < nb_attributes * 5; ci += 5) + { + const char* attr; + char* value; + size_t value_length = 0; + + attr = attributes[ci]; + value_length = attributes[ci + 4] - attributes[ci + 3] + 1; + + /* append the attribute-value pair to the XML literal */ + value = (char*)malloc(value_length + 1); + snprintf(value, value_length, "%s", attributes[ci + 3]); + + /* 2. Next the current element is examined for any change to the + * default vocabulary via @vocab. */ + if(strcmp(attr, "vocab") == 0) + { + if(strlen(value) < 1) + { + /* If the value is empty, then the local default vocabulary + * must be reset to the Host Language defined default + * (if any). */ + free(context->default_vocabulary); + context->default_vocabulary = NULL; + } + else + { + char* resolved_uri; + rdftriple* triple; + + /* If @vocab is present and contains a value, the local + * default vocabulary is updated according to the + * section on CURIE and IRI Processing. */ + resolved_uri = rdfa_resolve_uri(context, value); + context->default_vocabulary = rdfa_replace_string( + context->default_vocabulary, resolved_uri); + + /* The value of @vocab is used to generate a triple */ + triple = rdfa_create_triple( + context->base, "http://www.w3.org/ns/rdfa#usesVocabulary", + resolved_uri, RDF_TYPE_IRI, NULL, NULL); + context->default_graph_triple_callback( + triple, context->callback_data); + + free(resolved_uri); + } + } + else if(strcmp(attr, "prefix") == 0) + { + /* Mappings are defined via @prefix. */ + char* working_string = NULL; + char* atprefix = NULL; + char* iri = NULL; + char* saveptr = NULL; + + working_string = rdfa_replace_string(working_string, value); + + /* Values in this attribute are evaluated from beginning to + * end (e.g., left to right in typical documents). */ + atprefix = strtok_r(working_string, ":", &saveptr); + while(atprefix != NULL) + { + /* find the prefix and IRI mappings while skipping whitespace */ + while((*saveptr == ' ' || *saveptr == '\n' || + *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' || + *saveptr == '\v') && *saveptr != '\0') + { + saveptr++; + } + iri = strtok_r(NULL, RDFA_WHITESPACE, &saveptr); + + /* update the prefix mappings */ + rdfa_update_uri_mappings(context, atprefix, iri); + + if(!saveptr) + break; + + while((*saveptr == ' ' || *saveptr == '\n' || + *saveptr == '\r' || *saveptr == '\t' || *saveptr == '\f' || + *saveptr == '\v') && *saveptr != '\0') + { + saveptr++; + } + + /* get the next prefix to process */ + atprefix = strtok_r(NULL, ":", &saveptr); + } + + free(working_string); + } + else if(strcmp(attr, "inlist") == 0) + { + context->inlist_present = 1; + } + free(value); + } + } + + /* resolve all of the other RDFa values */ + for(ci = 0; ci < nb_attributes * 5; ci += 5) + { + const char* attr; + char* value; + char* attrns; + char* literal_text; + size_t value_length = 0; + size_t literal_text_length; + + attr = attributes[ci]; + attrns = (char*)attributes[ci + 1]; + value_length = attributes[ci + 4] - attributes[ci + 3] + 1; + + /* append the attribute-value pair to the XML literal */ + value = (char*)malloc(value_length + 1); + snprintf(value, value_length, "%s", attributes[ci + 3]); + + literal_text_length = strlen(attr) + value_length + 4; + literal_text = (char*)malloc(literal_text_length + 1); + snprintf(literal_text, literal_text_length, " %s=\"%s\"", attr, value); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + literal_text, strlen(literal_text)); + free(literal_text); + + /* if xml:lang is defined, ensure that it is not overwritten */ + if(attrns != NULL && strcmp(attrns, "xml") == 0 && + strcmp(attr, "lang") == 0) + { + context->xml_literal_xml_lang_defined = 1; + } + + /* process all of the RDFa attributes */ + if(strcmp(attr, "about") == 0) + { + about_curie = value; + about = rdfa_resolve_curie( + context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); + } + else if(strcmp(attr, "src") == 0) + { + src_curie = value; + src = rdfa_resolve_curie(context, src_curie, CURIE_PARSE_HREF_SRC); + } + else if(strcmp(attr, "typeof") == 0) + { + type_of_curie = value; + type_of = rdfa_resolve_curie_list( + context, type_of_curie, + CURIE_PARSE_INSTANCEOF_DATATYPE); + } + else if(strcmp(attr, "rel") == 0) + { + context->rel_present = 1; + rel_curie = value; + rel = rdfa_resolve_curie_list( + context, rel_curie, CURIE_PARSE_RELREV); + } + else if(strcmp(attr, "rev") == 0) + { + context->rev_present = 1; + rev_curie = value; + rev = rdfa_resolve_curie_list( + context, rev_curie, CURIE_PARSE_RELREV); + } + else if(strcmp(attr, "property") == 0) + { + property_curie = value; + property = + rdfa_resolve_curie_list( + context, property_curie, CURIE_PARSE_PROPERTY); + } + else if(strcmp(attr, "resource") == 0) + { + resource_curie = value; + resource = rdfa_resolve_curie( + context, resource_curie, CURIE_PARSE_ABOUT_RESOURCE); + } + else if(strcmp(attr, "href") == 0) + { + href_curie = value; + href = + rdfa_resolve_curie(context, href_curie, CURIE_PARSE_HREF_SRC); + } + else if(strcmp(attr, "content") == 0) + { + content = rdfa_replace_string(content, value); + } + else if(strcmp(attr, "datatype") == 0) + { + datatype_curie = value; + + if(strlen(datatype_curie) == 0) + { + datatype = rdfa_replace_string(datatype, ""); + } + else + { + datatype = rdfa_resolve_curie(context, datatype_curie, + CURIE_PARSE_INSTANCEOF_DATATYPE); + } + } + else if((attrns == NULL && strcmp(attr, "lang") == 0) || + (attrns != NULL && strcmp(attrns, "xml") == 0 && + strcmp(attr, "lang") == 0)) + { + xml_lang = rdfa_replace_string(xml_lang, value); + } + + free(value); + } + } + + /* The root element has an implicit @about declaration */ + if(context->depth == 1 && about == NULL && resource == NULL && + href == NULL && src == NULL) + { + about_curie = ""; + about = rdfa_resolve_curie( + context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); + } + + /* The HEAD and BODY element in XHTML and HTML has an implicit + * about="" on it. + */ + if(about == NULL && resource == NULL && href == NULL && src == NULL && + (context->parent_subject == NULL || type_of != NULL) && + ((context->host_language == HOST_LANGUAGE_XHTML1 || + context->host_language == HOST_LANGUAGE_HTML) && + (strcasecmp(name, "head") == 0 || strcasecmp(name, "body") == 0))) + { + about_curie = ""; + about = rdfa_resolve_curie( + context, about_curie, CURIE_PARSE_ABOUT_RESOURCE); + } + + /* check to see if we should append an xml:lang to the XML Literal + * if one is defined in the context and does not exist on the + * element. */ + if((xml_lang == NULL) && (context->language != NULL) && + !context->xml_literal_xml_lang_defined) + { + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + " xml:lang=\"", strlen(" xml:lang=\"")); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + context->language, strlen(context->language)); + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, "\"", 1); + + /* ensure that the lang isn't set in a subtree (unless it's overwritten) */ + context->xml_literal_xml_lang_defined = 1; + } + + /* close the XML Literal value */ + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, ">", 1); + + /* 3. The [current element] is also parsed for any language + * information, and [language] is set in the [current + * evaluation context]; */ + rdfa_update_language(context, xml_lang); + + /***************** FOR DEBUGGING PURPOSES ONLY ******************/ +#if defined(DEBUG) && DEBUG > 0 + printf("DEBUG: depth = %u\n", context->depth); + if(about != NULL) + { + printf("DEBUG: @about = %s\n", about); + } + if(src != NULL) + { + printf("DEBUG: @src = %s\n", src); + } + if(type_of != NULL) + { + printf("DEBUG: @type_of = "); + rdfa_print_list(type_of); + } + if(context->inlist_present) + { + printf("DEBUG: @inlist = true\n"); + } + if(rel != NULL) + { + printf("DEBUG: @rel = "); + rdfa_print_list(rel); + } + if(rev != NULL) + { + printf("DEBUG: @rev = "); + rdfa_print_list(rev); + } + if(property != NULL) + { + printf("DEBUG: @property = "); + rdfa_print_list(property); + } + if(resource != NULL) + { + printf("DEBUG: @resource = %s\n", resource); + } + if(href != NULL) + { + printf("DEBUG: @href = %s\n", href); + } + if(content != NULL) + { + printf("DEBUG: @content = %s\n", content); + } + if(datatype != NULL) + { + printf("DEBUG: @datatype = %s\n", datatype); + } + if(xml_lang != NULL) + { + printf("DEBUG: @xml:lang = %s\n", xml_lang); + } +#endif + + /* TODO: This isn't part of the processing model, it needs to be + * included and is a correction for the last item in step #4. */ + if((about == NULL) && (src == NULL) && (type_of == NULL) && + (rel == NULL) && (rev == NULL) && (property == NULL) && + (resource == NULL) && (href == NULL) && + (context->default_vocabulary == NULL) && (prefix == NULL)) + { + context->skip_element = 1; + } + + if((rel == NULL) && (rev == NULL)) + { + if(context->rdfa_version == RDFA_VERSION_1_0) + { + /* 4. If the [current element] contains no valid @rel or @rev + * URI, obtained according to the section on CURIE and URI + * Processing, then the next step is to establish a value for + * [new subject]. Any of the attributes that can carry a + * resource can set [new subject]; */ + rdfa_establish_new_1_0_subject( + context, name, about, src, resource, href, type_of); + } + else + { + rdfa_establish_new_1_1_subject( + context, name, about, src, resource, href, type_of, property, + content, datatype); + } + } + else + { + if(context->rdfa_version == RDFA_VERSION_1_0) + { + /* 5. If the [current element] does contain a valid @rel or @rev + * URI, obtained according to the section on CURIE and URI + * Processing, then the next step is to establish both a value + * for [new subject] and a value for [current object resource]: */ + rdfa_establish_new_1_0_subject_with_relrev( + context, name, about, src, resource, href, type_of); + } + else + { + rdfa_establish_new_1_1_subject_with_relrev( + context, name, about, src, resource, href, type_of); + } + } + + if(context->new_subject != NULL) + { +#if defined(DEBUG) && DEBUG > 0 + printf("DEBUG: new_subject = %s\n", context->new_subject); +#endif + + /* RDFa 1.0: 6. If in any of the previous steps a [new subject] was set + * to a non-null value, it is now used to provide a subject for + * type values; */ + /* RDFa 1.1: 7. If in any of the previous steps a typed resource was set + * to a non-null value, it is now used to provide a subject for type + * values; + */ + if(type_of != NULL) + { + rdfa_complete_type_triples(context, type_of); + } + + /* Note that none of this block is executed if there is no + * [new subject] value, i.e., [new subject] remains null. */ + } + + if(context->current_object_resource != NULL) + { + /* If the element contains both the @inlist and the @rel attributes: + * the @rel may contain one or more resources, obtained according to + * the section on CURIE and IRI Processing each of which is used to + * add an entry to the list mapping as follows: + * if the local list mapping does not contain a list associated with + * the IRI, instantiate a new list and add to local list mappings + * add the current object resource to the list associated with the + * resource in the local list mapping */ + if(context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) && + context->inlist_present) + { + rdfresource_t object_type = RDF_TYPE_IRI; + if((property != NULL) || (content != NULL)) + { + object_type = RDF_TYPE_PLAIN_LITERAL; + if(datatype != NULL) + { + object_type = RDF_TYPE_TYPED_LITERAL; + } + } + rdfa_establish_new_inlist_triples( + context, rel, context->current_object_resource, object_type); + } + + /* 7. If in any of the previous steps a [current object resource] + * was set to a non-null value, it is now used to generate triples */ + rdfa_complete_relrev_triples(context, rel, rev); + } + + if((context->current_object_resource == NULL) && + context->rdfa_version == RDFA_VERSION_1_1 && (rel != NULL) && + context->inlist_present) + { + rdfa_save_incomplete_list_triples(context, rel); + } + else if((context->current_object_resource == NULL) && + ((rel != NULL) || (rev != NULL))) + { + /* 8. If however [current object resource] was set to null, but + * there are predicates present, then they must be stored as + * [incomplete triple]s, pending the discovery of a subject that + * can be used as the object. Also, [current object resource] + * should be set to a newly created [bnode] */ + rdfa_save_incomplete_triples(context, rel, rev); + } + + /* Ensure to re-insert XML Literal namespace information from this + * point on... */ + if(property != NULL) + { + context->xml_literal_namespaces_defined = 0; + } + + /* save these for processing steps #9 and #10 */ + context->about = rdfa_replace_string(context->about, about); + context->resource = rdfa_replace_string(context->resource, resource); + context->href = rdfa_replace_string(context->href, href); + context->src = rdfa_replace_string(context->src, src); + context->content = rdfa_replace_string(context->content, content); + context->datatype = rdfa_replace_string(context->datatype, datatype); + context->property = property; + + /* free the resolved CURIEs */ + free(about); + free(src); + rdfa_free_list(type_of); + rdfa_free_list(rel); + rdfa_free_list(rev); + free(xml_lang); + free(content); + free(resource); + free(href); + free(datatype); +} + +static void character_data( + void *parser_context, const xmlChar *s, int len) +{ + /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/ + rdfalist* context_stack = + (rdfalist*)((rdfacontext*)parser_context)->context_stack; + rdfacontext* context = (rdfacontext*) + context_stack->items[context_stack->num_items - 1]->data; + + char *buffer = (char*)malloc(len + 1); + memset(buffer, 0, len + 1); + memcpy(buffer, s, len); + + /* append the text to the current context's plain literal */ + if(context->plain_literal == NULL) + { + context->plain_literal = + rdfa_replace_string(context->plain_literal, buffer); + context->plain_literal_size = len; + } + else + { + context->plain_literal = rdfa_n_append_string( + context->plain_literal, + &context->plain_literal_size, buffer, len); + } + + /* append the text to the current context's XML literal */ + if(context->xml_literal == NULL) + { + context->xml_literal = + rdfa_replace_string(context->xml_literal, buffer); + context->xml_literal_size = len; + } + else + { + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, buffer, len); + } + + /*printf("plain_literal: %s\n", context->plain_literal);*/ + /*printf("xml_literal: %s\n", context->xml_literal);*/ + + free(buffer); +} + +static void end_element(void* parser_context, const char* name, + const char* prefix,const xmlChar* URI) +{ + /*xmlParserCtxtPtr parser = (xmlParserCtxtPtr)parser_context;*/ + rdfalist* context_stack = + (rdfalist*)((rdfacontext*)parser_context)->context_stack; + rdfacontext* context = (rdfacontext*)rdfa_pop_item(context_stack); + rdfacontext* parent_context = (rdfacontext*) + context_stack->items[context_stack->num_items - 1]->data; + + /* append the text to the current context's XML literal */ + size_t name_len = strlen(name); + char* buffer = (char*)malloc(name_len + 3 + 1); + +#if defined(DEBUG) && DEBUG > 0 + printf("DEBUG: </%s>\n", name); + printf("context->local_list_mappings (start of end_element): "); + rdfa_print_mapping(context->local_list_mappings, + (print_mapping_value_fp)rdfa_print_triple_list); +#endif + + buffer[0] = '<'; + buffer[1] = '/'; + memcpy(buffer + 2, name, name_len); + buffer[name_len + 2] = '>'; + buffer[name_len + 3] = '\0'; + if(context->xml_literal == NULL) + { + context->xml_literal = + rdfa_replace_string(context->xml_literal, buffer); + context->xml_literal_size = strlen(buffer); + } + else + { + context->xml_literal = rdfa_n_append_string( + context->xml_literal, &context->xml_literal_size, + buffer, strlen(buffer)); + } + free(buffer); + + /* 9. The next step of the iteration is to establish any + * [current object literal]; */ + + /* generate the complete object literal triples */ + if(context->property != NULL) + { + /* save the current xml literal */ + char* saved_xml_literal = context->xml_literal; + char* content_start = NULL; + char* content_end = NULL; + + /* ensure to mark only the inner-content of the XML node for + * processing the object literal. */ + buffer = NULL; + + if(context->xml_literal != NULL) + { + /* get the data between the first tag and the last tag */ + content_start = strchr(context->xml_literal, '>'); + content_end = strrchr(context->xml_literal, '<'); + + if((content_start != NULL) && (content_end != NULL)) + { + /* set content end to null terminator */ + context->xml_literal = ++content_start; + *content_end = '\0'; + } + } + + /* update the plain literal if the XML Literal is an empty string */ + if(context->xml_literal != NULL && strlen(context->xml_literal) == 0) + { + context->plain_literal = + rdfa_replace_string(context->plain_literal, ""); + } + + /* process data between first tag and last tag + * this needs the xml literal to be null terminated */ + if(context->rdfa_version == RDFA_VERSION_1_0) + { + rdfa_complete_object_literal_triples(context); + } + else + { + rdfa_complete_current_property_value_triples(context); + } + + if(content_end != NULL) + { + /* set content end back */ + *content_end = '<'; + } + + if(saved_xml_literal != NULL) + { + /* restore xml literal */ + context->xml_literal = saved_xml_literal; + } + } + + /*printf(context->plain_literal);*/ + + if(parent_context != NULL) { + /* append the XML literal and plain text literals to the parent + * literals */ + if(context->xml_literal != NULL) + { + if(parent_context->xml_literal == NULL) + { + parent_context->xml_literal = + rdfa_replace_string( + parent_context->xml_literal, context->xml_literal); + parent_context->xml_literal_size = context->xml_literal_size; + } + else + { + parent_context->xml_literal = + rdfa_n_append_string( + parent_context->xml_literal, + &parent_context->xml_literal_size, + context->xml_literal, context->xml_literal_size); + } + + /* if there is an XML literal, there is probably a plain literal */ + if(context->plain_literal != NULL) + { + if(parent_context->plain_literal == NULL) + { + parent_context->plain_literal = + rdfa_replace_string( + parent_context->plain_literal, context->plain_literal); + parent_context->plain_literal_size = + context->plain_literal_size; + } + else + { + parent_context->plain_literal = + rdfa_n_append_string( + parent_context->plain_literal, + &parent_context->plain_literal_size, + context->plain_literal, + context->plain_literal_size); + } + } + } + + /* preserve the bnode count by copying it to the parent_context */ + parent_context->bnode_count = context->bnode_count; + parent_context->underscore_colon_bnode_name = \ + rdfa_replace_string(parent_context->underscore_colon_bnode_name, + context->underscore_colon_bnode_name); + } + + /* 10. If the [ skip element ] flag is 'false', and [ new subject ] + * was set to a non-null value, then any [ incomplete triple ]s + * within the current context should be completed: */ + if((context->skip_element == 0) && (context->new_subject != NULL)) + { + rdfa_complete_incomplete_triples(context); + } + + /* 14. Once all the child elements have been traversed, list triples are + * generated, if necessary. */ + if(context->rdfa_version == RDFA_VERSION_1_1 && (context->new_subject != NULL)) + { + rdfa_complete_list_triples(context); + + if(parent_context != NULL) + { + /* copy the current mapping to the parent mapping */ + rdfa_free_mapping(parent_context->local_list_mappings, + (free_mapping_value_fp)rdfa_free_list); + parent_context->local_list_mappings = rdfa_copy_mapping( + (void**)context->local_list_mappings, + (copy_mapping_value_fp)rdfa_replace_list); + +#if defined(DEBUG) && DEBUG > 0 + printf("parent_context->local_list_mappings (after copy): "); + rdfa_print_mapping(context->local_list_mappings, + (print_mapping_value_fp)rdfa_print_triple_list); +#endif + rdfa_free_mapping(context->local_list_mappings, + (free_mapping_value_fp)rdfa_free_list); + context->local_list_mappings = NULL; + } + } + + /* free the context */ + rdfa_free_context(context); + +#if defined(DEBUG) && DEBUG > 0 + printf("-------------------------------------------------------------\n"); +#endif +} + +void rdfa_set_default_graph_triple_handler( + rdfacontext* context, triple_handler_fp th) +{ + context->default_graph_triple_callback = th; +} + +void rdfa_set_processor_graph_triple_handler( + rdfacontext* context, triple_handler_fp th) +{ + context->processor_graph_triple_callback = th; +} + +void rdfa_set_buffer_filler(rdfacontext* context, buffer_filler_fp bf) +{ + context->buffer_filler_callback = bf; +} + +#ifdef LIBRDFA_IN_RAPTOR +/* Raptor reports its errors a different way */ +#else +static void rdfa_report_error(void* parser_context, char* msg, ...) +{ + char error[1024]; + char* eptr; + va_list args; + rdfacontext* context = (rdfacontext*)parser_context; + + /* format the error message */ + va_start(args, msg); + vsprintf(error, msg, args); + va_end(args); + + /* Remove any newlines from the libxml2 error */ + eptr = error; + while(*eptr != '\0') + { + if(*eptr == '\n') + { + *eptr = '.'; + } + eptr++; + } + + /* Generate the processor error */ + rdfa_processor_triples(context, RDFA_PROCESSOR_ERROR, error); +} +#endif + +#ifdef LIBRDFA_IN_RAPTOR + +static void raptor_rdfa_start_element(void *user_data, + raptor_xml_element *xml_element) +{ + raptor_qname* qname = raptor_xml_element_get_name(xml_element); + int nb_attributes = raptor_xml_element_get_attributes_count(xml_element); + raptor_qname** attrs = raptor_xml_element_get_attributes(xml_element); + unsigned char* localname = raptor_qname_to_counted_name(qname, NULL); + const raptor_namespace* qname_ns = raptor_qname_get_namespace(qname); + int nb_namespaces = 0; + const char** namespaces = NULL; + int nb_defaulted = 0; + char** attr = NULL; + int i; + const char* ns_name = NULL; + const char* ns_uri = NULL; + + if(nb_attributes > 0) { + /* Everything written into 'attr' is a shared pointer into + * xml_element or contained objects - qnames, namespaces, uris + * and values + */ + attr = (char**)malloc(sizeof(char*) * (1 + (nb_attributes * 5))); + for(i = 0; i < nb_attributes; i++) { + const raptor_namespace* attr_ns = attrs[i]->nspace; + char** attri = &attr[5 * i]; + /* 5 tuple: (localname, prefix, URI, value, end) */ + attri[0] = (char*)attrs[i]->local_name; + attri[1] = attr_ns ? (char*)attr_ns->prefix : NULL; + attri[2] = attr_ns ? (char*)raptor_uri_as_string(attr_ns->uri) : NULL; + attri[3] = (char*)attrs[i]->value; + attri[4] = attri[3] + attrs[i]->value_length; + } + attr[5 * i] = NULL; + } + +/* + * @ctx: the user data (XML parser context) + * @localname: the local name of the element + * @prefix: the element namespace prefix if available + * @URI: the element namespace name if available + * @nb_namespaces: number of namespace definitions on that node + * @namespaces: pointer to the array of prefix/URI pairs namespace definitions + * @nb_attributes: the number of attributes on that node + * @nb_defaulted: the number of defaulted attributes. The defaulted + * ones are at the end of the array + * @attributes: pointer to the array of (localname/prefix/URI/value/end) + * attribute values. + */ + if(qname_ns) { + ns_name = (const char*)raptor_namespace_get_prefix(qname_ns); + ns_uri = (const char*)raptor_uri_as_string(qname_ns->uri); + } + + start_element(user_data, (const char*)localname, + ns_name, + ns_uri, + nb_namespaces, + (const char**)namespaces, + nb_attributes, + nb_defaulted, + (const char**)attr); + if(attr) + free(attr); + raptor_free_memory(localname); +} + +static void raptor_rdfa_end_element(void *user_data, + raptor_xml_element* xml_element) +{ + raptor_qname* qname = raptor_xml_element_get_name(xml_element); + unsigned char* localname = raptor_qname_to_counted_name(qname, NULL); + const raptor_namespace* qname_ns = raptor_qname_get_namespace(qname); + + if(qname_ns) + end_element(user_data, (const char*)localname, + (const char*)qname_ns->prefix, + (const xmlChar*)raptor_uri_as_string(qname_ns->uri)); + else + end_element(user_data, (const char*)localname, NULL, NULL); + + raptor_free_memory(localname); +} + +static void raptor_rdfa_character_data(void *user_data, + raptor_xml_element* xml_element, + const unsigned char *s, int len) +{ + character_data(user_data, (const xmlChar *)s, len); +} + +static void raptor_rdfa_namespace_handler(void *user_data, + raptor_namespace* nspace) +{ + rdfacontext* context = (rdfacontext*)user_data; + + if(context->namespace_handler) + (*context->namespace_handler)(context->namespace_handler_user_data, + nspace); +} + +#endif + +int rdfa_parse_start(rdfacontext* context) +{ + /* create the buffers and expat parser */ + int rval = RDFA_PARSE_SUCCESS; + + context->wb_allocated = sizeof(char) * READ_BUFFER_SIZE; + /* +1 for NUL at end, to allow strstr() etc. to work + * malloc - only the first char needs to be NUL */ + context->working_buffer = (char*)malloc(context->wb_allocated + 1); + *context->working_buffer = '\0'; + context->done = 0; + context->context_stack = rdfa_create_list(32); + + /* initialize the context stack */ + rdfa_push_item(context->context_stack, context, RDFALIST_FLAG_CONTEXT); + +#ifdef LIBRDFA_IN_RAPTOR + context->sax2 = raptor_new_sax2(context->world, context->locator, + context); +#else + /* init libxml2 */ + xmlInitParser(); +#endif + + /* set up the context stack */ +#ifdef LIBRDFA_IN_RAPTOR + raptor_sax2_set_start_element_handler(context->sax2, + raptor_rdfa_start_element); + raptor_sax2_set_end_element_handler(context->sax2, + raptor_rdfa_end_element); + raptor_sax2_set_characters_handler(context->sax2, + raptor_rdfa_character_data); + raptor_sax2_set_namespace_handler(context->sax2, + raptor_rdfa_namespace_handler); +#endif + + rdfa_init_context(context); + +#ifdef LIBRDFA_IN_RAPTOR + context->base_uri = raptor_new_uri(context->sax2->world, + (const unsigned char*)context->base); + raptor_sax2_parse_start(context->sax2, context->base_uri); +#endif + + return rval; +} + +int rdfa_parse_chunk(rdfacontext* context, char* data, size_t wblen, int done) +{ +#ifdef LIBRDFA_IN_RAPTOR +#else + xmlSAXHandler handler; + xmlParserCtxtPtr parser; +#endif + + /* it is an error to call this before rdfa_parse_start() */ + if(context->done) + { + return RDFA_PARSE_FAILED; + } + + if(!context->preread) + { + /* search for the <base> tag and use the href contained therein to + * set the parsing context. */ + context->wb_preread = rdfa_init_base(context, + &context->working_buffer, &context->wb_allocated, data, wblen); + + /* continue looking if in first 131072 bytes of data */ + if(!context->base && context->wb_preread < (1<<17)) + return RDFA_PARSE_SUCCESS; + +#ifdef LIBRDFA_IN_RAPTOR + /* term mappings are needed before SAX2 parsing */ + rdfa_setup_initial_context(context); + + if(raptor_sax2_parse_chunk(context->sax2, + (const unsigned char*)context->working_buffer, + context->wb_position, done)) + { + return RDFA_PARSE_FAILED; + } +#else + /* create the SAX2 handler structure */ + memset(&handler, 0, sizeof(xmlSAXHandler)); + handler.initialized = XML_SAX2_MAGIC; + handler.startElementNs = (startElementNsSAX2Func)start_element; + handler.endElementNs = (endElementNsSAX2Func)end_element; + handler.characters = (charactersSAXFunc)character_data; + handler.error = (errorSAXFunc)rdfa_report_error; + + /* create a push-based parser */ + parser = xmlCreatePushParserCtxt( + &handler, context, (const char*)context->working_buffer, + context->wb_position, NULL); + + /* ensure that entity substitution is turned on by default */ + xmlSubstituteEntitiesDefault(1); + + context->parser = parser; + + rdfa_setup_initial_context(context); +#endif + + context->preread = 1; + + return RDFA_PARSE_SUCCESS; + } + + /* otherwise just parse the block passed in */ +#ifdef LIBRDFA_IN_RAPTOR + if(raptor_sax2_parse_chunk(context->sax2, + (const unsigned char*)data, wblen, done)) + { + return RDFA_PARSE_FAILED; + } +#else + if(xmlParseChunk(context->parser, data, wblen, done)) + { + return RDFA_PARSE_FAILED; + } +#endif + + return RDFA_PARSE_SUCCESS; +} + +void rdfa_parse_end(rdfacontext* context) +{ + /* free context stack */ + rdfa_free_context_stack(context); + + /* Free the expat parser and the like */ +#ifdef LIBRDFA_IN_RAPTOR + if(context->base_uri) + raptor_free_uri(context->base_uri); + raptor_free_sax2(context->sax2); + context->sax2=NULL; +#else + /* free parser */ + xmlFreeParserCtxt(context->parser); + xmlCleanupParser(); +#endif +} + +char* rdfa_get_buffer(rdfacontext* context, size_t* blen) +{ + *blen = context->wb_allocated; + return context->working_buffer; +} + +int rdfa_parse_buffer(rdfacontext* context, size_t bytes) +{ + int rval; + int done; + done = (bytes == 0); + rval = rdfa_parse_chunk(context, context->working_buffer, bytes, done); + context->done = done; + return rval; +} + +int rdfa_parse(rdfacontext* context) +{ + int rval; + + rval = rdfa_parse_start(context); + if(rval != RDFA_PARSE_SUCCESS) + { + context->done = 1; + return rval; + } + + do + { + size_t wblen; + int done; + + wblen = context->buffer_filler_callback( + context->working_buffer, context->wb_allocated, + context->callback_data); + done = (wblen == 0); + + rval = rdfa_parse_chunk(context, context->working_buffer, wblen, done); + context->done=done; + } + while(!context->done && rval == RDFA_PARSE_SUCCESS); + + rdfa_parse_end(context); + + return rval; +} diff --git a/librdfa/rdfa.h b/librdfa/rdfa.h new file mode 100644 index 0000000..7c9cfb2 --- /dev/null +++ b/librdfa/rdfa.h @@ -0,0 +1,377 @@ +/** + * Copyright 2008-2010 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * You should have received a copy of the GNU Lesser General Public + * License along with librdfa. If not, see <http://www.gnu.org/licenses/>. + * + * The librdfa library is the Fastest RDFa Parser in the Universe. It is + * a stream parser, meaning that it takes an XML data as input and spits + * out RDF triples as it comes across them in the stream. Due to this + * processing approach, librdfa has a very, very small memory footprint. + * It is also very fast and can operate on hundreds of gigabytes of XML + * data without breaking a sweat. + * + * Usage: + * + * rdfacontext* context = rdfa_create_context(base_uri); + * context->callback_data = your_user_data; + * rdfa_set_triple_handler(context, triple_function); + * rdfa_set_buffer_filler(context, buffer_filler_function); + * rdfa_parse(context); + * rdfa_free_context(context); + * + * If you would like to get warnings/error triples from the processor graph: + * + * rdfa_set_issue_handler(context, triple_function); + * + * Usage if you need more control over when to fill rdfa's buffer: + * + * rdfacontext* context = rdfa_create_context(base_uri); + * context->callback_data = your_user_data; + * rdfa_set_triple_handler(context, triple_function); + * int rval = rdfa_parse_start(context); + * if(rval == RDFA_PARSE_SUCCESS) + * { + * FILE* myfile = fopen("myfilename"); + * size_t buf_len = 0; + * size_t read = 0; + * do + * { + * char* buf = rdfa_get_buffer(context, &buf_len); + * if(buf_len > 0) + * { + * // fill buffer here up to buf_len bytes from your input stream + * read = fread(buf, sizeof(char), buf_len, myfile); + * } + * + * // parse the read data + * rdfa_parse_buffer(context, read); + * } + * while(read > 0); + * fclose(myfile); + * + * rdfa_parse_end(context); + * } + * rdfa_free_context(context); + * + */ +#ifndef _LIBRDFA_RDFA_H_ +#define _LIBRDFA_RDFA_H_ +#include <stdlib.h> +#include <libxml/SAX2.h> + +/* Activate the stupid Windows DLL exporting mechanism if we're building for Windows */ +#ifdef WIN32 +#define DLLEXPORT __declspec(dllexport) +#else +#define DLLEXPORT +#endif + +#ifdef LIBRDFA_IN_RAPTOR +#include "raptor2.h" +#include "raptor_internal.h" +#endif /* LIBRDFA_IN_RAPTOR */ + +#ifdef __cplusplus +extern "C" +{ +#endif + +#define DEBUG 0 + +/* RDFa version numbers */ +#define RDFA_VERSION_1_0 1 +#define RDFA_VERSION_1_1 2 + +/* parse process return types */ +#define RDFA_PARSE_WARNING -2 +#define RDFA_PARSE_FAILED -1 +#define RDFA_PARSE_UNKNOWN 0 +#define RDFA_PARSE_SUCCESS 1 + +/* maximum list lengths */ +#define MAX_LOCAL_LIST_MAPPINGS 32 +#define MAX_LIST_MAPPINGS 48 +#define MAX_LIST_ITEMS 16 +#define MAX_TERM_MAPPINGS 64 +#define MAX_URI_MAPPINGS 128 +#define MAX_INCOMPLETE_TRIPLES 128 + +/* host language definitions */ +#define HOST_LANGUAGE_NONE 0 +#define HOST_LANGUAGE_XML1 1 +#define HOST_LANGUAGE_XHTML1 2 +#define HOST_LANGUAGE_HTML 3 + +/* default mapping key for xmlns */ +#define XMLNS_DEFAULT_MAPPING "XMLNS_DEFAULT" + +/* whitespace characters for RDFa Core 1.1 */ +#define RDFA_WHITESPACE " \t\n\v\f\r" + +/** + * An RDF resource type is used to denote the content of a triple's + * object value. + */ +typedef enum +{ + RDF_TYPE_NAMESPACE_PREFIX, + RDF_TYPE_IRI, + RDF_TYPE_PLAIN_LITERAL, + RDF_TYPE_XML_LITERAL, + RDF_TYPE_TYPED_LITERAL, + RDF_TYPE_UNKNOWN +} rdfresource_t; + +/** + * An RDF triple is the result of an RDFa statement that contains, at + * the very least, a subject, a predicate and an object. It is the + * smallest, complete statement one can make in RDF. + */ +typedef struct rdftriple +{ + char* subject; + char* predicate; + char* object; + rdfresource_t object_type; + char* datatype; + char* language; +} rdftriple; + +/** + * The specification for a callback that is capable of handling + * triples. Produces a triple that must be freed once the application + * is done with the object. + */ +typedef void (*triple_handler_fp)(rdftriple*, void*); + +/** + * The specification for a callback that is used to fill the input buffer + * with data to parse. + */ +typedef size_t (*buffer_filler_fp)(char*, size_t, void*); + +/** + * An RDFA list item is used to hold each datum in an rdfa list. It + * contains a list of flags as well as the data for the list member. + */ +typedef struct rdfalistitem +{ + unsigned char flags; + void* data; +} rdfalistitem; + +/** + * An RDFa list is used to store multiple text strings that have a set + * of attributes associated with them. These can be lists of CURIEs, + * or lists of incomplete triples. The structure grows with use, but + * cannot be shrunk. + */ +typedef struct rdfalist +{ + rdfalistitem** items; + size_t num_items; + size_t max_items; + unsigned int user_data; +} rdfalist; + +/** + * The RDFa Parser structure is responsible for keeping track of the state of + * the current RDFa parser. Things such as the default namespace, + * CURIE mappings, and other context-specific + */ +typedef struct rdfacontext +{ + unsigned char rdfa_version; + char* base; + char* parent_subject; + char* parent_object; + char* default_vocabulary; +#ifndef LIBRDFA_IN_RAPTOR + void** uri_mappings; +#endif + void** term_mappings; + void** list_mappings; + void** local_list_mappings; + rdfalist* incomplete_triples; + rdfalist* local_incomplete_triples; + char* language; + unsigned char host_language; + + triple_handler_fp default_graph_triple_callback; + buffer_filler_fp buffer_filler_callback; + triple_handler_fp processor_graph_triple_callback; + + unsigned char recurse; + unsigned char skip_element; + char* new_subject; + char* current_object_resource; + + char* about; + char* typed_resource; + char* resource; + char* href; + char* src; + char* content; + char* datatype; + rdfalist* property; + unsigned char inlist_present; + unsigned char rel_present; + unsigned char rev_present; + char* plain_literal; + size_t plain_literal_size; + char* xml_literal; + size_t xml_literal_size; + + void* callback_data; + + /* parse state */ + size_t bnode_count; + char* underscore_colon_bnode_name; + unsigned char xml_literal_namespaces_defined; + unsigned char xml_literal_xml_lang_defined; + size_t wb_allocated; + char* working_buffer; + size_t wb_position; +#ifdef LIBRDFA_IN_RAPTOR + raptor_world *world; + raptor_locator *locator; + /* a pointer (in every context) to the error_handlers structure + * held in the raptor_parser object */ + raptor_uri* base_uri; + raptor_sax2* sax2; + raptor_namespace_handler namespace_handler; + void* namespace_handler_user_data; + int raptor_rdfa_version; /* 10 or 11 or otherwise default */ +#else + xmlParserCtxtPtr parser; +#endif + int done; + rdfalist* context_stack; + size_t wb_preread; + int preread; + int depth; +} rdfacontext; + +/** + * Creates an initial context for RDFa. + * + * @param base The base URI that should be used for the parser. + * + * @return a pointer to the base RDFa context, or NULL if memory + * allocation failed. + */ +DLLEXPORT rdfacontext* rdfa_create_context(const char* base); + +/** + * Sets the default graph triple handler for the application. + * + * @param context the base rdfa context for the application. + * @param th the triple handler function. + */ +DLLEXPORT void rdfa_set_default_graph_triple_handler( + rdfacontext* context, triple_handler_fp th); + +/** + * Sets the processor graph triple handler for the application. + * + * @param context the base rdfa context for the application. + * @param th the triple handler function. + */ +DLLEXPORT void rdfa_set_processor_graph_triple_handler( + rdfacontext* context, triple_handler_fp th); + +/** + * Sets the buffer filler for the application. + * + * @param context the base rdfa context for the application. + * @param bf the buffer filler function. + */ +DLLEXPORT void rdfa_set_buffer_filler( + rdfacontext* context, buffer_filler_fp bf); + +/** + * Starts processing given the base rdfa context. + * + * @param context the base rdfa context. + * + * @return RDFA_PARSE_SUCCESS if everything went well. RDFA_PARSE_FAILED + * if there was a fatal error and RDFA_PARSE_WARNING if there + * was a non-fatal error. + */ +DLLEXPORT int rdfa_parse(rdfacontext* context); + +DLLEXPORT int rdfa_parse_start(rdfacontext* context); + +DLLEXPORT int rdfa_parse_chunk( + rdfacontext* context, char* data, size_t wblen, int done); + +/** + * Gets the input buffer for the given context so it can be filled with data. + * A pointer to the buffer will be returned and the maximum number of bytes + * that can be written to that buffer will be set to the blen parameter. Once + * data has been written to the buffer, rdfa_parse_buffer() should be called. + * + * @param context the base rdfa context. + * @param blen the variable to set to the buffer length. + * + * @return a pointer to the context's input buffer. + */ +DLLEXPORT char* rdfa_get_buffer(rdfacontext* context, size_t* blen); + +/** + * Informs the parser to attempt to parse more of the given context's input + * buffer. To fill the input buffer with data, call rdfa_get_buffer(). + * + * If any of the input buffer can be parsed, it will be. It is possible + * that none of the data will be parsed, in which case this function will + * still return RDFA_PARSE_SUCCESS. More data should be written to the input + * buffer using rdfa_get_buffer() as it is made available to the application. + * Once there is no more data to write, rdfa_parse_end() should be called. + * + * @param context the base rdfa context. + * @param bytes the number of bytes written to the input buffer via the last + * call to rdfa_get_buffer(), a value of 0 will indicate that there + * is no more data to parse. + * + * @return RDFA_PARSE_SUCCESS if everything went well. RDFA_PARSE_FAILED + * if there was a fatal error and RDFA_PARSE_WARNING if there + * was a non-fatal error. + */ +DLLEXPORT int rdfa_parse_buffer(rdfacontext* context, size_t bytes); + +DLLEXPORT void rdfa_parse_end(rdfacontext* context); + +DLLEXPORT void rdfa_init_context(rdfacontext* context); + +DLLEXPORT char* rdfa_iri_get_base(const char* iri); + +/** + * Destroys the given rdfa context by freeing all memory associated + * with the context. + * + * @param context the rdfa context. + */ +DLLEXPORT void rdfa_free_context(rdfacontext* context); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/librdfa/rdfa_utils.c b/librdfa/rdfa_utils.c new file mode 100644 index 0000000..a2e65f2 --- /dev/null +++ b/librdfa/rdfa_utils.c @@ -0,0 +1,543 @@ +/* + * Copyright 2008-2011 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * You should have received a copy of the GNU Lesser General Public + * License along with librdfa. If not, see <http://www.gnu.org/licenses/>. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "rdfa_utils.h" +#include "rdfa.h" +#include "strtok_r.h" + +#define RDFA_WHITESPACE_CHARACTERS " \a\b\t\n\v\f\r" + +char* rdfa_join_string(const char* prefix, const char* suffix) +{ + char* rval = NULL; + size_t prefix_size = strlen(prefix); + size_t suffix_size = strlen(suffix); + rval = (char*)malloc(prefix_size + suffix_size + 1); + + memcpy(rval, prefix, prefix_size); + memcpy(rval+prefix_size, suffix, suffix_size + 1); + + return rval; +} + +char* rdfa_n_append_string( + char* old_string, size_t* string_size, + const char* suffix, size_t suffix_size) +{ + char* rval = NULL; + rval = (char*)realloc(old_string, *string_size + suffix_size + 1); + memcpy(rval + *string_size, suffix, suffix_size + 1); + *string_size = *string_size + suffix_size; + return rval; +} + +char* rdfa_replace_string(char* old_string, const char* new_string) +{ + char* rval = NULL; + + if(new_string != NULL) + { + /* free the memory associated with the old string */ + free(old_string); + + /* copy the new string */ + rval = rdfa_strdup(new_string); + } + + return rval; +} + +char* rdfa_canonicalize_string(const char* str) +{ + char* rval = (char*)malloc(sizeof(char) * (strlen(str) + 2)); + char* working_string = NULL; + char* token = NULL; + char* wptr = NULL; + char* offset = rval; + + working_string = rdfa_replace_string(working_string, str); + + /* split on any whitespace character that we may find */ + token = strtok_r(working_string, RDFA_WHITESPACE_CHARACTERS, &wptr); + while(token != NULL) + { + size_t token_length = strlen(token); + memcpy(offset, token, token_length); + offset += token_length; + *offset++ = ' '; + *offset = '\0'; + + token = strtok_r(NULL, RDFA_WHITESPACE_CHARACTERS, &wptr); + } + + if(offset != rval) + { + offset--; + *offset = '\0'; + } + + free(working_string); + + return rval; +} + +rdfalist* rdfa_create_list(size_t size) +{ + rdfalist* rval = (rdfalist*)malloc(sizeof(rdfalist)); + + rval->max_items = size; + rval->num_items = 0; + rval->items = (rdfalistitem**)malloc( + sizeof(rdfalistitem*) * rval->max_items); + + return rval; +} + +rdfalist* rdfa_replace_list(rdfalist* old_list, rdfalist* new_list) +{ + rdfalist* rval = NULL; + + if(new_list != NULL) + { + /* free the memory associated with the old list */ + rdfa_free_list(old_list); + + /* copy the new list */ + rval = rdfa_copy_list(new_list); + } + + return rval; +} + +rdfalist* rdfa_copy_list(rdfalist* list) +{ + rdfalist* rval = NULL; + + if(list != NULL) + { + unsigned int i; + rval = rdfa_create_list(list->max_items); + + /* copy the base list variables over */ + rval->num_items = list->num_items; + rval->user_data = list->user_data; + + /* copy the data of every list member along with all of the flags + * for each list member. */ + for(i = 0; i < list->max_items; i++) + { + if(i < list->num_items) + { + rval->items[i] = (rdfalistitem*)malloc(sizeof(rdfalistitem)); + rval->items[i]->data = NULL; + rval->items[i]->flags = list->items[i]->flags; + + /* copy specific data type */ + if(list->items[i]->flags & RDFALIST_FLAG_TEXT) + { + rval->items[i]->data = rdfa_strdup((char*)list->items[i]->data); + } + else if(list->items[i]->flags & RDFALIST_FLAG_TRIPLE) + { + rdftriple* t = (rdftriple*)list->items[i]->data; + rval->items[i]->data = + rdfa_create_triple(t->subject, t->predicate, t->object, + t->object_type, t->datatype, t->language); + } + else if(list->items[i]->flags & RDFALIST_FLAG_CONTEXT) + { + /* TODO: Implement the copy for context, if it is needed. */ + } + } + else + { + rval->items[i] = NULL; + } + } + } + + return rval; +} + +void rdfa_print_list(rdfalist* list) +{ + unsigned int i; + + printf("[ "); + + for(i = 0; i < list->num_items; i++) + { + if(i != 0) + { + printf(", "); + } + + puts((const char*)list->items[i]->data); + } + + printf(" ]\n"); +} + +void rdfa_print_triple_list(rdfalist* list) +{ + unsigned int i; + + if(list != NULL) + { + printf("[ "); + + for(i = 0; i < list->num_items; i++) + { + if(i != 0) + { + printf(", "); + } + + rdfa_print_triple((rdftriple*)list->items[i]->data); + } + + printf(" ]\n"); + } + else + { + printf("NULL\n"); + } +} + +void rdfa_free_list(rdfalist* list) +{ + if(list != NULL) + { + unsigned int i; + for(i = 0; i < list->num_items; i++) + { + if(list->items[i]->flags & RDFALIST_FLAG_TEXT) + { + free(list->items[i]->data); + } + else if(list->items[i]->flags & RDFALIST_FLAG_TRIPLE) + { + rdftriple* t = (rdftriple*)list->items[i]->data; + rdfa_free_triple(t); + } + + free(list->items[i]); + } + + free(list->items); + free(list); + } +} + +void rdfa_push_item(rdfalist* stack, void* data, liflag_t flags) +{ + rdfa_add_item(stack, data, flags); +} + +void* rdfa_pop_item(rdfalist* stack) +{ + void* rval = NULL; + + if(stack->num_items > 0) + { + --stack->num_items; + rval = stack->items[stack->num_items]->data; + free(stack->items[stack->num_items]); + stack->items[stack->num_items] = NULL; + } + + return rval; +} + +void rdfa_add_item(rdfalist* list, void* data, liflag_t flags) +{ + rdfalistitem* item; + + if(!list) + return; + + item = (rdfalistitem*)malloc(sizeof(rdfalistitem)); + + item->data = NULL; + + if((flags & RDFALIST_FLAG_CONTEXT) || (flags & RDFALIST_FLAG_TRIPLE)) + { + item->data = data; + } + else + { + item->data = (char*)rdfa_replace_string( + (char*)item->data, (const char*)data); + } + + item->flags = flags; + + if(list->num_items == list->max_items) + { + list->max_items = 1 + (list->max_items * 2); + list->items = (rdfalistitem**)realloc( + list->items, sizeof(rdfalistitem*) * list->max_items); + } + + list->items[list->num_items] = item; + ++list->num_items; +} + +void** rdfa_create_mapping(size_t elements) +{ + size_t mapping_size = sizeof(void*) * MAX_URI_MAPPINGS * 2; + void** mapping = (void**)malloc(mapping_size); + + /* only initialize the mapping if it is not null. */ + if(mapping != NULL) + { + memset(mapping, 0, mapping_size); + } + + return mapping; +} + +void rdfa_create_list_mapping( + rdfacontext* context, void** mapping, + const char* subject, const char* key) +{ + char* realkey = NULL; + size_t str_size; + rdfalist* value = NULL; + char* list_bnode; + rdftriple* triple; + + /* Attempt to find the list mapping */ + value = (rdfalist*)rdfa_get_list_mapping(mapping, subject, key); + + if(value == NULL) + { + /* create the mapping */ + value = rdfa_create_list(MAX_LIST_ITEMS); + value->user_data = context->depth; + + /* build the real key to use when updating the mapping */ + str_size = strlen(subject); + realkey = rdfa_strdup(subject); + realkey = rdfa_n_append_string(realkey, &str_size, " ", 1); + realkey = rdfa_n_append_string(realkey, &str_size, key, strlen(key)); + rdfa_update_mapping(mapping, realkey, value, + (update_mapping_value_fp)rdfa_replace_list); + free(realkey); + rdfa_free_list(value); + + /* add the first item in the list as the bnode for the list */ + list_bnode = rdfa_create_bnode(context); + triple = rdfa_create_triple( + list_bnode, list_bnode, list_bnode, RDF_TYPE_IRI, NULL, NULL); + rdfa_append_to_list_mapping(mapping, subject, key, (void*)triple); + free(list_bnode); + } +} + +void rdfa_append_to_list_mapping( + void** mapping, const char* subject, const char* key, void* value) +{ + rdfalist* list = (rdfalist*)rdfa_get_list_mapping(mapping, subject, key); + rdfa_add_item(list, value, RDFALIST_FLAG_TRIPLE); +} + +void** rdfa_copy_mapping( + void** mapping, copy_mapping_value_fp copy_mapping_value) +{ + void** rval = (void**)calloc(MAX_URI_MAPPINGS * 2, sizeof(void*)); + void** mptr = mapping; + void** rptr = rval; + + /* copy each element of the old mapping to the new mapping. */ + while(*mptr != NULL) + { + /* copy the key */ + *rptr = rdfa_replace_string((char*)*rptr, (const char*)*mptr); + rptr++; + mptr++; + + /* copy the value */ + *rptr = copy_mapping_value(*rptr, *mptr); + rptr++; + mptr++; + } + + return rval; +} + +void rdfa_update_mapping(void** mapping, const char* key, const void* value, + update_mapping_value_fp update_mapping_value) +{ + int found = 0; + void** mptr = mapping; + + /* search the current mapping to see if the key exists in the mapping */ + while(!found && (*mptr != NULL)) + { + if(strcmp((char*)*mptr, key) == 0) + { + mptr++; + *mptr = update_mapping_value(*mptr, value); + found = 1; + } + else + { + mptr++; + } + mptr++; + } + + /* if we made it through the entire URI mapping and the key was not + * found, create a new key-value pair. */ + if(!found) + { + *mptr = rdfa_replace_string((char*)*mptr, key); + mptr++; + *mptr = update_mapping_value(*mptr, value); + } +} + +const void* rdfa_get_mapping(void** mapping, const char* key) +{ + const void* rval = NULL; + char** mptr = (char**)mapping; + + /* search the current mapping to see if the key exists in the mapping. */ + while(*mptr != NULL) + { + if(strcmp(*mptr, key) == 0) + { + mptr++; + rval = *mptr; + } + else + { + mptr++; + } + mptr++; + } + + return rval; +} + +const void* rdfa_get_list_mapping( + void** mapping, const char* subject, const char* key) +{ + void* rval; + char* realkey = NULL; + size_t str_size = strlen(subject); + + /* generate the real list mapping key and retrieve it from the mapping */ + realkey = rdfa_strdup(subject); + realkey = rdfa_n_append_string(realkey, &str_size, " ", 1); + realkey = rdfa_n_append_string(realkey, &str_size, key, strlen(key)); + rval = (void*)rdfa_get_mapping(mapping, realkey); + free(realkey); + + return (const void*)rval; +} + +void rdfa_next_mapping(void** mapping, char** key, void** value) +{ + *key = NULL; + *value = NULL; + + if(*mapping != NULL) + { + *key = *(char**)mapping++; + *value = *mapping++; + } +} + +void rdfa_print_mapping(void** mapping, print_mapping_value_fp print_value) +{ + void** mptr = mapping; + printf("{\n"); + while(*mptr != NULL) + { + char* key; + void* value; + key = (char*)*mptr++; + value = *mptr++; + + printf(" %s : ", key); + print_value(value); + + if(*mptr != NULL) + { + printf(",\n"); + } + else + { + printf("\n"); + } + } + printf("}\n"); +} + +void rdfa_print_string(const char* str) +{ + printf("%s", str); +} + +void rdfa_free_mapping(void** mapping, free_mapping_value_fp free_value) +{ + void** mptr = mapping; + + if(mapping != NULL) + { + /* free all of the memory in the mapping */ + while(*mptr != NULL) + { + free(*mptr); + mptr++; + free_value(*mptr); + mptr++; + } + + free(mapping); + } +} + +char* +rdfa_strdup(const char* s) +{ + size_t len; + char *buf; + + if(!s) + return NULL; + + len = strlen(s) + 1; + buf = (char*)malloc(len); + if(buf) + memcpy(buf, s, len); + return buf; +} diff --git a/librdfa/rdfa_utils.h b/librdfa/rdfa_utils.h new file mode 100644 index 0000000..a21ddfe --- /dev/null +++ b/librdfa/rdfa_utils.h @@ -0,0 +1,486 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * You should have received a copy of the GNU Lesser General Public + * License along with librdfa. If not, see <http://www.gnu.org/licenses/>. + * + * This file contains functions used for common rdfa utility functions. + */ +#ifndef _RDFA_UTILS_H_ +#define _RDFA_UTILS_H_ +#include "rdfa.h" + +#ifdef __cplusplus +extern "C" +{ +#endif + +/** + * A CURIE type can be safe, unsafe, and Internationalized Resource + * Identifier, reference-only or invalid. + */ +typedef enum +{ + CURIE_TYPE_SAFE, + CURIE_TYPE_IRI_OR_UNSAFE, + CURIE_TYPE_LINK_TYPE, + CURIE_TYPE_INVALID +} curie_t; + +/** + * A CURIE parse type lets the CURIE processor know what type of CURIE + * is being parsed so that the proper namespace resolution may occur. + */ +typedef enum +{ + CURIE_PARSE_ABOUT_RESOURCE, + CURIE_PARSE_PROPERTY, + CURIE_PARSE_INSTANCEOF_DATATYPE, + CURIE_PARSE_HREF_SRC, + CURIE_PARSE_RELREV +} curieparse_t; + +/** + * The list member flag type is used to attach attribute information + * to list member data. + */ +typedef enum +{ + RDFALIST_FLAG_NONE = 0, + RDFALIST_FLAG_DIR_NONE = (1 << 1), + RDFALIST_FLAG_DIR_FORWARD = (1 << 2), + RDFALIST_FLAG_DIR_REVERSE = (1 << 3), + RDFALIST_FLAG_TEXT = (1 << 4), + RDFALIST_FLAG_CONTEXT = (1 << 5), + RDFALIST_FLAG_TRIPLE = (1 << 6), + RDFALIST_FLAG_LAST = (1 << 7) +} liflag_t; + +/* + * RDFa processor graph reporting types + */ +#define RDFA_PROCESSOR_INFO "http://www.w3.org/ns/rdfa#Info" +#define RDFA_PROCESSOR_WARNING "http://www.w3.org/ns/rdfa#Warning" +#define RDFA_PROCESSOR_ERROR "http://www.w3.org/ns/rdfa#Error" + +/* key establishing a deleted mapping entry */ +#define RDFA_MAPPING_DELETED_KEY "<DELETED-KEY>" + +/** + * A function pointer that will be used to copy mapping values. + */ +typedef void* (*copy_mapping_value_fp)(void*, void*); + +/** + * A function pointer that will be used to update mapping values. + */ +typedef void* (*update_mapping_value_fp)(const void*, const void*); + +/** + * A function pointer that will be used to print mapping values. + */ +typedef void (*print_mapping_value_fp)(void*); + +/** + * A function pointer that will be used to free memory associated with values. + */ +typedef void (*free_mapping_value_fp)(void*); + +/** + * Initializes a mapping given the number of elements the mapping is + * expected to hold. + * + * @param elements the maximum number of elements the mapping is + * supposed to hold. + * + * @return an initialized void**, with all of the elements set to NULL. + */ +void** rdfa_create_mapping(size_t elements); + +/** + * Adds a list to a mapping given a key to create. The result will be a + * zero-item list associated with the given key in the mapping. + * + * @param context the current active context. + * @param mapping the mapping to modify. + * @param subject the current active subject. + * @param key the key to add to the mapping. + * @param user_data the user-defined data to store with the list information. + */ +void rdfa_create_list_mapping( + rdfacontext* context, void** mapping, const char* subject, const char* key); + +/** + * Adds an item to the end of the list that is associated with the given + * key in the mapping. + * + * @param mapping the mapping to modify. + * @param subject the current active subject. + * @param key the key to use when looking up the list value. + * @param value the value to append to the end of the list. + */ +void rdfa_append_to_list_mapping( + void** mapping, const char* subject, const char* key, void* value); + +/** + * Gets the value for a given list mapping when presented with a subject + * and a key. If the subject-key combo doesn't exist in the mapping, + * NULL is returned. + * + * @param mapping the mapping to search. + * @param subject the current active subject. + * @param key the key. + * + * @return value the value in the mapping for the given key. + */ +const void* rdfa_get_list_mapping( + void** mapping, const char* subject, const char* key); + +/** + * Copies the entire contents of a mapping verbatim and returns a + * pointer to the copied mapping. + * + * @param mapping the mapping to copy + * + * @return the copied mapping, with all of the memory newly + * allocated. You MUST free the returned mapping when you are + * done with it. + */ +void** rdfa_copy_mapping( + void** mapping, copy_mapping_value_fp copy_mapping_value); + +/** + * Updates the given mapping when presented with a key and a value. If + * the key doesn't exist in the mapping, it is created. + * + * @param mapping the mapping to update. + * @param key the key. + * @param value the value. + * @param replace_mapping_value a pointer to a function that will replace the + * old + */ +void rdfa_update_mapping(void** mapping, const char* key, const void* value, + update_mapping_value_fp update_mapping_value); + +/** + * Gets the value for a given mapping when presented with a key. If + * the key doesn't exist in the mapping, NULL is returned. + * + * @param mapping the mapping to search. + * @param key the key. + * + * @return value the value in the mapping for the given key. + */ +const void* rdfa_get_mapping(void** mapping, const char* key); + +/** + * Gets the current mapping for the given mapping and increments the + * mapping to the next value in the chain. + * + * @param mapping the mapping to use and increment. + * @param key the key that will be retrieved, NULL if the mapping is + * blank or you are at the end of the mapping. + * @param value the value that is associated with the key. NULL if the + * mapping is blank or you are at the end of the mapping. + */ +void rdfa_next_mapping(void** mapping, char** key, void** value); + +/** + * Prints the mapping to the screen in a human-readable way. + * + * @param mapping the mapping to print to the screen. + * @param print_value the function pointer to use to print the mapping values. + */ +void rdfa_print_mapping(void** mapping, print_mapping_value_fp print_value); + +/** + * Frees all memory associated with a mapping. + * + * @param mapping the mapping to free. + * @param free_value the function to free mapping values. + */ +void rdfa_free_mapping(void** mapping, free_mapping_value_fp free_value); + +/** + * Creates a list and initializes it to the given size. + * + * @param size the starting size of the list. + */ +rdfalist* rdfa_create_list(size_t size); + +/** + * Copies the given list. + * + * @param list the list to copy. + * + * @return the copied list. You MUST free the memory associated with + * the returned list once you are done with it. + */ +rdfalist* rdfa_copy_list(rdfalist* list); + +/** + * Replaced the old_list by free'ing the memory associated with it. A + * copy is made of the new list and then returned. + * + * @param old_list the list to replace. The memory associated with this list + * is freed. + * @param new_list the new list to copy in replacement of the old list. A + * deep copy is performed on the new list. + * + * @return the copied list. You MUST free the memory associated with + * the returned list once you are done with it. + */ +rdfalist* rdfa_replace_list(rdfalist* old_list, rdfalist* new_list); + +/** + * Adds an item to the end of the list. + * + * @param list the list to add the item to. + * @param data the data to add to the list. + * @param flags the flags to attach to the item. + */ +void rdfa_add_item(rdfalist* list, void* data, liflag_t flags); + +/** + * Pushes an item onto the top of a stack. This function uses a list + * for the underlying implementation. + * + * @param stack the stack to add the item to. + * @param data the data to add to the stack. + * @param flags the flags to attach to the item. + */ +void rdfa_push_item(rdfalist* stack, void* data, liflag_t flags); + +/** + * Pops an item off of the top of a stack. This function uses a list + * for the underlying implementation + * + * @param stack the stack to pop the item off of. + * + * @return the item that was just popped off of the top of the + * stack. You MUST free the memory associated with the return + * value. + */ +void* rdfa_pop_item(rdfalist* stack); + +/** + * Prints the list to the screen in a human-readable way. + * + * @param list the list to print to the screen. + */ +void rdfa_print_list(rdfalist* list); + +/** + * Frees all memory associated with the given list. + * + * @param list the list to free. + */ +void rdfa_free_list(rdfalist* list); + +/** + * Replaces an old string with a new string, freeing the old memory + * and allocating new memory for the new string. + * + * @param old_string the old string to free and replace. + * @param new_string the new string to copy to the old_string's + * location. + * + * @return a pointer to the newly allocated string. + */ +char* rdfa_replace_string(char* old_string, const char* new_string); + +/** + * Appends a new string to the old string, expanding the old string's + * memory area if needed. The old string's size must be provided and + * will be updated to the new length. + * + * @param old_string the old string to reallocate if needed. + * @param string_size the old string's length, to be updated. + * @param suffix the string to append to the old_string. + * @param suffix_size the size of the suffix string. + * + * @return a pointer to the newly re-allocated string. + */ +char* rdfa_n_append_string( + char* old_string, size_t* string_size, + const char* suffix, size_t suffix_size); + +/** + * Joins two strings together and returns a newly allocated string + * with both strings joined. + * + * @param prefix the beginning part of the string. + * @param suffix the ending part of the string. + * + * @return a pointer to the newly allocated string that has both + * prefix and suffix in it. + */ +char* rdfa_join_string(const char* prefix, const char* suffix); + +/** + * Prints a string to stdout. This function is used by the rdfa_print_mapping + * function. + * + * @param str the string to print to stdout. + */ +void rdfa_print_string(const char* str); + +/** + * Canonicalizes a given string by condensing all whitespace to single + * spaces and stripping leading and trailing whitespace. + * + * @param str the string to canonicalize. + * + * @return a pointer to a newly allocated string that contains the + * canonicalized text. + */ +char* rdfa_canonicalize_string(const char* str); + +/** + * Creates a triple given the subject, predicate, object, datatype and + * language for the triple. + * + * @param subject the subject for the triple. + * @param predicate the predicate for the triple. + * @param object the object for the triple. + * @param object_type the type of the object, which must be an rdfresource_t. + * @param datatype the datatype of the triple. + * @param language the language for the triple. + * + * @return a newly allocated triple with all of the given + * information. This triple MUST be free()'d when you are done + * with it. + */ +rdftriple* rdfa_create_triple(const char* subject, const char* predicate, + const char* object, rdfresource_t object_type, const char* datatype, + const char* language); + +/** + * Prints a triple in a human-readable fashion. + * + * @triple the triple to display. + */ +void rdfa_print_triple(rdftriple* triple); + +/** + * Prints a list of triples in a human readable form. + * + * @triple the triple to display. + */ +void rdfa_print_triple_list(rdfalist* list); + +/** + * Frees the memory associated with a triple. + */ +void rdfa_free_triple(rdftriple* triple); + +/** + * Resolves a given uri by appending it to the context's base parameter. + * + * @param context the current processing context. + * @param uri the URI part to process. + * + * @return the fully qualified IRI. The memory returned from this + * function MUST be freed. + */ +char* rdfa_resolve_uri(rdfacontext* context, const char* uri); + +/** + * Resolves a given uri depending on whether or not it is a fully + * qualified IRI or a CURIE. + * + * @param context the current processing context. + * @param uri the URI part to process. + * @param mode the CURIE processing mode to use when parsing the CURIE. + * + * @return the fully qualified IRI. The memory returned from this + * function MUST be freed. + */ +char* rdfa_resolve_curie( + rdfacontext* context, const char* uri, curieparse_t mode); + +/** + * Resolves one or more CURIEs into fully qualified IRIs. + * + * @param rdfa_context the current processing context. + * @param uris a list of URIs. + * @param mode the CURIE parsing mode to use, one of + * CURIE_PARSE_INSTANCEOF, CURIE_PARSE_RELREV, or + * CURIE_PARSE_PROPERTY. + * + * @return an RDFa list if one or more IRIs were generated, NULL if not. + */ +rdfalist* rdfa_resolve_curie_list( + rdfacontext* rdfa_context, const char* uris, curieparse_t mode); + +char* rdfa_resolve_relrev_curie(rdfacontext* context, const char* uri); + +char* rdfa_resolve_property_curie(rdfacontext* context, const char* uri); + +void rdfa_update_language(rdfacontext* context, const char* lang); + +char* rdfa_create_bnode(rdfacontext* context); + +/* All functions that rdfa.c needs. */ +void rdfa_update_uri_mappings(rdfacontext* context, const char* attr, const char* value); +void rdfa_establish_new_1_0_subject( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of); +void rdfa_establish_new_1_1_subject( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of, + const rdfalist* property, const char* content, const char* datatype); +void rdfa_establish_new_1_0_subject_with_relrev( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of); +void rdfa_establish_new_1_1_subject_with_relrev( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of); +void rdfa_complete_incomplete_triples(rdfacontext* context); +void rdfa_save_incomplete_list_triples( + rdfacontext* context, const rdfalist* rel); +void rdfa_complete_type_triples(rdfacontext* context, const rdfalist* type_of); +void rdfa_complete_relrev_triples( + rdfacontext* context, const rdfalist* rel, const rdfalist* rev); +void rdfa_save_incomplete_triples( + rdfacontext* context, const rdfalist* rel, const rdfalist* rev); +void rdfa_complete_object_literal_triples(rdfacontext* context); +void rdfa_complete_current_property_value_triples(rdfacontext* context); + +/* Declarations needed by namespace.c */ +void rdfa_generate_namespace_triple( + rdfacontext* context, const char* prefix, const char* iri); +void rdfa_processor_triples( + rdfacontext* context, const char* type, const char* msg); + +/* Declarations needed by rdfa.c */ +void rdfa_setup_initial_context(rdfacontext* context); +void rdfa_establish_new_inlist_triples( + rdfacontext* context, rdfalist* predicates, const char* object, + rdfresource_t object_type); +void rdfa_complete_list_triples(rdfacontext* context); +rdfacontext* rdfa_create_new_element_context(rdfalist* context_stack); +void rdfa_free_context_stack(rdfacontext* context); +char* rdfa_strdup(const char* s); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/librdfa/strtok_r.c b/librdfa/strtok_r.c new file mode 100644 index 0000000..f07f9fb --- /dev/null +++ b/librdfa/strtok_r.c @@ -0,0 +1,52 @@ +/* This file is in the public domain */ + +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <string.h> +#include "strtok_r.h" + +#ifdef NEED_RDFA_STRTOK_R + +char * +rdfa_strtok_r(char *str, const char *delim, char **saveptr) +{ + char *p; + + if (str == NULL) + str = *saveptr; + + if (str == NULL) + return NULL; + + while (*str && strchr(delim, *str)) + str++; + + if (*str == '\0') + { + *saveptr = NULL; + return NULL; + } + + p = str; + while (*p && !strchr(delim, *p)) + p++; + + if (*p == '\0') + *saveptr = NULL; + else + { + *p = '\0'; + p++; + *saveptr = p; + } + + return str; +} + +#else /* ! NEED_RDFA_STRTOK_R */ + +typedef int blah; /* "ISO C forbids an empty translation unit" */ + +#endif /* NEED_RDFA_STRTOK_R */ diff --git a/librdfa/strtok_r.h b/librdfa/strtok_r.h new file mode 100644 index 0000000..6fd31ee --- /dev/null +++ b/librdfa/strtok_r.h @@ -0,0 +1,15 @@ +/* This file is in the public domain */ + +#ifndef HAVE_STRTOK_R +# define NEED_RDFA_STRTOK_R +#endif + +#if defined(WIN32) && defined(_MSC_VER) && _MSC_VER >= 1400 +# define strtok_r(s,d,p) strtok_s(s,d,p) +# undef NEED_RDFA_STRTOK_R +#endif + +#ifdef NEED_RDFA_STRTOK_R +char *rdfa_strtok_r(char *str, const char *delim, char **saveptr); +# define strtok_r(s,d,p) rdfa_strtok_r(s,d,p) +#endif diff --git a/librdfa/subject.c b/librdfa/subject.c new file mode 100644 index 0000000..f082052 --- /dev/null +++ b/librdfa/subject.c @@ -0,0 +1,535 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * This file is used to process RDFa subjects. + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +/** + * Creates a new bnode given an RDFa context. + * + * @param context the RDFa context. + * + * @return a newly allocated string containing the bnode name. This + * string MUST be memory collected. + */ +char* rdfa_create_bnode(rdfacontext* context) +{ + char* rval = NULL; +#define BNODE_BUFFER_LEN 64 + char buffer[BNODE_BUFFER_LEN]; + + /* print and increment the bnode count */ + snprintf(buffer, BNODE_BUFFER_LEN, "_:bnode%i", (int)context->bnode_count++); + rval = rdfa_replace_string(rval, buffer); + + return rval; +} + +/** + * Establishes a new subject for the given context given the + * attributes on the current element. The given context's new_subject + * value is updated if a new subject is found. + * + * @param context the RDFa context. + * @param name the name of the current element that is being processed. + * @param about the full IRI for about, or NULL if there isn't one. + * @param src the full IRI for src, or NULL if there isn't one. + * @param resource the full IRI for resource, or NULL if there isn't one. + * @param href the full IRI for href, or NULL if there isn't one. + * @param type_of The list of IRIs for type_of, or NULL if there was + * no type_of specified. + */ +void rdfa_establish_new_1_0_subject( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of) +{ + /* 4. If the [current element] contains no valid @rel or @rev + * URI, obtained according to the section on CURIE and URI + * Processing, then the next step is to establish a value for + * [new subject]. Any of the attributes that can carry a + * resource can set [new subject]; */ + + if(about != NULL) + { + /* * by using the URI from @about, if present, obtained according + * to the section on CURIE and URI Processing; */ + context->new_subject = + rdfa_replace_string(context->new_subject, about); + } + else if(src != NULL) + { + /* * otherwise, by using the URI from @src, if present, obtained + * according to the section on CURIE and URI Processing. */ + context->new_subject = + rdfa_replace_string(context->new_subject, src); + } + else if(resource != NULL) + { + /* * otherwise, by using the URI from @resource, if present, + * obtained according to the section on CURIE and URI + * Processing; */ + context->new_subject = + rdfa_replace_string(context->new_subject, resource); + } + else if(href != NULL) + { + /* * otherwise, by using the URI from @href, if present, obtained + * according to the section on CURIE and URI Processing. */ + context->new_subject = + rdfa_replace_string(context->new_subject, href); + } + else if((type_of != NULL) && (type_of->num_items > 0)) + { + /* * if @type_of is present, obtained according to the + * section on CURIE and URI Processing, then [new subject] is + * set to be a newly created [bnode]; */ + char* bnode = rdfa_create_bnode(context); + context->new_subject = rdfa_replace_string(context->new_subject, bnode); + free(bnode); + } + else if(context->parent_object != NULL) + { + /* * otherwise, if [parent object] is present, [new subject] is + * set to that and the [skip element] flag is set to 'true'; */ + context->new_subject = + rdfa_replace_string(context->new_subject, context->parent_object); + + /* TODO: The skip element flag will be set even if there is a + * @property value, which is a bug, isn't it? */ + /*context->skip_element = 1;*/ + } +} + +/** + * Establishes a new subject for the given context given the + * attributes on the current element. The given context's new_subject + * value is updated if a new subject is found. + * + * @param context the RDFa context. + * @param name the name of the current element that is being processed. + * @param about the full IRI for about, or NULL if there isn't one. + * @param src the full IRI for src, or NULL if there isn't one. + * @param resource the full IRI for resource, or NULL if there isn't one. + * @param href the full IRI for href, or NULL if there isn't one. + * @param type_of The list of IRIs for type_of, or NULL if there was + * no type_of specified. + * @param property a list of properties that were detected during processing. + */ +void rdfa_establish_new_1_1_subject( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of, + const rdfalist* property, const char* content, const char* datatype) +{ + /* + * If the current element contains the @property attribute, but does not + * contain either the @content or @datatype attributes, then new subject + * is set to the resource obtained from the first match from the + * following rule: + */ + if(property != NULL && content == NULL && datatype == NULL) + { + /* by using the resource from @about, if present, obtained according to + * the section on CURIE and IRI Processing; + */ + if(about != NULL) + { + /* NOTE: this statement achieves this part of the processing rule + * as well because @about is set if depth == 1 in RDFa 1.1 in + * the calling function: otherwise, if the element is the root + * element of the document, then act as if there is an empty + * @about present, and process it according to the rule for + * @about, above; + */ + context->new_subject = + rdfa_replace_string(context->new_subject, about); + } + else if(context->parent_object != NULL) + { + /* otherwise, if parent object is present, new subject is set + * to the value of parent object. + */ + context->new_subject = + rdfa_replace_string(context->new_subject, context->parent_object); + } + + /* If @typeof is present then typed resource is set to the resource + * obtained from the first match from the following rules: + */ + if(type_of != NULL) + { + if(about != NULL) + { + /* by using the resource from @about, if present, obtained + * according to the section on CURIE and IRI Processing; + * + * NOTE: about is set to the document if this is the root + * element of the document, so the following rule is also applied + * in this case: + * + * otherwise, if the element is the root element of the + * document, then act as if there is an empty @about present + * and process it according to the previous rule; + */ + context->typed_resource = + rdfa_replace_string(context->typed_resource, about); + } + else + { + if(resource != NULL) + { + /* by using the resource from @resource, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->typed_resource = + rdfa_replace_string(context->typed_resource, resource); + } + else if(href != NULL) + { + /* otherwise, by using the IRI from @href, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->typed_resource = + rdfa_replace_string(context->typed_resource, href); + } + else if(src != NULL) + { + /* otherwise, by using the IRI from @src, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->typed_resource = + rdfa_replace_string(context->typed_resource, src); + } + else + { + /* otherwise, the value of typed resource is set to a newly + * created bnode. + */ + char* bnode = rdfa_create_bnode(context); + context->typed_resource = rdfa_replace_string( + context->typed_resource, bnode); + free(bnode); + } + + /* The value of the current object resource is then set to the value + * of typed resource. + */ + context->current_object_resource = rdfa_replace_string( + context->current_object_resource, context->typed_resource); + } + } + } + else + { + /* otherwise: + * If the element contains an @about, @href, @src, or @resource attribute, + * new subject is set to the resource obtained as follows: + */ + if(about != NULL || href != NULL || src != NULL || resource != NULL) + { + if(about != NULL) + { + /* by using the resource from @about, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->new_subject = + rdfa_replace_string(context->new_subject, about); + } + else if(resource != NULL) + { + /* otherwise, by using the resource from @resource, if present, + * obtained according to the section on CURIE and IRI Processing; + */ + context->new_subject = + rdfa_replace_string(context->new_subject, resource); + } + else if(href != NULL) + { + /* otherwise, by using the IRI from @href, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->new_subject = + rdfa_replace_string(context->new_subject, href); + } + else if(src != NULL) + { + /* otherwise, by using the IRI from @src, if present, obtained + * according to the section on CURIE and IRI Processing. + */ + context->new_subject = + rdfa_replace_string(context->new_subject, src); + } + } + else + { + /* otherwise, if no resource is provided by a resource attribute, + * then the first match from the following rules will apply: + */ + + /* NOTE: this step is achieved via the parent function call as @about + * is set if the current element is the root element. + * + * if the element is the root element of the document, then act + * as if there is an empty @about present, and process it according + * to the rule for @about, above; + */ + if(type_of != NULL) + { + /* otherwise, if @typeof is present, then new subject is set + * to be a newly created bnode; + */ + char* bnode = rdfa_create_bnode(context); + context->new_subject = rdfa_replace_string(context->new_subject, + bnode); + free(bnode); + } + else if(context->parent_object != NULL) + { + /* otherwise, if parent object is present, new subject is set to + * the value of parent object. + */ + context->new_subject = rdfa_replace_string(context->new_subject, + context->parent_object); + + /* Additionally, if @property is not present then the skip + * element flag is set to 'true'. + */ + if(property == NULL) + { + context->skip_element = 1; + } + } + } + + if(type_of != NULL) + { + /* Finally, if @typeof is present, set the typed resource to the value + * of new subject. + */ + context->typed_resource = rdfa_replace_string(context->typed_resource, + context->new_subject); + } + } +} + +/** + * Establishes a new subject for the given context when @rel or @rev + * is present. The given context's new_subject and + * current_object_resource values are updated if a new subject is found. + * + * @param context the RDFa context. + * @param about the full IRI for about, or NULL if there isn't one. + * @param src the full IRI for src, or NULL if there isn't one. + * @param resource the full IRI for resource, or NULL if there isn't one. + * @param href the full IRI for href, or NULL if there isn't one. + * @param type_of the list of IRIs for type_of, or NULL if type_of + * wasn't specified on the current element. + */ +void rdfa_establish_new_1_0_subject_with_relrev( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of) +{ + /* 5. If the [current element] does contain a valid @rel or @rev + * URI, obtained according to the section on CURIE and URI + * Processing, then the next step is to establish both a value + * for [new subject] and a value for [current object resource]: + * + * [new subject] is set to the URI obtained from the first match + * from the following rules: */ + + if(about != NULL) + { + /* * by using the URI from @about, if present, obtained + * according to the section on CURIE and URI Processing; */ + context->new_subject = + rdfa_replace_string(context->new_subject, about); + } + else if(context->rdfa_version == RDFA_VERSION_1_0 && src != NULL) + { + /* * otherwise, by using the URI from @src, if present, obtained + * according to the section on CURIE and URI Processing. */ + context->new_subject = + rdfa_replace_string(context->new_subject, src); + } + else if((type_of != NULL) && (type_of->num_items > 0)) + { + /* * if @type_of is present, obtained according to the + * section on CURIE and URI Processing, then [new subject] is + * set to be a newly created [bnode]; */ + char* bnode = rdfa_create_bnode(context); + context->new_subject = rdfa_replace_string(context->new_subject, bnode); + free(bnode); + } + else if(context->parent_object != NULL) + { + /* * otherwise, if [parent object] is present, [new subject] is + * set to that; */ + context->new_subject = + rdfa_replace_string(context->new_subject, context->parent_object); + } + + /* Then the [current object resource] is set to the URI obtained + * from the first match from the following rules: */ + if(resource != NULL) + { + /* * by using the URI from @resource, if present, obtained + * according to the section on CURIE and URI Processing; */ + context->current_object_resource = + rdfa_replace_string(context->current_object_resource, resource); + } + else if(href != NULL) + { + /* * otherwise, by using the URI from @href, if present, + * obtained according to the section on CURIE and URI Processing. */ + context->current_object_resource = + rdfa_replace_string(context->current_object_resource, href); + } + else + { + /* * otherwise, null. */ + context->current_object_resource = NULL; + } + + /* Note that final value of the [current object resource] will + * either be null, or a full URI. */ +} + +/** + * Establishes a new subject for the given context when @rel or @rev + * is present. The given context's new_subject and + * current_object_resource values are updated if a new subject is found. + * + * @param context the RDFa context. + * @param about the full IRI for about, or NULL if there isn't one. + * @param src the full IRI for src, or NULL if there isn't one. + * @param resource the full IRI for resource, or NULL if there isn't one. + * @param href the full IRI for href, or NULL if there isn't one. + * @param type_of the list of IRIs for type_of, or NULL if type_of + * wasn't specified on the current element. + */ +void rdfa_establish_new_1_1_subject_with_relrev( + rdfacontext* context, const char* name, const char* about, const char* src, + const char* resource, const char* href, const rdfalist* type_of) +{ + /* If the current element does contain a @rel or @rev attribute, then + * the next step is to establish both a value for new subject and a + * value for current object resource: + */ + + /* new subject is set to the resource obtained from the first match from + * the following rules: + */ + + if(about != NULL) + { + /* by using the resource from @about, if present, obtained according + * to the section on CURIE and IRI Processing; + * + * NOTE: This will also catch the following rule due to @about being + * set in the calling function: + * + * if the element is the root element of the document then act as if + * there is an empty @about present, and process it according to the + * rule for @about, above; + */ + context->new_subject = + rdfa_replace_string(context->new_subject, about); + } + + if(type_of != NULL) + { + /* if the @typeof attribute is present, set typed resource to + * new subject. + */ + context->typed_resource = + rdfa_replace_string(context->typed_resource, context->new_subject); + } + + /* If no resource is provided then the first match from the following rules + * will apply: + * + */ + if(context->new_subject == NULL && context->parent_object != NULL) + { + /* otherwise, if parent object is present, new subject is set to that. + */ + context->new_subject = rdfa_replace_string( + context->new_subject, context->parent_object); + } + + /* Then the current object resource is set to the resource obtained from + * the first match from the following rules: + */ + + if(resource != NULL) + { + /* by using the resource from @resource, if present, obtained according + * to the section on CURIE and IRI Processing; + */ + context->current_object_resource = rdfa_replace_string( + context->current_object_resource, resource); + } + else if(href != NULL) + { + /* otherwise, by using the IRI from @href, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->current_object_resource = rdfa_replace_string( + context->current_object_resource, href); + } + else if(src != NULL) + { + /* otherwise, by using the IRI from @src, if present, obtained + * according to the section on CURIE and IRI Processing; + */ + context->current_object_resource = rdfa_replace_string( + context->current_object_resource, src); + } + else if(type_of != NULL && about == NULL) + { + /* otherwise, if @typeof is present and @about is not, use a + * newly created bnode. + */ + char* bnode = rdfa_create_bnode(context); + context->current_object_resource = rdfa_replace_string( + context->current_object_resource, bnode); + free(bnode); + } + + if(type_of != NULL && about == NULL) + { + /* If @typeof is present and @about is not, set typed resource to current + * object resource. + */ + context->typed_resource = rdfa_replace_string( + context->typed_resource, context->current_object_resource); + } + + /* Note that final value of the current object resource will either be + * null (from initialization) or a full IRI or bnode. + */ +} diff --git a/librdfa/triple.c b/librdfa/triple.c new file mode 100644 index 0000000..be1caf2 --- /dev/null +++ b/librdfa/triple.c @@ -0,0 +1,847 @@ +/** + * Copyright 2008 Digital Bazaar, Inc. + * + * This file is part of librdfa. + * + * librdfa is Free Software, and can be licensed under any of the + * following three licenses: + * + * 1. GNU Lesser General Public License (LGPL) V2.1 or any + * newer version + * 2. GNU General Public License (GPL) V2 or any newer version + * 3. Apache License, V2.0 or any newer version + * + * You may not use this file except in compliance with at least one of + * the above three licenses. + * + * See LICENSE-* at the top of this software distribution for more + * information regarding the details of each license. + * + * Handles all triple functionality including all incomplete triple + * functionality. + * + * @author Manu Sporny + */ +#ifdef HAVE_CONFIG_H +# include <config.h> +#endif + +#include <stdlib.h> +#include <string.h> +#include <stdio.h> +#include "rdfa_utils.h" +#include "rdfa.h" + +rdftriple* rdfa_create_triple(const char* subject, const char* predicate, + const char* object, rdfresource_t object_type, const char* datatype, + const char* language) +{ + rdftriple* rval = (rdftriple*)malloc(sizeof(rdftriple)); + + /* clear the memory */ + rval->subject = NULL; + rval->predicate = NULL; + rval->object = NULL; + rval->object_type = object_type; + rval->datatype = NULL; + rval->language = NULL; + +#if 0 + printf("SUBJECT : %s\n", subject); + printf("PREDICATE: %s\n", predicate); + printf("OBJECT : %s\n", object); + printf("DATATYPE : %s\n", datatype); + printf("LANG : %s\n", language); +#endif + + /* a triple needs a subject, predicate and object at minimum to be + * considered a triple. */ + if((subject != NULL) && (predicate != NULL) && (object != NULL)) + { + rval->subject = rdfa_replace_string(rval->subject, subject); + rval->predicate = rdfa_replace_string(rval->predicate, predicate); + rval->object = rdfa_replace_string(rval->object, object); + + /* if the datatype is present, set it */ + if(datatype != NULL) + { + rval->datatype = rdfa_replace_string(rval->datatype, datatype); + } + + /* if the language was specified, set it */ + if(language != NULL) + { + rval->language = rdfa_replace_string(rval->language, language); + } + } + + return rval; +} + +void rdfa_print_triple(rdftriple* triple) +{ + if(triple->object_type == RDF_TYPE_NAMESPACE_PREFIX) + { + printf("%s %s: <%s> .\n", + triple->subject, triple->predicate, triple->object); + } + else + { + if(triple->subject != NULL) + { + if((triple->subject[0] == '_') && (triple->subject[1] == ':')) + { + printf("%s\n", triple->subject); + } + else + { + printf("<%s>\n", triple->subject); + } + } + else + { + printf("INCOMPLETE\n"); + } + + if(triple->predicate != NULL) + { + printf(" <%s>\n", triple->predicate); + } + else + { + printf(" INCOMPLETE\n"); + } + + if(triple->object != NULL) + { + if(triple->object_type == RDF_TYPE_IRI) + { + if((triple->object[0] == '_') && (triple->object[1] == ':')) + { + printf(" %s", triple->object); + } + else + { + printf(" <%s>", triple->object); + } + } + else if(triple->object_type == RDF_TYPE_PLAIN_LITERAL) + { + printf(" \"%s\"", triple->object); + if(triple->language != NULL) + { + printf("@%s", triple->language); + } + } + else if(triple->object_type == RDF_TYPE_XML_LITERAL) + { + printf(" \"%s\"^^rdf:XMLLiteral", triple->object); + } + else if(triple->object_type == RDF_TYPE_TYPED_LITERAL) + { + if((triple->datatype != NULL) && (triple->language != NULL)) + { + printf(" \"%s\"@%s^^<%s>", + triple->object, triple->language, triple->datatype); + } + else if(triple->datatype != NULL) + { + printf(" \"%s\"^^<%s>", triple->object, triple->datatype); + } + } + else + { + printf(" <%s> <---- UNKNOWN OBJECT TYPE", triple->object); + } + + printf(" .\n"); + } + else + { + printf(" INCOMPLETE ."); + } + } +} + +void rdfa_free_triple(rdftriple* triple) +{ + free(triple->subject); + free(triple->predicate); + free(triple->object); + free(triple->datatype); + free(triple->language); + free(triple); +} + +#ifndef LIBRDFA_IN_RAPTOR +/** + * Generates a namespace prefix triple for any application that is + * interested in processing namespace changes. + * + * @param context the RDFa context. + * @param prefix the name of the prefix + * @param IRI the fully qualified IRI that the prefix maps to. + */ +void rdfa_generate_namespace_triple( + rdfacontext* context, const char* prefix, const char* iri) +{ + if(context->processor_graph_triple_callback != NULL) + { + rdftriple* triple = rdfa_create_triple( + "@prefix", prefix, iri, RDF_TYPE_NAMESPACE_PREFIX, NULL, NULL); + context->processor_graph_triple_callback(triple, context->callback_data); + } +} + +/** + * Generates a set of triples that describe the location of a warning or + * error in a document. + * + * @param context the currently active context. + * @param subject the name of the subject that is associated with the triples. + */ +#if 1 /* remove when the prototype is in the header */ +void rdfa_processor_location_triples(rdfacontext* context, const char* subject); +#endif +void rdfa_processor_location_triples(rdfacontext* context, const char* subject) +{ +} + +/** + * Generates a set of triples in the processor graph including the processor's + * position in the byte stream. + * + * @param context the current active context. + * @param type the type of the message, which may be any of the RDF Classes + * defined in the RDFa Core specification: + * http://www.w3.org/TR/rdfa-core/#processor-graph-reporting + * @param msg the message associated with the processor warning. + */ +void rdfa_processor_triples( + rdfacontext* context, const char* type, const char* msg) +{ + if(context->processor_graph_triple_callback != NULL) + { + char buffer[32]; + char* subject = rdfa_create_bnode(context); + char* context_subject = rdfa_create_bnode(context); + + /* generate the RDFa Processing Graph warning type triple */ + rdftriple* triple = rdfa_create_triple( + subject, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + type, RDF_TYPE_IRI, NULL, NULL); + context->processor_graph_triple_callback(triple, context->callback_data); + + /* generate the description */ + triple = rdfa_create_triple( + subject, "http://purl.org/dc/terms/description", msg, + RDF_TYPE_PLAIN_LITERAL, NULL, "en"); + context->processor_graph_triple_callback(triple, context->callback_data); + + /* generate the context triple for the error */ + triple = rdfa_create_triple( + subject, "http://www.w3.org/ns/rdfa#context", + context_subject, RDF_TYPE_IRI, NULL, NULL); + context->processor_graph_triple_callback(triple, context->callback_data); + + /* generate the type for the context triple */ + triple = rdfa_create_triple( + context_subject, "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", + "http://www.w3.org/2009/pointers#LineCharPointer", + RDF_TYPE_IRI, NULL, NULL); + context->processor_graph_triple_callback(triple, context->callback_data); + + /* generate the line number */ + snprintf(buffer, sizeof(buffer) - 1, "%d", + (int)xmlSAX2GetLineNumber(context->parser)); + triple = rdfa_create_triple( + context_subject, "http://www.w3.org/2009/pointers#lineNumber", + buffer, RDF_TYPE_TYPED_LITERAL, + "http://www.w3.org/2001/XMLSchema#positiveInteger", NULL); + context->processor_graph_triple_callback(triple, context->callback_data); + + free(context_subject); + free(subject); + } +} +#endif + +/** + * Completes all incomplete triples that are part of the current + * context by matching the new_subject with the list of incomplete + * triple predicates. + * + * @param context the RDFa context. + */ +void rdfa_complete_incomplete_triples(rdfacontext* context) +{ + /* 10. If the [ skip element ] flag is 'false', and [ new subject ] + * was set to a non-null value, then any [ incomplete triple ]s + * within the current context should be completed: + * + * The [ list of incomplete triples ] from the current [ evaluation + * context ] ( not the [ local list of incomplete triples ]) will + * contain zero or more predicate URIs. This list is iterated, and + * each of the predicates is used with [ parent subject ] and + * [ new subject ] to generate a triple. Note that at each level + * there are two , lists of [ incomplete triple ]s; one for the + * current processing level (which is passed to each child element + * in the previous step), and one that was received as part of the + * [ evaluation context ]. It is the latter that is used in + * processing during this step. */ + unsigned int i; + for(i = 0; i < context->incomplete_triples->num_items; i++) + { + rdfalist* incomplete_triples = context->incomplete_triples; + rdfalistitem* incomplete_triple = incomplete_triples->items[i]; + + if(incomplete_triple->flags & RDFALIST_FLAG_DIR_NONE) + { + /* If direction is 'none', the new subject is added to the list + * from the iterated incomplete triple. */ + const char* predicate = (const char*)incomplete_triple->data; + rdftriple* triple = rdfa_create_triple(context->parent_subject, + predicate, context->new_subject, RDF_TYPE_IRI, NULL, NULL); + + /* ensure the list mapping exists */ + rdfa_create_list_mapping( + context, context->local_list_mappings, + context->parent_subject, predicate); + + /* add the predicate to the list mapping */ + rdfa_append_to_list_mapping(context->local_list_mappings, + context->parent_subject, predicate, (void*)triple); + } + else if(incomplete_triple->flags & RDFALIST_FLAG_DIR_FORWARD) + { + /* If [direction] is 'forward' then the following triple is generated: + * + * subject + * [parent subject] + * predicate + * the predicate from the iterated incomplete triple + * object + * [new subject] */ + rdftriple* triple = + rdfa_create_triple(context->parent_subject, + (const char*)incomplete_triple->data, context->new_subject, + RDF_TYPE_IRI, NULL, NULL); + context->default_graph_triple_callback(triple, context->callback_data); + } + else + { + /* If [direction] is not 'forward' then this is the triple generated: + * + * subject + * [new subject] + * predicate + * the predicate from the iterated incomplete triple + * object + * [parent subject] */ + rdftriple* triple = + rdfa_create_triple(context->new_subject, + (const char*)incomplete_triple->data, context->parent_subject, + RDF_TYPE_IRI, NULL, NULL); + context->default_graph_triple_callback(triple, context->callback_data); + } + free(incomplete_triple->data); + free(incomplete_triple); + } + context->incomplete_triples->num_items = 0; +} + +void rdfa_complete_type_triples( + rdfacontext* context, const rdfalist* type_of) +{ + unsigned int i; + rdfalistitem** iptr = type_of->items; + const char* subject; + const char* type; + + if(context->rdfa_version == RDFA_VERSION_1_0) + { + /* RDFa 1.0: 6.1 One or more 'types' for the [new subject] can be set by + * using @type_of. If present, the attribute must contain one or + * more URIs, obtained according to the section on URI and CURIE + * Processing, each of which is used to generate a triple as follows: + * + * subject + * [new subject] + * predicate + * http://www.w3.org/1999/02/22-rdf-syntax-ns#type + * object + * full URI of 'type' + */ + subject = context->new_subject; + } + else + { + /* RDFa 1.1: 7. One or more 'types' for the typed resource can be set by + * using @typeof. If present, the attribute may contain one or more IRIs, + * obtained according to the section on CURIE and IRI Processing, each of + * which is used to generate a triple as follows: + * + * subject + * typed resource + * predicate + * http://www.w3.org/1999/02/22-rdf-syntax-ns#type + * object + * current full IRI of 'type' from typed resource + */ + subject = context->typed_resource; + } + + for(i = 0; i < type_of->num_items; i++) + { + rdfalistitem* iri = *iptr; + rdftriple* triple; + type = (const char*)iri->data; + + triple = rdfa_create_triple(subject, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#type", type, RDF_TYPE_IRI, + NULL, NULL); + + context->default_graph_triple_callback(triple, context->callback_data); + iptr++; + } +} + +void rdfa_complete_relrev_triples( + rdfacontext* context, const rdfalist* rel, const rdfalist* rev) +{ + /* 7. If in any of the previous steps a [current object resource] + * was set to a non-null value, it is now used to generate triples */ + unsigned int i; + + /* Predicates for the [current object resource] can be set by using + * one or both of the @rel and @rev attributes. */ + + /* If present, @rel will contain one or more URIs, obtained + * according to the section on CURIE and URI Processing each of + * which is used to generate a triple as follows: + * + * subject + * [new subject] + * predicate + * full URI + * object + * [current object resource] */ + if(rel != NULL) + { + rdfalistitem** relptr = rel->items; + for(i = 0; i < rel->num_items; i++) + { + rdfalistitem* curie = *relptr; + + rdftriple* triple = rdfa_create_triple(context->new_subject, + (const char*)curie->data, context->current_object_resource, + RDF_TYPE_IRI, NULL, NULL); + + context->default_graph_triple_callback(triple, context->callback_data); + relptr++; + } + } + + /* If present, @rev will contain one or more URIs, obtained + * according to the section on CURIE and URI Processing each of which + * is used to generate a triple as follows: + * + * subject + * [current object resource] + * predicate + * full URI + * object + * [new subject] */ + if(rev != NULL) + { + rdfalistitem** revptr = rev->items; + for(i = 0; i < rev->num_items; i++) + { + rdfalistitem* curie = *revptr; + + rdftriple* triple = rdfa_create_triple( + context->current_object_resource, (const char*)curie->data, + context->new_subject, RDF_TYPE_IRI, NULL, NULL); + + context->default_graph_triple_callback(triple, context->callback_data); + revptr++; + } + } +} + +void rdfa_save_incomplete_triples( + rdfacontext* context, const rdfalist* rel, const rdfalist* rev) +{ + unsigned int i; + /* 8. If however [current object resource] was set to null, but + * there are predicates present, then they must be stored as + * [incomplete triple]s, pending the discovery of a subject that + * can be used as the object. Also, [current object resource] + * should be set to a newly created [bnode] */ + if(context->current_object_resource == NULL) + { + context->current_object_resource = rdfa_create_bnode(context); + } + + /* If present, @rel must contain one or more URIs, obtained + * according to the section on CURIE and URI Processing each of + * which is added to the [local local list of incomplete triples] + * as follows: + * + * predicate + * full URI + * direction + * forward */ + if(rel != NULL) + { + rdfalistitem** relptr = rel->items; + for(i = 0; i < rel->num_items; i++) + { + rdfalistitem* curie = *relptr; + + rdfa_add_item( + context->local_incomplete_triples, curie->data, + (liflag_t)(RDFALIST_FLAG_DIR_FORWARD | RDFALIST_FLAG_TEXT)); + + relptr++; + } + } + + /* If present, @rev must contain one or more URIs, obtained + * according to the section on CURIE and URI Processing, each of + * which is added to the [local list of incomplete triples] as follows: + * + * predicate + * full URI + * direction + * reverse */ + if(rev != NULL) + { + rdfalistitem** revptr = rev->items; + for(i = 0; i < rev->num_items; i++) + { + rdfalistitem* curie = *revptr; + + rdfa_add_item( + context->local_incomplete_triples, curie->data, + (liflag_t)(RDFALIST_FLAG_DIR_REVERSE | RDFALIST_FLAG_TEXT)); + + revptr++; + } + } +} + +void rdfa_complete_object_literal_triples(rdfacontext* context) +{ + /* 9. The next step of the iteration is to establish any + * [current object literal]; + * + * Predicates for the [current object literal] can be set by using + * @property. If present, a URI is obtained according to the + * section on CURIE and URI Processing, and then the actual literal + * value is obtained as follows: */ + const char* current_object_literal = NULL; + rdfresource_t type = RDF_TYPE_UNKNOWN; + + unsigned int i; + rdfalistitem** pptr; + + /* * as a [plain literal] if: + * o @content is present; + * o or all children of the [current element] are text nodes; + * o or there are no child nodes; TODO: Is this needed? + * o or the body of the [current element] does have non-text + * child nodes but @datatype is present, with an empty value. + * + * Additionally, if there is a value for [current language] then + * the value of the [plain literal] should include this language + * information, as described in [RDF-CONCEPTS]. The actual literal + * is either the value of @content (if present) or a string created + * by concatenating the text content of each of the descendant + * elements of the [current element] in document order. */ + if((context->content != NULL)) + { + current_object_literal = context->content; + type = RDF_TYPE_PLAIN_LITERAL; + } + else if(context->xml_literal && strchr(context->xml_literal, '<') == NULL) + { + current_object_literal = context->plain_literal; + type = RDF_TYPE_PLAIN_LITERAL; + } + else if(strlen(context->plain_literal) == 0) + { + current_object_literal = (const char*)""; + type = RDF_TYPE_PLAIN_LITERAL; + } + else if((context->xml_literal != NULL) && + (context->datatype != NULL) && + (strlen(context->xml_literal) > 0) && + (strcmp(context->datatype, "") == 0)) + { + current_object_literal = context->plain_literal; + type = RDF_TYPE_PLAIN_LITERAL; + } + + + /* * as an [XML literal] if: + * o the [current element] has any child nodes that are not + * simply text nodes, and @datatype is not present, or is + * present, but is set to rdf:XMLLiteral. + * + * The value of the [XML literal] is a string created by + * serializing to text, all nodes that are descendants of the + * [current element], i.e., not including the element itself, and + * giving it a datatype of rdf:XMLLiteral. */ + if((context->xml_literal != NULL) && + (current_object_literal == NULL) && + (strchr(context->xml_literal, '<') != NULL) && + ((context->datatype == NULL) || + (strcmp(context->datatype, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") == 0))) + { + current_object_literal = context->xml_literal; + type = RDF_TYPE_XML_LITERAL; + } + + /* * as a [typed literal] if: + * o @datatype is present, and does not have an empty + * value. + * + * The actual literal is either the value of @content (if present) + * or a string created by concatenating the value of all descendant + * text nodes, of the [current element] in turn. The final string + * includes the datatype URI, as described in [RDF-CONCEPTS], which + * will have been obtained according to the section on CURIE and + * URI Processing. */ + if((context->datatype != NULL) && (strlen(context->datatype) > 0)) + { + if(context->content != NULL) + { + /* Static code analyzer clang says next line is not needed; + * "Assigned value is always the same as the existing value" + */ + /* current_object_literal = context->content; */ + type = RDF_TYPE_TYPED_LITERAL; + } + else if(strcmp(context->datatype, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") != 0) + { + current_object_literal = context->plain_literal; + type = RDF_TYPE_TYPED_LITERAL; + } + } + + /* TODO: Setting the current object literal to the plain literal in + * the case of xsd:string isn't mentioned in the syntax + * processing document. */ + if((current_object_literal == NULL) && (context->datatype != NULL) && + (strcmp( + context->datatype, "http://www.w3.org/2001/XMLSchema#string") == 0)) + { + current_object_literal = context->plain_literal; + type = RDF_TYPE_TYPED_LITERAL; + } + + /* The [current object literal] is then used with each predicate to + * generate a triple as follows: + * + * subject + * [new subject] + * predicate + * full URI + * object + * [current object literal] */ + pptr = context->property->items; + for(i = 0; i < context->property->num_items; i++) + { + + rdfalistitem* curie = *pptr; + rdftriple* triple = NULL; + + triple = rdfa_create_triple(context->new_subject, + (const char*)curie->data, current_object_literal, type, + context->datatype, context->language); + + context->default_graph_triple_callback(triple, context->callback_data); + pptr++; + } + + /* TODO: Implement recurse flag being set to false + * + * Once the triple has been created, if the [datatype] of the + * [current object literal] is rdf:XMLLiteral, then the [recurse] + * flag is set to false */ + context->recurse = 0; +} + +void rdfa_complete_current_property_value_triples(rdfacontext* context) +{ + /* 11. The next step of the iteration is to establish any current property + * value; + * Predicates for the current property value can be set by using @property. + * If present, one or more resources are obtained according to the section + * on CURIE and IRI Processing, and then the actual literal value is + * obtained as follows: */ + char* current_property_value = NULL; + rdfresource_t type = RDF_TYPE_UNKNOWN; + + unsigned int i; + rdfalistitem** pptr; + + /* as a typed literal if @datatype is present, does not have an empty + * value according to the section on CURIE and IRI Processing, and is not + * set to XMLLiteral in the vocabulary + * http://www.w3.org/1999/02/22-rdf-syntax-ns#. */ + if((context->datatype != NULL) && (strcmp(context->datatype, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") != 0)) + { + /* The actual literal is either the value of @content (if present) or a + * string created by concatenating the value of all descendant text nodes, + * of the current element in turn. */ + if(context->content != NULL) + { + current_property_value = context->content; + } + else + { + current_property_value = context->plain_literal; + } + + /* The final string includes the datatype + * IRI, as described in [RDF-CONCEPTS], which will have been obtained + * according to the section on CURIE and IRI Processing. + * otherwise, as a plain literal if @datatype is present but has an + * empty value according to the section on CURIE and IRI Processing. */ + if(strlen(context->datatype) > 0) + { + type = RDF_TYPE_TYPED_LITERAL; + } + else + { + type = RDF_TYPE_PLAIN_LITERAL; + } + } + else if((context->datatype != NULL) && (strcmp(context->datatype, + "http://www.w3.org/1999/02/22-rdf-syntax-ns#XMLLiteral") == 0)) + { + /* otherwise, as an XML literal if @datatype is present and is set to + * XMLLiteral in the vocabulary + * http://www.w3.org/1999/02/22-rdf-syntax-ns#. + * The value of the XML literal is a string created by serializing to + * text, all nodes that are descendants of the current element, i.e., not + * including the element itself, and giving it a datatype of XMLLiteral + * in the vocabulary http://www.w3.org/1999/02/22-rdf-syntax-ns#. The + * format of the resulting serialized content is as defined in Exclusive + * XML Canonicalization Version [XML-EXC-C14N]. + * In order to maintain maximum portability of this literal, any children + * of the current node that are elements must have the current XML + * namespace declarations (if any) declared on the serialized element. + * Since the child element node could also declare new XML namespaces, + * the RDFa Processor must be careful to merge these together when + * generating the serialized element definition. For avoidance of doubt, + * any re-declarations on the child node must take precedence over + * declarations that were active on the current node. */ + current_property_value = context->xml_literal; + type = RDF_TYPE_XML_LITERAL; + } + else if(context->content != NULL) + { + /* otherwise, as an plain literal using the value of @content if + * @content is present. */ + current_property_value = context->content; + type = RDF_TYPE_PLAIN_LITERAL; + } + else if((context->rel_present == 0) && (context->rev_present == 0) && + (context->content == NULL)) + { + /* otherwise, if the @rel, @rev, and @content attributes are not present, + * as a resource obtained from one of the following: */ + if(context->resource != NULL) + { + /* by using the resource from @resource, if present, obtained + * according to the section on CURIE and IRI Processing; */ + current_property_value = context->resource; + type = RDF_TYPE_IRI; + } + else if(context->href != NULL) + { + /* otherwise, by using the IRI from @href, if present, obtained + * according to the section on CURIE and IRI Processing; */ + current_property_value = context->href; + type = RDF_TYPE_IRI; + } + else if(context->src != NULL) + { + /* otherwise, by using the IRI from @src, if present, obtained + * according to the section on CURIE and IRI Processing. */ + current_property_value = context->src; + type = RDF_TYPE_IRI; + } + else if((context->about == NULL) && (context->typed_resource != NULL)) + { + /* otherwise, if @typeof is present and @about is not, the value of + * typed resource. */ + current_property_value = context->typed_resource; + type = RDF_TYPE_IRI; + } + else + { + /* otherwise as a plain literal. */ + current_property_value = context->plain_literal; + type = RDF_TYPE_PLAIN_LITERAL; + } + } + else + { + /* otherwise as a plain literal. */ + current_property_value = context->plain_literal; + type = RDF_TYPE_PLAIN_LITERAL; + } + + /* Additionally, if there is a value for current language then the value + * of the plain literal should include this language information, as + * described in [RDF-CONCEPTS]. The actual literal is either the value + * of @content (if present) or a string created by concatenating the text + * content of each of the descendant elements of the current element in + * document order. + * + * NOTE: This happens automatically due to the way the code is setup. */ + + if(context->inlist_present) + { + /* The current property value is then used with each predicate as + * follows: + * If the element also includes the @inlist attribute, the current + * property value is added to the local list mapping as follows: + * if the local list mapping does not contain a list associated with + * the predicate IRI, instantiate a new list and add to local list + * mappings add the current property value to the list associated + * with the predicate IRI in the local list mapping */ + rdfa_establish_new_inlist_triples( + context, context->property, current_property_value, type); + } + else + { + pptr = context->property->items; + for(i = 0; i < context->property->num_items; i++) + { + /* Otherwise the current property value is used to generate a triple + * as follows: + * subject + * new subject + * predicate + * full IRI + * object + * current property value */ + rdfalistitem* curie = *pptr; + rdftriple* triple = rdfa_create_triple(context->new_subject, + (const char*)curie->data, current_property_value, type, + context->datatype, context->language); + + context->default_graph_triple_callback(triple, context->callback_data); + + pptr++; + } + } +} |