summaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
Diffstat (limited to 'include')
-rw-r--r--include/Makefile.am1
-rw-r--r--include/Makefile.in718
-rw-r--r--include/orcus/Makefile.am88
-rw-r--r--include/orcus/Makefile.in828
-rw-r--r--include/orcus/base64.hpp37
-rw-r--r--include/orcus/cell_buffer.hpp42
-rw-r--r--include/orcus/config.hpp125
-rw-r--r--include/orcus/css_document_tree.hpp100
-rw-r--r--include/orcus/css_parser.hpp883
-rw-r--r--include/orcus/css_parser_base.hpp71
-rw-r--r--include/orcus/css_selector.hpp110
-rw-r--r--include/orcus/css_types.hpp139
-rw-r--r--include/orcus/csv_parser.hpp306
-rw-r--r--include/orcus/csv_parser_base.hpp80
-rw-r--r--include/orcus/detail/Makefile.am7
-rw-r--r--include/orcus/detail/Makefile.in662
-rw-r--r--include/orcus/detail/parser_token_buffer.hpp188
-rw-r--r--include/orcus/detail/thread.hpp35
-rw-r--r--include/orcus/dom_tree.hpp134
-rw-r--r--include/orcus/env.hpp141
-rw-r--r--include/orcus/exception.hpp152
-rw-r--r--include/orcus/format_detection.hpp52
-rw-r--r--include/orcus/info.hpp22
-rw-r--r--include/orcus/interface.hpp92
-rw-r--r--include/orcus/json_document_tree.hpp504
-rw-r--r--include/orcus/json_global.hpp30
-rw-r--r--include/orcus/json_parser.hpp402
-rw-r--r--include/orcus/json_parser_base.hpp46
-rw-r--r--include/orcus/json_parser_thread.hpp104
-rw-r--r--include/orcus/json_structure_tree.hpp137
-rw-r--r--include/orcus/measurement.hpp41
-rw-r--r--include/orcus/orcus_csv.hpp41
-rw-r--r--include/orcus/orcus_gnumeric.hpp43
-rw-r--r--include/orcus/orcus_import_ods.hpp32
-rw-r--r--include/orcus/orcus_import_xlsx.hpp37
-rw-r--r--include/orcus/orcus_json.hpp73
-rw-r--r--include/orcus/orcus_ods.hpp58
-rw-r--r--include/orcus/orcus_parquet.hpp41
-rw-r--r--include/orcus/orcus_xls_xml.hpp43
-rw-r--r--include/orcus/orcus_xlsx.hpp87
-rw-r--r--include/orcus/orcus_xml.hpp155
-rw-r--r--include/orcus/parser_base.hpp155
-rw-r--r--include/orcus/parser_global.hpp153
-rw-r--r--include/orcus/sax_ns_parser.hpp374
-rw-r--r--include/orcus/sax_parser.hpp576
-rw-r--r--include/orcus/sax_parser_base.hpp207
-rw-r--r--include/orcus/sax_token_parser.hpp186
-rw-r--r--include/orcus/sax_token_parser_thread.hpp92
-rw-r--r--include/orcus/spreadsheet/Makefile.am26
-rw-r--r--include/orcus/spreadsheet/Makefile.in680
-rw-r--r--include/orcus/spreadsheet/auto_filter.hpp149
-rw-r--r--include/orcus/spreadsheet/config.hpp37
-rw-r--r--include/orcus/spreadsheet/document.hpp166
-rw-r--r--include/orcus/spreadsheet/document_types.hpp77
-rw-r--r--include/orcus/spreadsheet/export_interface.hpp60
-rw-r--r--include/orcus/spreadsheet/factory.hpp143
-rw-r--r--include/orcus/spreadsheet/import_interface.hpp1332
-rw-r--r--include/orcus/spreadsheet/import_interface_pivot.hpp351
-rw-r--r--include/orcus/spreadsheet/import_interface_styles.hpp774
-rw-r--r--include/orcus/spreadsheet/import_interface_view.hpp78
-rw-r--r--include/orcus/spreadsheet/pivot.hpp254
-rw-r--r--include/orcus/spreadsheet/shared_strings.hpp77
-rw-r--r--include/orcus/spreadsheet/sheet.hpp150
-rw-r--r--include/orcus/spreadsheet/styles.hpp268
-rw-r--r--include/orcus/spreadsheet/types.hpp751
-rw-r--r--include/orcus/spreadsheet/view.hpp65
-rw-r--r--include/orcus/spreadsheet/view_types.hpp95
-rw-r--r--include/orcus/stream.hpp188
-rw-r--r--include/orcus/string_pool.hpp99
-rw-r--r--include/orcus/threaded_json_parser.hpp185
-rw-r--r--include/orcus/threaded_sax_token_parser.hpp165
-rw-r--r--include/orcus/tokens.hpp74
-rw-r--r--include/orcus/types.hpp634
-rw-r--r--include/orcus/xml_namespace.hpp195
-rw-r--r--include/orcus/xml_structure_tree.hpp198
-rw-r--r--include/orcus/xml_writer.hpp122
-rw-r--r--include/orcus/yaml_document_tree.hpp109
-rw-r--r--include/orcus/yaml_parser.hpp691
-rw-r--r--include/orcus/yaml_parser_base.hpp195
-rw-r--r--include/orcus/zip_archive.hpp126
-rw-r--r--include/orcus/zip_archive_stream.hpp71
81 files changed, 17215 insertions, 0 deletions
diff --git a/include/Makefile.am b/include/Makefile.am
new file mode 100644
index 0000000..305a9c4
--- /dev/null
+++ b/include/Makefile.am
@@ -0,0 +1 @@
+SUBDIRS = orcus
diff --git a/include/Makefile.in b/include/Makefile.in
new file mode 100644
index 0000000..9921170
--- /dev/null
+++ b/include/Makefile.in
@@ -0,0 +1,718 @@
+# Makefile.in generated by automake 1.16.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2021 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \
+ $(top_srcdir)/m4/ax_cxx_compile_stdcxx_17.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/m4_ax_valgrind_check.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+ ctags-recursive dvi-recursive html-recursive info-recursive \
+ install-data-recursive install-dvi-recursive \
+ install-exec-recursive install-html-recursive \
+ install-info-recursive install-pdf-recursive \
+ install-ps-recursive install-recursive installcheck-recursive \
+ installdirs-recursive pdf-recursive ps-recursive \
+ tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+ $(RECURSIVE_TARGETS) \
+ $(RECURSIVE_CLEAN_TARGETS) \
+ $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+ distdir distdir-am
+am__extra_recursive_targets = check-valgrind-recursive \
+ check-valgrind-memcheck-recursive \
+ check-valgrind-helgrind-recursive check-valgrind-drd-recursive \
+ check-valgrind-sgcheck-recursive
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_DATE_TIME_LDFLAGS = @BOOST_DATE_TIME_LDFLAGS@
+BOOST_DATE_TIME_LDPATH = @BOOST_DATE_TIME_LDPATH@
+BOOST_DATE_TIME_LIBS = @BOOST_DATE_TIME_LIBS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_IOSTREAMS_LDFLAGS = @BOOST_IOSTREAMS_LDFLAGS@
+BOOST_IOSTREAMS_LDPATH = @BOOST_IOSTREAMS_LDPATH@
+BOOST_IOSTREAMS_LIBS = @BOOST_IOSTREAMS_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_PROGRAM_OPTIONS_LDFLAGS = @BOOST_PROGRAM_OPTIONS_LDFLAGS@
+BOOST_PROGRAM_OPTIONS_LDPATH = @BOOST_PROGRAM_OPTIONS_LDPATH@
+BOOST_PROGRAM_OPTIONS_LIBS = @BOOST_PROGRAM_OPTIONS_LIBS@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CSCOPE = @CSCOPE@
+CTAGS = @CTAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ENABLE_VALGRIND_drd = @ENABLE_VALGRIND_drd@
+ENABLE_VALGRIND_helgrind = @ENABLE_VALGRIND_helgrind@
+ENABLE_VALGRIND_memcheck = @ENABLE_VALGRIND_memcheck@
+ENABLE_VALGRIND_sgcheck = @ENABLE_VALGRIND_sgcheck@
+ETAGS = @ETAGS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_CXX17 = @HAVE_CXX17@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+IXION_REQUIRED_API_VERSION = @IXION_REQUIRED_API_VERSION@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBIXION_CFLAGS = @LIBIXION_CFLAGS@
+LIBIXION_LIBS = @LIBIXION_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MDDS_CFLAGS = @MDDS_CFLAGS@
+MDDS_LIBS = @MDDS_LIBS@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ORCUS_API_VERSION = @ORCUS_API_VERSION@
+ORCUS_MAJOR_VERSION = @ORCUS_MAJOR_VERSION@
+ORCUS_MICRO_VERSION = @ORCUS_MICRO_VERSION@
+ORCUS_MINOR_VERSION = @ORCUS_MINOR_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PARQUET_CFLAGS = @PARQUET_CFLAGS@
+PARQUET_LIBS = @PARQUET_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POW_LIB = @POW_LIB@
+PYTHON = @PYTHON@
+PYTHON_CFLAGS = @PYTHON_CFLAGS@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_LIBS = @PYTHON_LIBS@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALGRIND = @VALGRIND@
+VALGRIND_ENABLED = @VALGRIND_ENABLED@
+VERSION = @VERSION@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+valgrind_enabled_tools = @valgrind_enabled_tools@
+valgrind_tools = @valgrind_tools@
+SUBDIRS = orcus
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign include/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+# (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+ @fail=; \
+ if $(am__make_keepgoing); then \
+ failcom='fail=yes'; \
+ else \
+ failcom='exit 1'; \
+ fi; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+check-valgrind-local:
+check-valgrind-memcheck-local:
+check-valgrind-helgrind-local:
+check-valgrind-drd-local:
+check-valgrind-sgcheck-local:
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile
+installdirs: installdirs-recursive
+installdirs-am:
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+check-valgrind: check-valgrind-recursive
+
+check-valgrind-am: check-valgrind-local
+
+check-valgrind-drd: check-valgrind-drd-recursive
+
+check-valgrind-drd-am: check-valgrind-drd-local
+
+check-valgrind-helgrind: check-valgrind-helgrind-recursive
+
+check-valgrind-helgrind-am: check-valgrind-helgrind-local
+
+check-valgrind-memcheck: check-valgrind-memcheck-recursive
+
+check-valgrind-memcheck-am: check-valgrind-memcheck-local
+
+check-valgrind-sgcheck: check-valgrind-sgcheck-recursive
+
+check-valgrind-sgcheck-am: check-valgrind-sgcheck-local
+
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am:
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am:
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+ check-am check-valgrind-am check-valgrind-drd-am \
+ check-valgrind-drd-local check-valgrind-helgrind-am \
+ check-valgrind-helgrind-local check-valgrind-local \
+ check-valgrind-memcheck-am check-valgrind-memcheck-local \
+ check-valgrind-sgcheck-am check-valgrind-sgcheck-local clean \
+ clean-generic clean-libtool cscopelist-am ctags ctags-am \
+ distclean distclean-generic distclean-libtool distclean-tags \
+ distdir dvi dvi-am html html-am info info-am install \
+ install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ installdirs-am maintainer-clean maintainer-clean-generic \
+ mostlyclean mostlyclean-generic mostlyclean-libtool pdf pdf-am \
+ ps ps-am tags tags-am uninstall uninstall-am
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/include/orcus/Makefile.am b/include/orcus/Makefile.am
new file mode 100644
index 0000000..865e8e6
--- /dev/null
+++ b/include/orcus/Makefile.am
@@ -0,0 +1,88 @@
+SUBDIRS = detail spreadsheet
+
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus
+liborcus_HEADERS = \
+ base64.hpp \
+ cell_buffer.hpp \
+ config.hpp \
+ css_document_tree.hpp \
+ css_parser.hpp \
+ css_parser_base.hpp \
+ css_selector.hpp \
+ css_types.hpp \
+ csv_parser.hpp \
+ csv_parser_base.hpp \
+ dom_tree.hpp \
+ env.hpp \
+ exception.hpp \
+ format_detection.hpp \
+ info.hpp \
+ interface.hpp \
+ json_document_tree.hpp \
+ json_global.hpp \
+ json_parser.hpp \
+ json_parser_base.hpp \
+ json_parser_thread.hpp \
+ json_structure_tree.hpp \
+ measurement.hpp \
+ orcus_csv.hpp \
+ orcus_json.hpp \
+ orcus_xml.hpp \
+ parser_base.hpp \
+ parser_global.hpp \
+ sax_parser.hpp \
+ sax_parser_base.hpp \
+ sax_ns_parser.hpp \
+ sax_token_parser.hpp \
+ sax_token_parser_thread.hpp \
+ stream.hpp \
+ string_pool.hpp \
+ threaded_json_parser.hpp \
+ threaded_sax_token_parser.hpp \
+ tokens.hpp \
+ types.hpp \
+ xml_namespace.hpp \
+ xml_structure_tree.hpp \
+ xml_writer.hpp \
+ yaml_document_tree.hpp \
+ yaml_parser.hpp \
+ yaml_parser_base.hpp \
+ zip_archive.hpp \
+ zip_archive_stream.hpp
+
+if WITH_ODS_FILTER
+
+liborcus_HEADERS += \
+ orcus_ods.hpp \
+ orcus_import_ods.hpp
+
+endif # WITH_ODS_FILTER
+
+if WITH_XLSX_FILTER
+
+liborcus_HEADERS += \
+ orcus_xlsx.hpp \
+ orcus_import_xlsx.hpp
+
+endif # WITH_XLSX_FILTER
+
+if WITH_XLS_XML_FILTER
+
+liborcus_HEADERS += \
+ orcus_xls_xml.hpp
+
+endif # WITH_XLS_XML_FILTER
+
+if WITH_GNUMERIC_FILTER
+
+liborcus_HEADERS += \
+ orcus_gnumeric.hpp
+
+endif # WITH_GNUMERIC_FILTER
+
+if WITH_PARQUET_FILTER
+
+liborcus_HEADERS += \
+ orcus_parquet.hpp
+
+endif # WITH_PARQUET_FILTER
diff --git a/include/orcus/Makefile.in b/include/orcus/Makefile.in
new file mode 100644
index 0000000..0070b0f
--- /dev/null
+++ b/include/orcus/Makefile.in
@@ -0,0 +1,828 @@
+# Makefile.in generated by automake 1.16.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2021 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@WITH_ODS_FILTER_TRUE@am__append_1 = \
+@WITH_ODS_FILTER_TRUE@ orcus_ods.hpp \
+@WITH_ODS_FILTER_TRUE@ orcus_import_ods.hpp
+
+@WITH_XLSX_FILTER_TRUE@am__append_2 = \
+@WITH_XLSX_FILTER_TRUE@ orcus_xlsx.hpp \
+@WITH_XLSX_FILTER_TRUE@ orcus_import_xlsx.hpp
+
+@WITH_XLS_XML_FILTER_TRUE@am__append_3 = \
+@WITH_XLS_XML_FILTER_TRUE@ orcus_xls_xml.hpp
+
+@WITH_GNUMERIC_FILTER_TRUE@am__append_4 = \
+@WITH_GNUMERIC_FILTER_TRUE@ orcus_gnumeric.hpp
+
+@WITH_PARQUET_FILTER_TRUE@am__append_5 = \
+@WITH_PARQUET_FILTER_TRUE@ orcus_parquet.hpp
+
+subdir = include/orcus
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \
+ $(top_srcdir)/m4/ax_cxx_compile_stdcxx_17.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/m4_ax_valgrind_check.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__liborcus_HEADERS_DIST) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+RECURSIVE_TARGETS = all-recursive check-recursive cscopelist-recursive \
+ ctags-recursive dvi-recursive html-recursive info-recursive \
+ install-data-recursive install-dvi-recursive \
+ install-exec-recursive install-html-recursive \
+ install-info-recursive install-pdf-recursive \
+ install-ps-recursive install-recursive installcheck-recursive \
+ installdirs-recursive pdf-recursive ps-recursive \
+ tags-recursive uninstall-recursive
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__liborcus_HEADERS_DIST = base64.hpp cell_buffer.hpp config.hpp \
+ css_document_tree.hpp css_parser.hpp css_parser_base.hpp \
+ css_selector.hpp css_types.hpp csv_parser.hpp \
+ csv_parser_base.hpp dom_tree.hpp env.hpp exception.hpp \
+ format_detection.hpp info.hpp interface.hpp \
+ json_document_tree.hpp json_global.hpp json_parser.hpp \
+ json_parser_base.hpp json_parser_thread.hpp \
+ json_structure_tree.hpp measurement.hpp orcus_csv.hpp \
+ orcus_json.hpp orcus_xml.hpp parser_base.hpp parser_global.hpp \
+ sax_parser.hpp sax_parser_base.hpp sax_ns_parser.hpp \
+ sax_token_parser.hpp sax_token_parser_thread.hpp stream.hpp \
+ string_pool.hpp threaded_json_parser.hpp \
+ threaded_sax_token_parser.hpp tokens.hpp types.hpp \
+ xml_namespace.hpp xml_structure_tree.hpp xml_writer.hpp \
+ yaml_document_tree.hpp yaml_parser.hpp yaml_parser_base.hpp \
+ zip_archive.hpp zip_archive_stream.hpp orcus_ods.hpp \
+ orcus_import_ods.hpp orcus_xlsx.hpp orcus_import_xlsx.hpp \
+ orcus_xls_xml.hpp orcus_gnumeric.hpp orcus_parquet.hpp
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(liborcusdir)"
+HEADERS = $(liborcus_HEADERS)
+RECURSIVE_CLEAN_TARGETS = mostlyclean-recursive clean-recursive \
+ distclean-recursive maintainer-clean-recursive
+am__recursive_targets = \
+ $(RECURSIVE_TARGETS) \
+ $(RECURSIVE_CLEAN_TARGETS) \
+ $(am__extra_recursive_targets)
+AM_RECURSIVE_TARGETS = $(am__recursive_targets:-recursive=) TAGS CTAGS \
+ distdir distdir-am
+am__extra_recursive_targets = check-valgrind-recursive \
+ check-valgrind-memcheck-recursive \
+ check-valgrind-helgrind-recursive check-valgrind-drd-recursive \
+ check-valgrind-sgcheck-recursive
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+DIST_SUBDIRS = $(SUBDIRS)
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+am__relativize = \
+ dir0=`pwd`; \
+ sed_first='s,^\([^/]*\)/.*$$,\1,'; \
+ sed_rest='s,^[^/]*/*,,'; \
+ sed_last='s,^.*/\([^/]*\)$$,\1,'; \
+ sed_butlast='s,/*[^/]*$$,,'; \
+ while test -n "$$dir1"; do \
+ first=`echo "$$dir1" | sed -e "$$sed_first"`; \
+ if test "$$first" != "."; then \
+ if test "$$first" = ".."; then \
+ dir2=`echo "$$dir0" | sed -e "$$sed_last"`/"$$dir2"; \
+ dir0=`echo "$$dir0" | sed -e "$$sed_butlast"`; \
+ else \
+ first2=`echo "$$dir2" | sed -e "$$sed_first"`; \
+ if test "$$first2" = "$$first"; then \
+ dir2=`echo "$$dir2" | sed -e "$$sed_rest"`; \
+ else \
+ dir2="../$$dir2"; \
+ fi; \
+ dir0="$$dir0"/"$$first"; \
+ fi; \
+ fi; \
+ dir1=`echo "$$dir1" | sed -e "$$sed_rest"`; \
+ done; \
+ reldir="$$dir2"
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_DATE_TIME_LDFLAGS = @BOOST_DATE_TIME_LDFLAGS@
+BOOST_DATE_TIME_LDPATH = @BOOST_DATE_TIME_LDPATH@
+BOOST_DATE_TIME_LIBS = @BOOST_DATE_TIME_LIBS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_IOSTREAMS_LDFLAGS = @BOOST_IOSTREAMS_LDFLAGS@
+BOOST_IOSTREAMS_LDPATH = @BOOST_IOSTREAMS_LDPATH@
+BOOST_IOSTREAMS_LIBS = @BOOST_IOSTREAMS_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_PROGRAM_OPTIONS_LDFLAGS = @BOOST_PROGRAM_OPTIONS_LDFLAGS@
+BOOST_PROGRAM_OPTIONS_LDPATH = @BOOST_PROGRAM_OPTIONS_LDPATH@
+BOOST_PROGRAM_OPTIONS_LIBS = @BOOST_PROGRAM_OPTIONS_LIBS@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CSCOPE = @CSCOPE@
+CTAGS = @CTAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ENABLE_VALGRIND_drd = @ENABLE_VALGRIND_drd@
+ENABLE_VALGRIND_helgrind = @ENABLE_VALGRIND_helgrind@
+ENABLE_VALGRIND_memcheck = @ENABLE_VALGRIND_memcheck@
+ENABLE_VALGRIND_sgcheck = @ENABLE_VALGRIND_sgcheck@
+ETAGS = @ETAGS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_CXX17 = @HAVE_CXX17@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+IXION_REQUIRED_API_VERSION = @IXION_REQUIRED_API_VERSION@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBIXION_CFLAGS = @LIBIXION_CFLAGS@
+LIBIXION_LIBS = @LIBIXION_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MDDS_CFLAGS = @MDDS_CFLAGS@
+MDDS_LIBS = @MDDS_LIBS@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ORCUS_API_VERSION = @ORCUS_API_VERSION@
+ORCUS_MAJOR_VERSION = @ORCUS_MAJOR_VERSION@
+ORCUS_MICRO_VERSION = @ORCUS_MICRO_VERSION@
+ORCUS_MINOR_VERSION = @ORCUS_MINOR_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PARQUET_CFLAGS = @PARQUET_CFLAGS@
+PARQUET_LIBS = @PARQUET_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POW_LIB = @POW_LIB@
+PYTHON = @PYTHON@
+PYTHON_CFLAGS = @PYTHON_CFLAGS@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_LIBS = @PYTHON_LIBS@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALGRIND = @VALGRIND@
+VALGRIND_ENABLED = @VALGRIND_ENABLED@
+VERSION = @VERSION@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+valgrind_enabled_tools = @valgrind_enabled_tools@
+valgrind_tools = @valgrind_tools@
+SUBDIRS = detail spreadsheet
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus
+liborcus_HEADERS = base64.hpp cell_buffer.hpp config.hpp \
+ css_document_tree.hpp css_parser.hpp css_parser_base.hpp \
+ css_selector.hpp css_types.hpp csv_parser.hpp \
+ csv_parser_base.hpp dom_tree.hpp env.hpp exception.hpp \
+ format_detection.hpp info.hpp interface.hpp \
+ json_document_tree.hpp json_global.hpp json_parser.hpp \
+ json_parser_base.hpp json_parser_thread.hpp \
+ json_structure_tree.hpp measurement.hpp orcus_csv.hpp \
+ orcus_json.hpp orcus_xml.hpp parser_base.hpp parser_global.hpp \
+ sax_parser.hpp sax_parser_base.hpp sax_ns_parser.hpp \
+ sax_token_parser.hpp sax_token_parser_thread.hpp stream.hpp \
+ string_pool.hpp threaded_json_parser.hpp \
+ threaded_sax_token_parser.hpp tokens.hpp types.hpp \
+ xml_namespace.hpp xml_structure_tree.hpp xml_writer.hpp \
+ yaml_document_tree.hpp yaml_parser.hpp yaml_parser_base.hpp \
+ zip_archive.hpp zip_archive_stream.hpp $(am__append_1) \
+ $(am__append_2) $(am__append_3) $(am__append_4) \
+ $(am__append_5)
+all: all-recursive
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/orcus/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign include/orcus/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-liborcusHEADERS: $(liborcus_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(liborcusdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(liborcusdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(liborcusdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(liborcusdir)" || exit $$?; \
+ done
+
+uninstall-liborcusHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(liborcusdir)'; $(am__uninstall_files_from_dir)
+
+# This directory's subdirectories are mostly independent; you can cd
+# into them and run 'make' without going through this Makefile.
+# To change the values of 'make' variables: instead of editing Makefiles,
+# (1) if the variable is set in 'config.status', edit 'config.status'
+# (which will cause the Makefiles to be regenerated when you run 'make');
+# (2) otherwise, pass the desired values on the 'make' command line.
+$(am__recursive_targets):
+ @fail=; \
+ if $(am__make_keepgoing); then \
+ failcom='fail=yes'; \
+ else \
+ failcom='exit 1'; \
+ fi; \
+ dot_seen=no; \
+ target=`echo $@ | sed s/-recursive//`; \
+ case "$@" in \
+ distclean-* | maintainer-clean-*) list='$(DIST_SUBDIRS)' ;; \
+ *) list='$(SUBDIRS)' ;; \
+ esac; \
+ for subdir in $$list; do \
+ echo "Making $$target in $$subdir"; \
+ if test "$$subdir" = "."; then \
+ dot_seen=yes; \
+ local_target="$$target-am"; \
+ else \
+ local_target="$$target"; \
+ fi; \
+ ($(am__cd) $$subdir && $(MAKE) $(AM_MAKEFLAGS) $$local_target) \
+ || eval $$failcom; \
+ done; \
+ if test "$$dot_seen" = "no"; then \
+ $(MAKE) $(AM_MAKEFLAGS) "$$target-am" || exit 1; \
+ fi; test -z "$$fail"
+check-valgrind-local:
+check-valgrind-memcheck-local:
+check-valgrind-helgrind-local:
+check-valgrind-drd-local:
+check-valgrind-sgcheck-local:
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-recursive
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ if ($(ETAGS) --etags-include --version) >/dev/null 2>&1; then \
+ include_option=--etags-include; \
+ empty_fix=.; \
+ else \
+ include_option=--include; \
+ empty_fix=; \
+ fi; \
+ list='$(SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ test ! -f $$subdir/TAGS || \
+ set "$$@" "$$include_option=$$here/$$subdir/TAGS"; \
+ fi; \
+ done; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-recursive
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-recursive
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+ @list='$(DIST_SUBDIRS)'; for subdir in $$list; do \
+ if test "$$subdir" = .; then :; else \
+ $(am__make_dryrun) \
+ || test -d "$(distdir)/$$subdir" \
+ || $(MKDIR_P) "$(distdir)/$$subdir" \
+ || exit 1; \
+ dir1=$$subdir; dir2="$(distdir)/$$subdir"; \
+ $(am__relativize); \
+ new_distdir=$$reldir; \
+ dir1=$$subdir; dir2="$(top_distdir)"; \
+ $(am__relativize); \
+ new_top_distdir=$$reldir; \
+ echo " (cd $$subdir && $(MAKE) $(AM_MAKEFLAGS) top_distdir="$$new_top_distdir" distdir="$$new_distdir" \\"; \
+ echo " am__remove_distdir=: am__skip_length_check=: am__skip_mode_fix=: distdir)"; \
+ ($(am__cd) $$subdir && \
+ $(MAKE) $(AM_MAKEFLAGS) \
+ top_distdir="$$new_top_distdir" \
+ distdir="$$new_distdir" \
+ am__remove_distdir=: \
+ am__skip_length_check=: \
+ am__skip_mode_fix=: \
+ distdir) \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-recursive
+all-am: Makefile $(HEADERS)
+installdirs: installdirs-recursive
+installdirs-am:
+ for dir in "$(DESTDIR)$(liborcusdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-recursive
+install-exec: install-exec-recursive
+install-data: install-data-recursive
+uninstall: uninstall-recursive
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-recursive
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+check-valgrind: check-valgrind-recursive
+
+check-valgrind-am: check-valgrind-local
+
+check-valgrind-drd: check-valgrind-drd-recursive
+
+check-valgrind-drd-am: check-valgrind-drd-local
+
+check-valgrind-helgrind: check-valgrind-helgrind-recursive
+
+check-valgrind-helgrind-am: check-valgrind-helgrind-local
+
+check-valgrind-memcheck: check-valgrind-memcheck-recursive
+
+check-valgrind-memcheck-am: check-valgrind-memcheck-local
+
+check-valgrind-sgcheck: check-valgrind-sgcheck-recursive
+
+check-valgrind-sgcheck-am: check-valgrind-sgcheck-local
+
+clean: clean-recursive
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-recursive
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-recursive
+
+dvi-am:
+
+html: html-recursive
+
+html-am:
+
+info: info-recursive
+
+info-am:
+
+install-data-am: install-liborcusHEADERS
+
+install-dvi: install-dvi-recursive
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-recursive
+
+install-html-am:
+
+install-info: install-info-recursive
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-recursive
+
+install-pdf-am:
+
+install-ps: install-ps-recursive
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-recursive
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-recursive
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-recursive
+
+pdf-am:
+
+ps: ps-recursive
+
+ps-am:
+
+uninstall-am: uninstall-liborcusHEADERS
+
+.MAKE: $(am__recursive_targets) install-am install-strip
+
+.PHONY: $(am__recursive_targets) CTAGS GTAGS TAGS all all-am check \
+ check-am check-valgrind-am check-valgrind-drd-am \
+ check-valgrind-drd-local check-valgrind-helgrind-am \
+ check-valgrind-helgrind-local check-valgrind-local \
+ check-valgrind-memcheck-am check-valgrind-memcheck-local \
+ check-valgrind-sgcheck-am check-valgrind-sgcheck-local clean \
+ clean-generic clean-libtool cscopelist-am ctags ctags-am \
+ distclean distclean-generic distclean-libtool distclean-tags \
+ distdir dvi dvi-am html html-am info info-am install \
+ install-am install-data install-data-am install-dvi \
+ install-dvi-am install-exec install-exec-am install-html \
+ install-html-am install-info install-info-am \
+ install-liborcusHEADERS install-man install-pdf install-pdf-am \
+ install-ps install-ps-am install-strip installcheck \
+ installcheck-am installdirs installdirs-am maintainer-clean \
+ maintainer-clean-generic mostlyclean mostlyclean-generic \
+ mostlyclean-libtool pdf pdf-am ps ps-am tags tags-am uninstall \
+ uninstall-am uninstall-liborcusHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/include/orcus/base64.hpp b/include/orcus/base64.hpp
new file mode 100644
index 0000000..44c7017
--- /dev/null
+++ b/include/orcus/base64.hpp
@@ -0,0 +1,37 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __ORCUS_BASE64_HPP__
+#define __ORCUS_BASE64_HPP__
+
+#include "env.hpp"
+#include <cstdint>
+#include <vector>
+#include <string>
+
+namespace orcus {
+
+/**
+ * Decode a based64-encoded character sequence into a sequence of bytes.
+ *
+ * @param base64 encoded character sequence.
+ * @return decoded byte sequence.
+ */
+ORCUS_PSR_DLLPUBLIC std::vector<uint8_t> decode_from_base64(std::string_view base64);
+
+/**
+ * Encode a sequence of bytes into base64-encoded characters.
+ *
+ * @param input sequence of bytes to encode.
+ * @return base64-encoded character sequence representing the input bytes.
+ */
+ORCUS_PSR_DLLPUBLIC std::string encode_to_base64(const std::vector<uint8_t>& input);
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/cell_buffer.hpp b/include/orcus/cell_buffer.hpp
new file mode 100644
index 0000000..60df728
--- /dev/null
+++ b/include/orcus/cell_buffer.hpp
@@ -0,0 +1,42 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __ORCUS_CELL_BUFFER_HPP__
+#define __ORCUS_CELL_BUFFER_HPP__
+
+#include "env.hpp"
+
+#include <string>
+
+namespace orcus {
+
+/**
+ * Temporary cell buffer used to decode encoded cell values. This is used in
+ * the sax, json and csv parsers.
+ */
+class ORCUS_PSR_DLLPUBLIC cell_buffer
+{
+ std::string m_buffer;
+ size_t m_buf_size;
+public:
+ cell_buffer(const cell_buffer&) = delete;
+
+ cell_buffer();
+ ~cell_buffer();
+
+ void append(const char* p, size_t len);
+ void reset();
+
+ std::string_view str() const;
+
+ bool empty() const;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/config.hpp b/include/orcus/config.hpp
new file mode 100644
index 0000000..17743e6
--- /dev/null
+++ b/include/orcus/config.hpp
@@ -0,0 +1,125 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_CONFIG_HPP
+#define INCLUDED_ORCUS_CONFIG_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/types.hpp"
+
+#include <string>
+#include <variant>
+
+namespace orcus {
+
+struct ORCUS_DLLPUBLIC config
+{
+ format_t input_format;
+
+ /**
+ * configuration settings specific to the CSV format. This struct must be
+ * POD.
+ */
+ struct csv_config
+ {
+ /** Number of header rows to repeat in case of split. */
+ size_t header_row_size;
+
+ /**
+ * Whether or not to split oversized source data into multiple sheets
+ * in case it spills over.
+ */
+ bool split_to_multiple_sheets;
+ };
+
+ // TODO: add config for other formats as needed.
+ using data_type = std::variant<csv_config>;
+
+ /**
+ * Enable or disable runtime debug output to stdout or stderr.
+ */
+ bool debug;
+
+ /**
+ * Control whether or not to perform strict check of the xml structure of
+ * a stream being parsed. When enabled, it throws an xml_structure_error
+ * exception when an incorrect xml structure is detected.
+ */
+ bool structure_check;
+
+ data_type data;
+
+ config(format_t input_format);
+};
+
+struct ORCUS_DLLPUBLIC json_config
+{
+ /**
+ * Path of the JSON file being parsed, in case the JSON string originates
+ * from a file. This parameter is required if external JSON files need to
+ * be resolved. Otherwise it's optional.
+ */
+ std::string input_path;
+
+ /**
+ * Path of the file to which output is written to. Used only from the
+ * orcus-json command line tool.
+ */
+ std::string output_path;
+
+ /**
+ * Output format type. Used only from the orcus-json command line tool.
+ */
+ dump_format_t output_format;
+
+ /**
+ * Control whether or not to preserve the order of object's child
+ * name/value pairs. By definition, JSON's object is an unordered set of
+ * name/value pairs, but in some cases preserving the original order may
+ * be desirable.
+ */
+ bool preserve_object_order;
+
+ /**
+ * Control whether or not to resolve JSON references to external files.
+ */
+ bool resolve_references;
+
+ /**
+ * When true, the document tree should allocate memory and hold copies of
+ * string values in the tree. When false, no extra memory is allocated
+ * for string values in the tree and the string values simply point to the
+ * original json string stream.
+ *
+ * In other words, when this option is set to false, the caller must
+ * ensure that the json string stream instance stays alive for the entire
+ * life cycle of the document tree.
+ */
+ bool persistent_string_values;
+
+ json_config();
+ ~json_config();
+};
+
+struct ORCUS_DLLPUBLIC yaml_config
+{
+ enum class output_format_type { none, yaml, json };
+
+ std::string input_path;
+ std::string output_path;
+
+ output_format_type output_format;
+
+ yaml_config();
+ ~yaml_config();
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/css_document_tree.hpp b/include/orcus/css_document_tree.hpp
new file mode 100644
index 0000000..abbc65a
--- /dev/null
+++ b/include/orcus/css_document_tree.hpp
@@ -0,0 +1,100 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_CSS_DOCUMENT_TREE_HPP
+#define INCLUDED_ORCUS_CSS_DOCUMENT_TREE_HPP
+
+#include "orcus/css_selector.hpp"
+#include "orcus/exception.hpp"
+
+#include <string>
+#include <memory>
+
+namespace orcus {
+
+/**
+ * Class representing CSS rules.
+ */
+class ORCUS_DLLPUBLIC css_document_tree
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+
+ class insertion_error : public general_error
+ {
+ public:
+ insertion_error(const std::string& msg);
+ };
+
+ css_document_tree(const css_document_tree&) = delete;
+
+ css_document_tree();
+ css_document_tree(css_document_tree&& other);
+ ~css_document_tree();
+
+ css_document_tree& operator=(css_document_tree&& other);
+
+ /**
+ * Load raw string stream containing CSS rules to populate the document
+ * tree.
+ *
+ * @param stream raw CSS rules.
+ */
+ void load(std::string_view stream);
+
+ /**
+ * Insert or replace properties for given selector and pseudo element
+ * flags.
+ *
+ * @param selector selector to store properties for.
+ * @param pseudo_elem pseudo element flags for the last simple selector.
+ * @param props new properties to insert.
+ */
+ void insert_properties(
+ const css_selector_t& selector,
+ css::pseudo_element_t pseudo_elem,
+ const css_properties_t& props);
+
+ /**
+ * Get properties associated with given selector and one or more pseudo
+ * elements.
+ *
+ * @param selector selector to get properties for.
+ * @param pseudo_elem pseudo element flags for the last simple selector.
+ * This value is a bitfield.
+ *
+ * @return const pointer to the property set instance, or NULL in case
+ * there is no properties for the given selector.
+ */
+ const css_properties_t* get_properties(
+ const css_selector_t& selector, css::pseudo_element_t pseudo_elem) const;
+
+ /**
+ * Get all sets of properties associated with given selector, for all
+ * pseudo element values.
+ *
+ * @param selector selector to get properties for.
+ *
+ * @return const pointer to the map of property sets with pseudo element
+ * values as the keys, or NULL in case there is no properties for
+ * the given selector.
+ */
+ const css_pseudo_element_properties_t*
+ get_all_properties(const css_selector_t& selector) const;
+
+ void dump() const;
+
+ void swap(css_document_tree& other) noexcept;
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/css_parser.hpp b/include/orcus/css_parser.hpp
new file mode 100644
index 0000000..93bbc14
--- /dev/null
+++ b/include/orcus/css_parser.hpp
@@ -0,0 +1,883 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_CSS_PARSER_HPP
+#define INCLUDED_ORCUS_CSS_PARSER_HPP
+
+#define ORCUS_DEBUG_CSS 0
+
+#include "parser_global.hpp"
+#include "css_parser_base.hpp"
+
+#include <cassert>
+#include <algorithm>
+
+#if ORCUS_DEBUG_CSS
+#include <iostream>
+using std::cout;
+using std::endl;
+#endif
+
+namespace orcus {
+
+/**
+ * Empty handler for CSS parser. Sub-class from it and implement necessary
+ * methods.
+ */
+class css_handler
+{
+public:
+ /**
+ * Called upon encountering an at-rule.
+ *
+ * @param name name of the at-rule.
+ */
+ void at_rule_name(std::string_view name)
+ {
+ (void)name;
+ }
+
+ /**
+ * Called upon encountering a simple selector type. A simple selector may
+ * consist of
+ *
+ * @code{.txt}
+ * <type>.<class>#<id>
+ * @endcode
+ *
+ * and this function only passes the type part of the simple selector
+ * expression.
+ *
+ * @param type simple selector type.
+ */
+ void simple_selector_type(std::string_view type)
+ {
+ (void)type;
+ }
+
+ /**
+ * Called upon encountering a simple selector class. A simple selector may
+ * consist of
+ *
+ * @code{.txt}
+ * <type>.<class>#<id>
+ * @endcode
+ *
+ * and this function only passes the class part of the simple selector
+ * expression.
+ *
+ * @param cls simple selector class.
+ */
+ void simple_selector_class(std::string_view cls)
+ {
+ (void)cls;
+ }
+
+ /**
+ * Called upon encountering a pseudo element of a simple selector. For
+ * instance, given the following CSS block:
+ *
+ * @code{.css}
+ * p::first-line {
+ * color: blue;
+ * text-transform: uppercase;
+ * }
+ * @endcode
+ *
+ * the `first-line` part is the pseudo element of the selector named `p`.
+ *
+ * @param pe pseudo element of a simple selector.
+ */
+ void simple_selector_pseudo_element(orcus::css::pseudo_element_t pe)
+ {
+ (void)pe;
+ }
+
+ /**
+ * Called upon encountering a pseudo class of a simple selector. For
+ * instance, given the following CSS block:
+ *
+ * @code{.css}
+ * button:hover {
+ * color: blue;
+ * }
+ * @endcode
+ *
+ * the `hover` part is the pseudo class of the selector named `button`.
+ *
+ * @param pc pseudo class of a simple selector.
+ */
+ void simple_selector_pseudo_class(orcus::css::pseudo_class_t pc)
+ {
+ (void)pc;
+ }
+
+ /**
+ * Called upon encountering a simple selector id. A simple selector may
+ * consist of
+ *
+ * @code{.txt}
+ * <type>.<class>#<id>
+ * @endcode
+ *
+ * and this function only passes the id part of the simle selector
+ * expression.
+ *
+ * @param id simple selector id.
+ */
+ void simple_selector_id(std::string_view id)
+ {
+ (void)id;
+ }
+
+ /**
+ * Called at the end of a simple selector expression.
+ *
+ * @todo find out the difference between a simple selector and a selector,
+ * and document it.
+ */
+ void end_simple_selector() {}
+
+ /**
+ * Called at the end of a selector expression.
+ *
+ * @todo find out the difference between a simple selector and a selector,
+ * and document it.
+ */
+ void end_selector() {}
+
+ /**
+ * Calling upon encountering a combinator. A combinator is an operator that
+ * combines other selectors. Given the following CSS block:
+ *
+ * @code{.css}
+ * div > p {
+ * background-color: yellow;
+ * }
+ * @endcode
+ *
+ * the `>` is the combinator that combines the `div` and `p` selectors.
+ *
+ * @param combinator type of combinator encountered.
+ */
+ void combinator(orcus::css::combinator_t combinator)
+ {
+ (void)combinator;
+ }
+
+ /**
+ * Called at each property name.
+ *
+ * @param name property name string.
+ */
+ void property_name(std::string_view name)
+ {
+ (void)name;
+ }
+
+ /**
+ * Called at each ordinary property value string.
+ *
+ * @param value value string.
+ */
+ void value(std::string_view value)
+ {
+ (void)value;
+ }
+
+ /**
+ * Called at each RGB color value of a property.
+ *
+ * @param red value of red (0-255)
+ * @param green value of green (0-255)
+ * @param blue value of blue (0-255)
+ */
+ void rgb(uint8_t red, uint8_t green, uint8_t blue)
+ {
+ (void)red; (void)green; (void)blue;
+ }
+
+ /**
+ * Called at each RGB color value of a property with alpha transparency
+ * value.
+ *
+ * @param red value of red (0-255)
+ * @param green value of green (0-255)
+ * @param blue value of blue (0-255)
+ * @param alpha alpha transparency value
+ */
+ void rgba(uint8_t red, uint8_t green, uint8_t blue, double alpha)
+ {
+ (void)red; (void)green; (void)blue; (void)alpha;
+ }
+
+ /**
+ * Called at each HSL color value of a property.
+ *
+ * @param hue hue
+ * @param sat saturation
+ * @param light lightness
+ */
+ void hsl(uint8_t hue, uint8_t sat, uint8_t light)
+ {
+ (void)hue; (void)sat; (void)light;
+ }
+
+ /**
+ * Called at each HSL color value of a property with alpha transparency
+ * value.
+ *
+ * @param hue hue
+ * @param sat saturation
+ * @param light lightness
+ * @param alpha alpha value
+ */
+ void hsla(uint8_t hue, uint8_t sat, uint8_t light, double alpha)
+ {
+ (void)hue; (void)sat; (void)light; (void)alpha;
+ }
+
+ /**
+ * Called at each URL value of a property.
+ *
+ * @param url URL value string.
+ */
+ void url(std::string_view url)
+ {
+ (void)url;
+ }
+
+ /**
+ * Called when the parsing begins.
+ */
+ void begin_parse() {}
+
+ /**
+ * Called when the parsing ends.
+ */
+ void end_parse() {}
+
+ /**
+ * Called at the beginning of each block. An opening brace '{' marks the
+ * beginning of a block.
+ */
+ void begin_block() {}
+
+ /**
+ * Called at the end of each block. A closing brace '}' marks the end of
+ * a block.
+ */
+ void end_block() {}
+
+ /**
+ * Called at the beginning of a single property expression. Each property
+ * expression may consist of
+ *
+ * @code{.txt}
+ * <name> : <value>, ..., <value>
+ * @endcode
+ *
+ * terminated by either a `;` or `}`.
+ */
+ void begin_property() {}
+
+ /**
+ * Called at the end of a single property expression.
+ */
+ void end_property() {}
+};
+
+/**
+ * Parser for CSS documents.
+ *
+ * @tparam HandlerT Hanlder type with member functions for event callbacks.
+ * Refer to css_handler.
+ */
+template<typename HandlerT>
+class css_parser : public css::parser_base
+{
+public:
+ typedef HandlerT handler_type;
+
+ css_parser(std::string_view content, handler_type& hdl);
+ void parse();
+
+private:
+ // Handlers - at the time a handler is called the current position is
+ // expected to point to the first unprocessed non-blank character, and
+ // each handler must set the current position to the next unprocessed
+ // non-blank character when it finishes.
+ void rule();
+ void at_rule_name();
+ void simple_selector_name();
+ void property_name();
+ void property();
+ void quoted_value(char c);
+ void value();
+ void function_value(std::string_view v);
+ void function_rgb(bool alpha);
+ void function_hsl(bool alpha);
+ void function_url();
+ void name_sep();
+ void property_sep();
+ void block();
+
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+css_parser<_Handler>::css_parser(std::string_view content, handler_type& hdl) :
+ css::parser_base(content), m_handler(hdl) {}
+
+template<typename _Handler>
+void css_parser<_Handler>::parse()
+{
+ shrink_stream();
+
+#if ORCUS_DEBUG_CSS
+ std::cout << "compressed: '";
+ const char* p = mp_char;
+ for (; p != mp_end; ++p)
+ std::cout << *p;
+ std::cout << "'" << std::endl;
+#endif
+ m_handler.begin_parse();
+ while (has_char())
+ rule();
+ m_handler.end_parse();
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::rule()
+{
+ // <selector name> , ... , <selector name> <block>
+ while (has_char())
+ {
+ if (skip_comment())
+ continue;
+
+ char c = cur_char();
+ if (is_alpha(c))
+ {
+ simple_selector_name();
+ continue;
+ }
+
+ switch (c)
+ {
+ case '>':
+ set_combinator(c, css::combinator_t::direct_child);
+ break;
+ case '+':
+ set_combinator(c, css::combinator_t::next_sibling);
+ break;
+ case '.':
+ case '#':
+ case '@':
+ simple_selector_name();
+ break;
+ case ',':
+ name_sep();
+ break;
+ case '{':
+ reset_before_block();
+ block();
+ break;
+ default:
+ parse_error::throw_with("rule: failed to parse '", c, "'", offset());
+ }
+ }
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::at_rule_name()
+{
+ assert(has_char());
+ assert(cur_char() == '@');
+ next();
+ char c = cur_char();
+ if (!is_alpha(c))
+ throw parse_error("at_rule_name: first character of an at-rule name must be an alphabet.", offset());
+
+ const char* p;
+ size_t len;
+ identifier(p, len);
+ skip_blanks();
+
+ m_handler.at_rule_name({p, len});
+#if ORCUS_DEBUG_CSS
+ std::string foo(p, len);
+ std::cout << "at-rule name: " << foo.c_str() << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::simple_selector_name()
+{
+ assert(has_char());
+ char c = cur_char();
+ if (c == '@')
+ {
+ // This is the name of an at-rule.
+ at_rule_name();
+ return;
+ }
+
+ if (m_simple_selector_count)
+ {
+#if ORCUS_DEBUG_CSS
+ cout << "combinator: " << m_combinator << endl;
+#endif
+ m_handler.combinator(m_combinator);
+ m_combinator = css::combinator_t::descendant;
+ }
+ assert(is_alpha(c) || c == '.' || c == '#');
+
+ const char* p = nullptr;
+ size_t n = 0;
+
+#if ORCUS_DEBUG_CSS
+ cout << "simple_selector_name: (" << m_simple_selector_count << ")";
+#endif
+
+ if (c != '.' && c != '#')
+ {
+ identifier(p, n);
+#if ORCUS_DEBUG_CSS
+ std::string s(p, n);
+ cout << " type=" << s;
+#endif
+ m_handler.simple_selector_type({p, n});
+ }
+
+ bool in_loop = true;
+ while (in_loop && has_char())
+ {
+ switch (cur_char())
+ {
+ case '.':
+ {
+ next();
+ identifier(p, n);
+ m_handler.simple_selector_class({p, n});
+#if ORCUS_DEBUG_CSS
+ std::string s(p, n);
+ std::cout << " class=" << s;
+#endif
+ }
+ break;
+ case '#':
+ {
+ next();
+ identifier(p, n);
+ m_handler.simple_selector_id({p, n});
+#if ORCUS_DEBUG_CSS
+ std::string s(p, n);
+ std::cout << " id=" << s;
+#endif
+ }
+ break;
+ case ':':
+ {
+ // This could be either a pseudo element or pseudo class.
+ next();
+ if (cur_char() == ':')
+ {
+ // pseudo element.
+ next();
+ identifier(p, n);
+ css::pseudo_element_t elem = css::to_pseudo_element({p, n});
+ if (!elem)
+ parse_error::throw_with(
+ "selector_name: unknown pseudo element '", {p, n}, "'", offset());
+
+ m_handler.simple_selector_pseudo_element(elem);
+ }
+ else
+ {
+ // pseudo class (or pseudo element in the older version of CSS).
+ identifier(p, n);
+ css::pseudo_class_t pc = css::to_pseudo_class({p, n});
+ if (!pc)
+ parse_error::throw_with(
+ "selector_name: unknown pseudo class '", {p, n}, "'", offset());
+
+ m_handler.simple_selector_pseudo_class(pc);
+ }
+ }
+ break;
+ default:
+ in_loop = false;
+ }
+ }
+
+ m_handler.end_simple_selector();
+ skip_comments_and_blanks();
+
+ ++m_simple_selector_count;
+
+#if ORCUS_DEBUG_CSS
+ std::cout << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::property_name()
+{
+ // <identifier>
+
+ assert(has_char());
+ char c = cur_char();
+ if (!is_alpha(c) && c != '.')
+ parse_error::throw_with(
+ "property_name: first character of a name must be an alphabet or a dot, but found '", c, "'", offset());
+
+ const char* p;
+ size_t len;
+ identifier(p, len);
+ skip_comments_and_blanks();
+
+ m_handler.property_name({p, len});
+#if ORCUS_DEBUG_CSS
+ std::string foo(p, len);
+ std::cout << "property name: " << foo.c_str() << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::property()
+{
+ // <property name> : <value> , ... , <value>
+
+ m_handler.begin_property();
+ property_name();
+ if (cur_char() != ':')
+ throw parse_error("property: ':' expected.", offset());
+ next();
+ skip_comments_and_blanks();
+
+ bool in_loop = true;
+ while (in_loop && has_char())
+ {
+ value();
+ char c = cur_char();
+ switch (c)
+ {
+ case ',':
+ {
+ // separated by commas.
+ next();
+ skip_comments_and_blanks();
+ }
+ break;
+ case ';':
+ case '}':
+ in_loop = false;
+ break;
+ default:
+ ;
+ }
+ }
+
+ skip_comments_and_blanks();
+ m_handler.end_property();
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::quoted_value(char c)
+{
+ // Parse until the the end quote is reached.
+ const char* p = nullptr;
+ size_t len = 0;
+ literal(p, len, c);
+ next();
+ skip_blanks();
+
+ m_handler.value({p, len});
+#if ORCUS_DEBUG_CSS
+ std::string foo(p, len);
+ std::cout << "quoted value: " << foo.c_str() << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::value()
+{
+ assert(has_char());
+ char c = cur_char();
+ if (c == '"' || c == '\'')
+ {
+ quoted_value(c);
+ return;
+ }
+
+ std::string_view v = parse_value();
+ if (v.empty())
+ return;
+
+ if (cur_char() == '(')
+ {
+ function_value(v);
+ return;
+ }
+
+ m_handler.value(v);
+
+ skip_comments_and_blanks();
+
+#if ORCUS_DEBUG_CSS
+ std::cout << "value: " << v << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::function_value(std::string_view v)
+{
+ assert(cur_char() == '(');
+ css::property_function_t func = css::to_property_function(v);
+ if (func == css::property_function_t::unknown)
+ parse_error::throw_with("function_value: unknown function '", v, "'", offset());
+
+ // Move to the first character of the first argument.
+ next();
+ skip_comments_and_blanks();
+
+ switch (func)
+ {
+ case css::property_function_t::rgb:
+ function_rgb(false);
+ break;
+ case css::property_function_t::rgba:
+ function_rgb(true);
+ break;
+ case css::property_function_t::hsl:
+ function_hsl(false);
+ break;
+ case css::property_function_t::hsla:
+ function_hsl(true);
+ break;
+ case css::property_function_t::url:
+ function_url();
+ break;
+ default:
+ parse_error::throw_with("function_value: unhandled function '", v, "'", offset());
+ }
+
+ char c = cur_char();
+ if (c != ')')
+ parse_error::throw_with("function_value: ')' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::function_rgb(bool alpha)
+{
+ // rgb(num, num, num) rgba(num, num, num, float)
+
+ uint8_t vals[3];
+ uint8_t* p = vals;
+ const uint8_t* plast = p + 2;
+ char c = 0;
+
+ for (; ; ++p)
+ {
+ *p = parse_uint8();
+
+ skip_comments_and_blanks();
+
+ if (p == plast)
+ break;
+
+ c = cur_char();
+
+ if (c != ',')
+ parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+ }
+
+ if (alpha)
+ {
+ c = cur_char();
+ if (c != ',')
+ parse_error::throw_with("function_rgb: ',' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+
+ double alpha_val = parse_double_or_throw();
+
+ alpha_val = std::clamp(alpha_val, 0.0, 1.0);
+ m_handler.rgba(vals[0], vals[1], vals[2], alpha_val);
+ }
+ else
+ m_handler.rgb(vals[0], vals[1], vals[2]);
+
+#if ORCUS_DEBUG_CSS
+ std::cout << "rgb";
+ if (alpha)
+ std::cout << 'a';
+ std::cout << '(';
+ p = vals;
+ const uint8_t* pend = plast + 1;
+ for (; p != pend; ++p)
+ std::cout << ' ' << (int)*p;
+ std::cout << " )" << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::function_hsl(bool alpha)
+{
+ // hsl(num, percent, percent) hsla(num, percent, percent, float)
+
+ double hue = parse_double_or_throw(); // casted to uint8_t eventually.
+ hue = std::clamp(hue, 0.0, 360.0);
+ skip_comments_and_blanks();
+
+ char c = cur_char();
+ if (c != ',')
+ parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+
+ double sat = parse_percent();
+ sat = std::clamp(sat, 0.0, 100.0);
+ skip_comments_and_blanks();
+
+ c = cur_char();
+ if (c != ',')
+ parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+
+ double light = parse_percent();
+ light = std::clamp(light, 0.0, 100.0);
+ skip_comments_and_blanks();
+
+ if (!alpha)
+ {
+ m_handler.hsl(hue, sat, light);
+ return;
+ }
+
+ c = cur_char();
+ if (c != ',')
+ parse_error::throw_with("function_hsl: ',' expected but '", c, "' found.", offset());
+
+ next();
+ skip_comments_and_blanks();
+
+ double alpha_val = parse_double_or_throw();
+ alpha_val = std::clamp(alpha_val, 0.0, 1.0);
+ skip_comments_and_blanks();
+ m_handler.hsla(hue, sat, light, alpha_val);
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::function_url()
+{
+ char c = cur_char();
+
+ if (c == '"' || c == '\'')
+ {
+ // Quoted URL value.
+ const char* p;
+ size_t len;
+ literal(p, len, c);
+ next();
+ skip_comments_and_blanks();
+ m_handler.url({p, len});
+#if ORCUS_DEBUG_CSS
+ std::cout << "url(" << std::string(p, len) << ")" << std::endl;
+#endif
+ return;
+ }
+
+ // Unquoted URL value.
+ const char* p;
+ size_t len;
+ skip_to_or_blank(p, len, ")");
+ skip_comments_and_blanks();
+ m_handler.url({p, len});
+#if ORCUS_DEBUG_CSS
+ std::cout << "url(" << std::string(p, len) << ")" << std::endl;
+#endif
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::name_sep()
+{
+ assert(cur_char() == ',');
+#if ORCUS_DEBUG_CSS
+ std::cout << "," << std::endl;
+#endif
+ next();
+ skip_blanks();
+ m_handler.end_selector();
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::property_sep()
+{
+#if ORCUS_DEBUG_CSS
+ std::cout << ";" << std::endl;
+#endif
+ next();
+ skip_comments_and_blanks();
+}
+
+template<typename _Handler>
+void css_parser<_Handler>::block()
+{
+ // '{' <property> ';' ... ';' <property> ';'(optional) '}'
+
+ assert(cur_char() == '{');
+#if ORCUS_DEBUG_CSS
+ std::cout << "{" << std::endl;
+#endif
+ m_handler.end_selector();
+ m_handler.begin_block();
+
+ next();
+ skip_comments_and_blanks();
+
+ // parse properties.
+ while (has_char())
+ {
+ property();
+ if (cur_char() != ';')
+ break;
+ property_sep();
+ if (cur_char() == '}')
+ // ';' after the last property. This is optional but allowed.
+ break;
+ }
+
+ if (cur_char() != '}')
+ throw parse_error("block: '}' expected.", offset());
+
+ m_handler.end_block();
+
+ next();
+ skip_comments_and_blanks();
+
+#if ORCUS_DEBUG_CSS
+ std::cout << "}" << std::endl;
+#endif
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/css_parser_base.hpp b/include/orcus/css_parser_base.hpp
new file mode 100644
index 0000000..4514269
--- /dev/null
+++ b/include/orcus/css_parser_base.hpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_CSS_PARSER_BASE_HPP
+#define INCLUDED_CSS_PARSER_BASE_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/css_types.hpp"
+#include "orcus/exception.hpp"
+#include "orcus/parser_base.hpp"
+
+#include <string>
+#include <exception>
+
+namespace orcus { namespace css {
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+public:
+ parser_base(std::string_view content);
+
+protected:
+
+ void identifier(const char*& p, size_t& len, std::string_view extra = std::string_view{});
+ uint8_t parse_uint8();
+
+ /**
+ * Parse an unquoted property value until one of non-value characters is
+ * reached.
+ *
+ * @return parsed value segment.
+ */
+ std::string_view parse_value();
+ double parse_percent();
+ double parse_double_or_throw();
+
+ void literal(const char*& p, size_t& len, char quote);
+ void skip_to(const char*& p, size_t& len, char c);
+
+ /**
+ * Skip until one of specified characters or a blank character is reached.
+ *
+ * @param p pointer to the first character of the skipped character array.
+ * @param len length of the skipped character array.
+ * @param chars one or more characters that can end the skipping.
+ */
+ void skip_to_or_blank(const char*& p, size_t& len, std::string_view chars);
+ void skip_blanks();
+ void skip_blanks_reverse();
+ void shrink_stream();
+ bool skip_comment();
+ void comment();
+ void skip_comments_and_blanks();
+ void set_combinator(char c, css::combinator_t combinator);
+ void reset_before_block();
+
+protected:
+ size_t m_simple_selector_count;
+ combinator_t m_combinator;
+};
+
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/css_selector.hpp b/include/orcus/css_selector.hpp
new file mode 100644
index 0000000..1e41d54
--- /dev/null
+++ b/include/orcus/css_selector.hpp
@@ -0,0 +1,110 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_CSS_SELECTOR_HPP
+#define INCLUDED_ORCUS_CSS_SELECTOR_HPP
+
+#include "env.hpp"
+#include "css_types.hpp"
+
+#include <ostream>
+#include <variant>
+#include <vector>
+#include <unordered_set>
+#include <unordered_map>
+
+namespace orcus {
+
+struct ORCUS_DLLPUBLIC css_simple_selector_t
+{
+ typedef std::unordered_set<std::string_view> classes_type;
+
+ std::string_view name;
+ std::string_view id;
+ classes_type classes;
+ css::pseudo_class_t pseudo_classes;
+
+ css_simple_selector_t();
+
+ void clear();
+ bool empty() const;
+
+ bool operator== (const css_simple_selector_t& r) const;
+ bool operator!= (const css_simple_selector_t& r) const;
+
+ struct hash
+ {
+ size_t operator() (const css_simple_selector_t& ss) const;
+ };
+};
+
+struct ORCUS_DLLPUBLIC css_chained_simple_selector_t
+{
+ css::combinator_t combinator;
+ css_simple_selector_t simple_selector;
+
+ bool operator== (const css_chained_simple_selector_t& r) const;
+
+ css_chained_simple_selector_t();
+ css_chained_simple_selector_t(const css_simple_selector_t& ss);
+ css_chained_simple_selector_t(css::combinator_t op, const css_simple_selector_t& ss);
+};
+
+/**
+ * Each CSS selector consists of one or more chained simple selectors.
+ */
+struct ORCUS_DLLPUBLIC css_selector_t
+{
+ typedef std::vector<css_chained_simple_selector_t> chained_type;
+ css_simple_selector_t first;
+ chained_type chained;
+
+ void clear();
+
+ bool operator== (const css_selector_t& r) const;
+};
+
+/**
+ * Structure representing a single CSS property value.
+ */
+struct ORCUS_DLLPUBLIC css_property_value_t
+{
+ using value_type = std::variant<std::string_view, css::rgba_color_t, css::hsla_color_t>;
+
+ css::property_value_t type;
+ value_type value;
+
+ css_property_value_t();
+ css_property_value_t(const css_property_value_t& r);
+
+ /**
+ * Constructor that takes a string value.
+ *
+ * @param _str string value to store. This value should point to a string
+ * buffer that's already been interned. The caller is
+ * responsible for managing the life cycle of the source string
+ * buffer.
+ */
+ css_property_value_t(std::string_view _str);
+
+ css_property_value_t& operator= (const css_property_value_t& r);
+
+ void swap(css_property_value_t& r);
+};
+
+typedef std::unordered_map<std::string_view, std::vector<css_property_value_t>> css_properties_t;
+typedef std::unordered_map<css::pseudo_element_t, css_properties_t> css_pseudo_element_properties_t;
+
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const css_simple_selector_t& v);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const css_selector_t& v);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const css_property_value_t& v);
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/css_types.hpp b/include/orcus/css_types.hpp
new file mode 100644
index 0000000..75386ea
--- /dev/null
+++ b/include/orcus/css_types.hpp
@@ -0,0 +1,139 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_CSS_TYPES_HPP
+#define INCLUDED_ORCUS_CSS_TYPES_HPP
+
+#include "env.hpp"
+
+#include <cstdlib>
+#include <cstdint>
+#include <string>
+
+namespace orcus { namespace css {
+
+enum class combinator_t
+{
+ /// `E F` where `F` is a descendant of `E`.
+ descendant,
+ /// `E > F` where `F` is a direct child of `E`.
+ direct_child,
+ /// `E + F` where `F` is a direct sibling of `E` where `E` precedes `F`.
+ next_sibling
+};
+
+/**
+ * List of functions used as property values.
+ */
+enum class property_function_t
+{
+ unknown = 0,
+ hsl,
+ hsla,
+ rgb,
+ rgba,
+ url
+};
+
+enum class property_value_t
+{
+ none = 0,
+ string,
+ hsl,
+ hsla,
+ rgb,
+ rgba,
+ url
+};
+
+struct rgba_color_t
+{
+ uint8_t red; /// 0 to 255
+ uint8_t green; /// 0 to 255
+ uint8_t blue; /// 0 to 255
+ double alpha;
+};
+
+struct hsla_color_t
+{
+ uint8_t hue; /// 0 to 255
+ uint8_t saturation; /// 0 to 255
+ uint8_t lightness; /// 0 to 255
+ double alpha;
+};
+
+using pseudo_element_t = uint16_t;
+using pseudo_class_t = uint64_t;
+
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_after;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_before;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_first_letter;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_first_line;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_selection;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_element_t pseudo_element_backdrop;
+
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_active;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_checked;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_default;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_dir;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_disabled;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_empty;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_enabled;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_first;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_first_child;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_first_of_type;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_fullscreen;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_focus;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_hover;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_indeterminate;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_in_range;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_invalid;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_lang;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_last_child;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_last_of_type;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_left;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_link;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_not;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_nth_child;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_nth_last_child;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_nth_last_of_type;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_nth_of_type;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_only_child;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_only_of_type;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_optional;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_out_of_range;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_read_only;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_read_write;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_required;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_right;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_root;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_scope;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_target;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_valid;
+ORCUS_PSR_DLLPUBLIC extern const pseudo_class_t pseudo_class_visited;
+
+/**
+ * Convert a textural representation of a pseudo element into its numerical
+ * representation.
+ */
+ORCUS_PSR_DLLPUBLIC pseudo_element_t to_pseudo_element(std::string_view s);
+
+/**
+ * Convert a textural representation of a pseudo class into its numerical
+ * representation.
+ */
+ORCUS_PSR_DLLPUBLIC pseudo_class_t to_pseudo_class(std::string_view s);
+
+ORCUS_PSR_DLLPUBLIC std::string pseudo_class_to_string(pseudo_class_t val);
+
+ORCUS_PSR_DLLPUBLIC property_function_t to_property_function(std::string_view s);
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/csv_parser.hpp b/include/orcus/csv_parser.hpp
new file mode 100644
index 0000000..5cb9598
--- /dev/null
+++ b/include/orcus/csv_parser.hpp
@@ -0,0 +1,306 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_CSV_PARSER_HPP
+#define ORCUS_CSV_PARSER_HPP
+
+#include "csv_parser_base.hpp"
+
+namespace orcus {
+
+class csv_handler
+{
+public:
+ /**
+ * Called when the parser starts parsing a stream.
+ */
+ void begin_parse() {}
+
+ /**
+ * Called when the parser finishes parsing a stream.
+ */
+ void end_parse() {}
+
+ /**
+ * Called at the beginning of every row.
+ */
+ void begin_row() {}
+
+ /**
+ * Called at the end of every row.
+ */
+ void end_row() {}
+
+ /**
+ * Called after every cell is parsed.
+ *
+ * @param value cell content.
+ * @param transient when true, the text content has been converted and is
+ * stored in a temporary buffer. In such case, there is
+ * no guarantee that the text content remain available
+ * after the end of the call. When this value is false,
+ * the text content is guaranteed to be valid so long as
+ * the original CSV stream content is valid.
+ */
+ void cell(std::string_view value, bool transient)
+ {
+ (void)value; (void)transient;
+ }
+};
+
+/**
+ * Parser for CSV documents.
+ *
+ * @tparam HandlerT Hanlder type with member functions for event callbacks.
+ * Refer to csv_handler.
+ */
+template<typename HandlerT>
+class csv_parser : public csv::parser_base
+{
+public:
+ typedef HandlerT handler_type;
+
+ csv_parser(std::string_view content, handler_type& hdl, const csv::parser_config& config);
+ void parse();
+
+private:
+
+ // handlers
+ void row();
+ void cell();
+ void quoted_cell();
+
+ void parse_cell_with_quote(const char* p0, size_t len0);
+
+ /**
+ * Push cell value to the handler.
+ */
+ void push_cell_value(const char* p, size_t n);
+
+private:
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+csv_parser<_Handler>::csv_parser(
+ std::string_view content, handler_type& hdl, const csv::parser_config& config) :
+ csv::parser_base(content, config), m_handler(hdl) {}
+
+template<typename _Handler>
+void csv_parser<_Handler>::parse()
+{
+#if ORCUS_DEBUG_CSV
+ for (const char* p = mp_begin; p < mp_end; ++p)
+ std::cout << *p;
+ std::cout << std::endl;
+#endif
+
+ m_handler.begin_parse();
+ while (has_char())
+ row();
+ m_handler.end_parse();
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::row()
+{
+ m_handler.begin_row();
+ while (true)
+ {
+ if (is_text_qualifier(cur_char()))
+ quoted_cell();
+ else
+ cell();
+
+ if (!has_char())
+ {
+ m_handler.end_row();
+ return;
+ }
+
+ char c = cur_char();
+ if (c == '\n')
+ {
+ next();
+#if ORCUS_DEBUG_CSV
+ cout << "(LF)" << endl;
+#endif
+ m_handler.end_row();
+ return;
+ }
+
+ if (!is_delim(c))
+ throw orcus::parse_error("expected a delimiter", offset());
+
+ next();
+
+ if (m_config.trim_cell_value)
+ skip_blanks();
+
+ if (!has_char())
+ {
+ m_handler.end_row();
+ return;
+ }
+ }
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::cell()
+{
+ const char* p = mp_char;
+ size_t len = 0;
+ char c = cur_char();
+ while (c != '\n' && !is_delim(c))
+ {
+ ++len;
+ next();
+ if (!has_char())
+ break;
+ c = cur_char();
+ }
+
+ if (!len)
+ p = nullptr;
+
+ push_cell_value(p, len);
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::quoted_cell()
+{
+#if ORCUS_DEBUG_CSV
+ cout << "--- quoted cell" << endl;
+#endif
+ char c = cur_char();
+ assert(is_text_qualifier(c));
+ next(); // Skip the opening quote.
+ if (!has_char())
+ return;
+
+ const char* p0 = mp_char;
+ size_t len = 1;
+ for (; has_char(); next(), ++len)
+ {
+ c = cur_char();
+#if ORCUS_DEBUG_CSV
+ cout << "'" << c << "'" << endl;
+#endif
+ if (!is_text_qualifier(c))
+ continue;
+
+ // current char is a quote. Check if the next char is also a text
+ // qualifier.
+
+ if (has_next() && is_text_qualifier(peek_char()))
+ {
+ next();
+ parse_cell_with_quote(p0, len);
+ return;
+ }
+
+ // Closing quote.
+ m_handler.cell({p0, len-1}, false);
+ next();
+ skip_blanks();
+ return;
+ }
+
+ // Stream ended prematurely. Handle it gracefully.
+ m_handler.cell({p0, len}, false);
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::parse_cell_with_quote(const char* p0, size_t len0)
+{
+#if ORCUS_DEBUG_CSV
+ using namespace std;
+ cout << "--- parse cell with quote" << endl;
+#endif
+ assert(is_text_qualifier(cur_char()));
+
+ // Push the preceding chars to the temp buffer.
+ m_cell_buf.reset();
+ m_cell_buf.append(p0, len0);
+
+ // Parse the rest, until the closing quote.
+ next();
+ const char* p_cur = mp_char;
+ size_t cur_len = 0;
+ for (; has_char(); next(), ++cur_len)
+ {
+ char c = cur_char();
+#if ORCUS_DEBUG_CSV
+ cout << "'" << c << "'" << endl;
+#endif
+ if (!is_text_qualifier(c))
+ continue;
+
+ if (has_next() && is_text_qualifier(peek_char()))
+ {
+ // double quotation. Copy the current segment to the cell buffer.
+ m_cell_buf.append(p_cur, cur_len);
+
+ next(); // to the 2nd quote.
+ p_cur = mp_char;
+ cur_len = 0;
+ continue;
+ }
+
+ // closing quote. Flush the current segment to the cell
+ // buffer, push the value to the handler, and exit normally.
+ m_cell_buf.append(p_cur, cur_len);
+
+ m_handler.cell(m_cell_buf.str(), true);
+ next();
+ skip_blanks();
+ return;
+ }
+
+ // Stream ended prematurely.
+ throw parse_error("stream ended prematurely while parsing quoted cell.", offset());
+}
+
+template<typename _Handler>
+void csv_parser<_Handler>::push_cell_value(const char* p, size_t n)
+{
+ size_t len = n;
+
+ if (m_config.trim_cell_value)
+ {
+ // Trim any leading blanks.
+ for (size_t i = 0; i < n; ++i, --len, ++p)
+ {
+ if (!is_blank(*p))
+ break;
+ }
+
+ // Trim any trailing blanks.
+ if (len)
+ {
+ const char* p_end = p + (len-1);
+ for (; p != p_end; --p_end, --len)
+ {
+ if (!is_blank(*p_end))
+ break;
+ }
+ }
+ }
+
+ m_handler.cell({p, len}, false);
+#if ORCUS_DEBUG_CSV
+ if (len)
+ cout << "(cell:'" << std::string(p, len) << "')" << endl;
+ else
+ cout << "(cell:'')" << endl;
+#endif
+}
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/csv_parser_base.hpp b/include/orcus/csv_parser_base.hpp
new file mode 100644
index 0000000..506d4e5
--- /dev/null
+++ b/include/orcus/csv_parser_base.hpp
@@ -0,0 +1,80 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef CSV_PARSER_BASE_HPP
+#define CSV_PARSER_BASE_HPP
+
+#include "env.hpp"
+#include "cell_buffer.hpp"
+#include "parser_global.hpp"
+#include "parser_base.hpp"
+
+#include <cstdlib>
+#include <cstring>
+#include <exception>
+#include <string>
+#include <cassert>
+#include <sstream>
+
+#define ORCUS_DEBUG_CSV 0
+
+#if ORCUS_DEBUG_CSV
+#include <iostream>
+using std::cout;
+using std::endl;
+#endif
+
+namespace orcus { namespace csv {
+
+/**
+ * Run-time configuration object for csv_parser.
+ */
+struct ORCUS_PSR_DLLPUBLIC parser_config
+{
+ /**
+ * One or more characters that serve as cell boundaries.
+ */
+ std::string delimiters;
+
+ /**
+ * A single character used as a text quote value.
+ */
+ char text_qualifier;
+
+ /**
+ * When true, the value of each cell gets trimmed i.e. any leading or
+ * trailing white spaces will get ignored.
+ */
+ bool trim_cell_value:1;
+
+ parser_config();
+};
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+protected:
+ const csv::parser_config& m_config;
+ cell_buffer m_cell_buf;
+
+protected:
+ parser_base(std::string_view content, const parser_config& config);
+
+ /**
+ * This is different from the global 'is_blank' in that it doesn't treat
+ * linefeed and carriage return characters as non-blanks.
+ */
+ bool is_blank(char c) const;
+ bool is_delim(char c) const;
+ bool is_text_qualifier(char c) const;
+
+ void skip_blanks();
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/detail/Makefile.am b/include/orcus/detail/Makefile.am
new file mode 100644
index 0000000..d1d3aee
--- /dev/null
+++ b/include/orcus/detail/Makefile.am
@@ -0,0 +1,7 @@
+
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus/detail
+
+liborcus_HEADERS = \
+ parser_token_buffer.hpp \
+ thread.hpp
+
diff --git a/include/orcus/detail/Makefile.in b/include/orcus/detail/Makefile.in
new file mode 100644
index 0000000..f58c7b1
--- /dev/null
+++ b/include/orcus/detail/Makefile.in
@@ -0,0 +1,662 @@
+# Makefile.in generated by automake 1.16.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2021 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+subdir = include/orcus/detail
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \
+ $(top_srcdir)/m4/ax_cxx_compile_stdcxx_17.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/m4_ax_valgrind_check.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(liborcus_HEADERS) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(liborcusdir)"
+HEADERS = $(liborcus_HEADERS)
+am__extra_recursive_targets = check-valgrind-recursive \
+ check-valgrind-memcheck-recursive \
+ check-valgrind-helgrind-recursive check-valgrind-drd-recursive \
+ check-valgrind-sgcheck-recursive
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_DATE_TIME_LDFLAGS = @BOOST_DATE_TIME_LDFLAGS@
+BOOST_DATE_TIME_LDPATH = @BOOST_DATE_TIME_LDPATH@
+BOOST_DATE_TIME_LIBS = @BOOST_DATE_TIME_LIBS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_IOSTREAMS_LDFLAGS = @BOOST_IOSTREAMS_LDFLAGS@
+BOOST_IOSTREAMS_LDPATH = @BOOST_IOSTREAMS_LDPATH@
+BOOST_IOSTREAMS_LIBS = @BOOST_IOSTREAMS_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_PROGRAM_OPTIONS_LDFLAGS = @BOOST_PROGRAM_OPTIONS_LDFLAGS@
+BOOST_PROGRAM_OPTIONS_LDPATH = @BOOST_PROGRAM_OPTIONS_LDPATH@
+BOOST_PROGRAM_OPTIONS_LIBS = @BOOST_PROGRAM_OPTIONS_LIBS@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CSCOPE = @CSCOPE@
+CTAGS = @CTAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ENABLE_VALGRIND_drd = @ENABLE_VALGRIND_drd@
+ENABLE_VALGRIND_helgrind = @ENABLE_VALGRIND_helgrind@
+ENABLE_VALGRIND_memcheck = @ENABLE_VALGRIND_memcheck@
+ENABLE_VALGRIND_sgcheck = @ENABLE_VALGRIND_sgcheck@
+ETAGS = @ETAGS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_CXX17 = @HAVE_CXX17@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+IXION_REQUIRED_API_VERSION = @IXION_REQUIRED_API_VERSION@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBIXION_CFLAGS = @LIBIXION_CFLAGS@
+LIBIXION_LIBS = @LIBIXION_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MDDS_CFLAGS = @MDDS_CFLAGS@
+MDDS_LIBS = @MDDS_LIBS@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ORCUS_API_VERSION = @ORCUS_API_VERSION@
+ORCUS_MAJOR_VERSION = @ORCUS_MAJOR_VERSION@
+ORCUS_MICRO_VERSION = @ORCUS_MICRO_VERSION@
+ORCUS_MINOR_VERSION = @ORCUS_MINOR_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PARQUET_CFLAGS = @PARQUET_CFLAGS@
+PARQUET_LIBS = @PARQUET_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POW_LIB = @POW_LIB@
+PYTHON = @PYTHON@
+PYTHON_CFLAGS = @PYTHON_CFLAGS@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_LIBS = @PYTHON_LIBS@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALGRIND = @VALGRIND@
+VALGRIND_ENABLED = @VALGRIND_ENABLED@
+VERSION = @VERSION@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+valgrind_enabled_tools = @valgrind_enabled_tools@
+valgrind_tools = @valgrind_tools@
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus/detail
+liborcus_HEADERS = \
+ parser_token_buffer.hpp \
+ thread.hpp
+
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/orcus/detail/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign include/orcus/detail/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-liborcusHEADERS: $(liborcus_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(liborcusdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(liborcusdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(liborcusdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(liborcusdir)" || exit $$?; \
+ done
+
+uninstall-liborcusHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(liborcusdir)'; $(am__uninstall_files_from_dir)
+check-valgrind-local:
+check-valgrind-memcheck-local:
+check-valgrind-helgrind-local:
+check-valgrind-drd-local:
+check-valgrind-sgcheck-local:
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(liborcusdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+check-valgrind: check-valgrind-am
+
+check-valgrind-am: check-valgrind-local
+
+check-valgrind-drd: check-valgrind-drd-am
+
+check-valgrind-drd-am: check-valgrind-drd-local
+
+check-valgrind-helgrind: check-valgrind-helgrind-am
+
+check-valgrind-helgrind-am: check-valgrind-helgrind-local
+
+check-valgrind-memcheck: check-valgrind-memcheck-am
+
+check-valgrind-memcheck-am: check-valgrind-memcheck-local
+
+check-valgrind-sgcheck: check-valgrind-sgcheck-am
+
+check-valgrind-sgcheck-am: check-valgrind-sgcheck-local
+
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-liborcusHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-liborcusHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am check-valgrind-am \
+ check-valgrind-drd-am check-valgrind-drd-local \
+ check-valgrind-helgrind-am check-valgrind-helgrind-local \
+ check-valgrind-local check-valgrind-memcheck-am \
+ check-valgrind-memcheck-local check-valgrind-sgcheck-am \
+ check-valgrind-sgcheck-local clean clean-generic clean-libtool \
+ cscopelist-am ctags ctags-am distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-liborcusHEADERS install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am uninstall-liborcusHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/include/orcus/detail/parser_token_buffer.hpp b/include/orcus/detail/parser_token_buffer.hpp
new file mode 100644
index 0000000..3b13bec
--- /dev/null
+++ b/include/orcus/detail/parser_token_buffer.hpp
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_DETAIL_THREAD_PARSER_TOKEN_BUFFER_HPP
+#define INCLUDED_ORCUS_DETAIL_THREAD_PARSER_TOKEN_BUFFER_HPP
+
+#include "orcus/exception.hpp"
+
+#include <mutex>
+#include <condition_variable>
+
+namespace orcus { namespace detail { namespace thread {
+
+/**
+ * Class that manages synchronization of parser tokens used in
+ * multi-threaded parsers.
+ */
+template<typename _TokensT>
+class parser_token_buffer
+{
+ enum class state_type { parsing_progress, parsing_ended, parsing_aborted };
+
+ typedef _TokensT tokens_type;
+
+ mutable std::mutex m_mtx_tokens;
+ std::condition_variable m_cv_tokens_empty;
+ std::condition_variable m_cv_tokens_ready;
+
+ tokens_type m_tokens; // token buffer used to hand over tokens to the client.
+
+ size_t m_token_size_threshold;
+ const size_t m_max_token_size;
+
+ state_type m_state;
+
+ bool tokens_empty() const
+ {
+ std::lock_guard<std::mutex> lock(m_mtx_tokens);
+ return m_tokens.empty();
+ }
+
+ /**
+ * Only to be called from the parser thread.
+ *
+ * Wait until the processor thread takes the new tokens and makes the
+ * token buffer empty.
+ */
+ void wait_until_tokens_empty()
+ {
+ std::unique_lock<std::mutex> lock(m_mtx_tokens);
+ while (!m_tokens.empty() && m_state == state_type::parsing_progress)
+ m_cv_tokens_empty.wait(lock);
+
+ if (m_state == state_type::parsing_aborted)
+ throw detail::parsing_aborted_error();
+ }
+
+public:
+
+ parser_token_buffer(size_t min_token_size, size_t max_token_size) :
+ m_token_size_threshold(std::max<size_t>(min_token_size, 1)),
+ m_max_token_size(max_token_size),
+ m_state(state_type::parsing_progress)
+ {
+ if (m_token_size_threshold > m_max_token_size)
+ throw invalid_arg_error(
+ "initial token size threshold is already larger than the max token size.");
+ }
+
+ /**
+ * Check the size of the parser token buffer, and if it exceeds specified
+ * threshold, move it to the client buffer.
+ *
+ * Call this from the parser thread.
+ *
+ * @param parser_tokens parser token buffer.
+ */
+ void check_and_notify(tokens_type& parser_tokens)
+ {
+ if (parser_tokens.size() < m_token_size_threshold)
+ // Still below the threshold.
+ return;
+
+ if (!tokens_empty())
+ {
+ if (m_token_size_threshold < (m_max_token_size/2))
+ {
+ // Double the threshold and continue to parse.
+ m_token_size_threshold *= 2;
+ return;
+ }
+
+ // We cannot increase the threshold any more. Wait for the
+ // client to finish.
+ wait_until_tokens_empty();
+ }
+
+ std::unique_lock<std::mutex> lock(m_mtx_tokens);
+ m_tokens.swap(parser_tokens);
+ lock.unlock();
+ m_cv_tokens_ready.notify_one();
+ }
+
+ /**
+ * Move the current parser token buffer to the client buffer, and signal
+ * the end of parsing.
+ *
+ * Call this from the parser thread.
+ *
+ * @param parser_tokens parser token buffer.
+ */
+ void notify_and_finish(tokens_type& parser_tokens)
+ {
+ // Wait until the client tokens get used up.
+ wait_until_tokens_empty();
+
+ {
+ std::lock_guard<std::mutex> lock(m_mtx_tokens);
+ m_tokens.swap(parser_tokens);
+ m_state = state_type::parsing_ended;
+ }
+ m_cv_tokens_ready.notify_one();
+ }
+
+ void abort()
+ {
+ {
+ std::lock_guard<std::mutex> lock(m_mtx_tokens);
+ m_tokens.clear();
+ m_state = state_type::parsing_aborted;
+ }
+ m_cv_tokens_empty.notify_one();
+ }
+
+ /**
+ * Retrieve the tokens currently in the client token buffer.
+ *
+ * Call this from the client (non-parser) thread.
+ *
+ * @param tokens place to move the tokens in the client token buffer to.
+ *
+ * @return true if the parsing is still in progress, therefore more tokens
+ * are expected, false if this is the last set of tokens.
+ */
+ bool next_tokens(tokens_type& tokens)
+ {
+ tokens.clear();
+
+ // Wait until the parser passes a new set of tokens.
+ std::unique_lock<std::mutex> lock(m_mtx_tokens);
+ while (m_tokens.empty() && m_state == state_type::parsing_progress)
+ m_cv_tokens_ready.wait(lock);
+
+ // Get the new tokens and notify the parser.
+ tokens.swap(m_tokens);
+ state_type parsing_progress = m_state; // Make a copy so that lock can be released safely.
+
+ lock.unlock();
+
+ m_cv_tokens_empty.notify_one();
+
+ return parsing_progress == state_type::parsing_progress;
+ }
+
+ /**
+ * Return the current token size threshold. Call this only after the
+ * parsing has finished.
+ *
+ * @return current token size threshold.
+ */
+ size_t token_size_threshold() const
+ {
+ if (m_state == state_type::parsing_progress)
+ return 0;
+
+ return m_token_size_threshold;
+ }
+};
+
+}}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/detail/thread.hpp b/include/orcus/detail/thread.hpp
new file mode 100644
index 0000000..2d63dbd
--- /dev/null
+++ b/include/orcus/detail/thread.hpp
@@ -0,0 +1,35 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_DETAIL_THREAD_HPP
+#define INCLUDED_ORCUS_DETAIL_THREAD_HPP
+
+#include <thread>
+
+namespace orcus { namespace detail { namespace thread {
+
+class scoped_guard
+{
+ std::thread m_thread;
+public:
+ scoped_guard(std::thread thread) : m_thread(std::move(thread)) {}
+ scoped_guard(scoped_guard&& other) : m_thread(std::move(other.m_thread)) {}
+
+ scoped_guard(const scoped_guard&) = delete;
+ scoped_guard& operator= (const scoped_guard&) = delete;
+
+ ~scoped_guard()
+ {
+ m_thread.join();
+ }
+};
+
+}}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/dom_tree.hpp b/include/orcus/dom_tree.hpp
new file mode 100644
index 0000000..68df0d0
--- /dev/null
+++ b/include/orcus/dom_tree.hpp
@@ -0,0 +1,134 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_DOM_TREE_HPP
+#define INCLUDED_ORCUS_DOM_TREE_HPP
+
+#include "types.hpp"
+
+#include <vector>
+#include <ostream>
+#include <memory>
+
+namespace orcus {
+
+class xmlns_context;
+
+namespace sax {
+
+struct doctype_declaration;
+
+}
+
+namespace dom {
+
+class document_tree;
+
+enum class node_t : uint8_t
+{
+ unset,
+ declaration,
+ element,
+};
+
+struct ORCUS_DLLPUBLIC entity_name
+{
+ xmlns_id_t ns;
+ std::string_view name;
+
+ entity_name();
+ entity_name(std::string_view _name);
+ entity_name(xmlns_id_t _ns, std::string_view _name);
+
+ bool operator== (const entity_name& other) const;
+ bool operator!= (const entity_name& other) const;
+};
+
+class ORCUS_DLLPUBLIC const_node
+{
+ friend class document_tree;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ const_node(std::unique_ptr<impl>&& _impl);
+public:
+ const_node();
+ const_node(const const_node& other);
+ const_node(const_node&& other);
+
+ ~const_node();
+
+ node_t type() const;
+
+ size_t child_count() const;
+
+ const_node child(size_t index) const;
+
+ entity_name name() const;
+
+ std::string_view attribute(const entity_name& name) const;
+ std::string_view attribute(std::string_view name) const;
+
+ size_t attribute_count() const;
+
+ const_node parent() const;
+
+ void swap(const_node& other);
+
+ const_node& operator= (const const_node& other);
+
+ bool operator== (const const_node& other) const;
+ bool operator!= (const const_node& other) const;
+};
+
+/**
+ * Ordinary DOM tree representing the content of an XML document.
+ */
+class ORCUS_DLLPUBLIC document_tree
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ document_tree(const document_tree&) = delete;
+ document_tree& operator= (const document_tree&) = delete;
+
+ document_tree(xmlns_context& cxt);
+ document_tree(document_tree&& other);
+ ~document_tree();
+
+ /**
+ * Parse a given XML stream and build the content tree.
+ *
+ * @param strm XML stream.
+ */
+ void load(std::string_view strm);
+
+ dom::const_node root() const;
+
+ dom::const_node declaration(std::string_view name) const;
+
+ /**
+ * Swap the content with another dom_tree instance.
+ *
+ * @param other the dom_tree instance to swap the content with.
+ */
+ void swap(document_tree& other);
+
+ const sax::doctype_declaration* get_doctype() const;
+
+ void dump_compact(std::ostream& os) const;
+};
+
+} // namespace dom
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/env.hpp b/include/orcus/env.hpp
new file mode 100644
index 0000000..47dc153
--- /dev/null
+++ b/include/orcus/env.hpp
@@ -0,0 +1,141 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __ORCUS_ENV_HPP__
+#define __ORCUS_ENV_HPP__
+
+// orcus
+
+#if defined _WIN32 || defined __CYGWIN__
+ #if defined __MINGW32__
+ #define ORCUS_DLLPUBLIC
+ #define ORCUS_DLLLOCAL
+ #elif defined __ORCUS_BUILDING_DLL
+ #ifdef __GNUC__
+ #define ORCUS_DLLPUBLIC __attribute__ ((dllexport))
+ #else
+ #define ORCUS_DLLPUBLIC __declspec(dllexport)
+ #endif
+ #elif defined __ORCUS_STATIC_LIB
+ #define ORCUS_DLLPUBLIC
+ #else
+ #ifdef __GNUC__
+ #define ORCUS_DLLPUBLIC __attribute__ ((dllimport))
+ #else
+ #define ORCUS_DLLPUBLIC __declspec(dllimport)
+ #endif
+ #endif
+ #define ORCUS_DLLLOCAL
+#else
+ #if __GNUC__ >= 4
+ #define ORCUS_DLLPUBLIC __attribute__ ((visibility ("default")))
+ #define ORCUS_DLLLOCAL __attribute__ ((visibility ("hidden")))
+ #else
+ #define ORCUS_DLLPUBLIC
+ #define ORCUS_DLLLOCAL
+ #endif
+#endif
+
+// orcus-parser
+
+#if defined _WIN32 || defined __CYGWIN__
+ #if defined __MINGW32__
+ #define ORCUS_PSR_DLLPUBLIC
+ #define ORCUS_PSR_DLLLOCAL
+ #elif defined __ORCUS_PSR_BUILDING_DLL
+ #ifdef __GNUC__
+ #define ORCUS_PSR_DLLPUBLIC __attribute__ ((dllexport))
+ #else
+ #define ORCUS_PSR_DLLPUBLIC __declspec(dllexport)
+ #endif
+ #elif defined __ORCUS_PSR_STATIC_LIB
+ #define ORCUS_PSR_DLLPUBLIC
+ #else
+ #ifdef __GNUC__
+ #define ORCUS_PSR_DLLPUBLIC __attribute__ ((dllimport))
+ #else
+ #define ORCUS_PSR_DLLPUBLIC __declspec(dllimport)
+ #endif
+ #endif
+ #define ORCUS_PSR_DLLLOCAL
+#else
+ #if __GNUC__ >= 4
+ #define ORCUS_PSR_DLLPUBLIC __attribute__ ((visibility ("default")))
+ #define ORCUS_PSR_DLLLOCAL __attribute__ ((visibility ("hidden")))
+ #else
+ #define ORCUS_PSR_DLLPUBLIC
+ #define ORCUS_PSR_DLLLOCAL
+ #endif
+#endif
+
+// orcus-spreadsheet-model
+
+#if defined _WIN32 || defined __CYGWIN__
+ #if defined __MINGW32__
+ #define ORCUS_SPM_DLLPUBLIC
+ #define ORCUS_SPM_DLLLOCAL
+ #elif defined __ORCUS_SPM_BUILDING_DLL
+ #ifdef __GNUC__
+ #define ORCUS_SPM_DLLPUBLIC __attribute__ ((dllexport))
+ #else
+ #define ORCUS_SPM_DLLPUBLIC __declspec(dllexport)
+ #endif
+ #elif defined __ORCUS_SPM_STATIC_LIB
+ #define ORCUS_SPM_DLLPUBLIC
+ #else
+ #ifdef __GNUC__
+ #define ORCUS_SPM_DLLPUBLIC __attribute__ ((dllimport))
+ #else
+ #define ORCUS_SPM_DLLPUBLIC __declspec(dllimport)
+ #endif
+ #endif
+ #define ORCUS_SPM_DLLLOCAL
+#else
+ #if __GNUC__ >= 4
+ #define ORCUS_SPM_DLLPUBLIC __attribute__ ((visibility ("default")))
+ #define ORCUS_SPM_DLLLOCAL __attribute__ ((visibility ("hidden")))
+ #else
+ #define ORCUS_SPM_DLLPUBLIC
+ #define ORCUS_SPM_DLLLOCAL
+ #endif
+#endif
+
+// orcus-mso
+
+#if defined _WIN32 || defined __CYGWIN__
+ #if defined __MINGW32__
+ #define ORCUS_MSO_DLLPUBLIC
+ #define ORCUS_MSO_DLLLOCAL
+ #elif defined __ORCUS_MSO_BUILDING_DLL
+ #ifdef __GNUC__
+ #define ORCUS_MSO_DLLPUBLIC __attribute__ ((dllexport))
+ #else
+ #define ORCUS_MSO_DLLPUBLIC __declspec(dllexport)
+ #endif
+ #elif defined __ORCUS_MSO_STATIC_LIB
+ #define ORCUS_MSO_DLLPUBLIC
+ #else
+ #ifdef __GNUC__
+ #define ORCUS_MSO_DLLPUBLIC __attribute__ ((dllimport))
+ #else
+ #define ORCUS_MSO_DLLPUBLIC __declspec(dllimport)
+ #endif
+ #endif
+ #define ORCUS_MSO_DLLLOCAL
+#else
+ #if __GNUC__ >= 4
+ #define ORCUS_MSO_DLLPUBLIC __attribute__ ((visibility ("default")))
+ #define ORCUS_MSO_DLLLOCAL __attribute__ ((visibility ("hidden")))
+ #else
+ #define ORCUS_MSO_DLLPUBLIC
+ #define ORCUS_MSO_DLLLOCAL
+ #endif
+#endif
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/exception.hpp b/include/orcus/exception.hpp
new file mode 100644
index 0000000..5d1aa82
--- /dev/null
+++ b/include/orcus/exception.hpp
@@ -0,0 +1,152 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_EXCEPTION_HPP
+#define INCLUDED_ORCUS_EXCEPTION_HPP
+
+#include <stdexcept>
+#include <string>
+
+#include "env.hpp"
+
+namespace orcus {
+
+class ORCUS_PSR_DLLPUBLIC general_error : public std::exception
+{
+public:
+ explicit general_error(std::string msg);
+ explicit general_error(std::string_view cls, std::string_view msg);
+ virtual ~general_error() noexcept;
+ virtual const char* what() const noexcept;
+
+protected:
+ void append_msg(const std::string& s);
+
+private:
+ std::string m_msg;
+};
+
+class ORCUS_PSR_DLLPUBLIC invalid_arg_error : public std::invalid_argument
+{
+public:
+ explicit invalid_arg_error(const std::string& msg);
+ virtual ~invalid_arg_error() noexcept;
+};
+
+class ORCUS_PSR_DLLPUBLIC xml_structure_error : public general_error
+{
+public:
+ explicit xml_structure_error(std::string msg);
+ virtual ~xml_structure_error() noexcept;
+};
+
+class ORCUS_PSR_DLLPUBLIC json_structure_error : public general_error
+{
+public:
+ explicit json_structure_error(std::string msg);
+ virtual ~json_structure_error() noexcept;
+};
+
+class ORCUS_PSR_DLLPUBLIC invalid_map_error : public general_error
+{
+public:
+ explicit invalid_map_error(std::string msg);
+ virtual ~invalid_map_error() noexcept;
+};
+
+class ORCUS_PSR_DLLPUBLIC value_error : public general_error
+{
+public:
+ explicit value_error(std::string msg);
+ virtual ~value_error() noexcept;
+};
+
+/**
+ * Error indicating improper xpath syntax.
+ */
+class ORCUS_PSR_DLLPUBLIC xpath_error : public general_error
+{
+public:
+ xpath_error(std::string msg);
+ virtual ~xpath_error() noexcept;
+};
+
+/**
+ * This gets thrown when a public interface method is expected to return a
+ * non-null pointer to another interface but actually returns a null pointer.
+ */
+class ORCUS_PSR_DLLPUBLIC interface_error : public general_error
+{
+public:
+ interface_error(std::string msg);
+ virtual ~interface_error() noexcept;
+};
+
+/**
+ * Exception related to a parsing error that includes an offset in the stream
+ * where the error occurred.
+ */
+class ORCUS_PSR_DLLPUBLIC parse_error : public general_error
+{
+ std::ptrdiff_t m_offset; /// offset in the stream where the error occurred.
+
+protected:
+ parse_error(std::string_view cls, std::string_view msg, std::ptrdiff_t offset);
+
+public:
+ parse_error(std::string msg, std::ptrdiff_t offset);
+
+ /**
+ * Get the offset in a stream associated with the error.
+ *
+ * @return offset in a stream where the error occurred.
+ */
+ std::ptrdiff_t offset() const;
+
+ static void throw_with(
+ std::string_view msg_before, char c, std::string_view msg_after, std::ptrdiff_t offset);
+
+ static void throw_with(
+ std::string_view msg_before, std::string_view msg, std::string_view msg_after, std::ptrdiff_t offset);
+};
+
+/**
+ * This exception is thrown when SAX parser detects a malformed XML document.
+ */
+class ORCUS_PSR_DLLPUBLIC malformed_xml_error : public parse_error
+{
+public:
+ malformed_xml_error() = delete;
+ malformed_xml_error(std::string_view msg, std::ptrdiff_t offset);
+ virtual ~malformed_xml_error();
+};
+
+/**
+ * Exception related to parsing of zip archive stream.
+ */
+class ORCUS_PSR_DLLPUBLIC zip_error : public general_error
+{
+public:
+ zip_error(std::string_view msg);
+ virtual ~zip_error();
+};
+
+namespace detail {
+
+/**
+ * Internal error used in multi-threaded parsing to signal that the parser
+ * thread has been aborted.
+ */
+class ORCUS_PSR_DLLPUBLIC parsing_aborted_error : public std::exception {};
+
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/format_detection.hpp b/include/orcus/format_detection.hpp
new file mode 100644
index 0000000..f4754bc
--- /dev/null
+++ b/include/orcus/format_detection.hpp
@@ -0,0 +1,52 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_FORMAT_DETECTION_HPP
+#define ORCUS_FORMAT_DETECTION_HPP
+
+#include <orcus/env.hpp>
+#include <orcus/types.hpp>
+
+#include <cstdlib>
+#include <memory>
+
+namespace orcus {
+
+namespace iface {
+
+class import_filter;
+
+}
+
+namespace spreadsheet { namespace iface {
+
+class import_factory;
+
+}}
+
+/**
+ * Detect the format of a given document stream.
+ *
+ * @param strm document stream to detect the format of.
+ */
+ORCUS_DLLPUBLIC format_t detect(std::string_view strm);
+
+/**
+ * Create an instance of import_filter for a specified format.
+ *
+ * @param type Format type to create an instace of import_filter of.
+ * @param factory Pointer to an import factory instance. It must not be null.
+ *
+ * @return Pointer to an instance of import_filter for specified format.
+ */
+ORCUS_DLLPUBLIC std::shared_ptr<iface::import_filter> create_filter(
+ format_t type, spreadsheet::iface::import_factory* factory);
+
+} // namespace orcus
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/info.hpp b/include/orcus/info.hpp
new file mode 100644
index 0000000..61866fa
--- /dev/null
+++ b/include/orcus/info.hpp
@@ -0,0 +1,22 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_INFO_HPP
+
+#include "orcus/env.hpp"
+
+namespace orcus {
+
+ORCUS_DLLPUBLIC int get_version_major();
+ORCUS_DLLPUBLIC int get_version_minor();
+ORCUS_DLLPUBLIC int get_version_micro();
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/interface.hpp b/include/orcus/interface.hpp
new file mode 100644
index 0000000..b08a9ee
--- /dev/null
+++ b/include/orcus/interface.hpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_INTERFACE_HPP
+#define INCLUDED_ORCUS_INTERFACE_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/types.hpp"
+
+#include <string>
+#include <memory>
+
+namespace orcus {
+
+struct config;
+
+namespace iface {
+
+/**
+ * Base interface for import filters.
+ */
+class ORCUS_DLLPUBLIC import_filter
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ import_filter(format_t input);
+ virtual ~import_filter();
+
+ /**
+ * Read the content of a file.
+ *
+ * @param filepath path to a local file. It must be a system path.
+ */
+ virtual void read_file(std::string_view filepath) = 0;
+
+ /**
+ * Read the content of an in-memory stream.
+ *
+ * @param stream in-memory stream to read from.
+ */
+ virtual void read_stream(std::string_view stream) = 0;
+
+ /**
+ * Get the name of a filter.
+ *
+ * @return name of a filter.
+ */
+ virtual std::string_view get_name() const = 0;
+
+ void set_config(const orcus::config& v);
+ const orcus::config& get_config() const;
+};
+
+/**
+ * Base interface for document content dumpers.
+ */
+class ORCUS_DLLPUBLIC document_dumper
+{
+public:
+ virtual ~document_dumper();
+
+ /**
+ * Dump the content of a document in a specified format, either into set of
+ * multiple files, or a single file.
+ *
+ * @param format Output format type in which to dump the content.
+ * @param output Depending on the output format type, this can be either an
+ * output directory path where multiple output files get
+ * created, or an output file path where the content of the
+ * entire document gets dumped into.
+ */
+ virtual void dump(dump_format_t format, const std::string& output) const = 0;
+
+ /**
+ * Dump the content of a document in a specialized "check" format suitable
+ * for content verification.
+ *
+ * @param os output stream to write the transformed content to.
+ */
+ virtual void dump_check(std::ostream& os) const = 0;
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_document_tree.hpp b/include/orcus/json_document_tree.hpp
new file mode 100644
index 0000000..e558c38
--- /dev/null
+++ b/include/orcus/json_document_tree.hpp
@@ -0,0 +1,504 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_DOCUMENT_TREE_HPP
+#define INCLUDED_ORCUS_JSON_DOCUMENT_TREE_HPP
+
+#include "env.hpp"
+#include "exception.hpp"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace orcus {
+
+struct json_config;
+
+namespace json {
+
+struct json_value;
+struct document_resource;
+class document_tree;
+
+/**
+ * Exception related to JSON document tree construction.
+ */
+class ORCUS_DLLPUBLIC document_error : public general_error
+{
+public:
+ document_error(const std::string& msg);
+ virtual ~document_error();
+};
+
+/**
+ * Exception that gets thrown due to ambiguity when you specify a braced
+ * list that can be interpreted either as a key-value pair inside an object
+ * or as values of an array.
+ */
+class ORCUS_DLLPUBLIC key_value_error : public document_error
+{
+public:
+ key_value_error(const std::string& msg);
+ virtual ~key_value_error();
+};
+
+enum class node_t : uint8_t
+{
+ /** node type is not set. */
+ unset = 0,
+ /** JSON string node. A node of this type contains a string value. */
+ string = 1,
+ /** JSON number node. A node of this type contains a numeric value. */
+ number = 2,
+ /**
+ * JSON object node. A node of this type contains one or more key-value
+ * pairs.
+ */
+ object = 3,
+ /**
+ * JSON array node. A node of this type contains one or more child nodes.
+ */
+ array = 4,
+ /**
+ * JSON boolean node containing a value of 'true'.
+ */
+ boolean_true = 5,
+ /**
+ * JSON boolean node containing a value of 'false'.
+ */
+ boolean_false = 6,
+ /**
+ * JSON node containing a 'null' value.
+ */
+ null = 7,
+};
+
+namespace detail { namespace init { class node; }}
+
+class const_node;
+class document_tree;
+
+class ORCUS_DLLPUBLIC const_node_iterator
+{
+ friend class const_node;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ const_node_iterator(const document_tree* doc, const const_node& v, bool begin);
+
+public:
+ const_node_iterator();
+ const_node_iterator(const const_node_iterator& other);
+ ~const_node_iterator();
+
+ const const_node& operator*() const;
+ const const_node* operator->() const;
+
+ const_node_iterator& operator++();
+ const_node_iterator operator++(int);
+
+ const_node_iterator& operator--();
+ const_node_iterator operator--(int);
+
+ bool operator== (const const_node_iterator& other) const;
+ bool operator!= (const const_node_iterator& other) const;
+
+ const_node_iterator& operator= (const const_node_iterator& other);
+};
+
+/**
+ * Each node instance represents a JSON value stored in the document tree.
+ * It's immutable.
+ */
+class ORCUS_DLLPUBLIC const_node
+{
+ friend class document_tree;
+ friend class const_node_iterator;
+
+protected:
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ const_node(const document_tree* doc, json_value* jv);
+ const_node(std::unique_ptr<impl>&& p);
+public:
+ const_node() = delete;
+
+ const_node(const const_node& other);
+ const_node(const_node&& rhs);
+ ~const_node();
+
+ /**
+ * Get the type of a node.
+ *
+ * @return node type.
+ */
+ node_t type() const;
+
+ /**
+ * Get the number of child nodes if any.
+ *
+ * @return number of child nodes.
+ */
+ size_t child_count() const;
+
+ /**
+ * Get a list of keys stored in a JSON object node.
+ *
+ * @exception orcus::json::document_error if the node is not of the object
+ * type.
+ * @return a list of keys.
+ */
+ std::vector<std::string_view> keys() const;
+
+ /**
+ * Get the key by index in a JSON object node. This method works only
+ * when the <b>preserve object order</b> option is set.
+ *
+ * @param index 0-based key index.
+ *
+ * @exception orcus::json::document_error if the node is not of the object
+ * type.
+ *
+ * @exception std::out_of_range if the index is equal to or greater than
+ * the number of keys stored in the node.
+ *
+ * @return key value.
+ */
+ std::string_view key(size_t index) const;
+
+ /**
+ * Query whether or not a particular key exists in a JSON object node.
+ *
+ * @param key key value.
+ *
+ * @return true if this object node contains the specified key, otherwise
+ * false. If this node is not of a JSON object type, false is
+ * returned.
+ */
+ bool has_key(std::string_view key) const;
+ /**
+ * Get a child node by index.
+ *
+ * @param index 0-based index of a child node.
+ *
+ * @exception orcus::json::document_error if the node is not one of the
+ * object or array types.
+ *
+ * @exception std::out_of_range if the index is equal to or greater than
+ * the number of child nodes that the node has.
+ *
+ * @return child node instance.
+ */
+ const_node child(size_t index) const;
+
+ /**
+ * Get a child node by textural key value.
+ *
+ * @param key textural key value to get a child node by.
+ *
+ * @exception orcus::json::document_error if the node is not of the object
+ * type, or the node doesn't have the specified key.
+ *
+ * @return child node instance.
+ */
+ const_node child(std::string_view key) const;
+
+ /**
+ * Get the parent node.
+ *
+ * @exception orcus::json::document_error if the node doesn't have a parent
+ * node which implies that the node is a root node.
+ *
+ * @return parent node instance.
+ */
+ const_node parent() const;
+
+ /**
+ * Get the last child node.
+ *
+ * @exception orcus::json::document_error if the node is not of array type
+ * or node has no children.
+ *
+ * @return last child node instance.
+ */
+ const_node back() const;
+
+ /**
+ * Get the string value of a JSON string node.
+ *
+ * @exception orcus::json::document_error if the node is not of the string
+ * type.
+ *
+ * @return string value.
+ */
+ std::string_view string_value() const;
+
+ /**
+ * Get the numeric value of a JSON number node.
+ *
+ * @exception orcus::json::document_error if the node is not of the number
+ * type.
+ *
+ * @return numeric value.
+ */
+ double numeric_value() const;
+
+ const_node& operator=(const const_node& other);
+ const_node& operator=(const_node&& other);
+
+ /**
+ * Return an indentifier of the JSON value object that the node
+ * represents. The identifier is derived directly from the memory address
+ * of the value object.
+ *
+ * @return identifier of the JSON value object.
+ */
+ uintptr_t identity() const;
+
+ const_node_iterator begin() const;
+ const_node_iterator end() const;
+};
+
+/**
+ * Each node instance represents a JSON value stored in the document tree.
+ * This class allows mutable operations.
+ */
+class ORCUS_DLLPUBLIC node : public const_node
+{
+ friend class document_tree;
+
+ node(const document_tree* doc, json_value* jv);
+ node(const_node&& rhs);
+
+public:
+ node() = delete;
+
+ node(const node& other);
+ node(node&& rhs);
+ ~node();
+
+ node& operator=(const node& other);
+ node& operator=(const detail::init::node& v);
+ node operator[](std::string_view key);
+
+ /**
+ * Get a child node by index.
+ *
+ * @param index 0-based index of a child node.
+ *
+ * @exception orcus::json::document_error if the node is not one of the
+ * object or array types.
+ *
+ * @exception std::out_of_range if the index is equal to or greater than
+ * the number of child nodes that the node has.
+ *
+ * @return child node instance.
+ */
+ node child(size_t index);
+
+ /**
+ * Get a child node by textural key value.
+ *
+ * @param key textural key value to get a child node by.
+ *
+ * @exception orcus::json::document_error if the node is not of the object
+ * type, or the node doesn't have the specified key.
+ *
+ * @return child node instance.
+ */
+ node child(std::string_view key);
+
+ /**
+ * Get the parent node.
+ *
+ * @exception orcus::json::document_error if the node doesn't have a parent
+ * node which implies that the node is a root node.
+ *
+ * @return parent node instance.
+ */
+ node parent();
+
+ /**
+ * Get the last child node.
+ *
+ * @exception orcus::json::document_error if the node is not of array type
+ * or node has no children.
+ *
+ * @return last child node instance.
+ */
+ node back();
+
+ /**
+ * Append a new node value to the end of the array.
+ *
+ * @exception orcus::json::document_error if the node is not of array
+ * type.
+ * @param v new node value to append to the end of the array.
+ */
+ void push_back(const detail::init::node& v);
+};
+
+/**
+ * This class represents a JSON array, to be used to explicitly create an
+ * array instance during initialization.
+ */
+class ORCUS_DLLPUBLIC array
+{
+ friend class detail::init::node;
+ friend class document_tree;
+
+ std::vector<detail::init::node> m_vs;
+public:
+ array();
+ array(const array&) = delete;
+ array(array&& other);
+ array(std::initializer_list<detail::init::node> vs);
+ ~array();
+};
+
+/**
+ * This class represents a JSON object, primarily to be used to create an
+ * empty object instance.
+ */
+class ORCUS_DLLPUBLIC object
+{
+public:
+ object();
+ object(const object&) = delete;
+ object(object&& other);
+ ~object();
+};
+
+namespace detail { namespace init {
+
+/**
+ * Node to store an initial value during document tree initialization. It's
+ * not meant to be instantiated explicitly. A value passed from the braced
+ * initialization list is implicitly converted to an instance of this class.
+ */
+class ORCUS_DLLPUBLIC node
+{
+ friend class ::orcus::json::document_tree;
+ friend class ::orcus::json::node;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ node(double v);
+ node(int v);
+ node(bool b);
+ node(std::nullptr_t);
+ node(const char* p);
+ node(const std::string& s);
+ node(std::initializer_list<detail::init::node> vs);
+ node(json::array array);
+ node(json::object obj);
+
+ node(const node& other) = delete;
+ node(node&& other);
+ ~node();
+
+ node& operator= (node other) = delete;
+
+private:
+ node_t type() const;
+ json_value* to_json_value(document_resource& res) const;
+ void store_to_node(document_resource& res, json_value* parent) const;
+};
+
+}}
+
+/**
+ * This class stores a parsed JSON document tree structure.
+ */
+class ORCUS_DLLPUBLIC document_tree
+{
+ friend class const_node;
+ friend class node;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ const document_resource& get_resource() const;
+
+public:
+ document_tree();
+ document_tree(const document_tree&) = delete;
+ document_tree(document_tree&& other);
+ document_tree(document_resource& res);
+ document_tree(std::initializer_list<detail::init::node> vs);
+ document_tree(array vs);
+ document_tree(object obj);
+ ~document_tree();
+
+ document_tree& operator= (std::initializer_list<detail::init::node> vs);
+ document_tree& operator= (array vs);
+ document_tree& operator= (object obj);
+
+ /**
+ * Load raw string stream containing a JSON structure to populate the
+ * document tree.
+ *
+ * @param stream stream containing a JSON structure.
+ * @param config configuration object.
+ */
+ void load(std::string_view stream, const json_config& config);
+
+ /**
+ * Get the root node of the document.
+ *
+ * @return root node of the document.
+ */
+ json::const_node get_document_root() const;
+
+ /**
+ * Get the root node of the document.
+ *
+ * @return root node of the document.
+ */
+ json::node get_document_root();
+
+ /**
+ * Dump the JSON document tree to string.
+ *
+ * @return a string representation of the JSON document tree.
+ */
+ std::string dump() const;
+
+ /**
+ * Dump the JSON document tree to an XML structure.
+ *
+ * @return a string containing an XML structure representing the JSON
+ * content.
+ */
+ std::string dump_xml() const;
+
+ /**
+ * Dump the JSON document tree as YAML output.
+ *
+ * @return string containing a YAML output representing the JSON document
+ * tree structure.
+ */
+ std::string dump_yaml() const;
+
+ /**
+ * Swap the content of the document with another document instance.
+ *
+ * @param other document instance to swap the content with.
+ */
+ void swap(document_tree& other);
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_global.hpp b/include/orcus/json_global.hpp
new file mode 100644
index 0000000..8c3a6e7
--- /dev/null
+++ b/include/orcus/json_global.hpp
@@ -0,0 +1,30 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_GLOBAL_HPP
+#define INCLUDED_ORCUS_JSON_GLOBAL_HPP
+
+#include "orcus/env.hpp"
+
+#include <string>
+
+namespace orcus { namespace json {
+
+/**
+ * Properly escape an input string appropriate for json output.
+ *
+ * @param input string value to escape.
+ *
+ * @return escaped string value.
+ */
+ORCUS_PSR_DLLPUBLIC std::string escape_string(const std::string& input);
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_parser.hpp b/include/orcus/json_parser.hpp
new file mode 100644
index 0000000..b021ff8
--- /dev/null
+++ b/include/orcus/json_parser.hpp
@@ -0,0 +1,402 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_PARSER_HPP
+#define INCLUDED_ORCUS_JSON_PARSER_HPP
+
+#include "json_parser_base.hpp"
+
+#include <cassert>
+#include <cmath>
+
+namespace orcus {
+
+class json_handler
+{
+public:
+ /**
+ * Called when the parsing begins.
+ */
+ void begin_parse() {}
+
+ /**
+ * Called when the parsing ends.
+ */
+ void end_parse() {}
+
+ /**
+ * Called when the opening brace of an array is encountered.
+ */
+ void begin_array() {}
+
+ /**
+ * Called when the closing brace of an array is encountered.
+ */
+ void end_array() {}
+
+ /**
+ * Called when the opening curly brace of an object is encountered.
+ */
+ void begin_object() {}
+
+ /**
+ * Called when a key value string of an object is encountered.
+ *
+ * @param key key value string.
+ * @param transient true if the string value is stored in a temporary
+ * buffer which is not guaranteed to hold the string
+ * value after the end of this callback. When false, the
+ * pointer points to somewhere in the JSON stream being
+ * parsed.
+ */
+ void object_key(std::string_view key, bool transient)
+ {
+ (void)key; (void)transient;
+ }
+
+ /**
+ * Called when the closing curly brace of an object is encountered.
+ */
+ void end_object() {}
+
+ /**
+ * Called when a boolean 'true' keyword is encountered.
+ */
+ void boolean_true() {}
+
+ /**
+ * Called when a boolean 'false' keyword is encountered.
+ */
+ void boolean_false() {}
+
+ /**
+ * Called when a 'null' keyword is encountered.
+ */
+ void null() {}
+
+ /**
+ * Called when a string value is encountered.
+ *
+ * @param val string value.
+ * @param transient true if the string value is stored in a temporary
+ * buffer which is not guaranteed to hold the string
+ * value after the end of this callback. When false, the
+ * pointer points to somewhere in the JSON stream being
+ * parsed.
+ */
+ void string(std::string_view val, bool transient)
+ {
+ (void)val; (void)transient;
+ }
+
+ /**
+ * Called when a numeric value is encountered.
+ *
+ * @param val numeric value.
+ */
+ void number(double val)
+ {
+ (void)val;
+ }
+};
+
+/**
+ * Parser for JSON documents.
+ *
+ * @tparam HandlerT Hanlder type with member functions for event callbacks.
+ * Refer to json_handler.
+ */
+template<typename HandlerT>
+class json_parser : public json::parser_base
+{
+public:
+ typedef HandlerT handler_type;
+
+ /**
+ * Constructor.
+ *
+ * @param content string stream containing JSON string.
+ * @param hdl handler class instance.
+ */
+ json_parser(std::string_view content, handler_type& hdl);
+
+ /**
+ * Call this method to start parsing.
+ */
+ void parse();
+
+private:
+ void root_value();
+ void value();
+ void array();
+ void end_array();
+ void object();
+ void number();
+ void string();
+
+private:
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+json_parser<_Handler>::json_parser(
+ std::string_view content, handler_type& hdl) :
+ json::parser_base(content), m_handler(hdl) {}
+
+template<typename _Handler>
+void json_parser<_Handler>::parse()
+{
+ m_handler.begin_parse();
+
+ skip_ws();
+ if (has_char())
+ root_value();
+ else
+ throw parse_error("parse: no json content could be found in file", offset());
+
+ if (has_char())
+ throw parse_error("parse: unexpected trailing string segment.", offset());
+
+ m_handler.end_parse();
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::root_value()
+{
+ char c = cur_char();
+
+ switch (c)
+ {
+ case '[':
+ array();
+ break;
+ case '{':
+ object();
+ break;
+ default:
+ parse_error::throw_with(
+ "root_value: either '[' or '{' was expected, but '", cur_char(), "' was found.", offset());
+ }
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::value()
+{
+ char c = cur_char();
+ if (is_numeric(c))
+ {
+ number();
+ return;
+ }
+
+ switch (c)
+ {
+ case '-':
+ number();
+ break;
+ case '[':
+ array();
+ break;
+ case '{':
+ object();
+ break;
+ case 't':
+ parse_true();
+ m_handler.boolean_true();
+ break;
+ case 'f':
+ parse_false();
+ m_handler.boolean_false();
+ break;
+ case 'n':
+ parse_null();
+ m_handler.null();
+ break;
+ case '"':
+ string();
+ break;
+ default:
+ parse_error::throw_with("value: failed to parse '", cur_char(), "'.", offset());
+ }
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::array()
+{
+ assert(cur_char() == '[');
+
+ m_handler.begin_array();
+ for (next(); has_char(); next())
+ {
+ skip_ws();
+
+ if (cur_char() == ']')
+ {
+ end_array();
+ return;
+ }
+
+ value();
+ skip_ws();
+
+ if (has_char())
+ {
+ switch (cur_char())
+ {
+ case ']':
+ end_array();
+ return;
+ case ',':
+ if (peek_char() == ']')
+ {
+ parse_error::throw_with(
+ "array: ']' expected but '", cur_char(), "' found.", offset() );
+ }
+ continue;
+ default:
+ parse_error::throw_with(
+ "array: either ']' or ',' expected, but '", cur_char(), "' found.", offset());
+ }
+ }
+ else
+ {
+ // needs to be handled here,
+ // we would call next() before checking again with has_char() which
+ // is already past the end
+ break;
+ }
+ }
+
+ throw parse_error("array: failed to parse array.", offset());
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::end_array()
+{
+ m_handler.end_array();
+ next();
+ skip_ws();
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::object()
+{
+ assert(cur_char() == '{');
+
+ bool require_new_key = false;
+ m_handler.begin_object();
+ for (next(); has_char(); next())
+ {
+ skip_ws();
+ if (!has_char())
+ throw parse_error("object: stream ended prematurely before reaching a key.", offset());
+
+ switch (cur_char())
+ {
+ case '}':
+ if (require_new_key)
+ {
+ parse_error::throw_with(
+ "object: new key expected, but '", cur_char(), "' found.", offset());
+ }
+ m_handler.end_object();
+ next();
+ skip_ws();
+ return;
+ case '"':
+ break;
+ default:
+ parse_error::throw_with(
+ "object: '\"' was expected, but '", cur_char(), "' found.", offset());
+ }
+ require_new_key = false;
+
+ parse_quoted_string_state res = parse_string();
+ if (!res.str)
+ {
+ // Parsing was unsuccessful.
+ if (res.length == parse_quoted_string_state::error_no_closing_quote)
+ throw parse_error("object: stream ended prematurely before reaching the closing quote of a key.", offset());
+ else if (res.length == parse_quoted_string_state::error_illegal_escape_char)
+ parse_error::throw_with(
+ "object: illegal escape character '", cur_char(), "' in key value.", offset());
+ else
+ throw parse_error("object: unknown error while parsing a key value.", offset());
+ }
+
+ m_handler.object_key({res.str, res.length}, res.transient);
+
+ skip_ws();
+ if (cur_char() != ':')
+ parse_error::throw_with(
+ "object: ':' was expected, but '", cur_char(), "' found.", offset());
+
+ next();
+ skip_ws();
+
+ if (!has_char())
+ throw parse_error("object: stream ended prematurely before reaching a value.", offset());
+
+ value();
+
+ skip_ws();
+ if (!has_char())
+ throw parse_error("object: stream ended prematurely before reaching either '}' or ','.", offset());
+
+ switch (cur_char())
+ {
+ case '}':
+ m_handler.end_object();
+ next();
+ skip_ws();
+ return;
+ case ',':
+ require_new_key = true;
+ continue;
+ default:
+ parse_error::throw_with(
+ "object: either '}' or ',' expected, but '", cur_char(), "' found.", offset());
+ }
+ }
+
+ throw parse_error("object: closing '}' was never reached.", offset());
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::number()
+{
+ assert(is_numeric(cur_char()) || cur_char() == '-');
+
+ double val = parse_double_or_throw();
+ m_handler.number(val);
+ skip_ws();
+}
+
+template<typename _Handler>
+void json_parser<_Handler>::string()
+{
+ parse_quoted_string_state res = parse_string();
+ if (res.str)
+ {
+ m_handler.string({res.str, res.length}, res.transient);
+ return;
+ }
+
+ // Parsing was unsuccessful.
+ if (res.length == parse_quoted_string_state::error_no_closing_quote)
+ throw parse_error("string: stream ended prematurely before reaching the closing quote.", offset());
+ else if (res.length == parse_quoted_string_state::error_illegal_escape_char)
+ parse_error::throw_with("string: illegal escape character '", cur_char(), "'.", offset());
+ else
+ throw parse_error("string: unknown error.", offset());
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_parser_base.hpp b/include/orcus/json_parser_base.hpp
new file mode 100644
index 0000000..461808e
--- /dev/null
+++ b/include/orcus/json_parser_base.hpp
@@ -0,0 +1,46 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_PARSER_BASE_HPP
+#define INCLUDED_ORCUS_JSON_PARSER_BASE_HPP
+
+#include "parser_base.hpp"
+#include "parser_global.hpp"
+#include "exception.hpp"
+
+#include <memory>
+
+namespace orcus { namespace json {
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+protected:
+
+ parser_base() = delete;
+ parser_base(const parser_base&) = delete;
+ parser_base& operator=(const parser_base&) = delete;
+
+ parser_base(std::string_view content);
+ ~parser_base();
+
+ void skip_ws();
+ void parse_true();
+ void parse_false();
+ void parse_null();
+ double parse_double_or_throw();
+
+ parse_quoted_string_state parse_string();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_parser_thread.hpp b/include/orcus/json_parser_thread.hpp
new file mode 100644
index 0000000..8328ef1
--- /dev/null
+++ b/include/orcus/json_parser_thread.hpp
@@ -0,0 +1,104 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_PARSER_THREAD_HPP
+#define INCLUDED_ORCUS_JSON_PARSER_THREAD_HPP
+
+#include "env.hpp"
+#include "types.hpp"
+
+#include <memory>
+#include <vector>
+#include <ostream>
+#include <variant>
+
+namespace orcus {
+
+class string_pool;
+
+namespace json {
+
+struct ORCUS_PSR_DLLPUBLIC parser_stats
+{
+ size_t token_buffer_size_threshold;
+};
+
+enum class parse_token_t
+{
+ unknown,
+ begin_parse,
+ end_parse,
+ begin_array,
+ end_array,
+ begin_object,
+ object_key,
+ end_object,
+ boolean_true,
+ boolean_false,
+ null,
+ string,
+ number,
+ parse_error,
+};
+
+struct ORCUS_PSR_DLLPUBLIC parse_token
+{
+ using value_type = std::variant<std::string_view, parse_error_value_t, double>;
+
+ parse_token_t type;
+ value_type value;
+
+ parse_token();
+ parse_token(parse_token_t _type);
+ parse_token(parse_token_t _type, std::string_view s);
+ parse_token(std::string_view s, std::ptrdiff_t offset);
+ parse_token(double value);
+
+ parse_token(const parse_token& other);
+
+ parse_token& operator= (parse_token) = delete;
+
+ bool operator== (const parse_token& other) const;
+ bool operator!= (const parse_token& other) const;
+};
+
+typedef std::vector<parse_token> parse_tokens_t;
+
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const parse_tokens_t& tokens);
+
+class ORCUS_PSR_DLLPUBLIC parser_thread
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ parser_thread(const char* p, size_t n, size_t min_token_size);
+ parser_thread(const char* p, size_t n, size_t min_token_size, size_t max_token_size);
+ ~parser_thread();
+
+ void start();
+
+ /**
+ * Wait until new set of tokens becomes available.
+ *
+ * @param tokens new set of tokens.
+ *
+ * @return true if the parsing is still in progress (therefore more tokens
+ * to come), false if it's done i.e. this is the last token set.
+ */
+ bool next_tokens(parse_tokens_t& tokens);
+
+ parser_stats get_stats() const;
+
+ void swap_string_pool(string_pool& pool);
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/json_structure_tree.hpp b/include/orcus/json_structure_tree.hpp
new file mode 100644
index 0000000..ad77f5c
--- /dev/null
+++ b/include/orcus/json_structure_tree.hpp
@@ -0,0 +1,137 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_JSON_STRUCTURE_TREE_HPP
+#define INCLUDED_ORCUS_JSON_STRUCTURE_TREE_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/types.hpp"
+
+#include <ostream>
+#include <memory>
+#include <vector>
+#include <functional>
+
+namespace orcus { namespace json {
+
+struct ORCUS_DLLPUBLIC table_range_t
+{
+ std::vector<std::string> paths;
+ std::vector<std::string> row_groups;
+};
+
+class ORCUS_DLLPUBLIC structure_tree
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+
+ enum class node_type : short { unknown = 0, array = 1, object = 2, object_key = 3, value = 4 };
+
+ struct node_properties
+ {
+ node_type type;
+ bool repeat;
+ };
+
+ class ORCUS_DLLPUBLIC walker
+ {
+ friend class structure_tree;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ walker(const structure_tree::impl* parent_impl);
+ public:
+ walker();
+ walker(const walker& other);
+ ~walker();
+
+ /**
+ * Set the current position to the root node, and return its
+ * properties.
+ */
+ void root();
+
+ /**
+ * Move down to a child node at specified position. Call
+ * child_count() to get the number of child nodes the current node
+ * has. A child node position is 0-based and must be less than the
+ * child count.
+ *
+ * @param child_pos 0-based index of the child node to move down to.
+ */
+ void descend(size_t child_pos);
+
+ /**
+ * Move up to the parent node of the current node.
+ */
+ void ascend();
+
+ /**
+ * Return the number of child nodes the current node has.
+ *
+ * @return number of child nodes of the current node.
+ */
+ size_t child_count() const;
+
+ /**
+ * Get the properties of the current node.
+ */
+ node_properties get_node() const;
+
+ /**
+ * Build one or more field paths for the current value node. For a
+ * value node that is a child of an object, you'll always get one
+ * path, whereas a value node that is a chlid of an array, you may get
+ * more than one field paths.
+ *
+ * @return one or more field paths built for the current value node.
+ */
+ std::vector<std::string> build_field_paths() const;
+
+ /**
+ * Build a path for the parent of the current repeating node. A row
+ * group is an anchor to which repeating nodes get anchored to. It is
+ * used to determine when to increment row position during mapping.
+ *
+ * @return path for the row group of the current repeating node.
+ */
+ std::string build_row_group_path() const;
+ };
+
+ structure_tree(const structure_tree&) = delete;
+ structure_tree& operator= (const structure_tree&) = delete;
+
+ structure_tree();
+ ~structure_tree();
+
+ void parse(std::string_view stream);
+
+ /**
+ * For now, normalizing a tree just means sorting child nodes. We may add
+ * other normalization stuff later.
+ */
+ void normalize_tree();
+
+ void dump_compact(std::ostream& os) const;
+
+ walker get_walker() const;
+
+ using range_handler_type = std::function<void(table_range_t&&)>;
+
+ void process_ranges(range_handler_type rh) const;
+};
+
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, structure_tree::node_type nt);
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/measurement.hpp b/include/orcus/measurement.hpp
new file mode 100644
index 0000000..7444ae0
--- /dev/null
+++ b/include/orcus/measurement.hpp
@@ -0,0 +1,41 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_MEASUREMENT_HPP
+#define ORCUS_MEASUREMENT_HPP
+
+#include "types.hpp"
+#include "env.hpp"
+
+#include <cstdlib>
+#include <string>
+
+namespace orcus {
+
+ORCUS_DLLPUBLIC double to_double(std::string_view s, const char** p_parse_ended = nullptr);
+ORCUS_DLLPUBLIC long to_long(std::string_view s, const char** p_parse_ended = nullptr);
+ORCUS_DLLPUBLIC bool to_bool(std::string_view s);
+
+/**
+ * Parse a string value containing a part representing a numerical value
+ * optionally followed by a part representing a unit of measurement.
+ *
+ * Examples of such string value are: "1.234in", "0.34cm" and so on.
+ *
+ * @param str original string value.
+ *
+ * @return structure containing a numerical value and a unit of measurement
+ * that the original string value represents.
+ */
+ORCUS_DLLPUBLIC length_t to_length(std::string_view str);
+
+ORCUS_DLLPUBLIC double convert(double value, length_unit_t unit_from, length_unit_t unit_to);
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_csv.hpp b/include/orcus/orcus_csv.hpp
new file mode 100644
index 0000000..3e34c15
--- /dev/null
+++ b/include/orcus/orcus_csv.hpp
@@ -0,0 +1,41 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_ORCUS_CSV_HPP
+#define ORCUS_ORCUS_CSV_HPP
+
+#include "interface.hpp"
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface {
+ class import_factory;
+}}
+
+class ORCUS_DLLPUBLIC orcus_csv : public iface::import_filter
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ orcus_csv() = delete;
+ orcus_csv(const orcus_csv&) = delete;
+ orcus_csv& operator=(const orcus_csv&) = delete;
+
+ orcus_csv(spreadsheet::iface::import_factory* factory);
+ ~orcus_csv();
+
+ virtual void read_file(std::string_view filepath) override;
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_gnumeric.hpp b/include/orcus/orcus_gnumeric.hpp
new file mode 100644
index 0000000..54f74a2
--- /dev/null
+++ b/include/orcus/orcus_gnumeric.hpp
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_ORCUS_GNUMERIC_HPP
+#define ORCUS_ORCUS_GNUMERIC_HPP
+
+#include "interface.hpp"
+
+#include <memory>
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface { class import_factory; }}
+
+class ORCUS_DLLPUBLIC orcus_gnumeric : public iface::import_filter
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ orcus_gnumeric() = delete;
+ orcus_gnumeric(const orcus_gnumeric&) = delete;
+ orcus_gnumeric& operator=(const orcus_gnumeric&) = delete;
+
+ orcus_gnumeric(spreadsheet::iface::import_factory* factory);
+ ~orcus_gnumeric();
+
+ static bool detect(const unsigned char* blob, size_t size);
+
+ virtual void read_file(std::string_view filepath) override;
+
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_import_ods.hpp b/include/orcus/orcus_import_ods.hpp
new file mode 100644
index 0000000..1a94d0b
--- /dev/null
+++ b/include/orcus/orcus_import_ods.hpp
@@ -0,0 +1,32 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_ORCUS_IMPORT_ODS_HPP
+#define ORCUS_ORCUS_IMPORT_ODS_HPP
+
+#include "interface.hpp"
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface {
+ class import_styles;
+}}
+
+class ORCUS_DLLPUBLIC import_ods
+{
+public:
+ import_ods() = delete;
+ import_ods(const import_ods&) = delete;
+ import_ods& operator=(const import_ods&) = delete;
+
+ static void read_styles(std::string_view s, spreadsheet::iface::import_styles* data);
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_import_xlsx.hpp b/include/orcus/orcus_import_xlsx.hpp
new file mode 100644
index 0000000..8523299
--- /dev/null
+++ b/include/orcus/orcus_import_xlsx.hpp
@@ -0,0 +1,37 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_IMPORT_XLSX_HPP
+#define INCLUDED_ORCUS_ORCUS_IMPORT_XLSX_HPP
+
+#include "interface.hpp"
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface {
+ class import_table;
+ class import_reference_resolver;
+}}
+
+class ORCUS_DLLPUBLIC import_xlsx
+{
+public:
+ import_xlsx() = delete;
+ import_xlsx(const import_xlsx&) = delete;
+ import_xlsx& operator=(const import_xlsx&) = delete;
+
+ static void read_table(
+ std::string_view s,
+ spreadsheet::iface::import_table& table,
+ spreadsheet::iface::import_reference_resolver& resolver);
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_json.hpp b/include/orcus/orcus_json.hpp
new file mode 100644
index 0000000..7ec487f
--- /dev/null
+++ b/include/orcus/orcus_json.hpp
@@ -0,0 +1,73 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_JSON_HPP
+#define INCLUDED_ORCUS_ORCUS_JSON_HPP
+
+#include "env.hpp"
+#include "./spreadsheet/types.hpp"
+
+#include <memory>
+#include <string_view>
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface {
+
+class import_factory;
+
+}}
+
+class ORCUS_DLLPUBLIC orcus_json
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+
+ orcus_json(const orcus_json&) = delete;
+ orcus_json& operator= (const orcus_json&) = delete;
+
+ orcus_json(spreadsheet::iface::import_factory* im_fact);
+ ~orcus_json();
+
+ void set_cell_link(std::string_view path, std::string_view sheet, spreadsheet::row_t row, spreadsheet::col_t col);
+
+ void start_range(
+ std::string_view sheet, spreadsheet::row_t row, spreadsheet::col_t col, bool row_header);
+
+ void append_field_link(std::string_view path, std::string_view label);
+ void set_range_row_group(std::string_view path);
+ void commit_range();
+
+ void append_sheet(std::string_view name);
+
+ void read_stream(std::string_view stream);
+
+ /**
+ * Read a JSON string that contains an entire set of mapping rules.
+ *
+ * This method also inserts all necessary sheets into the document model.
+ *
+ * @param stream JSON string.
+ */
+ void read_map_definition(std::string_view stream);
+
+ /**
+ * Read a JSON string, and detect and define mapping rules for one or more
+ * ranges.
+ *
+ * @param stream JSON string.
+ */
+ void detect_map_definition(std::string_view stream);
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_ods.hpp b/include/orcus/orcus_ods.hpp
new file mode 100644
index 0000000..08eb197
--- /dev/null
+++ b/include/orcus/orcus_ods.hpp
@@ -0,0 +1,58 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_ODS_HPP
+#define INCLUDED_ORCUS_ORCUS_ODS_HPP
+
+#include "orcus/spreadsheet/import_interface.hpp"
+#include "orcus/env.hpp"
+#include "interface.hpp"
+
+#include <memory>
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface { class import_factory; }}
+
+struct orcus_ods_impl;
+class zip_archive;
+class zip_archive_stream;
+
+class ORCUS_DLLPUBLIC orcus_ods : public iface::import_filter
+{
+ orcus_ods(const orcus_ods&); // disabled
+ orcus_ods& operator= (const orcus_ods&); // disabled
+
+public:
+ orcus_ods(spreadsheet::iface::import_factory* factory);
+ ~orcus_ods();
+
+ static bool detect(const unsigned char* blob, size_t size);
+
+ virtual void read_file(std::string_view filepath) override;
+
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+
+private:
+ static void list_content(const zip_archive& archive);
+ void read_styles(const zip_archive& archive);
+ void read_content(const zip_archive& archive);
+ void read_content_xml(const unsigned char* p, size_t size);
+
+ void read_file_impl(zip_archive_stream* stream);
+
+private:
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_parquet.hpp b/include/orcus/orcus_parquet.hpp
new file mode 100644
index 0000000..1dccf45
--- /dev/null
+++ b/include/orcus/orcus_parquet.hpp
@@ -0,0 +1,41 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "./interface.hpp"
+#include "./spreadsheet/import_interface.hpp"
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface { class import_factory; }}
+
+class ORCUS_DLLPUBLIC orcus_parquet : public iface::import_filter
+{
+public:
+ orcus_parquet(const orcus_parquet&) = delete;
+ orcus_parquet& operator=(const orcus_parquet&) = delete;
+
+ orcus_parquet(spreadsheet::iface::import_factory* factory);
+ ~orcus_parquet();
+
+ static bool detect(const unsigned char* blob, std::size_t size);
+
+ virtual void read_file(std::string_view filepath) override;
+
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+
+private:
+ class impl;
+ std::unique_ptr<impl> mp_impl;
+};
+
+}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_xls_xml.hpp b/include/orcus/orcus_xls_xml.hpp
new file mode 100644
index 0000000..4534bfc
--- /dev/null
+++ b/include/orcus/orcus_xls_xml.hpp
@@ -0,0 +1,43 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_XLS_XML_HPP
+#define INCLUDED_ORCUS_ORCUS_XLS_XML_HPP
+
+#include "interface.hpp"
+#include <memory>
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface { class import_factory; }}
+
+struct orcus_xls_xml_impl;
+
+class ORCUS_DLLPUBLIC orcus_xls_xml : public iface::import_filter
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ orcus_xls_xml(spreadsheet::iface::import_factory* factory);
+ ~orcus_xls_xml();
+
+ orcus_xls_xml(const orcus_xls_xml&) = delete;
+ orcus_xls_xml& operator= (const orcus_xls_xml&) = delete;
+
+ static bool detect(const unsigned char* blob, size_t size);
+
+ virtual void read_file(std::string_view filepath) override;
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_xlsx.hpp b/include/orcus/orcus_xlsx.hpp
new file mode 100644
index 0000000..68b01c0
--- /dev/null
+++ b/include/orcus/orcus_xlsx.hpp
@@ -0,0 +1,87 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_XLSX_HPP
+#define INCLUDED_ORCUS_ORCUS_XLSX_HPP
+
+#include "interface.hpp"
+
+#include <memory>
+
+namespace orcus {
+
+namespace spreadsheet { namespace iface { class import_factory; }}
+
+struct xlsx_rel_sheet_info;
+struct xlsx_rel_table_info;
+struct xlsx_rel_pivot_cache_info;
+struct xlsx_rel_pivot_cache_record_info;
+struct orcus_xlsx_impl;
+class xlsx_opc_handler;
+
+class ORCUS_DLLPUBLIC orcus_xlsx : public iface::import_filter
+{
+ friend class xlsx_opc_handler;
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ orcus_xlsx(spreadsheet::iface::import_factory* factory);
+ ~orcus_xlsx();
+
+ orcus_xlsx(const orcus_xlsx&) = delete;
+ orcus_xlsx& operator= (const orcus_xlsx&) = delete;
+
+ static bool detect(const unsigned char* blob, size_t size);
+
+ virtual void read_file(std::string_view filepath) override;
+ virtual void read_stream(std::string_view stream) override;
+
+ virtual std::string_view get_name() const override;
+
+private:
+
+ void set_formulas_to_doc();
+
+ void read_workbook(const std::string& dir_path, const std::string& file_name);
+
+ /**
+ * Parse a sheet xml part that contains data stored in a single sheet.
+ */
+ void read_sheet(const std::string& dir_path, const std::string& file_name, xlsx_rel_sheet_info* data);
+
+ /**
+ * Parse sharedStrings.xml part that contains a list of strings referenced
+ * in the document.
+ */
+ void read_shared_strings(const std::string& dir_path, const std::string& file_name);
+
+ void read_styles(const std::string& dir_path, const std::string& file_name);
+
+ void read_table(const std::string& dir_path, const std::string& file_name, xlsx_rel_table_info* data);
+
+ void read_pivot_cache_def(
+ const std::string& dir_path, const std::string& file_name,
+ const xlsx_rel_pivot_cache_info* data);
+
+ void read_pivot_cache_rec(
+ const std::string& dir_path, const std::string& file_name,
+ const xlsx_rel_pivot_cache_record_info* data);
+
+ void read_pivot_table(const std::string& dir_path, const std::string& file_name);
+
+ void read_rev_headers(const std::string& dir_path, const std::string& file_name);
+
+ void read_rev_log(const std::string& dir_path, const std::string& file_name);
+
+ void read_drawing(const std::string& dir_path, const std::string& file_name);
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/orcus_xml.hpp b/include/orcus/orcus_xml.hpp
new file mode 100644
index 0000000..f20466f
--- /dev/null
+++ b/include/orcus/orcus_xml.hpp
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ORCUS_XML_HPP
+#define INCLUDED_ORCUS_ORCUS_XML_HPP
+
+#include "env.hpp"
+#include "spreadsheet/types.hpp"
+
+#include <ostream>
+#include <memory>
+
+namespace orcus {
+
+class xmlns_repository;
+
+namespace spreadsheet { namespace iface {
+ class import_factory;
+ class export_factory;
+}}
+
+class ORCUS_DLLPUBLIC orcus_xml
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ orcus_xml(const orcus_xml&) = delete;
+ orcus_xml& operator= (const orcus_xml&) = delete;
+
+ orcus_xml(xmlns_repository& ns_repo, spreadsheet::iface::import_factory* im_fact, spreadsheet::iface::export_factory* ex_fact);
+ ~orcus_xml();
+
+ /**
+ * Define a namespace and its alias used in a map file.
+ *
+ * @param alias alias for the namespace.
+ * @param uri namespace value.
+ * @param default_ns whether or not to use this namespace as the default
+ * namespace. When this value is set to true, the
+ * namespace being set will be applied for all elements
+ * and attributes used in the paths without explicit
+ * namespace values.
+ */
+ void set_namespace_alias(std::string_view alias, std::string_view uri, bool default_ns=false);
+
+ /**
+ * Define a mapping of a single element or attribute to a single cell
+ * location.
+ *
+ * @param xpath path to the element or attribute to link.
+ * @param sheet sheet index (0-based) of the linked cell location.
+ * @param row row index (0-based) of the linked cell location.
+ * @param col column index (0-based) of the linked cell location.
+ */
+ void set_cell_link(std::string_view xpath, std::string_view sheet, spreadsheet::row_t row, spreadsheet::col_t col);
+
+ /**
+ * Initiate the mapping definition of a linked range. The definition will
+ * get committed when the {@link commit_range} method is called.
+ *
+ * @param sheet sheet index (0-based) of the linked cell location.
+ * @param row row index (0-based) of the linked cell location.
+ * @param col column index (0-based) of the linked cell location.
+ */
+ void start_range(std::string_view sheet, spreadsheet::row_t row, spreadsheet::col_t col);
+
+ /**
+ * Append a field that is mapped to a specified path in the XML document
+ * to the current linked range.
+ *
+ * @param xpath path to the element or attribute to link as a field.
+ * @param label custom header label to use in lieu of the name of the
+ * linked entity.
+ */
+ void append_field_link(std::string_view xpath, std::string_view label);
+
+ /**
+ * Set the element located in the specified path as a row group in the
+ * current linked range.
+ *
+ * If the element is defined as a row-group element, the row index will
+ * increment whenever that element closes.
+ *
+ * @param xpath path to the element to use as a row group element.
+ */
+ void set_range_row_group(std::string_view xpath);
+
+ /**
+ * Commit the mapping definition of the current range.
+ */
+ void commit_range();
+
+ /**
+ * Append a new sheet to the spreadsheet document.
+ *
+ * @param name name of the sheet.
+ */
+ void append_sheet(std::string_view name);
+
+ /**
+ * Read the stream containing the source XML document.
+ *
+ * @param stream stream containing the content of the source XML document.
+ */
+ void read_stream(std::string_view stream);
+
+ /**
+ * Read an XML stream that contains an entire set of mapping rules.
+ *
+ * This method also inserts all necessary sheets into the document model.
+ *
+ * @param stream stream containing the XML string.
+ */
+ void read_map_definition(std::string_view stream);
+
+ /**
+ * Read a stream containing the source XML document, automatically detect
+ * all linkable ranges and import them one range per sheet.
+ *
+ * @param stream stream containing the source XML document.
+ */
+ void detect_map_definition(std::string_view stream);
+
+ /**
+ * Read a stream containing the source XML document, automatically detect
+ * all linkable ranges, and write a map definition file depicting the
+ * detected ranges.
+ *
+ * @param stream stream containing the source XML document.
+ * @param out output stream to write the map definition file to.
+ */
+ void write_map_definition(std::string_view stream, std::ostream& out) const;
+
+ /**
+ * Write the linked cells and ranges in the spreadsheet document as an XML
+ * document using the same map definition rules used to load the content.
+ *
+ * Note that this requires the source XML document stream, as it re-uses
+ * parts of the source stream.
+ *
+ * @param stream stream containing the source XML document.
+ * @param out output stream to write the XML document to.
+ */
+ void write(std::string_view stream, std::ostream& out) const;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/parser_base.hpp b/include/orcus/parser_base.hpp
new file mode 100644
index 0000000..b3d99a1
--- /dev/null
+++ b/include/orcus/parser_base.hpp
@@ -0,0 +1,155 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_PARSER_BASE_HPP
+#define INCLUDED_ORCUS_PARSER_BASE_HPP
+
+#include "env.hpp"
+#include "exception.hpp"
+
+#include <string>
+#include <cstdlib>
+#include <cstddef>
+#include <cassert>
+#include <functional>
+
+namespace orcus {
+
+class ORCUS_PSR_DLLPUBLIC parser_base
+{
+protected:
+ using numeric_parser_type = std::function<const char*(const char*, const char*, double&)>;
+
+ const char* const mp_begin;
+ const char* mp_char;
+ const char* mp_end;
+
+private:
+ numeric_parser_type m_func_parse_numeric;
+
+protected:
+ parser_base(const char* p, size_t n);
+
+ void set_numeric_parser(const numeric_parser_type& func)
+ {
+ m_func_parse_numeric = func;
+ }
+
+ bool has_char() const
+ {
+ assert(mp_char <= mp_end);
+ return mp_char != mp_end;
+ }
+
+ bool has_next() const
+ {
+ assert((mp_char+1) <= mp_end);
+ return (mp_char+1) != mp_end;
+ }
+
+ void next(size_t inc=1) { mp_char += inc; }
+
+ void prev(size_t dec=1);
+
+ char cur_char() const { return *mp_char; }
+
+ /**
+ * Peek a character at specified offset from the current position without
+ * advancing the current position.
+ *
+ * @note The caller <strong>must</strong> ensure that the specified offset
+ * position is a valid position. This method does not check its
+ * validity.
+ *
+ * @param offset offset from the current position to peek at.
+ *
+ * @return character at a specified offset position from the current
+ * position.
+ */
+ char peek_char(std::size_t offset=1) const;
+
+ /**
+ * Peek a segment of contiguous characters of a specified length starting
+ * from the current position.
+ *
+ * @note The caller <strong>must</strong> ensure that the specified
+ * substring segment is entirely valid. This method does not check
+ * its validity.
+ *
+ * @param length length of the segment to peek.
+ *
+ * @return segment of contiguous characters.
+ */
+ std::string_view peek_chars(std::size_t length) const;
+
+ /**
+ * Skip an optional byte order mark at the current position of the stream.
+ *
+ * Currently we only check for UTF-8 BOM.
+ */
+ void skip_bom();
+
+ void skip(std::string_view chars_to_skip);
+
+ /**
+ * Skip all characters that are 0-32 in ASCII range
+ */
+ void skip_space_and_control();
+
+ /**
+ * Parse and check next characters to see if it matches specified
+ * character sequence.
+ *
+ * @param expected sequence of characters to match against.
+ *
+ * @return true if it matches specified character sequence, false
+ * otherwise.
+ */
+ bool parse_expected(std::string_view expected);
+
+ /**
+ * Try to parse the next characters as double, or return NaN in case of
+ * failure.
+ *
+ * @return double value on success, or NaN on failure.
+ */
+ double parse_double();
+
+ /**
+ * Determine the number of characters remaining <strong>after</strong> the
+ * current character. For instance, if the current character is on the
+ * last character in the stream, this method will return 0, whereas if
+ * it's on the first character, it will return the total length - 1.
+ *
+ * @return number of characters remaining after the current character.
+ */
+ size_t remaining_size() const;
+
+ /**
+ * Determine the number of characters available from the current character
+ * to the end of the buffer. The current character is included.
+ *
+ * @return number of characters available including the current character.
+ */
+ size_t available_size() const
+ {
+ return std::distance(mp_char, mp_end);
+ }
+
+ /**
+ * Return the current offset from the beginning of the character stream.
+ *
+ * @return current offset from the beginning of the character stream.
+ */
+ std::ptrdiff_t offset() const;
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/parser_global.hpp b/include/orcus/parser_global.hpp
new file mode 100644
index 0000000..bf5971b
--- /dev/null
+++ b/include/orcus/parser_global.hpp
@@ -0,0 +1,153 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_PARSER_GLOBAL_HPP
+#define ORCUS_PARSER_GLOBAL_HPP
+
+#include "env.hpp"
+
+#include <sstream>
+
+namespace orcus {
+
+class cell_buffer;
+
+enum class string_escape_char_t
+{
+ invalid,
+ valid,
+ control_char
+};
+
+/**
+ * Stores state of string parsing. Upon successful parsing the str points
+ * to the first character of the string and the length stores the size of
+ * the string. When the parsing fails, the str value becomes nullptr and
+ * the length stores the error code.
+ */
+struct parse_quoted_string_state
+{
+ ORCUS_PSR_DLLPUBLIC static const size_t error_no_closing_quote;
+ ORCUS_PSR_DLLPUBLIC static const size_t error_illegal_escape_char;
+
+ const char* str;
+ size_t length;
+
+ /**
+ * When true, the str pointer points to the temporary buffer storage
+ * provided by the caller instead of the original character stream. The
+ * caller must allocate memory and copy the value to it before the buffer
+ * content changes if the parsed string value needs to be stored.
+ *
+ * When false, str points to a position in the original stream, and the
+ * caller doens't need to allocate memory to store the string value as
+ * long as the original character stream is alive.
+ */
+ bool transient;
+
+ /**
+ * When true, the string contains at least one control character - a
+ * character whose value ranges between 0x00 and 0x1F.
+ */
+ bool has_control_character;
+};
+
+ORCUS_PSR_DLLPUBLIC bool is_blank(char c);
+ORCUS_PSR_DLLPUBLIC bool is_alpha(char c);
+ORCUS_PSR_DLLPUBLIC bool is_numeric(char c);
+
+/**
+ * Check if the characater is one of allowed characters. Note that you can
+ * only specify up to 16 allowed characters.
+ *
+ * @param c character to check.
+ * @param allowed string containing all allowed characters.
+ *
+ * @return true if the character is one of the allowed characters, false
+ * otherwise.
+ */
+ORCUS_PSR_DLLPUBLIC bool is_in(char c, std::string_view allowed);
+
+/**
+ * Parse a sequence of characters into a double-precision numeric value.
+ *
+ * @param p pointer to the first character to start parsing from.
+ * @param p_end pointer to the first character not allowed to parse.
+ * @param value output parameter to assign the matched value to.
+ *
+ * @return pointer to the first non-matching character.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_numeric(const char* p, const char* p_end, double& value);
+
+/**
+ * Parse a sequence of characters into an integer value.
+ *
+ * @param p pointer to the first character to start parsing from.
+ * @param p_end pointer to the first character not allowed to parse.
+ * @param value output parameter to assign the matched value to.
+ *
+ * @return pointer to the first non-matching character.
+ *
+ * @note Use of this function should be eventually replaced with
+ * std::from_chars() once it becomes available.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_integer(const char* p, const char* p_end, long& value);
+
+/**
+ * Two single-quote characters ('') represent one single-quote character.
+ */
+ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_single_quoted_string(
+ const char*& p, size_t max_length, cell_buffer& buffer);
+
+/**
+ * Starting from the opening single quote position, parse string all the way
+ * to the closing quote. Two single-quote characters ('') will be
+ * interpreted as encoded one single-quote character.
+ *
+ * @param p it should point to the opening single quote character.
+ * @param max_length maximum length to parse.
+ *
+ * @return address of the character immediately after the closing quote, or
+ * nullptr in case no closing quote is found.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_single_quote(
+ const char* p, size_t max_length);
+
+ORCUS_PSR_DLLPUBLIC parse_quoted_string_state parse_double_quoted_string(
+ const char*& p, size_t max_length, cell_buffer& buffer);
+
+/**
+ * Starting from the opening double quote position, parse string all the way
+ * to the closing quote. Two single-quote characters ('') will be
+ * interpreted as encoded one single-quote character.
+ *
+ * @param p it should point to the opening single quote character.
+ * @param max_length maximum length to parse.
+ *
+ * @return address of the character immediately after the closing quote, or
+ * nullptr in case no closing quote is found.
+ */
+ORCUS_PSR_DLLPUBLIC const char* parse_to_closing_double_quote(
+ const char* p, size_t max_length);
+
+/**
+ * Given a character that occurs immediately after the escape character '\',
+ * return what type this character is.
+ *
+ * @param c character that occurs immediately after the escape character
+ * '\'.
+ *
+ * @return enum value representing the type of escape character.
+ */
+ORCUS_PSR_DLLPUBLIC string_escape_char_t get_string_escape_char_type(char c);
+
+ORCUS_PSR_DLLPUBLIC std::string_view trim(std::string_view str);
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/sax_ns_parser.hpp b/include/orcus/sax_ns_parser.hpp
new file mode 100644
index 0000000..f888fa2
--- /dev/null
+++ b/include/orcus/sax_ns_parser.hpp
@@ -0,0 +1,374 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_NS_PARSER_HPP
+#define INCLUDED_ORCUS_SAX_NS_PARSER_HPP
+
+#include "sax_parser.hpp"
+#include "xml_namespace.hpp"
+
+#include <unordered_set>
+#include <vector>
+#include <algorithm>
+
+namespace orcus {
+
+struct sax_ns_parser_element
+{
+ /** Element namespace identifier. */
+ xmlns_id_t ns;
+ /** Element namespace alias. */
+ std::string_view ns_alias;
+ /** Element name. */
+ std::string_view name;
+ /** Position of the opening brace '<'. */
+ std::ptrdiff_t begin_pos;
+ /** Position immediately after the closing brace '>'. */
+ std::ptrdiff_t end_pos;
+};
+
+struct sax_ns_parser_attribute
+{
+ /** Attribute namespace identifier. */
+ xmlns_id_t ns;
+ /** Attribute namespace alias. */
+ std::string_view ns_alias;
+ /** Attribute name. */
+ std::string_view name;
+ /** Attribute value. */
+ std::string_view value;
+ /** Whether or not the attribute value is transient. */
+ bool transient;
+};
+
+namespace sax { namespace detail {
+
+struct entity_name
+{
+ std::string_view ns;
+ std::string_view name;
+
+ entity_name(std::string_view _ns, std::string_view _name) :
+ ns(_ns), name(_name) {}
+
+ bool operator== (const entity_name& other) const
+ {
+ return other.ns == ns && other.name == name;
+ }
+
+ struct hash
+ {
+ size_t operator() (const entity_name& v) const
+ {
+ std::hash<std::string_view> hasher;
+ return hasher(v.ns) + hasher(v.name);
+ }
+ };
+};
+
+typedef std::unordered_set<std::string_view> ns_keys_type;
+typedef std::unordered_set<entity_name, entity_name::hash> entity_names_type;
+
+struct elem_scope
+{
+ xmlns_id_t ns;
+ std::string_view name;
+ ns_keys_type ns_keys;
+
+ elem_scope() {}
+ elem_scope(const elem_scope&) = delete;
+ elem_scope(elem_scope&& other) = default;
+};
+
+using elem_scopes_type = std::vector<elem_scope>;
+
+}} // namespace sax::detail
+
+class sax_ns_handler
+{
+public:
+ /**
+ * Called when a doctype declaration &lt;!DOCTYPE ... &gt; is encountered.
+ *
+ * @param dtd struct containing doctype declaration data.
+ */
+ void doctype(const orcus::sax::doctype_declaration& dtd)
+ {
+ (void)dtd;
+ }
+
+ /**
+ * Called when &lt;?... is encountered, where the '...' may be an
+ * arbitraray dentifier. One common declaration is &lt;?xml which is
+ * typically given at the start of an XML stream.
+ *
+ * @param decl name of the identifier.
+ */
+ void start_declaration(std::string_view decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called when the closing tag (&gt;) of a &lt;?... ?&gt; is encountered.
+ *
+ * @param decl name of the identifier.
+ */
+ void end_declaration(std::string_view decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called at the start of each element.
+ *
+ * @param elem information of the element being parsed.
+ */
+ void start_element(const orcus::sax_ns_parser_element& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called at the end of each element.
+ *
+ * @param elem information of the element being parsed.
+ */
+ void end_element(const orcus::sax_ns_parser_element& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called when a segment of a text content is parsed. Each text content
+ * is a direct child of an element, which may have multiple child contents
+ * when the element also has a child element that are direct sibling to
+ * the text contents or the text contents are splitted by a comment.
+ *
+ * @param val value of the text content.
+ * @param transient when true, the text content has been converted and is
+ * stored in a temporary buffer due to presence of one or
+ * more encoded characters, in which case <em>the passed
+ * text value needs to be either immediately converted to
+ * a non-text value or be interned within the scope of
+ * the callback</em>.
+ */
+ void characters(std::string_view val, bool transient)
+ {
+ (void)val;
+ (void)transient;
+ }
+
+ /**
+ * Called upon parsing of an attribute of a declaration. The value of an
+ * attribute is assumed to be transient thus should be consumed within the
+ * scope of this callback.
+ *
+ * @param name name of an attribute.
+ * @param val value of an attribute.
+ *
+ * @todo Perhaps we should pass the transient flag here as well like all the
+ * other places.
+ */
+ void attribute(std::string_view name, std::string_view val)
+ {
+ (void)name;
+ (void)val;
+ }
+
+ /**
+ * Called upon parsing of an attribute of an element. Note that <em>when
+ * the attribute's transient flag is set, the attribute value is stored in
+ * a temporary buffer due to a presence of encoded characters, and must be
+ * processed within the scope of the callback</em>.
+ *
+ * @param attr struct containing attribute information.
+ */
+ void attribute(const orcus::sax_ns_parser_attribute& attr)
+ {
+ (void)attr;
+ }
+};
+
+/**
+ * SAX based XML parser with extra namespace handling.
+ *
+ * It uses an instance of xmlns_context passed by the caller to validate and
+ * convert namespace values into identifiers. The namespace identifier of
+ * each encountered element is always given even if one is not explicitly
+ * given.
+ *
+ * This parser keeps track of element scopes and detects non-matching element
+ * pairs.
+ *
+ * @tparam HandlerT Handler type with member functions for event callbacks.
+ * Refer to @ref sax_ns_handler.
+ */
+template<typename HandlerT>
+class sax_ns_parser
+{
+public:
+ typedef HandlerT handler_type;
+
+ sax_ns_parser(std::string_view content, xmlns_context& ns_cxt, handler_type& handler);
+ ~sax_ns_parser() = default;
+
+ /**
+ * Start parsing the document.
+ *
+ * @exception orcus::malformed_xml_error when it encounters a
+ * non-matching closing element.
+ */
+ void parse();
+
+private:
+ /**
+ * Re-route callbacks from the internal sax_parser into sax_ns_parser
+ * callbacks.
+ */
+ class handler_wrapper
+ {
+ sax::detail::elem_scopes_type m_scopes;
+ sax::detail::ns_keys_type m_ns_keys;
+ sax::detail::entity_names_type m_attrs;
+
+ sax_ns_parser_element m_elem;
+ sax_ns_parser_attribute m_attr;
+
+ xmlns_context& m_ns_cxt;
+ handler_type& m_handler;
+
+ bool m_declaration;
+
+ public:
+ handler_wrapper(xmlns_context& ns_cxt, handler_type& handler) : m_ns_cxt(ns_cxt), m_handler(handler), m_declaration(false) {}
+
+ void doctype(const sax::doctype_declaration& dtd)
+ {
+ m_handler.doctype(dtd);
+ }
+
+ void start_declaration(std::string_view name)
+ {
+ m_declaration = true;
+ m_handler.start_declaration(name);
+ }
+
+ void end_declaration(std::string_view name)
+ {
+ m_declaration = false;
+ m_handler.end_declaration(name);
+ }
+
+ void start_element(const sax::parser_element& elem)
+ {
+ m_scopes.emplace_back();
+ sax::detail::elem_scope& scope = m_scopes.back();
+ scope.ns = m_ns_cxt.get(elem.ns);
+ scope.name = elem.name;
+ scope.ns_keys.swap(m_ns_keys);
+
+ m_elem.ns = scope.ns;
+ m_elem.ns_alias = elem.ns;
+ m_elem.name = scope.name;
+ m_elem.begin_pos = elem.begin_pos;
+ m_elem.end_pos = elem.end_pos;
+ m_handler.start_element(m_elem);
+
+ m_attrs.clear();
+ }
+
+ void end_element(const sax::parser_element& elem)
+ {
+ sax::detail::elem_scope& scope = m_scopes.back();
+ if (scope.ns != m_ns_cxt.get(elem.ns) || scope.name != elem.name)
+ throw malformed_xml_error("mis-matching closing element.", -1);
+
+ m_elem.ns = scope.ns;
+ m_elem.ns_alias = elem.ns;
+ m_elem.name = scope.name;
+ m_elem.begin_pos = elem.begin_pos;
+ m_elem.end_pos = elem.end_pos;
+ m_handler.end_element(m_elem);
+
+ // Pop all namespaces declared in this scope.
+ for (const std::string_view& key : scope.ns_keys)
+ m_ns_cxt.pop(key);
+
+ m_scopes.pop_back();
+ }
+
+ void characters(std::string_view val, bool transient)
+ {
+ m_handler.characters(val, transient);
+ }
+
+ void attribute(const sax::parser_attribute& attr)
+ {
+ if (m_declaration)
+ {
+ // XML declaration attribute. Pass it through to the handler without namespace.
+ m_handler.attribute(attr.name, attr.value);
+ return;
+ }
+
+ if (m_attrs.count(sax::detail::entity_name(attr.ns, attr.name)) > 0)
+ throw malformed_xml_error(
+ "You can't define two attributes of the same name in the same element.", -1);
+
+ m_attrs.insert(sax::detail::entity_name(attr.ns, attr.name));
+
+ if (attr.ns.empty() && attr.name == "xmlns")
+ {
+ // Default namespace
+ m_ns_cxt.push(std::string_view{}, attr.value);
+ m_ns_keys.insert(std::string_view{});
+ return;
+ }
+
+ if (attr.ns == "xmlns")
+ {
+ // Namespace alias
+ if (!attr.name.empty())
+ {
+ m_ns_cxt.push(attr.name, attr.value);
+ m_ns_keys.insert(attr.name);
+ }
+ return;
+ }
+
+ m_attr.ns = attr.ns.empty() ? XMLNS_UNKNOWN_ID : m_ns_cxt.get(attr.ns);
+ m_attr.ns_alias = attr.ns;
+ m_attr.name = attr.name;
+ m_attr.value = attr.value;
+ m_attr.transient = attr.transient;
+ m_handler.attribute(m_attr);
+ }
+ };
+
+private:
+ handler_wrapper m_wrapper;
+ sax_parser<handler_wrapper> m_parser;
+};
+
+template<typename HandlerT>
+sax_ns_parser<HandlerT>::sax_ns_parser(
+ std::string_view content, xmlns_context& ns_cxt, handler_type& handler) :
+ m_wrapper(ns_cxt, handler), m_parser(content, m_wrapper)
+{
+}
+
+template<typename HandlerT>
+void sax_ns_parser<HandlerT>::parse()
+{
+ m_parser.parse();
+}
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/sax_parser.hpp b/include/orcus/sax_parser.hpp
new file mode 100644
index 0000000..f7283d2
--- /dev/null
+++ b/include/orcus/sax_parser.hpp
@@ -0,0 +1,576 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_PARSER_HPP
+#define INCLUDED_ORCUS_SAX_PARSER_HPP
+
+#include "sax_parser_base.hpp"
+
+#include <string_view>
+
+namespace orcus {
+
+struct sax_parser_default_config
+{
+ /**
+ * An integer value representing a baseline XML version. A value of 10
+ * corresponds with version 1.0 whereas a value of 11 corresponds with
+ * version 1.1.
+ */
+ static constexpr uint8_t baseline_version = 10;
+};
+
+class sax_handler
+{
+public:
+ /**
+ * Called when a doctype declaration &lt;!DOCTYPE ... &gt; is encountered.
+ *
+ * @param dtd struct containing doctype declaration data.
+ */
+ void doctype(const orcus::sax::doctype_declaration& dtd)
+ {
+ (void)dtd;
+ }
+
+ /**
+ * Called when &lt;?... is encountered, where the '...' may be an
+ * arbitraray dentifier. One common declaration is &lt;?xml which is
+ * typically given at the start of an XML stream.
+ *
+ * @param decl name of the identifier.
+ */
+ void start_declaration(std::string_view decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called when the closing tag (&gt;) of a &lt;?... ?&gt; is encountered.
+ *
+ * @param decl name of the identifier.
+ */
+ void end_declaration(std::string_view decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called at the start of each element.
+ *
+ * @param elem information of the element being parsed.
+ */
+ void start_element(const orcus::sax::parser_element& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called at the end of each element.
+ *
+ * @param elem information of the element being parsed.
+ */
+ void end_element(const orcus::sax::parser_element& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called when a segment of a text content is parsed. Each text content
+ * is a direct child of an element, which may have multiple child contents
+ * when the element also has a child element that are direct sibling to
+ * the text contents or the text contents are splitted by a comment.
+ *
+ * @param val value of the text content.
+ * @param transient when true, the text content has been converted and is
+ * stored in a temporary buffer due to presence of one or
+ * more encoded characters, in which case <em>the passed
+ * text value needs to be either immediately converted to
+ * a non-text value or be interned within the scope of
+ * the callback</em>.
+ */
+ void characters(std::string_view val, bool transient)
+ {
+ (void)val; (void)transient;
+ }
+
+ /**
+ * Called upon parsing of an attribute of an element. Note that <em>when
+ * the attribute's transient flag is set, the attribute value is stored in
+ * a temporary buffer due to presence of one or more encoded characters,
+ * and must be processed within the scope of the callback</em>.
+ *
+ * @param attr struct containing attribute information.
+ */
+ void attribute(const orcus::sax::parser_attribute& attr)
+ {
+ (void)attr;
+ }
+};
+
+/**
+ * SAX parser for XML documents.
+ *
+ * This parser is barebone in that it only parses the document and picks up
+ * all encountered elements and attributes without checking proper element
+ * pairs. The user is responsible for checking whether or not the document is
+ * well-formed in terms of element scopes.
+ *
+ * This parser additionally records the begin and end offset positions of each
+ * element.
+ *
+ * @tparam HandlerT Handler type with member functions for event callbacks.
+ * Refer to @ref sax_handler.
+ * @tparam ConfigT Parser configuration.
+ */
+template<typename HandlerT, typename ConfigT = sax_parser_default_config>
+class sax_parser : public sax::parser_base
+{
+public:
+ typedef HandlerT handler_type;
+ typedef ConfigT config_type;
+
+ sax_parser(std::string_view content, handler_type& handler);
+ ~sax_parser() = default;
+
+ void parse();
+
+private:
+
+ /**
+ * Parse XML header that occurs at the beginning of every XML stream i.e.
+ * <?xml version="..." encoding="..." ?>
+ */
+ void header();
+ void body();
+ void element();
+ void element_open(std::ptrdiff_t begin_pos);
+ void element_close(std::ptrdiff_t begin_pos);
+ void special_tag();
+ void declaration(const char* name_check);
+ void cdata();
+ void doctype();
+ void characters();
+ void attribute();
+
+private:
+ handler_type& m_handler;
+};
+
+template<typename HandlerT, typename ConfigT>
+sax_parser<HandlerT,ConfigT>::sax_parser(std::string_view content, handler_type& handler) :
+ sax::parser_base(content.data(), content.size()),
+ m_handler(handler)
+{
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::parse()
+{
+ m_nest_level = 0;
+ mp_char = mp_begin;
+ header();
+ skip_space_and_control();
+ body();
+
+ assert(m_buffer_pos == 0);
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::header()
+{
+ // we don't handle multi byte encodings so we can just skip bom entry if exists.
+ skip_bom();
+
+ // Allow leading whitespace in the XML stream.
+ // TODO : Make this configurable since strictly speaking such an XML
+ // sttream is invalid.
+ skip_space_and_control();
+
+ if (!has_char() || cur_char() != '<')
+ throw malformed_xml_error("xml file must begin with '<'.", offset());
+
+ if (config_type::baseline_version >= 11)
+ {
+ // XML version 1.1 requires a header declaration whereas in 1.0 it's
+ // optional.
+ if (next_char_checked() != '?')
+ throw malformed_xml_error("xml file must begin with '<?'.", offset());
+
+ declaration("xml");
+ }
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::body()
+{
+ while (has_char())
+ {
+ if (cur_char() == '<')
+ {
+ element();
+ if (!m_root_elem_open)
+ // Root element closed. Stop parsing.
+ return;
+ }
+ else if (m_nest_level)
+ // Call characters only when in xml hierarchy.
+ characters();
+ else
+ next();
+ }
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::element()
+{
+ assert(cur_char() == '<');
+ std::ptrdiff_t pos = offset();
+ char c = next_char_checked();
+ switch (c)
+ {
+ case '/':
+ element_close(pos);
+ return;
+ case '!':
+ special_tag();
+ return;
+ case '?':
+ declaration(nullptr);
+ return;
+ }
+
+ element_open(pos);
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::element_open(std::ptrdiff_t begin_pos)
+{
+ sax::parser_element elem;
+ element_name(elem, begin_pos);
+
+ while (true)
+ {
+ skip_space_and_control();
+ char c = cur_char_checked();
+ if (c == '/')
+ {
+ // Self-closing element: <element/>
+ if (next_and_char() != '>')
+ throw malformed_xml_error("expected '/>' to self-close the element.", offset());
+ next();
+ elem.end_pos = offset();
+ m_handler.start_element(elem);
+ reset_buffer_pos();
+ m_handler.end_element(elem);
+ if (!m_nest_level)
+ m_root_elem_open = false;
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "element_open: ns='" << elem.ns << "', name='" << elem.name << "' (self-closing)" << endl;
+#endif
+ return;
+ }
+ else if (c == '>')
+ {
+ // End of opening element: <element>
+ next();
+ elem.end_pos = offset();
+ nest_up();
+ m_handler.start_element(elem);
+ reset_buffer_pos();
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "element_open: ns='" << elem.ns << "', name='" << elem.name << "'" << endl;
+#endif
+ return;
+ }
+ else
+ attribute();
+ }
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::element_close(std::ptrdiff_t begin_pos)
+{
+ assert(cur_char() == '/');
+ nest_down();
+ next_check();
+ sax::parser_element elem;
+ element_name(elem, begin_pos);
+
+ if (cur_char() != '>')
+ throw malformed_xml_error("expected '>' to close the element.", offset());
+ next();
+ elem.end_pos = offset();
+
+ m_handler.end_element(elem);
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "element_close: ns='" << elem.ns << "', name='" << elem.name << "'" << endl;
+#endif
+ if (!m_nest_level)
+ m_root_elem_open = false;
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::special_tag()
+{
+ assert(cur_char() == '!');
+ // This can be either <![CDATA, <!--, or <!DOCTYPE.
+ size_t len = available_size();
+ if (len < 2)
+ throw malformed_xml_error("special tag too short.", offset());
+
+ switch (next_and_char())
+ {
+ case '-':
+ {
+ // Possibly comment.
+ if (next_and_char() != '-')
+ throw malformed_xml_error("comment expected.", offset());
+
+ len -= 2;
+ if (len < 3)
+ throw malformed_xml_error("malformed comment.", offset());
+
+ next();
+ comment();
+ }
+ break;
+ case '[':
+ {
+ // Possibly a CDATA.
+ expects_next("CDATA[", 6);
+ if (has_char())
+ cdata();
+ }
+ break;
+ case 'D':
+ {
+ // check if this is a DOCTYPE.
+ expects_next("OCTYPE", 6);
+ skip_space_and_control();
+ if (has_char())
+ doctype();
+ }
+ break;
+ default:
+ throw malformed_xml_error("failed to parse special tag.", offset());
+ }
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::declaration(const char* name_check)
+{
+ assert(cur_char() == '?');
+ next_check();
+
+ // Get the declaration name first.
+ std::string_view decl_name;
+ name(decl_name);
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::declaration: start name='" << decl_name << "'" << endl;
+#endif
+
+ if (name_check && decl_name != name_check)
+ {
+ std::ostringstream os;
+ os << "declaration name of '" << name_check << "' was expected, but '" << decl_name << "' was found instead.";
+ throw malformed_xml_error(os.str(), offset());
+ }
+
+ m_handler.start_declaration(decl_name);
+ skip_space_and_control();
+
+ // Parse the attributes.
+ while (cur_char_checked() != '?')
+ {
+ attribute();
+ skip_space_and_control();
+ }
+ if (next_char_checked() != '>')
+ throw malformed_xml_error("declaration must end with '?>'.", offset());
+
+ m_handler.end_declaration(decl_name);
+ reset_buffer_pos();
+ next();
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::declaration: end name='" << decl_name << "'" << endl;
+#endif
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::cdata()
+{
+ size_t len = available_size();
+ assert(len > 3);
+
+ // Parse until we reach ']]>'.
+ const char* p0 = mp_char;
+ size_t i = 0, match = 0;
+ for (char c = cur_char(); i < len; ++i, c = next_and_char())
+ {
+ if (c == ']')
+ {
+ // Be aware that we may encounter a series of more than two ']'
+ // characters, in which case we'll only count the last two.
+
+ if (match == 0)
+ // First ']'
+ ++match;
+ else if (match == 1)
+ // Second ']'
+ ++match;
+ }
+ else if (c == '>' && match == 2)
+ {
+ // Found ']]>'.
+ size_t cdata_len = i - 2;
+ m_handler.characters(std::string_view(p0, cdata_len), false);
+ next();
+ return;
+ }
+ else
+ match = 0;
+ }
+ throw malformed_xml_error("malformed CDATA section.", offset());
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::doctype()
+{
+ // Parse the root element first.
+ sax::doctype_declaration param;
+ name(param.root_element);
+ skip_space_and_control();
+
+ // Either PUBLIC or SYSTEM.
+ size_t len = available_size();
+ if (len < 6)
+ throw malformed_xml_error("DOCTYPE section too short.", offset());
+
+ param.keyword = sax::doctype_declaration::keyword_type::dtd_private;
+ char c = cur_char();
+ if (c == 'P')
+ {
+ if (next_and_char() != 'U' || next_and_char() != 'B' || next_and_char() != 'L' || next_and_char() != 'I' || next_and_char() != 'C')
+ throw malformed_xml_error("malformed DOCTYPE section.", offset());
+
+ param.keyword = sax::doctype_declaration::keyword_type::dtd_public;
+ }
+ else if (c == 'S')
+ {
+ if (next_and_char() != 'Y' || next_and_char() != 'S' || next_and_char() != 'T' || next_and_char() != 'E' || next_and_char() != 'M')
+ throw malformed_xml_error("malformed DOCTYPE section.", offset());
+ }
+
+ next_check();
+ skip_space_and_control();
+
+ // Parse FPI.
+ value(param.fpi, false);
+
+ has_char_throw("DOCTYPE section too short.");
+ skip_space_and_control();
+ has_char_throw("DOCTYPE section too short.");
+
+ if (cur_char() == '>')
+ {
+ // Optional URI not given. Exit.
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::doctype: root='" << param.root_element << "', fpi='" << param.fpi << "'" << endl;
+#endif
+ m_handler.doctype(param);
+ next();
+ return;
+ }
+
+ // Parse optional URI.
+ value(param.uri, false);
+
+ has_char_throw("DOCTYPE section too short.");
+ skip_space_and_control();
+ has_char_throw("DOCTYPE section too short.");
+
+ if (cur_char() != '>')
+ throw malformed_xml_error("malformed DOCTYPE section - closing '>' expected but not found.", offset());
+
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::doctype: root='" << param.root_element << "', fpi='" << param.fpi << "' uri='" << param.uri << "'" << endl;
+#endif
+ m_handler.doctype(param);
+ next();
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::characters()
+{
+ const char* p0 = mp_char;
+ for (; has_char(); next())
+ {
+ if (cur_char() == '<')
+ break;
+
+ if (cur_char() == '&')
+ {
+ // Text span with one or more encoded characters. Parse using cell buffer.
+ cell_buffer& buf = get_cell_buffer();
+ buf.reset();
+ buf.append(p0, mp_char-p0);
+ characters_with_encoded_char(buf);
+ if (buf.empty())
+ m_handler.characters(std::string_view{}, false);
+ else
+ m_handler.characters(buf.str(), true);
+ return;
+ }
+ }
+
+ if (mp_char > p0)
+ {
+ std::string_view val(p0, mp_char-p0);
+ m_handler.characters(val, false);
+ }
+}
+
+template<typename HandlerT, typename ConfigT>
+void sax_parser<HandlerT,ConfigT>::attribute()
+{
+ sax::parser_attribute attr;
+ attribute_name(attr.ns, attr.name);
+
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::attribute: ns='" << attr.ns << "', name='" << attr.name << "'" << endl;
+#endif
+
+ skip_space_and_control();
+
+ char c = cur_char_checked();
+ if (c != '=')
+ {
+ std::ostringstream os;
+ os << "Attribute must begin with 'name=..'. (ns='" << attr.ns << "', name='" << attr.name << "')";
+ throw malformed_xml_error(os.str(), offset());
+ }
+
+ next_check(); // skip the '='.
+ skip_space_and_control();
+
+ attr.transient = value(attr.value, true);
+ if (attr.transient)
+ // Value is stored in a temporary buffer. Push a new buffer.
+ inc_buffer_pos();
+
+#if ORCUS_DEBUG_SAX_PARSER
+ cout << "sax_parser::attribute: value='" << attr.value << "'" << endl;
+#endif
+
+ m_handler.attribute(attr);
+}
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/sax_parser_base.hpp b/include/orcus/sax_parser_base.hpp
new file mode 100644
index 0000000..4dcfc07
--- /dev/null
+++ b/include/orcus/sax_parser_base.hpp
@@ -0,0 +1,207 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
+#define INCLUDED_ORCUS_SAX_PARSER_BASE_HPP
+
+#include "env.hpp"
+#include "cell_buffer.hpp"
+#include "parser_global.hpp"
+#include "parser_base.hpp"
+
+#include <cassert>
+#include <cstdlib>
+#include <exception>
+#include <sstream>
+#include <memory>
+
+#define ORCUS_DEBUG_SAX_PARSER 0
+
+#if ORCUS_DEBUG_SAX_PARSER
+#include <iostream>
+using std::cout;
+using std::endl;
+#endif
+
+namespace orcus { namespace sax {
+
+/**
+ * Document type declaration passed by sax_parser to its handler's doctype()
+ * call.
+ */
+struct doctype_declaration
+{
+ enum class keyword_type { dtd_public, dtd_private };
+
+ keyword_type keyword;
+ std::string_view root_element;
+ std::string_view fpi;
+ std::string_view uri;
+};
+
+/**
+ * Given an encoded name (such as 'quot' and 'amp'), return a single
+ * character that corresponds with the name. The name shouldn't include the
+ * leading '&' and trailing ';'.
+ *
+ * @param p pointer to the first character of encoded name
+ * @param n length of encoded name
+ *
+ * @return single character that corresponds with the encoded name. '\0' is
+ * returned if decoding fails.
+ */
+ORCUS_PSR_DLLPUBLIC char decode_xml_encoded_char(const char* p, size_t n);
+
+/**
+ * Given an encoded unicode value (such as #20A9), return a UTF-8 string
+ * that corresponds with the unicode value. The value shouldn't include the
+ * leading '&' and trailing ';'.
+ *
+ * @param p pointer to the first character of encoded name
+ * @param n length of encoded name
+ *
+ * @return string that corresponds with the encoded value. An empty string
+ * is returned if decoding fails.
+ */
+ORCUS_PSR_DLLPUBLIC std::string decode_xml_unicode_char(const char* p, size_t n);
+
+/**
+ * Element properties passed by sax_parser to its handler's open_element()
+ * and close_element() calls.
+ */
+struct parser_element
+{
+ /** Optional element namespace. It may be empty if it's not given. */
+ std::string_view ns;
+ /** Element name. */
+ std::string_view name;
+ /** Position of the opening brace '<'. */
+ std::ptrdiff_t begin_pos;
+ /** Position immediately after the closing brace '>'. */
+ std::ptrdiff_t end_pos;
+};
+
+/**
+ * Attribute properties passed by sax_parser to its handler's attribute()
+ * call. When an attribute value is "transient", it has been converted due to
+ * presence of encoded character(s) and has been stored in a temporary buffer.
+ * The handler must assume that the value will not survive after the callback
+ * function ends.
+ */
+struct parser_attribute
+{
+ /** Optional attribute namespace. It may be empty if it's not given. */
+ std::string_view ns;
+ /** Attribute name. */
+ std::string_view name;
+ /** Attribute value. */
+ std::string_view value;
+ /** Whether or not the attribute value is in a temporary buffer. */
+ bool transient;
+};
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ parser_base() = delete;
+ parser_base(const parser_base&) = delete;
+ parser_base& operator=(const parser_base&) = delete;
+protected:
+ size_t m_nest_level;
+ size_t m_buffer_pos;
+ bool m_root_elem_open:1;
+
+protected:
+ parser_base(const char* content, size_t size);
+ ~parser_base();
+
+ void next_check()
+ {
+ next();
+ if (!has_char())
+ throw malformed_xml_error("xml stream ended prematurely.", offset());
+ }
+
+ void nest_up() { ++m_nest_level; }
+ void nest_down()
+ {
+ if (m_nest_level == 0)
+ throw malformed_xml_error("incorrect nesting in xml stream", offset());
+
+ --m_nest_level;
+ }
+
+ void inc_buffer_pos();
+ void reset_buffer_pos() { m_buffer_pos = 0; }
+
+ void has_char_throw(const char* msg) const
+ {
+ if (!has_char())
+ throw malformed_xml_error(msg, offset());
+ }
+
+ char cur_char_checked() const
+ {
+ if (!has_char())
+ throw malformed_xml_error("xml stream ended prematurely.", offset());
+
+ return *mp_char;
+ }
+
+ char next_and_char()
+ {
+ next();
+#if ORCUS_DEBUG_SAX_PARSER
+ if (mp_char >= mp_end)
+ throw malformed_xml_error("xml stream ended prematurely.", offset());
+#endif
+ return *mp_char;
+ }
+
+ char next_char_checked()
+ {
+ next();
+ if (!has_char())
+ throw malformed_xml_error("xml stream ended prematurely.", offset());
+
+ return *mp_char;
+ }
+
+ cell_buffer& get_cell_buffer();
+
+ void comment();
+
+ void expects_next(const char* p, size_t n);
+
+ void parse_encoded_char(cell_buffer& buf);
+ void value_with_encoded_char(cell_buffer& buf, std::string_view& str, char quote_char);
+
+ /**
+ * Parse quoted value. Note that the retrieved string may be stored in a
+ * temporary cell buffer if the decode parameter is true. Use the string
+ * immediately after this call before the buffer becomes invalid.
+ *
+ * @note This method checks for valid stream; the caller doesn't need to
+ * check for valid stream before calling this method.
+ *
+ * @return true if the value is stored in temporary buffer, false
+ * otherwise.
+ */
+ bool value(std::string_view& str, bool decode);
+
+ void name(std::string_view& str);
+ void element_name(parser_element& elem, std::ptrdiff_t begin_pos);
+ void attribute_name(std::string_view& attr_ns, std::string_view& attr_name);
+ void characters_with_encoded_char(cell_buffer& buf);
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/sax_token_parser.hpp b/include/orcus/sax_token_parser.hpp
new file mode 100644
index 0000000..867c8b5
--- /dev/null
+++ b/include/orcus/sax_token_parser.hpp
@@ -0,0 +1,186 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
+#define INCLUDED_ORCUS_SAX_TOKEN_PARSER_HPP
+
+#include "sax_ns_parser.hpp"
+#include "types.hpp"
+
+#include <vector>
+#include <algorithm>
+#include <functional>
+
+namespace orcus {
+
+class tokens;
+
+class ORCUS_PSR_DLLPUBLIC sax_token_handler_wrapper_base
+{
+protected:
+ xml_declaration_t m_declaration;
+ xml_token_element_t m_elem;
+ const tokens& m_tokens;
+
+ xml_token_t tokenize(std::string_view name) const;
+ void set_element(const sax_ns_parser_element& elem);
+
+public:
+ sax_token_handler_wrapper_base(const tokens& _tokens);
+
+ void attribute(std::string_view name, std::string_view val);
+ void attribute(const sax_ns_parser_attribute& attr);
+};
+
+class sax_token_handler
+{
+public:
+
+ /**
+ * Called immediately after the entire XML declaration has been parsed.
+ *
+ * @param decl struct containing the attributes of the XML declaration.
+ */
+ void declaration(const orcus::xml_declaration_t& decl)
+ {
+ (void)decl;
+ }
+
+ /**
+ * Called at the start of each element.
+ *
+ * @param elem struct containing the element's information as well as all
+ * the attributes that belong to the element.
+ */
+ void start_element(const orcus::xml_token_element_t& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called at the end of each element.
+ *
+ * @param elem struct containing the element's information as well as all
+ * the attributes that belong to the element.
+ */
+ void end_element(const orcus::xml_token_element_t& elem)
+ {
+ (void)elem;
+ }
+
+ /**
+ * Called when a segment of a text content is parsed. Each text content
+ * is a direct child of an element, which may have multiple child contents
+ * when the element also has a child element that are direct sibling to
+ * the text contents or the text contents are splitted by a comment.
+ *
+ * @param val value of the text content.
+ * @param transient when true, the text content has been converted and is
+ * stored in a temporary buffer due to presence of one or
+ * more encoded characters, in which case <em>the passed
+ * text value needs to be either immediately converted to
+ * a non-text value or be interned within the scope of
+ * the callback</em>.
+ */
+ void characters(std::string_view val, bool transient)
+ {
+ (void)val; (void)transient;
+ }
+};
+
+/**
+ * SAX parser that tokenizes element and attribute names while parsing. All
+ * pre-defined elements and attribute names are translated into integral
+ * identifiers via use of @ref tokens. The user of this class needs to
+ * provide a pre-defined set of element and attribute names at construction
+ * time.
+ *
+ * This parser internally uses @ref sax_ns_parser.
+ *
+ * @tparam HandlerT Handler type with member functions for event callbacks.
+ * Refer to @ref sax_token_handler.
+ */
+template<typename HandlerT>
+class sax_token_parser
+{
+public:
+ typedef HandlerT handler_type;
+
+ sax_token_parser(
+ std::string_view content, const tokens& _tokens,
+ xmlns_context& ns_cxt, handler_type& handler);
+
+ ~sax_token_parser() = default;
+
+ void parse();
+
+private:
+
+ /**
+ * Re-route callbacks from the internal sax_ns_parser into the
+ * sax_token_parser callbacks.
+ */
+ class handler_wrapper : public sax_token_handler_wrapper_base
+ {
+ handler_type& m_handler;
+
+ public:
+ handler_wrapper(const tokens& _tokens, handler_type& handler) :
+ sax_token_handler_wrapper_base(_tokens), m_handler(handler) {}
+
+ void doctype(const sax::doctype_declaration&) {}
+
+ void start_declaration(std::string_view) {}
+
+ void end_declaration(std::string_view)
+ {
+ m_handler.declaration(m_declaration);
+ m_elem.attrs.clear();
+ }
+
+ void start_element(const sax_ns_parser_element& elem)
+ {
+ set_element(elem);
+ m_handler.start_element(m_elem);
+ m_elem.attrs.clear();
+ }
+
+ void end_element(const sax_ns_parser_element& elem)
+ {
+ set_element(elem);
+ m_handler.end_element(m_elem);
+ }
+
+ void characters(std::string_view val, bool transient)
+ {
+ m_handler.characters(val, transient);
+ }
+ };
+
+private:
+ handler_wrapper m_wrapper;
+ sax_ns_parser<handler_wrapper> m_parser;
+};
+
+template<typename HandlerT>
+sax_token_parser<HandlerT>::sax_token_parser(
+ std::string_view content, const tokens& _tokens, xmlns_context& ns_cxt, handler_type& handler) :
+ m_wrapper(_tokens, handler),
+ m_parser(content, ns_cxt, m_wrapper)
+{
+}
+
+template<typename HandlerT>
+void sax_token_parser<HandlerT>::parse()
+{
+ m_parser.parse();
+}
+
+} // namespace orcus
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/sax_token_parser_thread.hpp b/include/orcus/sax_token_parser_thread.hpp
new file mode 100644
index 0000000..b364573
--- /dev/null
+++ b/include/orcus/sax_token_parser_thread.hpp
@@ -0,0 +1,92 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SAX_TOKEN_PARSER_THREAD_HPP
+#define INCLUDED_ORCUS_SAX_TOKEN_PARSER_THREAD_HPP
+
+#include "env.hpp"
+#include "types.hpp"
+
+#include <memory>
+#include <variant>
+#include <vector>
+#include <ostream>
+
+namespace orcus {
+
+class tokens;
+class xmlns_context;
+class string_pool;
+struct xml_token_element_t;
+
+namespace sax {
+
+enum class parse_token_t
+{
+ unknown,
+ start_element,
+ end_element,
+ characters,
+ parse_error,
+};
+
+struct ORCUS_PSR_DLLPUBLIC parse_token
+{
+ using value_type = std::variant<std::string_view, parse_error_value_t, const xml_token_element_t*>;
+
+ parse_token_t type;
+ value_type value;
+
+ parse_token();
+ parse_token(std::string_view _characters);
+ parse_token(parse_token_t _type, const xml_token_element_t* _element);
+ parse_token(std::string_view msg, std::ptrdiff_t offset);
+
+ parse_token(const parse_token& other);
+
+ parse_token& operator= (parse_token) = delete;
+
+ bool operator== (const parse_token& other) const;
+ bool operator!= (const parse_token& other) const;
+};
+
+typedef std::vector<parse_token> parse_tokens_t;
+
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const parse_tokens_t& tokens);
+
+class ORCUS_PSR_DLLPUBLIC parser_thread
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ parser_thread(const char* p, size_t n, const orcus::tokens& tks, xmlns_context& ns_cxt, size_t min_token_size);
+ parser_thread(const char* p, size_t n, const orcus::tokens& tks, xmlns_context& ns_cxt, size_t min_token_size, size_t max_token_size);
+ ~parser_thread();
+
+ void start();
+
+ /**
+ * Wait until new set of tokens becomes available.
+ *
+ * @param tokens new set of tokens.
+ *
+ * @return true if the parsing is still in progress (therefore more tokens
+ * to come), false if it's done i.e. this is the last token set.
+ */
+ bool next_tokens(parse_tokens_t& tokens);
+
+ void swap_string_pool(string_pool& pool);
+
+ void abort();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/Makefile.am b/include/orcus/spreadsheet/Makefile.am
new file mode 100644
index 0000000..b01bce7
--- /dev/null
+++ b/include/orcus/spreadsheet/Makefile.am
@@ -0,0 +1,26 @@
+
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus/spreadsheet
+liborcus_HEADERS = \
+ types.hpp \
+ view_types.hpp \
+ export_interface.hpp \
+ import_interface.hpp \
+ import_interface_pivot.hpp \
+ import_interface_styles.hpp \
+ import_interface_view.hpp
+
+if BUILD_SPREADSHEET_MODEL
+
+liborcus_HEADERS += \
+ auto_filter.hpp \
+ config.hpp \
+ document.hpp \
+ document_types.hpp \
+ factory.hpp \
+ pivot.hpp \
+ shared_strings.hpp \
+ sheet.hpp \
+ styles.hpp \
+ view.hpp
+
+endif
diff --git a/include/orcus/spreadsheet/Makefile.in b/include/orcus/spreadsheet/Makefile.in
new file mode 100644
index 0000000..2331067
--- /dev/null
+++ b/include/orcus/spreadsheet/Makefile.in
@@ -0,0 +1,680 @@
+# Makefile.in generated by automake 1.16.5 from Makefile.am.
+# @configure_input@
+
+# Copyright (C) 1994-2021 Free Software Foundation, Inc.
+
+# This Makefile.in is free software; the Free Software Foundation
+# gives unlimited permission to copy and/or distribute it,
+# with or without modifications, as long as this notice is preserved.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY, to the extent permitted by law; without
+# even the implied warranty of MERCHANTABILITY or FITNESS FOR A
+# PARTICULAR PURPOSE.
+
+@SET_MAKE@
+
+VPATH = @srcdir@
+am__is_gnu_make = { \
+ if test -z '$(MAKELEVEL)'; then \
+ false; \
+ elif test -n '$(MAKE_HOST)'; then \
+ true; \
+ elif test -n '$(MAKE_VERSION)' && test -n '$(CURDIR)'; then \
+ true; \
+ else \
+ false; \
+ fi; \
+}
+am__make_running_with_option = \
+ case $${target_option-} in \
+ ?) ;; \
+ *) echo "am__make_running_with_option: internal error: invalid" \
+ "target option '$${target_option-}' specified" >&2; \
+ exit 1;; \
+ esac; \
+ has_opt=no; \
+ sane_makeflags=$$MAKEFLAGS; \
+ if $(am__is_gnu_make); then \
+ sane_makeflags=$$MFLAGS; \
+ else \
+ case $$MAKEFLAGS in \
+ *\\[\ \ ]*) \
+ bs=\\; \
+ sane_makeflags=`printf '%s\n' "$$MAKEFLAGS" \
+ | sed "s/$$bs$$bs[$$bs $$bs ]*//g"`;; \
+ esac; \
+ fi; \
+ skip_next=no; \
+ strip_trailopt () \
+ { \
+ flg=`printf '%s\n' "$$flg" | sed "s/$$1.*$$//"`; \
+ }; \
+ for flg in $$sane_makeflags; do \
+ test $$skip_next = yes && { skip_next=no; continue; }; \
+ case $$flg in \
+ *=*|--*) continue;; \
+ -*I) strip_trailopt 'I'; skip_next=yes;; \
+ -*I?*) strip_trailopt 'I';; \
+ -*O) strip_trailopt 'O'; skip_next=yes;; \
+ -*O?*) strip_trailopt 'O';; \
+ -*l) strip_trailopt 'l'; skip_next=yes;; \
+ -*l?*) strip_trailopt 'l';; \
+ -[dEDm]) skip_next=yes;; \
+ -[JT]) skip_next=yes;; \
+ esac; \
+ case $$flg in \
+ *$$target_option*) has_opt=yes; break;; \
+ esac; \
+ done; \
+ test $$has_opt = yes
+am__make_dryrun = (target_option=n; $(am__make_running_with_option))
+am__make_keepgoing = (target_option=k; $(am__make_running_with_option))
+pkgdatadir = $(datadir)/@PACKAGE@
+pkgincludedir = $(includedir)/@PACKAGE@
+pkglibdir = $(libdir)/@PACKAGE@
+pkglibexecdir = $(libexecdir)/@PACKAGE@
+am__cd = CDPATH="$${ZSH_VERSION+.}$(PATH_SEPARATOR)" && cd
+install_sh_DATA = $(install_sh) -c -m 644
+install_sh_PROGRAM = $(install_sh) -c
+install_sh_SCRIPT = $(install_sh) -c
+INSTALL_HEADER = $(INSTALL_DATA)
+transform = $(program_transform_name)
+NORMAL_INSTALL = :
+PRE_INSTALL = :
+POST_INSTALL = :
+NORMAL_UNINSTALL = :
+PRE_UNINSTALL = :
+POST_UNINSTALL = :
+build_triplet = @build@
+host_triplet = @host@
+@BUILD_SPREADSHEET_MODEL_TRUE@am__append_1 = \
+@BUILD_SPREADSHEET_MODEL_TRUE@ auto_filter.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ config.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ document.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ document_types.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ factory.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ pivot.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ shared_strings.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ sheet.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ styles.hpp \
+@BUILD_SPREADSHEET_MODEL_TRUE@ view.hpp
+
+subdir = include/orcus/spreadsheet
+ACLOCAL_M4 = $(top_srcdir)/aclocal.m4
+am__aclocal_m4_deps = $(top_srcdir)/m4/ax_cxx_compile_stdcxx.m4 \
+ $(top_srcdir)/m4/ax_cxx_compile_stdcxx_17.m4 \
+ $(top_srcdir)/m4/boost.m4 $(top_srcdir)/m4/libtool.m4 \
+ $(top_srcdir)/m4/ltoptions.m4 $(top_srcdir)/m4/ltsugar.m4 \
+ $(top_srcdir)/m4/ltversion.m4 $(top_srcdir)/m4/lt~obsolete.m4 \
+ $(top_srcdir)/m4/m4_ax_valgrind_check.m4 \
+ $(top_srcdir)/configure.ac
+am__configure_deps = $(am__aclocal_m4_deps) $(CONFIGURE_DEPENDENCIES) \
+ $(ACLOCAL_M4)
+DIST_COMMON = $(srcdir)/Makefile.am $(am__liborcus_HEADERS_DIST) \
+ $(am__DIST_COMMON)
+mkinstalldirs = $(install_sh) -d
+CONFIG_HEADER = $(top_builddir)/config.h
+CONFIG_CLEAN_FILES =
+CONFIG_CLEAN_VPATH_FILES =
+AM_V_P = $(am__v_P_@AM_V@)
+am__v_P_ = $(am__v_P_@AM_DEFAULT_V@)
+am__v_P_0 = false
+am__v_P_1 = :
+AM_V_GEN = $(am__v_GEN_@AM_V@)
+am__v_GEN_ = $(am__v_GEN_@AM_DEFAULT_V@)
+am__v_GEN_0 = @echo " GEN " $@;
+am__v_GEN_1 =
+AM_V_at = $(am__v_at_@AM_V@)
+am__v_at_ = $(am__v_at_@AM_DEFAULT_V@)
+am__v_at_0 = @
+am__v_at_1 =
+SOURCES =
+DIST_SOURCES =
+am__can_run_installinfo = \
+ case $$AM_UPDATE_INFO_DIR in \
+ n|no|NO) false;; \
+ *) (install-info --version) >/dev/null 2>&1;; \
+ esac
+am__liborcus_HEADERS_DIST = types.hpp view_types.hpp \
+ export_interface.hpp import_interface.hpp \
+ import_interface_pivot.hpp import_interface_styles.hpp \
+ import_interface_view.hpp auto_filter.hpp config.hpp \
+ document.hpp document_types.hpp factory.hpp pivot.hpp \
+ shared_strings.hpp sheet.hpp styles.hpp view.hpp
+am__vpath_adj_setup = srcdirstrip=`echo "$(srcdir)" | sed 's|.|.|g'`;
+am__vpath_adj = case $$p in \
+ $(srcdir)/*) f=`echo "$$p" | sed "s|^$$srcdirstrip/||"`;; \
+ *) f=$$p;; \
+ esac;
+am__strip_dir = f=`echo $$p | sed -e 's|^.*/||'`;
+am__install_max = 40
+am__nobase_strip_setup = \
+ srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*|]/\\\\&/g'`
+am__nobase_strip = \
+ for p in $$list; do echo "$$p"; done | sed -e "s|$$srcdirstrip/||"
+am__nobase_list = $(am__nobase_strip_setup); \
+ for p in $$list; do echo "$$p $$p"; done | \
+ sed "s| $$srcdirstrip/| |;"' / .*\//!s/ .*/ ./; s,\( .*\)/[^/]*$$,\1,' | \
+ $(AWK) 'BEGIN { files["."] = "" } { files[$$2] = files[$$2] " " $$1; \
+ if (++n[$$2] == $(am__install_max)) \
+ { print $$2, files[$$2]; n[$$2] = 0; files[$$2] = "" } } \
+ END { for (dir in files) print dir, files[dir] }'
+am__base_list = \
+ sed '$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;$$!N;s/\n/ /g' | \
+ sed '$$!N;$$!N;$$!N;$$!N;s/\n/ /g'
+am__uninstall_files_from_dir = { \
+ test -z "$$files" \
+ || { test ! -d "$$dir" && test ! -f "$$dir" && test ! -r "$$dir"; } \
+ || { echo " ( cd '$$dir' && rm -f" $$files ")"; \
+ $(am__cd) "$$dir" && rm -f $$files; }; \
+ }
+am__installdirs = "$(DESTDIR)$(liborcusdir)"
+HEADERS = $(liborcus_HEADERS)
+am__extra_recursive_targets = check-valgrind-recursive \
+ check-valgrind-memcheck-recursive \
+ check-valgrind-helgrind-recursive check-valgrind-drd-recursive \
+ check-valgrind-sgcheck-recursive
+am__tagged_files = $(HEADERS) $(SOURCES) $(TAGS_FILES) $(LISP)
+# Read a list of newline-separated strings from the standard input,
+# and print each of them once, without duplicates. Input order is
+# *not* preserved.
+am__uniquify_input = $(AWK) '\
+ BEGIN { nonempty = 0; } \
+ { items[$$0] = 1; nonempty = 1; } \
+ END { if (nonempty) { for (i in items) print i; }; } \
+'
+# Make sure the list of sources is unique. This is necessary because,
+# e.g., the same source file might be shared among _SOURCES variables
+# for different programs/libraries.
+am__define_uniq_tagged_files = \
+ list='$(am__tagged_files)'; \
+ unique=`for i in $$list; do \
+ if test -f "$$i"; then echo $$i; else echo $(srcdir)/$$i; fi; \
+ done | $(am__uniquify_input)`
+am__DIST_COMMON = $(srcdir)/Makefile.in
+DISTFILES = $(DIST_COMMON) $(DIST_SOURCES) $(TEXINFOS) $(EXTRA_DIST)
+ACLOCAL = @ACLOCAL@
+AMTAR = @AMTAR@
+AM_DEFAULT_VERBOSITY = @AM_DEFAULT_VERBOSITY@
+AR = @AR@
+AS = @AS@
+AUTOCONF = @AUTOCONF@
+AUTOHEADER = @AUTOHEADER@
+AUTOMAKE = @AUTOMAKE@
+AWK = @AWK@
+BOOST_CPPFLAGS = @BOOST_CPPFLAGS@
+BOOST_DATE_TIME_LDFLAGS = @BOOST_DATE_TIME_LDFLAGS@
+BOOST_DATE_TIME_LDPATH = @BOOST_DATE_TIME_LDPATH@
+BOOST_DATE_TIME_LIBS = @BOOST_DATE_TIME_LIBS@
+BOOST_FILESYSTEM_LDFLAGS = @BOOST_FILESYSTEM_LDFLAGS@
+BOOST_FILESYSTEM_LDPATH = @BOOST_FILESYSTEM_LDPATH@
+BOOST_FILESYSTEM_LIBS = @BOOST_FILESYSTEM_LIBS@
+BOOST_IOSTREAMS_LDFLAGS = @BOOST_IOSTREAMS_LDFLAGS@
+BOOST_IOSTREAMS_LDPATH = @BOOST_IOSTREAMS_LDPATH@
+BOOST_IOSTREAMS_LIBS = @BOOST_IOSTREAMS_LIBS@
+BOOST_LDPATH = @BOOST_LDPATH@
+BOOST_PROGRAM_OPTIONS_LDFLAGS = @BOOST_PROGRAM_OPTIONS_LDFLAGS@
+BOOST_PROGRAM_OPTIONS_LDPATH = @BOOST_PROGRAM_OPTIONS_LDPATH@
+BOOST_PROGRAM_OPTIONS_LIBS = @BOOST_PROGRAM_OPTIONS_LIBS@
+BOOST_ROOT = @BOOST_ROOT@
+BOOST_SYSTEM_LDFLAGS = @BOOST_SYSTEM_LDFLAGS@
+BOOST_SYSTEM_LDPATH = @BOOST_SYSTEM_LDPATH@
+BOOST_SYSTEM_LIBS = @BOOST_SYSTEM_LIBS@
+CC = @CC@
+CCDEPMODE = @CCDEPMODE@
+CFLAGS = @CFLAGS@
+CPP = @CPP@
+CPPFLAGS = @CPPFLAGS@
+CSCOPE = @CSCOPE@
+CTAGS = @CTAGS@
+CXX = @CXX@
+CXXCPP = @CXXCPP@
+CXXDEPMODE = @CXXDEPMODE@
+CXXFLAGS = @CXXFLAGS@
+CYGPATH_W = @CYGPATH_W@
+DEFS = @DEFS@
+DEPDIR = @DEPDIR@
+DISTCHECK_CONFIGURE_FLAGS = @DISTCHECK_CONFIGURE_FLAGS@
+DLLTOOL = @DLLTOOL@
+DSYMUTIL = @DSYMUTIL@
+DUMPBIN = @DUMPBIN@
+ECHO_C = @ECHO_C@
+ECHO_N = @ECHO_N@
+ECHO_T = @ECHO_T@
+EGREP = @EGREP@
+ENABLE_VALGRIND_drd = @ENABLE_VALGRIND_drd@
+ENABLE_VALGRIND_helgrind = @ENABLE_VALGRIND_helgrind@
+ENABLE_VALGRIND_memcheck = @ENABLE_VALGRIND_memcheck@
+ENABLE_VALGRIND_sgcheck = @ENABLE_VALGRIND_sgcheck@
+ETAGS = @ETAGS@
+EXEEXT = @EXEEXT@
+FGREP = @FGREP@
+GREP = @GREP@
+HAVE_CXX17 = @HAVE_CXX17@
+INSTALL = @INSTALL@
+INSTALL_DATA = @INSTALL_DATA@
+INSTALL_PROGRAM = @INSTALL_PROGRAM@
+INSTALL_SCRIPT = @INSTALL_SCRIPT@
+INSTALL_STRIP_PROGRAM = @INSTALL_STRIP_PROGRAM@
+IXION_REQUIRED_API_VERSION = @IXION_REQUIRED_API_VERSION@
+LD = @LD@
+LDFLAGS = @LDFLAGS@
+LIBIXION_CFLAGS = @LIBIXION_CFLAGS@
+LIBIXION_LIBS = @LIBIXION_LIBS@
+LIBOBJS = @LIBOBJS@
+LIBS = @LIBS@
+LIBTOOL = @LIBTOOL@
+LIPO = @LIPO@
+LN_S = @LN_S@
+LTLIBOBJS = @LTLIBOBJS@
+LT_SYS_LIBRARY_PATH = @LT_SYS_LIBRARY_PATH@
+MAKEINFO = @MAKEINFO@
+MANIFEST_TOOL = @MANIFEST_TOOL@
+MDDS_CFLAGS = @MDDS_CFLAGS@
+MDDS_LIBS = @MDDS_LIBS@
+MKDIR_P = @MKDIR_P@
+NM = @NM@
+NMEDIT = @NMEDIT@
+OBJDUMP = @OBJDUMP@
+OBJEXT = @OBJEXT@
+ORCUS_API_VERSION = @ORCUS_API_VERSION@
+ORCUS_MAJOR_VERSION = @ORCUS_MAJOR_VERSION@
+ORCUS_MICRO_VERSION = @ORCUS_MICRO_VERSION@
+ORCUS_MINOR_VERSION = @ORCUS_MINOR_VERSION@
+OTOOL = @OTOOL@
+OTOOL64 = @OTOOL64@
+PACKAGE = @PACKAGE@
+PACKAGE_BUGREPORT = @PACKAGE_BUGREPORT@
+PACKAGE_NAME = @PACKAGE_NAME@
+PACKAGE_STRING = @PACKAGE_STRING@
+PACKAGE_TARNAME = @PACKAGE_TARNAME@
+PACKAGE_URL = @PACKAGE_URL@
+PACKAGE_VERSION = @PACKAGE_VERSION@
+PARQUET_CFLAGS = @PARQUET_CFLAGS@
+PARQUET_LIBS = @PARQUET_LIBS@
+PATH_SEPARATOR = @PATH_SEPARATOR@
+PKG_CONFIG = @PKG_CONFIG@
+PKG_CONFIG_LIBDIR = @PKG_CONFIG_LIBDIR@
+PKG_CONFIG_PATH = @PKG_CONFIG_PATH@
+POW_LIB = @POW_LIB@
+PYTHON = @PYTHON@
+PYTHON_CFLAGS = @PYTHON_CFLAGS@
+PYTHON_EXEC_PREFIX = @PYTHON_EXEC_PREFIX@
+PYTHON_LIBS = @PYTHON_LIBS@
+PYTHON_PLATFORM = @PYTHON_PLATFORM@
+PYTHON_PREFIX = @PYTHON_PREFIX@
+PYTHON_VERSION = @PYTHON_VERSION@
+RANLIB = @RANLIB@
+SED = @SED@
+SET_MAKE = @SET_MAKE@
+SHELL = @SHELL@
+STRIP = @STRIP@
+VALGRIND = @VALGRIND@
+VALGRIND_ENABLED = @VALGRIND_ENABLED@
+VERSION = @VERSION@
+ZLIB_CFLAGS = @ZLIB_CFLAGS@
+ZLIB_LIBS = @ZLIB_LIBS@
+abs_builddir = @abs_builddir@
+abs_srcdir = @abs_srcdir@
+abs_top_builddir = @abs_top_builddir@
+abs_top_srcdir = @abs_top_srcdir@
+ac_ct_AR = @ac_ct_AR@
+ac_ct_CC = @ac_ct_CC@
+ac_ct_CXX = @ac_ct_CXX@
+ac_ct_DUMPBIN = @ac_ct_DUMPBIN@
+am__include = @am__include@
+am__leading_dot = @am__leading_dot@
+am__quote = @am__quote@
+am__tar = @am__tar@
+am__untar = @am__untar@
+bindir = @bindir@
+build = @build@
+build_alias = @build_alias@
+build_cpu = @build_cpu@
+build_os = @build_os@
+build_vendor = @build_vendor@
+builddir = @builddir@
+datadir = @datadir@
+datarootdir = @datarootdir@
+docdir = @docdir@
+dvidir = @dvidir@
+exec_prefix = @exec_prefix@
+host = @host@
+host_alias = @host_alias@
+host_cpu = @host_cpu@
+host_os = @host_os@
+host_vendor = @host_vendor@
+htmldir = @htmldir@
+includedir = @includedir@
+infodir = @infodir@
+install_sh = @install_sh@
+libdir = @libdir@
+libexecdir = @libexecdir@
+localedir = @localedir@
+localstatedir = @localstatedir@
+mandir = @mandir@
+mkdir_p = @mkdir_p@
+oldincludedir = @oldincludedir@
+pdfdir = @pdfdir@
+pkgpyexecdir = @pkgpyexecdir@
+pkgpythondir = @pkgpythondir@
+prefix = @prefix@
+program_transform_name = @program_transform_name@
+psdir = @psdir@
+pyexecdir = @pyexecdir@
+pythondir = @pythondir@
+runstatedir = @runstatedir@
+sbindir = @sbindir@
+sharedstatedir = @sharedstatedir@
+srcdir = @srcdir@
+sysconfdir = @sysconfdir@
+target_alias = @target_alias@
+top_build_prefix = @top_build_prefix@
+top_builddir = @top_builddir@
+top_srcdir = @top_srcdir@
+valgrind_enabled_tools = @valgrind_enabled_tools@
+valgrind_tools = @valgrind_tools@
+liborcusdir = $(includedir)/liborcus-@ORCUS_API_VERSION@/orcus/spreadsheet
+liborcus_HEADERS = types.hpp view_types.hpp export_interface.hpp \
+ import_interface.hpp import_interface_pivot.hpp \
+ import_interface_styles.hpp import_interface_view.hpp \
+ $(am__append_1)
+all: all-am
+
+.SUFFIXES:
+$(srcdir)/Makefile.in: $(srcdir)/Makefile.am $(am__configure_deps)
+ @for dep in $?; do \
+ case '$(am__configure_deps)' in \
+ *$$dep*) \
+ ( cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh ) \
+ && { if test -f $@; then exit 0; else break; fi; }; \
+ exit 1;; \
+ esac; \
+ done; \
+ echo ' cd $(top_srcdir) && $(AUTOMAKE) --foreign include/orcus/spreadsheet/Makefile'; \
+ $(am__cd) $(top_srcdir) && \
+ $(AUTOMAKE) --foreign include/orcus/spreadsheet/Makefile
+Makefile: $(srcdir)/Makefile.in $(top_builddir)/config.status
+ @case '$?' in \
+ *config.status*) \
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh;; \
+ *) \
+ echo ' cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles)'; \
+ cd $(top_builddir) && $(SHELL) ./config.status $(subdir)/$@ $(am__maybe_remake_depfiles);; \
+ esac;
+
+$(top_builddir)/config.status: $(top_srcdir)/configure $(CONFIG_STATUS_DEPENDENCIES)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+
+$(top_srcdir)/configure: $(am__configure_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(ACLOCAL_M4): $(am__aclocal_m4_deps)
+ cd $(top_builddir) && $(MAKE) $(AM_MAKEFLAGS) am--refresh
+$(am__aclocal_m4_deps):
+
+mostlyclean-libtool:
+ -rm -f *.lo
+
+clean-libtool:
+ -rm -rf .libs _libs
+install-liborcusHEADERS: $(liborcus_HEADERS)
+ @$(NORMAL_INSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ if test -n "$$list"; then \
+ echo " $(MKDIR_P) '$(DESTDIR)$(liborcusdir)'"; \
+ $(MKDIR_P) "$(DESTDIR)$(liborcusdir)" || exit 1; \
+ fi; \
+ for p in $$list; do \
+ if test -f "$$p"; then d=; else d="$(srcdir)/"; fi; \
+ echo "$$d$$p"; \
+ done | $(am__base_list) | \
+ while read files; do \
+ echo " $(INSTALL_HEADER) $$files '$(DESTDIR)$(liborcusdir)'"; \
+ $(INSTALL_HEADER) $$files "$(DESTDIR)$(liborcusdir)" || exit $$?; \
+ done
+
+uninstall-liborcusHEADERS:
+ @$(NORMAL_UNINSTALL)
+ @list='$(liborcus_HEADERS)'; test -n "$(liborcusdir)" || list=; \
+ files=`for p in $$list; do echo $$p; done | sed -e 's|^.*/||'`; \
+ dir='$(DESTDIR)$(liborcusdir)'; $(am__uninstall_files_from_dir)
+check-valgrind-local:
+check-valgrind-memcheck-local:
+check-valgrind-helgrind-local:
+check-valgrind-drd-local:
+check-valgrind-sgcheck-local:
+
+ID: $(am__tagged_files)
+ $(am__define_uniq_tagged_files); mkid -fID $$unique
+tags: tags-am
+TAGS: tags
+
+tags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ set x; \
+ here=`pwd`; \
+ $(am__define_uniq_tagged_files); \
+ shift; \
+ if test -z "$(ETAGS_ARGS)$$*$$unique"; then :; else \
+ test -n "$$unique" || unique=$$empty_fix; \
+ if test $$# -gt 0; then \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ "$$@" $$unique; \
+ else \
+ $(ETAGS) $(ETAGSFLAGS) $(AM_ETAGSFLAGS) $(ETAGS_ARGS) \
+ $$unique; \
+ fi; \
+ fi
+ctags: ctags-am
+
+CTAGS: ctags
+ctags-am: $(TAGS_DEPENDENCIES) $(am__tagged_files)
+ $(am__define_uniq_tagged_files); \
+ test -z "$(CTAGS_ARGS)$$unique" \
+ || $(CTAGS) $(CTAGSFLAGS) $(AM_CTAGSFLAGS) $(CTAGS_ARGS) \
+ $$unique
+
+GTAGS:
+ here=`$(am__cd) $(top_builddir) && pwd` \
+ && $(am__cd) $(top_srcdir) \
+ && gtags -i $(GTAGS_ARGS) "$$here"
+cscopelist: cscopelist-am
+
+cscopelist-am: $(am__tagged_files)
+ list='$(am__tagged_files)'; \
+ case "$(srcdir)" in \
+ [\\/]* | ?:[\\/]*) sdir="$(srcdir)" ;; \
+ *) sdir=$(subdir)/$(srcdir) ;; \
+ esac; \
+ for i in $$list; do \
+ if test -f "$$i"; then \
+ echo "$(subdir)/$$i"; \
+ else \
+ echo "$$sdir/$$i"; \
+ fi; \
+ done >> $(top_builddir)/cscope.files
+
+distclean-tags:
+ -rm -f TAGS ID GTAGS GRTAGS GSYMS GPATH tags
+distdir: $(BUILT_SOURCES)
+ $(MAKE) $(AM_MAKEFLAGS) distdir-am
+
+distdir-am: $(DISTFILES)
+ @srcdirstrip=`echo "$(srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ topsrcdirstrip=`echo "$(top_srcdir)" | sed 's/[].[^$$\\*]/\\\\&/g'`; \
+ list='$(DISTFILES)'; \
+ dist_files=`for file in $$list; do echo $$file; done | \
+ sed -e "s|^$$srcdirstrip/||;t" \
+ -e "s|^$$topsrcdirstrip/|$(top_builddir)/|;t"`; \
+ case $$dist_files in \
+ */*) $(MKDIR_P) `echo "$$dist_files" | \
+ sed '/\//!d;s|^|$(distdir)/|;s,/[^/]*$$,,' | \
+ sort -u` ;; \
+ esac; \
+ for file in $$dist_files; do \
+ if test -f $$file || test -d $$file; then d=.; else d=$(srcdir); fi; \
+ if test -d $$d/$$file; then \
+ dir=`echo "/$$file" | sed -e 's,/[^/]*$$,,'`; \
+ if test -d "$(distdir)/$$file"; then \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ if test -d $(srcdir)/$$file && test $$d != $(srcdir); then \
+ cp -fpR $(srcdir)/$$file "$(distdir)$$dir" || exit 1; \
+ find "$(distdir)/$$file" -type d ! -perm -700 -exec chmod u+rwx {} \;; \
+ fi; \
+ cp -fpR $$d/$$file "$(distdir)$$dir" || exit 1; \
+ else \
+ test -f "$(distdir)/$$file" \
+ || cp -p $$d/$$file "$(distdir)/$$file" \
+ || exit 1; \
+ fi; \
+ done
+check-am: all-am
+check: check-am
+all-am: Makefile $(HEADERS)
+installdirs:
+ for dir in "$(DESTDIR)$(liborcusdir)"; do \
+ test -z "$$dir" || $(MKDIR_P) "$$dir"; \
+ done
+install: install-am
+install-exec: install-exec-am
+install-data: install-data-am
+uninstall: uninstall-am
+
+install-am: all-am
+ @$(MAKE) $(AM_MAKEFLAGS) install-exec-am install-data-am
+
+installcheck: installcheck-am
+install-strip:
+ if test -z '$(STRIP)'; then \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ install; \
+ else \
+ $(MAKE) $(AM_MAKEFLAGS) INSTALL_PROGRAM="$(INSTALL_STRIP_PROGRAM)" \
+ install_sh_PROGRAM="$(INSTALL_STRIP_PROGRAM)" INSTALL_STRIP_FLAG=-s \
+ "INSTALL_PROGRAM_ENV=STRIPPROG='$(STRIP)'" install; \
+ fi
+mostlyclean-generic:
+
+clean-generic:
+
+distclean-generic:
+ -test -z "$(CONFIG_CLEAN_FILES)" || rm -f $(CONFIG_CLEAN_FILES)
+ -test . = "$(srcdir)" || test -z "$(CONFIG_CLEAN_VPATH_FILES)" || rm -f $(CONFIG_CLEAN_VPATH_FILES)
+
+maintainer-clean-generic:
+ @echo "This command is intended for maintainers to use"
+ @echo "it deletes files that may require special tools to rebuild."
+check-valgrind: check-valgrind-am
+
+check-valgrind-am: check-valgrind-local
+
+check-valgrind-drd: check-valgrind-drd-am
+
+check-valgrind-drd-am: check-valgrind-drd-local
+
+check-valgrind-helgrind: check-valgrind-helgrind-am
+
+check-valgrind-helgrind-am: check-valgrind-helgrind-local
+
+check-valgrind-memcheck: check-valgrind-memcheck-am
+
+check-valgrind-memcheck-am: check-valgrind-memcheck-local
+
+check-valgrind-sgcheck: check-valgrind-sgcheck-am
+
+check-valgrind-sgcheck-am: check-valgrind-sgcheck-local
+
+clean: clean-am
+
+clean-am: clean-generic clean-libtool mostlyclean-am
+
+distclean: distclean-am
+ -rm -f Makefile
+distclean-am: clean-am distclean-generic distclean-tags
+
+dvi: dvi-am
+
+dvi-am:
+
+html: html-am
+
+html-am:
+
+info: info-am
+
+info-am:
+
+install-data-am: install-liborcusHEADERS
+
+install-dvi: install-dvi-am
+
+install-dvi-am:
+
+install-exec-am:
+
+install-html: install-html-am
+
+install-html-am:
+
+install-info: install-info-am
+
+install-info-am:
+
+install-man:
+
+install-pdf: install-pdf-am
+
+install-pdf-am:
+
+install-ps: install-ps-am
+
+install-ps-am:
+
+installcheck-am:
+
+maintainer-clean: maintainer-clean-am
+ -rm -f Makefile
+maintainer-clean-am: distclean-am maintainer-clean-generic
+
+mostlyclean: mostlyclean-am
+
+mostlyclean-am: mostlyclean-generic mostlyclean-libtool
+
+pdf: pdf-am
+
+pdf-am:
+
+ps: ps-am
+
+ps-am:
+
+uninstall-am: uninstall-liborcusHEADERS
+
+.MAKE: install-am install-strip
+
+.PHONY: CTAGS GTAGS TAGS all all-am check check-am check-valgrind-am \
+ check-valgrind-drd-am check-valgrind-drd-local \
+ check-valgrind-helgrind-am check-valgrind-helgrind-local \
+ check-valgrind-local check-valgrind-memcheck-am \
+ check-valgrind-memcheck-local check-valgrind-sgcheck-am \
+ check-valgrind-sgcheck-local clean clean-generic clean-libtool \
+ cscopelist-am ctags ctags-am distclean distclean-generic \
+ distclean-libtool distclean-tags distdir dvi dvi-am html \
+ html-am info info-am install install-am install-data \
+ install-data-am install-dvi install-dvi-am install-exec \
+ install-exec-am install-html install-html-am install-info \
+ install-info-am install-liborcusHEADERS install-man \
+ install-pdf install-pdf-am install-ps install-ps-am \
+ install-strip installcheck installcheck-am installdirs \
+ maintainer-clean maintainer-clean-generic mostlyclean \
+ mostlyclean-generic mostlyclean-libtool pdf pdf-am ps ps-am \
+ tags tags-am uninstall uninstall-am uninstall-liborcusHEADERS
+
+.PRECIOUS: Makefile
+
+
+# Tell versions [3.59,3.63) of GNU make to not export all variables.
+# Otherwise a system limit (for SysV at least) may be exceeded.
+.NOEXPORT:
diff --git a/include/orcus/spreadsheet/auto_filter.hpp b/include/orcus/spreadsheet/auto_filter.hpp
new file mode 100644
index 0000000..b6f2959
--- /dev/null
+++ b/include/orcus/spreadsheet/auto_filter.hpp
@@ -0,0 +1,149 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_AUTO_FILTER_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_AUTO_FILTER_HPP
+
+#include "types.hpp"
+#include "../env.hpp"
+
+#include <map>
+#include <unordered_set>
+
+#include <ixion/address.hpp>
+
+namespace orcus { namespace spreadsheet {
+
+/**
+ * Data for a single column inside autofilter range.
+ */
+struct ORCUS_SPM_DLLPUBLIC auto_filter_column_t
+{
+ using match_values_type = std::unordered_set<std::string_view>;
+ match_values_type match_values;
+
+ auto_filter_column_t();
+ auto_filter_column_t(const auto_filter_column_t& other);
+ auto_filter_column_t(auto_filter_column_t&& other);
+ ~auto_filter_column_t();
+
+ auto_filter_column_t& operator=(const auto_filter_column_t& other);
+ auto_filter_column_t& operator=(auto_filter_column_t&& other);
+
+ void reset();
+ void swap(auto_filter_column_t& r);
+};
+
+/**
+ * Data for a single autofilter entry. An autofilter can belong to either a
+ * sheet or a table.
+ */
+struct ORCUS_SPM_DLLPUBLIC auto_filter_t
+{
+ typedef std::map<col_t, auto_filter_column_t> columns_type;
+
+ ixion::abs_range_t range;
+
+ columns_type columns;
+
+ auto_filter_t();
+ auto_filter_t(const auto_filter_t& other);
+ auto_filter_t(auto_filter_t&& other);
+ ~auto_filter_t();
+
+ auto_filter_t& operator=(const auto_filter_t& other);
+ auto_filter_t& operator=(auto_filter_t&& other);
+
+ void reset();
+ void swap(auto_filter_t& r);
+
+ /**
+ * Set column data to specified column index.
+ *
+ * @param col column index to associate the data to.
+ * @param data column data.
+ */
+ void commit_column(col_t col, auto_filter_column_t data);
+};
+
+/**
+ * Single column entry in table.
+ */
+struct ORCUS_SPM_DLLPUBLIC table_column_t
+{
+ std::size_t identifier;
+ std::string_view name;
+ std::string_view totals_row_label;
+ totals_row_function_t totals_row_function;
+
+ table_column_t();
+ table_column_t(const table_column_t& other);
+ ~table_column_t();
+
+ table_column_t& operator=(const table_column_t& other);
+
+ void reset();
+};
+
+/**
+ * Table style information.
+ */
+struct ORCUS_SPM_DLLPUBLIC table_style_t
+{
+ std::string_view name;
+
+ bool show_first_column:1;
+ bool show_last_column:1;
+ bool show_row_stripes:1;
+ bool show_column_stripes:1;
+
+ table_style_t();
+ table_style_t(const table_style_t& other);
+ ~table_style_t();
+
+ table_style_t& operator=(const table_style_t& other);
+
+ void reset();
+};
+
+/**
+ * Single table entry. A table is a range in a spreadsheet that represents
+ * a single set of data that can be used as a data source.
+ */
+struct ORCUS_SPM_DLLPUBLIC table_t
+{
+ typedef std::vector<table_column_t> columns_type;
+
+ size_t identifier;
+
+ std::string_view name;
+ std::string_view display_name;
+
+ ixion::abs_range_t range;
+
+ size_t totals_row_count;
+
+ auto_filter_t filter;
+ columns_type columns;
+ table_style_t style;
+
+ table_t();
+ table_t(const table_t& other);
+ table_t(table_t&& other);
+ ~table_t();
+
+ table_t& operator=(const table_t& other);
+ table_t& operator=(table_t&& other);
+
+ void reset();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/config.hpp b/include/orcus/spreadsheet/config.hpp
new file mode 100644
index 0000000..11eebfc
--- /dev/null
+++ b/include/orcus/spreadsheet/config.hpp
@@ -0,0 +1,37 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_CONFIG_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_CONFIG_HPP
+
+#include "orcus/env.hpp"
+
+#include <cstdint>
+
+namespace orcus { namespace spreadsheet {
+
+struct ORCUS_SPM_DLLPUBLIC document_config
+{
+ /**
+ * Precision to use when converting numeric values to their string
+ * representations. A negative value indicates the precision is not being
+ * specified.
+ */
+ int8_t output_precision;
+
+ document_config();
+ document_config(const document_config& r);
+ ~document_config();
+
+ document_config& operator= (const document_config& r);
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/document.hpp b/include/orcus/spreadsheet/document.hpp
new file mode 100644
index 0000000..4f20b6e
--- /dev/null
+++ b/include/orcus/spreadsheet/document.hpp
@@ -0,0 +1,166 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_DOCUMENT_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_DOCUMENT_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/interface.hpp"
+#include "orcus/spreadsheet/types.hpp"
+
+#include <ostream>
+#include <memory>
+
+namespace ixion {
+
+class formula_name_resolver;
+class model_context;
+struct abs_address_t;
+
+}
+
+namespace orcus {
+
+class string_pool;
+struct date_time_t;
+
+namespace spreadsheet {
+
+class shared_strings;
+class styles;
+class pivot_collection;
+class sheet;
+class import_factory;
+
+struct document_config;
+struct table_t;
+
+namespace detail {
+
+struct document_impl;
+
+}
+
+/**
+ * Store spreadsheet document content. It uses the @p model_context class
+ * from the ixion library to store raw cell values required in the computation
+ * of formula expressions.
+ */
+class ORCUS_SPM_DLLPUBLIC document : public orcus::iface::document_dumper
+{
+ friend class sheet;
+ friend class import_factory;
+
+public:
+ document(const document&) = delete;
+ document& operator= (const document&) = delete;
+
+ document(const range_size_t& sheet_size);
+ ~document();
+
+ /** See @ref iface::document_dumper. */
+ virtual void dump(dump_format_t format, const std::string& output) const override;
+
+ /** See @ref iface::document_dumper. */
+ virtual void dump_check(std::ostream& os) const override;
+
+ shared_strings& get_shared_strings();
+ const shared_strings& get_shared_strings() const;
+
+ styles& get_styles();
+ const styles& get_styles() const;
+
+ pivot_collection& get_pivot_collection();
+ const pivot_collection& get_pivot_collection() const;
+
+ sheet* append_sheet(std::string_view sheet_name);
+ sheet* get_sheet(std::string_view sheet_name);
+ const sheet* get_sheet(std::string_view sheet_name) const;
+ sheet* get_sheet(sheet_t sheet_pos);
+ const sheet* get_sheet(sheet_t sheet_pos) const;
+
+ /**
+ * Clear document content, to make it empty.
+ */
+ void clear();
+
+ /**
+ * Calculate those formula cells that have been newly inserted and have
+ * not yet been calculated.
+ */
+ void recalc_formula_cells();
+
+ sheet_t get_sheet_index(std::string_view name) const;
+ std::string_view get_sheet_name(sheet_t sheet_pos) const;
+
+ /**
+ * Set a new name to a sheet.
+ *
+ * @param sheet_pos 0-based position of a sheet.
+ * @param name New name to set to a sheet.
+ */
+ void set_sheet_name(sheet_t sheet_pos, std::string name);
+
+ range_size_t get_sheet_size() const;
+ void set_sheet_size(const range_size_t& sheet_size);
+ size_t get_sheet_count() const;
+
+ void set_origin_date(int year, int month, int day);
+ date_time_t get_origin_date() const;
+
+ void set_formula_grammar(formula_grammar_t grammar);
+ formula_grammar_t get_formula_grammar() const;
+
+ const ixion::formula_name_resolver* get_formula_name_resolver(formula_ref_context_t cxt) const;
+
+ ixion::model_context& get_model_context();
+ const ixion::model_context& get_model_context() const;
+
+ const document_config& get_config() const;
+ void set_config(const document_config& cfg);
+
+ string_pool& get_string_pool();
+ const string_pool& get_string_pool() const;
+
+ /**
+ * Insert a new table object into the document. The document will take
+ * ownership of the inserted object after the call. The object will get
+ * inserted only when there is no pre-existing table object of the same
+ * name. The object not being inserted will be deleted.
+ *
+ * @param p table object to insert.
+ */
+ void insert_table(table_t* p);
+
+ /**
+ * Get a structure containing properties of a named table.
+ *
+ * @param name Name of the table.
+ *
+ * @return Pointer to the structure containing the properties of a named
+ * table, or @p nullptr if no such table exists for the given name.
+ */
+ const table_t* get_table(std::string_view name) const;
+
+private:
+ void dump_flat(const std::string& outdir) const;
+ void dump_html(const ::std::string& outdir) const;
+ void dump_json(const ::std::string& outdir) const;
+ void dump_csv(const std::string& outdir) const;
+ void dump_debug_state(const std::string& outdir) const;
+
+ void finalize_import();
+ void insert_dirty_cell(const ixion::abs_address_t& pos);
+
+private:
+ std::unique_ptr<detail::document_impl> mp_impl;
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/document_types.hpp b/include/orcus/spreadsheet/document_types.hpp
new file mode 100644
index 0000000..b1a864f
--- /dev/null
+++ b/include/orcus/spreadsheet/document_types.hpp
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include "types.hpp"
+#include <vector>
+
+namespace orcus { namespace spreadsheet {
+
+/**
+ * Stores a color value in ARGB format. Each color component ranges from 0 to
+ * 255 (8-bit).
+ */
+struct ORCUS_SPM_DLLPUBLIC color_t
+{
+ color_elem_t alpha;
+ color_elem_t red;
+ color_elem_t green;
+ color_elem_t blue;
+
+ color_t();
+ color_t(color_elem_t _red, color_elem_t _green, color_elem_t _blue);
+ color_t(color_elem_t _alpha, color_elem_t _red, color_elem_t _green, color_elem_t _blue);
+
+ void reset();
+
+ bool operator==(const color_t& other) const;
+ bool operator!=(const color_t& other) const;
+};
+
+/**
+ * Contains formatting properties of a section of a string. This is used in
+ * the stroage of rich-text strings.
+ */
+struct ORCUS_SPM_DLLPUBLIC format_run
+{
+ /** Position of the section where the formatting starts. */
+ std::size_t pos;
+ /** Length of the section. */
+ std::size_t size;
+ /** Name of the font. */
+ std::string_view font;
+ /** Size of the font. */
+ double font_size;
+ /** Color of the section. */
+ color_t color;
+ /** Whether or not the font is bold. */
+ bool bold:1;
+ /** Whether or not the font is italic. */
+ bool italic:1;
+
+ format_run();
+
+ /**
+ * Reset the properties to unformatted state.
+ */
+ void reset();
+
+ /**
+ * Query whether or not the section contains non-default format properties.
+ *
+ * @return @p true of it's formatted, otherwise @p false.
+ */
+ bool formatted() const;
+};
+
+/** Collection of format properties of a string. */
+using format_runs_t = std::vector<format_run>;
+
+}} // namespace orcus::spreadsheet
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/export_interface.hpp b/include/orcus/spreadsheet/export_interface.hpp
new file mode 100644
index 0000000..3c3104d
--- /dev/null
+++ b/include/orcus/spreadsheet/export_interface.hpp
@@ -0,0 +1,60 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_EXPORT_INTERFACE_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_EXPORT_INTERFACE_HPP
+
+#include "types.hpp"
+#include "../env.hpp"
+
+#include <ostream>
+
+namespace orcus { namespace spreadsheet { namespace iface {
+
+/**
+ * Interface for exporting sheet contents.
+ */
+class export_sheet
+{
+public:
+ ORCUS_DLLPUBLIC virtual ~export_sheet() = 0;
+
+ /**
+ * Write the content of a cell to an output stream.
+ *
+ * @param os output stream to write the cell content to.
+ * @param row 0-based row position of a cell.
+ * @param col 0-based column position of a cell.
+ */
+ virtual void write_string(std::ostream& os, orcus::spreadsheet::row_t row, orcus::spreadsheet::col_t col) const = 0;
+};
+
+/**
+ * Entry-point interface for exporting document contents.
+ */
+class export_factory
+{
+public:
+ ORCUS_DLLPUBLIC virtual ~export_factory() = 0;
+
+ /**
+ * Obtain an interface for exporting sheet content.
+ *
+ * @param sheet_name name of the sheet to export.
+ *
+ * @return pointer to an interface for exporting sheet content.
+ */
+ virtual const export_sheet* get_sheet(std::string_view sheet_name) const = 0;
+};
+
+}}}
+
+
+
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/factory.hpp b/include/orcus/spreadsheet/factory.hpp
new file mode 100644
index 0000000..e1423fa
--- /dev/null
+++ b/include/orcus/spreadsheet/factory.hpp
@@ -0,0 +1,143 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_IMPORT_FACTORY_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_IMPORT_FACTORY_HPP
+
+#include <orcus/spreadsheet/import_interface.hpp>
+#include <orcus/spreadsheet/import_interface_styles.hpp>
+#include <orcus/spreadsheet/export_interface.hpp>
+#include <orcus/env.hpp>
+
+#include <memory>
+
+namespace orcus {
+
+class string_pool;
+
+namespace spreadsheet {
+
+class document;
+class view;
+class styles;
+
+struct ORCUS_SPM_DLLPUBLIC import_factory_config
+{
+ /**
+ * When the font cache is enabled, the import factory checks each incoming
+ * font entry against the pool of existing font entries and insert it only
+ * when an equal entry doesn't already exist in the pool.
+ *
+ * @note It should not be enabled for a file format that already has
+ * font entries normalized, such as xlsx.
+ */
+ bool enable_font_cache = true;
+
+ import_factory_config();
+ import_factory_config(const import_factory_config& other);
+ ~import_factory_config();
+
+ import_factory_config& operator=(const import_factory_config& other);
+};
+
+/**
+ * Wraps @ref document and @ref view stores. This is to be used by the import
+ * filter to populate the document and view stores.
+ */
+class ORCUS_SPM_DLLPUBLIC import_factory : public iface::import_factory
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ import_factory(document& doc);
+ import_factory(document& doc, view& view_store);
+ virtual ~import_factory();
+
+ virtual iface::import_global_settings* get_global_settings() override;
+ virtual iface::import_shared_strings* get_shared_strings() override;
+ virtual iface::import_styles* get_styles() override;
+ virtual iface::import_named_expression* get_named_expression() override;
+ virtual iface::import_reference_resolver* get_reference_resolver(formula_ref_context_t cxt) override;
+ virtual iface::import_pivot_cache_definition* create_pivot_cache_definition(
+ orcus::spreadsheet::pivot_cache_id_t cache_id) override;
+ virtual iface::import_pivot_cache_records* create_pivot_cache_records(
+ orcus::spreadsheet::pivot_cache_id_t cache_id) override;
+ virtual iface::import_sheet* append_sheet(sheet_t sheet_index, std::string_view name) override;
+ virtual iface::import_sheet* get_sheet(std::string_view name) override;
+ virtual iface::import_sheet* get_sheet(sheet_t sheet_index) override;
+ virtual void finalize() override;
+
+ void set_config(const import_factory_config& config);
+
+ void set_default_row_size(row_t row_size);
+ void set_default_column_size(col_t col_size);
+
+ void set_character_set(character_set_t charset);
+ character_set_t get_character_set() const;
+
+ /**
+ * When setting this flag to true, those formula cells with no cached
+ * results will be re-calculated upon loading.
+ *
+ *
+ * @param b value of this flag.
+ */
+ void set_recalc_formula_cells(bool b);
+
+ void set_formula_error_policy(formula_error_policy_t policy);
+};
+
+/**
+ * Wraps @ref styles store. This is to be used by an import styles parser to
+ * populate the styles store.
+ */
+class ORCUS_SPM_DLLPUBLIC import_styles : public iface::import_styles
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ import_styles(styles& styles_store, string_pool& sp);
+ import_styles(std::shared_ptr<import_factory_config> config, styles& styles_store, string_pool& sp);
+ virtual ~import_styles() override;
+
+ virtual iface::import_font_style* start_font_style() override;
+ virtual iface::import_fill_style* start_fill_style() override;
+ virtual iface::import_border_style* start_border_style() override;
+ virtual iface::import_cell_protection* start_cell_protection() override;
+ virtual iface::import_number_format* start_number_format() override;
+ virtual iface::import_xf* start_xf(xf_category_t cat) override;
+ virtual iface::import_cell_style* start_cell_style() override;
+
+ virtual void set_font_count(size_t n) override;
+ virtual void set_fill_count(size_t n) override;
+ virtual void set_border_count(size_t n) override;
+ virtual void set_number_format_count(size_t n) override;
+ virtual void set_xf_count(xf_category_t cat, size_t n) override;
+ virtual void set_cell_style_count(size_t n) override;
+};
+
+/**
+ * Wraps @ref document store and faciliates export of its content.
+ *
+ * @warning It currently provides very limited functionality especially when
+ * compared to that of the @ref import_factory.
+ */
+class ORCUS_SPM_DLLPUBLIC export_factory : public iface::export_factory
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ export_factory(const document& doc);
+ virtual ~export_factory();
+
+ virtual const iface::export_sheet* get_sheet(std::string_view sheet_name) const override;
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/import_interface.hpp b/include/orcus/spreadsheet/import_interface.hpp
new file mode 100644
index 0000000..2ba80a7
--- /dev/null
+++ b/include/orcus/spreadsheet/import_interface.hpp
@@ -0,0 +1,1332 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_SPREADSHEET_IMPORT_INTERFACE_HPP
+#define ORCUS_SPREADSHEET_IMPORT_INTERFACE_HPP
+
+#include <cstdlib>
+
+#include "types.hpp"
+#include "../types.hpp"
+#include "../env.hpp"
+
+// NB: This header must not depend on ixion, as it needs to be usable for
+// those clients that provide their own formula engine. Other headers in
+// the orcus::spreadsheet namespace may depend on ixion.
+
+namespace orcus { namespace spreadsheet { namespace iface {
+
+class import_styles;
+class import_pivot_cache_definition;
+class import_pivot_cache_records;
+class import_sheet_view;
+
+/**
+ * Interface for importing raw string values shared in string cells. String
+ * values may be either with or without formatted segments.
+ *
+ * To insert an unformatted string, simply use either append() or add()
+ * method. The string will then be immediately pushed to the pool.
+ *
+ * To insert a string with mixed formatted segments, you need to first use one
+ * or more of:
+ *
+ * @li set_segment_font()
+ * @li set_segment_bold()
+ * @li set_segment_italic()
+ * @li set_segment_font_name()
+ * @li set_segment_font_size()
+ * @li set_segment_font_color()
+ *
+ * to define the format attribute(s) of a string segment followed by a call to
+ * append_segment(). This may be repeated as many times as necessary. Then
+ * as the final step, call commit_segments() to insert the entire series of
+ * formatted segments to the pool as a single string entry. The following
+ * example demonstrates how the code may look like:
+ *
+ * @code{.cpp}
+ * import_shared_strings* iface = ...;
+ *
+ * // store a segment with specific font, size and boldness.
+ * iface->set_segment_font_name("FreeMono");
+ * iface->set_segment_font_size(14);
+ * iface->set_segment_font_bold(true);
+ * iface->append_segment("a bold and big segment");
+ *
+ * // store an unformatted segment.
+ * iface->append_segment(" followed by ");
+ *
+ * // store a segment with smaller, italic font.
+ * iface->set_segment_font_size(7);
+ * iface->set_segment_font_italic(true);
+ * iface->append_segment("a small and italic segment");
+ *
+ * iface->commit_segments(); // commit the whole formatted string to the pool.
+ * @endcode
+ */
+class ORCUS_DLLPUBLIC import_shared_strings
+{
+public:
+ virtual ~import_shared_strings();
+
+ /**
+ * Append a new string to the sequence of strings. Order of insertion
+ * determines the numerical ID value of an inserted string. Note that this
+ * method assumes that the caller knows the string being appended is not yet
+ * in the pool; it does not check on duplicated strings.
+ *
+ * @param s string to append to the pool.
+ *
+ * @return ID of the inserted string.
+ */
+ virtual size_t append(std::string_view s) = 0;
+
+ /**
+ * Similar to the append() method, it adds a new string to the string pool;
+ * however, this method checks if the string being added is already in the
+ * pool before each insertion, to avoid duplicated strings.
+ *
+ * @param s string to add to the pool.
+ *
+ * @return ID of the inserted string.
+ */
+ virtual size_t add(std::string_view s) = 0;
+
+ /**
+ * Set the index of a font to apply to the current format attributes. Refer
+ * to the import_font_style interface on how to obtain a font index. Note
+ * that a single font index is associated with multiple font-related
+ * formatting attributes, such as font name, font color, boldness and
+ * italics.
+ *
+ * @param font_index positive integer representing the font to use.
+ */
+ virtual void set_segment_font(size_t font_index) = 0;
+
+ /**
+ * Set whether or not to make the current segment bold.
+ *
+ * @param b true if it's bold, false otherwise.
+ */
+ virtual void set_segment_bold(bool b) = 0;
+
+ /**
+ * Set whether or not to make the current segment italic.
+ *
+ * @param b true if it's italic, false otherwise.
+ */
+ virtual void set_segment_italic(bool b) = 0;
+
+ /**
+ * Set the name of a font to the current segment.
+ *
+ * @param s font name.
+ */
+ virtual void set_segment_font_name(std::string_view s) = 0;
+
+ /**
+ * Set a font size to the current segment.
+ *
+ * @param point font size in points.
+ */
+ virtual void set_segment_font_size(double point) = 0;
+
+ /**
+ * Set the color of a font in ARGB format to the current segment.
+ *
+ * @param alpha alpha component value (0-255).
+ * @param red red component value (0-255).
+ * @param green green component value (0-255).
+ * @param blue blue component value (0-255).
+ */
+ virtual void set_segment_font_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Push the current string segment to the buffer. Any formatting attributes
+ * defined so far will be applied to this segment.
+ *
+ * @param s string value for the segment.
+ */
+ virtual void append_segment(std::string_view s) = 0;
+
+ /**
+ * Store the entire formatted string in the current buffer to the shared
+ * strings pool. The implementor may choose to unconditionally append the
+ * string to the pool, or choose to find an existing duplicate and reuse
+ * it instead.
+ *
+ * @return ID of the string just inserted, or the ID of an existing string
+ * with identical formatting.
+ */
+ virtual size_t commit_segments() = 0;
+};
+
+/**
+ * Interface for importing sheet properties. Sheet properties include:
+ *
+ * @li column widths and row heights,
+ * @li hidden flags for columns and rows, and
+ * @li merged cell ranges.
+ *
+ * These properties are independent of the cell contents of a sheet.
+ */
+class ORCUS_DLLPUBLIC import_sheet_properties
+{
+public:
+ virtual ~import_sheet_properties();
+
+ /**
+ * Set a column width to one or more columns.
+ *
+ * @param col 0-based position of the first column.
+ * @param col_span number of contiguous columns to apply the width to.
+ * @param width column width to apply.
+ * @param unit unit of measurement to use for the width value.
+ */
+ virtual void set_column_width(col_t col, col_t col_span, double width, orcus::length_unit_t unit) = 0;
+
+ /**
+ * Set a column hidden flag to one or more columns.
+ *
+ * @param col 0-based position of the first column.
+ * @param col_span number of contiguous columns to apply the flag to.
+ * @param hidden flag indicating whether or not the columns are hidden.
+ */
+ virtual void set_column_hidden(col_t col, col_t col_span, bool hidden) = 0;
+
+ /**
+ * Set a row height to specified row.
+ *
+ * @param row 0-based position of a row.
+ * @param height new row height value to set.
+ * @param unit unit of the new row height value.
+ *
+ * @todo Convert this to take a raw span.
+ */
+ virtual void set_row_height(row_t row, double height, orcus::length_unit_t unit) = 0;
+
+ /**
+ * Set a row hidden flag to a specified row.
+ *
+ * @param row 0-based position of a row.
+ * @param hidden flag indicating whether or not the row is hidden.
+ *
+ * @todo Convert this to take a raw span.
+ */
+ virtual void set_row_hidden(row_t row, bool hidden) = 0;
+
+ /**
+ * Set a merged cell range.
+ *
+ * @param range structure containing the top-left and bottom-right
+ * positions of a merged cell range.
+ */
+ virtual void set_merge_cell_range(const range_t& range) = 0;
+};
+
+/**
+ * Interface for importing named expressions or ranges.
+ *
+ * This interface has two different methods for defining named expressions:
+ *
+ * @li set_named_expression() and
+ * @li set_named_range().
+ *
+ * Generally speaking, set_named_expression() can be used to define both named
+ * expression and named range. However, the implementor may choose to apply a
+ * different syntax rule to parse an expression passed to set_named_range(),
+ * depending on the formula grammar defined via @ref
+ * import_global_settings::set_default_formula_grammar(). For instance, the
+ * OpenDocument Spreadsheet format is known to use different syntax rules
+ * between named expressions and named ranges.
+ *
+ * A named range is a special case of a named expression where the expression
+ * consists of only one single cell range token.
+ *
+ * Here is a code example of how a named expression is defined:
+ *
+ * @code{.cpp}
+ * import_named_expression* iface = ...;
+ *
+ * // set the A1 on the first sheet as its origin (optional).
+ * src_address_t origin{0, 0, 0};
+ * iface->set_base_position(origin);
+ * iface->set_named_expression("MyExpression", "SUM(A1:B10)+SUM(D1:D4)");
+ * iface->commit();
+ * @endcode
+ *
+ * Replace the above set_named_expression() call with set_named_range() if you
+ * wish to define a named range instead.
+ */
+class ORCUS_DLLPUBLIC import_named_expression
+{
+public:
+ virtual ~import_named_expression();
+
+ /**
+ * Specify an optional base position, or origin, from which to evaluate a
+ * named expression. If not specified, the implementor should use the
+ * top-left corner cell on the first sheet as its origin.
+ *
+ * @param pos cell position to be used as the origin.
+ */
+ virtual void set_base_position(const src_address_t& pos) = 0;
+
+ /**
+ * Set a named expression to the buffer.
+ *
+ * @param name name of the expression to be defined.
+ * @param expression expression to be associated with the name.
+ */
+ virtual void set_named_expression(std::string_view name, std::string_view expression) = 0;
+
+ /**
+ * Set a named range to the buffer.
+ *
+ * @param name name of the expression to be defined.
+ * @param range range to be associated with the name.
+ */
+ virtual void set_named_range(std::string_view name, std::string_view range) = 0;
+
+ /**
+ * Commit the named expression or range currently in the buffer to the
+ * document.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing data tables.
+ */
+class ORCUS_DLLPUBLIC import_data_table
+{
+public:
+ virtual ~import_data_table();
+
+ /**
+ * Set the type of a data table. A data table can either:
+ *
+ * @li be a single-variable column-oriented,
+ * @li be a single-variable row-oriented, or
+ * @li use two variables that use both column and row.
+ *
+ * @param type type of a data table.
+ */
+ virtual void set_type(data_table_type_t type) = 0;
+
+ /**
+ * Set the range of a data table.
+ *
+ * @param range range of a data table.
+ */
+ virtual void set_range(const range_t& range) = 0;
+
+ /**
+ * Set the reference of the first input cell.
+ *
+ * @param ref reference of the first input cell.
+ * @param deleted whether or not this input cell has been deleted.
+ */
+ virtual void set_first_reference(std::string_view ref, bool deleted) = 0;
+
+ /**
+ * Set the reference of the second input cell but only if the data table
+ * uses two variables.
+ *
+ * @note This method gets called only if the data table uses two variables.
+ *
+ * @param ref reference of the second input cell.
+ * @param deleted whether or not this input cell has been deleted.
+ */
+ virtual void set_second_reference(std::string_view ref, bool deleted) = 0;
+
+ /**
+ * Store the current data table data in the buffer to the backend sheet
+ * storage.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing auto filters.
+ *
+ * Importing a single auto filter would roughly follow the following flow:
+ *
+ * @code{.cpp}
+ * import_auto_filter* iface = ... ;
+ *
+ * range_t range;
+ * range.first.column = 0;
+ * range.first.row = 0;
+ * range.last.column = 3;
+ * range.last.row = 1000;
+ * iface->set_range(range); // Auto filter is applied for A1:D1001.
+ *
+ * // Column A is filtered for a value of "A".
+ * iface->set_column(0);
+ * iface->append_column_match_value("A");
+ * iface->commit_column();
+ *
+ * // Column D is filtered for values of 1 and 4.
+ * iface->set_column(3);
+ * iface->append_column_match_value("1");
+ * iface->append_column_match_value("4");
+ * iface->commit_column();
+ *
+ * // Push the autofilter data in the current buffer to the sheet store.
+ * iface->commit();
+ * @endcode
+ */
+class ORCUS_DLLPUBLIC import_auto_filter
+{
+public:
+ virtual ~import_auto_filter();
+
+ /**
+ * Specify the range where the auto filter is applied.
+ *
+ * @param range structure containing the top-left and bottom-right
+ * positions of the auto filter range.
+ */
+ virtual void set_range(const range_t& range) = 0;
+
+ /**
+ * Specify the column position of a filter. The position is relative to
+ * the first column in the auto filter range. This method gets called at
+ * the beginning of each column filter data. The implementor may initialize
+ * the column filter data buffer when this method is called.
+ *
+ * @note This column position is relative to the first column in the
+ * autofilter range.
+ *
+ * @param col 0-based column position of a filter relative to the first
+ * column of the auto filter range.
+ */
+ virtual void set_column(col_t col) = 0;
+
+ /**
+ * Append a match value to the current column filter. A single column
+ * filter may have one or more match values.
+ *
+ * @param value match value to append to the current column filter.
+ */
+ virtual void append_column_match_value(std::string_view value) = 0;
+
+ /**
+ * Commit the current column filter data to the current auto filter buffer.
+ * The implementor may clear the current column filter buffer after this
+ * call.
+ */
+ virtual void commit_column() = 0;
+
+ /**
+ * Commit current auto filter data stored in the buffer to the sheet store.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * This is an optional interface to import conditional formatting.
+ *
+ * In general, a single conditional format consists of:
+ *
+ * @li a cell range the format is applied to, and
+ * @li one or more rule entries.
+ *
+ * Each rule entry consists of:
+ *
+ * @li a type of rule,
+ * @li zero or more rule properties, and
+ * @li zero or more conditions depending on the rule type.
+ *
+ * Lastly, each condition consists of:
+ *
+ * @li a formula, value, or string,
+ * @li an optional color.
+ *
+ * The flow of the import process varies depending on the type of the
+ * conditional formatting being imported. The following is an example of
+ * importing a conditional formatting that consists of a rule that applies a
+ * format when the cell value is greather than 2:
+ *
+ * @code{.cpp}
+ * import_conditional_format* iface = ... ;
+ *
+ * iface->set_range("A2:A13");
+ * iface->set_xf_id(14); // apply differential format (dxf) whose ID is 14
+ * iface->set_type(conditional_format_t::condition); // rule entry type
+ * iface->set_operator(condition_operator_t::expression);
+ * iface->set_operator(condition_operator_t::greater);
+ *
+ * iface->set_formula("2");
+ * iface->commit_condition();
+ *
+ * iface->commit_entry();
+ *
+ * iface->commit_format();
+ * @endcode
+ *
+ * @todo Revise this API for simplification.
+ */
+class ORCUS_DLLPUBLIC import_conditional_format
+{
+public:
+ virtual ~import_conditional_format();
+
+ /**
+ * Sets the color of the current condition.
+ * only valid for type == databar or type == colorscale.
+ */
+ virtual void set_color(color_elem_t alpha, color_elem_t red,
+ color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Sets the formula, value or string of the current condition.
+ */
+ virtual void set_formula(std::string_view formula) = 0;
+
+ /**
+ * Sets the type for the formula, value or string of the current condition.
+ * Only valid for type = iconset, databar or colorscale.
+ */
+ virtual void set_condition_type(condition_type_t type) = 0;
+
+ /**
+ * Only valid for type = date.
+ */
+ virtual void set_date(condition_date_t date) = 0;
+
+ /**
+ * commits the current condition to the current entry.
+ */
+ virtual void commit_condition() = 0;
+
+ /**
+ * Name of the icons to use in the current entry.
+ * only valid for type = iconset
+ */
+ virtual void set_icon_name(std::string_view name) = 0;
+
+ /**
+ * Use a gradient for the current entry.
+ * only valid for type == databar
+ */
+ virtual void set_databar_gradient(bool gradient) = 0;
+
+ /**
+ * Position of the 0 axis in the current entry.
+ * only valid for type == databar.
+ */
+ virtual void set_databar_axis(databar_axis_t axis) = 0;
+
+ /**
+ * Databar color for positive values.
+ * only valid for type == databar.
+ */
+ virtual void set_databar_color_positive(color_elem_t alpha, color_elem_t red,
+ color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Databar color for negative values.
+ * only valid for type == databar.
+ */
+ virtual void set_databar_color_negative(color_elem_t alpha, color_elem_t red,
+ color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Sets the minimum length for a databar.
+ * only valid for type == databar.
+ */
+ virtual void set_min_databar_length(double length) = 0;
+
+ /**
+ * Sets the maximum length for a databar.
+ * only valid for type == databar.
+ */
+ virtual void set_max_databar_length(double length) = 0;
+
+ /**
+ * Don't show the value in the cell.
+ * only valid for type = databar, iconset, colorscale.
+ */
+ virtual void set_show_value(bool show) = 0;
+
+ /**
+ * Use the icons in reverse order.
+ * only valid for type == iconset.
+ */
+ virtual void set_iconset_reverse(bool reverse) = 0;
+
+ /**
+ * TODO: In OOXML the style is stored as dxf and in ODF as named style.
+ */
+ virtual void set_xf_id(size_t xf) = 0;
+
+ /**
+ * Sets the current operation used for the current entry.
+ * only valid for type == condition
+ */
+ virtual void set_operator(condition_operator_t condition_type) = 0;
+
+ virtual void set_type(conditional_format_t type) = 0;
+
+ virtual void commit_entry() = 0;
+
+ virtual void set_range(std::string_view range) = 0;
+
+ virtual void set_range(row_t row_start, col_t col_start,
+ row_t row_end, col_t col_end) = 0;
+
+ virtual void commit_format() = 0;
+};
+
+/**
+ * Interface for table. A table is a range of cells within a sheet that
+ * consists of one or more data columns with a header row that contains their
+ * labels.
+ */
+class ORCUS_DLLPUBLIC import_table
+{
+public:
+ virtual ~import_table();
+
+ /**
+ * Get an optional interface for importing auto filter data stored as part
+ * of a table.
+ *
+ * The implementor should initialize the internal state of the temporary
+ * auto filter object when this method is called.
+ *
+ * @return pointer to the auto filter interface object, or a @p nullptr if
+ * the implementor doesn't support it.
+ */
+ virtual import_auto_filter* get_auto_filter();
+
+ /**
+ * Set an integral identifier unique to the table.
+ *
+ * @param id identifier associated with the table.
+ */
+ virtual void set_identifier(size_t id) = 0;
+
+ /**
+ * Set a 2-dimensional cell range associated with the table.
+ *
+ * @param range cell range associated with the table.
+ */
+ virtual void set_range(const range_t& range) = 0;
+
+ /**
+ * Set the number of totals rows.
+ *
+ * @param row_count number of totals rows.
+ */
+ virtual void set_totals_row_count(size_t row_count) = 0;
+
+ /**
+ * Set the internal name of the table.
+ *
+ * @param name name of the table.
+ */
+ virtual void set_name(std::string_view name) = 0;
+
+ /**
+ * Set the displayed name of the table.
+ *
+ * @param name displayed name of the table.
+ */
+ virtual void set_display_name(std::string_view name) = 0;
+
+ /**
+ * Set the number of columns the table contains.
+ *
+ * @param n number of columns in the table.
+ *
+ * @note This method gets called before the column data gets imported. The
+ * implementor can use this call to initialize the buffer for storing
+ * the column data.
+ */
+ virtual void set_column_count(size_t n) = 0;
+
+ /**
+ * Set an integral identifier for a column.
+ *
+ * @param id integral identifier for a column.
+ */
+ virtual void set_column_identifier(size_t id) = 0;
+
+ /**
+ * Set a name of a column.
+ *
+ * @param name name of a column.
+ */
+ virtual void set_column_name(std::string_view name) = 0;
+
+ /**
+ * Set the totals row label for a column.
+ *
+ * @param label row label for a column.
+ */
+ virtual void set_column_totals_row_label(std::string_view label) = 0;
+
+ /**
+ * Set the totals row function for a column.
+ *
+ * @param func totals row function for a column.
+ */
+ virtual void set_column_totals_row_function(totals_row_function_t func) = 0;
+
+ /**
+ * Push and append the column data stored in the current column data buffer
+ * into the table buffer.
+ */
+ virtual void commit_column() = 0;
+
+ /**
+ * Set the name of a style to apply to the table.
+ *
+ * @param name name of a style to apply to the table.
+ */
+ virtual void set_style_name(std::string_view name) = 0;
+
+ /**
+ * Specify whether or not the first column in the table should have the
+ * style applied.
+ *
+ * @param b whether or not the first column in the table should have the
+ * style applied.
+ */
+ virtual void set_style_show_first_column(bool b) = 0;
+
+ /**
+ * Specify whether or not the last column in the table should have the style
+ * applied.
+ *
+ * @param b whether or not the last column in the table should have the
+ * style applied.
+ */
+ virtual void set_style_show_last_column(bool b) = 0;
+
+ /**
+ * Specify whether or not row stripe formatting is applied.
+ *
+ * @param b whether or not row stripe formatting is applied.
+ */
+ virtual void set_style_show_row_stripes(bool b) = 0;
+
+ /**
+ * Specify whether or not column stripe formatting is applied.
+ *
+ * @param b whether or not column stripe formatting is applied.
+ */
+ virtual void set_style_show_column_stripes(bool b) = 0;
+
+ /**
+ * Push the data stored in the table buffer into the document store.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing the properties of a single formula cell. A formula
+ * cell contains a formula expression that can be computed, and optionally a
+ * cached result of the last computation performed on the expression.
+ */
+class ORCUS_DLLPUBLIC import_formula
+{
+public:
+ virtual ~import_formula();
+
+ /**
+ * Set the position of a cell.
+ *
+ * @param row row position.
+ * @param col column position.
+ */
+ virtual void set_position(row_t row, col_t col) = 0;
+
+ /**
+ * Set formula string to a cell.
+ *
+ * @param grammar grammar to use to compile the formula string into
+ * tokens.
+ * @param formula formula expression to store.
+ */
+ virtual void set_formula(formula_grammar_t grammar, std::string_view formula) = 0;
+
+ /**
+ * Register the formula stored in a cell as a shared formula to be shared
+ * with other cells, if the cell contains a formula string.
+ *
+ * If a cell references a shared formula stored in another cell, only
+ * specify the index of that shared formula without specifying a formula
+ * string of its own. In that case, it is expected that another formula
+ * cell registers its formula string for that index.
+ *
+ * @param index shared string index to register the formula with.
+ */
+ virtual void set_shared_formula_index(size_t index) = 0;
+
+ /**
+ * Set cached result of string type.
+ *
+ * @param value string result value.
+ */
+ virtual void set_result_string(std::string_view value) = 0;
+
+ /**
+ * Set cached result of numeric type.
+ *
+ * @param value numeric value to set as a cached result.
+ */
+ virtual void set_result_value(double value) = 0;
+
+ /**
+ * Set cached result of boolean type.
+ *
+ * @param value boolean value to set as a cached result.
+ */
+ virtual void set_result_bool(bool value) = 0;
+
+ /**
+ * Set empty value as a cached result.
+ */
+ virtual void set_result_empty() = 0;
+
+ /**
+ * Commit all the formula data to the specified cell.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing the properties of an array formula which occupies a
+ * range of cells. Cells that are part of an array formula share the same
+ * formula expression but may have different calculation results.
+ */
+class ORCUS_DLLPUBLIC import_array_formula
+{
+public:
+ virtual ~import_array_formula();
+
+ /**
+ * Set the range of an array formula.
+ *
+ * @param range range of an array formula.
+ */
+ virtual void set_range(const range_t& range) = 0;
+
+ /**
+ * Set the formula expression of an array formula.
+ *
+ * @param grammar grammar to use to compile the formula string into
+ * tokens.
+ * @param formula formula expression of an array formula.
+ */
+ virtual void set_formula(formula_grammar_t grammar, std::string_view formula) = 0;
+
+ /**
+ * Set a cached string result of a cell within the array formula range.
+ *
+ * @param row 0-based row position of a cell.
+ * @param col 0-based column position of a cell.
+ * @param value cached string value to set.
+ */
+ virtual void set_result_string(row_t row, col_t col, std::string_view value) = 0;
+
+ /**
+ * Set a cached numeric result of a cell within the array formula range.
+ *
+ * @param row 0-based row position of a cell.
+ * @param col 0-based column position of a cell.
+ * @param value cached numeric value to set.
+ */
+ virtual void set_result_value(row_t row, col_t col, double value) = 0;
+
+ /**
+ * Set a cached boolean result of a cell within the array formula range.
+ *
+ * @param row 0-based row position of a cell.
+ * @param col 0-based column position of a cell.
+ * @param value cached boolean value to set.
+ */
+ virtual void set_result_bool(row_t row, col_t col, bool value) = 0;
+
+ /**
+ * Set an empty value as a cached result to a cell within the array formula
+ * range.
+ *
+ * @param row 0-based row position of a cell.
+ * @param col 0-based column position of a cell.
+ */
+ virtual void set_result_empty(row_t row, col_t col) = 0;
+
+ /**
+ * Push the properties of an array formula currently stored in the buffer to
+ * the sheet store.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing the content and properties of a sheet.
+ */
+class ORCUS_DLLPUBLIC import_sheet
+{
+public:
+ virtual ~import_sheet();
+
+ /**
+ * Get an optional interface for importing properties that are specific to a
+ * view of a sheet.
+ *
+ * @return pointer to the interface for importing view properties, or a @p
+ * nullptr if the implementor doesn't support it.
+ */
+ virtual import_sheet_view* get_sheet_view();
+
+ /**
+ * Get an optional interface for importing sheet properties.
+ *
+ * @return pointer to the interface for importing sheet properties, or a @p
+ * nullptr if the implementor doesn't support it.
+ */
+ virtual import_sheet_properties* get_sheet_properties();
+
+ /**
+ * Get an optional interface for importing data tables. Note that the
+ * implementer may decide not to support this feature in which case this
+ * method should return a @p nullptr.
+ *
+ * The implementor should initialize the internal state of the temporary
+ * data table object when this method is called.
+ *
+ * @return pointer to the data table interface object, or a @p nullptr if
+ * the implementor doesn't support it.
+ */
+ virtual import_data_table* get_data_table();
+
+ /**
+ * Get an optional interface for importing auto filter ranges.
+ *
+ * The implementor should initialize the internal state of the temporary
+ * auto filter object when this method is called.
+ *
+ * @return pointer to the auto filter interface object, or a @p nullptr if
+ * the implementor doesn't support it.
+ */
+ virtual import_auto_filter* get_auto_filter();
+
+ /**
+ * Get an interface for importing tables.
+ *
+ * The implementor should initialize the internal state of the temporary
+ * table object when this method is called.
+ *
+ * @return pointer to the table interface object, or @p nullptr if the
+ * implementer doesn't support importing of tables.
+ */
+ virtual import_table* get_table();
+
+ /**
+ * Get an optional interface for importing conditional formats.
+ *
+ * @return pointer to the conditional format interface object, or @p nullptr
+ * if the implementer doesn't support importing conditional
+ * formats.
+ */
+ virtual import_conditional_format* get_conditional_format();
+
+ /**
+ * Get an optional interface for importing sheet-local named expressions.
+ *
+ * @return pointer to the sheet-local named expression interface, or a @p
+ * nullptr if the implementor doesn't support it.
+ */
+ virtual import_named_expression* get_named_expression();
+
+ /**
+ * Get an optional interface for importing array formulas. An array formula
+ * is a formula expression applied to a range of cells where each cell may
+ * have a different result value.
+ *
+ * @return pointer to the array formula import interface, or a @p nullptr if
+ * the implementor doesn't support it.
+ */
+ virtual import_array_formula* get_array_formula();
+
+ /**
+ * Get an optional interface for importing formula cells.
+ *
+ * @return pointer to the formula interface object, or a @p nullptr if the
+ * implementer doesn't support importing of formula cells.
+ */
+ virtual import_formula* get_formula();
+
+ /**
+ * Set raw string value to a cell and have the implementation
+ * auto-recognize its data type.
+ *
+ * @param row row ID
+ * @param col column ID
+ * @param s raw string value.
+ */
+ virtual void set_auto(row_t row, col_t col, std::string_view s) = 0;
+
+ /**
+ * Set string value to a cell.
+ *
+ * @param row row ID
+ * @param col column ID
+ * @param sindex 0-based string index in the shared string table.
+ */
+ virtual void set_string(row_t row, col_t col, string_id_t sindex) = 0;
+
+ /**
+ * Set numerical value to a cell.
+ *
+ * @param row row ID
+ * @param col column ID
+ * @param value value being assigned to the cell.
+ */
+ virtual void set_value(row_t row, col_t col, double value) = 0;
+
+ /**
+ * Set a boolean value to a cell.
+ *
+ * @param row row ID
+ * @param col col ID
+ * @param value boolean value being assigned to the cell
+ */
+ virtual void set_bool(row_t row, col_t col, bool value) = 0;
+
+ /**
+ * Set date and time value to a cell.
+ *
+ * @param row row ID
+ * @param col column ID
+ * @param year 1-based value representing year
+ * @param month 1-based value representing month, varying from 1 through
+ * 12.
+ * @param day 1-based value representing day, varying from 1 through 31.
+ * @param hour the hour of a day, ranging from 0 through 23.
+ * @param minute the minute of an hour, ranging from 0 through 59.
+ * @param second the second of a minute, ranging from 0 through 59.
+ */
+ virtual void set_date_time(
+ row_t row, col_t col,
+ int year, int month, int day, int hour, int minute, double second) = 0;
+
+ /**
+ * Set cell format to specified cell. The cell format is referred to by
+ * the xf (cell format) index in the styles table.
+ *
+ * @note This method gets called after both set_column_format() and
+ * set_row_format().
+ *
+ * @param row row ID
+ * @param col column ID
+ * @param xf_index 0-based xf (cell format) index
+ */
+ virtual void set_format(row_t row, col_t col, size_t xf_index) = 0;
+
+ /**
+ * Set cell format to specified cell range. The cell format is referred
+ * to by the xf (cell format) index in the styles table.
+ *
+ * @param row_start start row ID
+ * @param col_start start column ID
+ * @param row_end end row ID
+ * @param col_end end column ID
+ * @param xf_index 0-based xf (cell format) index
+ */
+ virtual void set_format(row_t row_start, col_t col_start,
+ row_t row_end, col_t col_end, size_t xf_index) = 0;
+
+ /**
+ * Set cell format to a specified column. The cell format is referred to by
+ * the xf (cell format) index in the styles table.
+ *
+ * @note This method gets called first before set_row_format() or
+ * set_format() variants.
+ *
+ * @param col column ID
+ * @param col_span number of contiguous columns to apply the format to. It
+ * must be at least one.
+ * @param xf_index 0-based xf (cell format) index
+ */
+ virtual void set_column_format(col_t col, col_t col_span, std::size_t xf_index) = 0;
+
+ /**
+ * Set cell format to a specified row. The cell format is referred to by
+ * the xf (cell format) index in the styles table.
+ *
+ * @note This method gets called after set_column_format() but before
+ * set_format().
+ *
+ * @param row row ID
+ * @param xf_index 0-based xf (cell format) index
+ */
+ virtual void set_row_format(row_t row, std::size_t xf_index) = 0;
+
+ /**
+ * Duplicate the value of the source cell to one or more cells located
+ * immediately below it.
+ *
+ * @param src_row row ID of the source cell
+ * @param src_col column ID of the source cell
+ * @param range_size number of cells below the source cell to copy the
+ * source cell value to. It must be at least one.
+ */
+ virtual void fill_down_cells(row_t src_row, col_t src_col, row_t range_size) = 0;
+
+ /**
+ * Get the size of the sheet.
+ *
+ * @return structure containing the numbers of rows and columns of the
+ * sheet.
+ */
+ virtual range_size_t get_sheet_size() const = 0;
+};
+
+/**
+ * Interface for specifying global settings that may affect how the
+ * implementor should process certain values and properties.
+ */
+class ORCUS_DLLPUBLIC import_global_settings
+{
+public:
+ virtual ~import_global_settings();
+
+ /**
+ * Set the date that is to be represented by a value of 0. All date
+ * values should be represented relative to this date. This may affect, for
+ * instance, values imported via @ref import_sheet::set_date_time().
+ *
+ * @param year 1-based value representing year
+ * @param month 1-based value representing month, varying from 1 through
+ * 12.
+ * @param day 1-based value representing day, varying from 1 through 31.
+ */
+ virtual void set_origin_date(int year, int month, int day) = 0;
+
+ /**
+ * Set the formula grammar to be used globally when parsing formulas if the
+ * grammar is not specified. This grammar should also be used when parsing
+ * range strings associated with shared formula ranges, array formula
+ * ranges, autofilter ranges etc.
+ *
+ * Note that the import filter may specify what formula grammar to use
+ * locally when importing formula expressions for cells via @ref
+ * import_formula::set_formula(), in which case the implementor should honor
+ * that one instead.
+ *
+ * @param grammar default formula grammar to use globally unless otherwise
+ * specified.
+ */
+ virtual void set_default_formula_grammar(formula_grammar_t grammar) = 0;
+
+ /**
+ * Get current global formula grammar. The import filter may use this
+ * method to query the current global formula grammar.
+ *
+ * @return current default formula grammar.
+ */
+ virtual formula_grammar_t get_default_formula_grammar() const = 0;
+
+ /**
+ * Set the character set to use when parsing encoded string values.
+ *
+ * @param charset character set to use when parsing encoded string values.
+ */
+ virtual void set_character_set(character_set_t charset) = 0;
+};
+
+/**
+ * This is an interface to allow the implementor to provide its own reference
+ * address parsers, for both single cell references and cell range references.
+ * The implementor may choose to provide a different parser depending of the
+ * type of formula_ref_context_t argument given to the @ref
+ * import_factory::get_reference_resolver() call.
+ */
+class ORCUS_DLLPUBLIC import_reference_resolver
+{
+public:
+ virtual ~import_reference_resolver();
+
+ /**
+ * Resolve a textural representation of a single cell address.
+ *
+ * @param address single cell address string.
+ *
+ * @return structure containing the column and row positions of the
+ * address.
+ *
+ * @exception orcus::invalid_arg_error the string is not a valid
+ * single cell addreess.
+ */
+ virtual src_address_t resolve_address(std::string_view address) = 0;
+
+ /**
+ * Resolve a textural representation of a range address. Note that a
+ * string representing a valid single cell address should be considered a
+ * valid range address.
+ *
+ * @param range range address string.
+ *
+ * @return structure containing the start and end positions of the range
+ * address.
+ *
+ * @exception invalid_arg_error the string is not a valid range addreess.
+ */
+ virtual src_range_t resolve_range(std::string_view range) = 0;
+};
+
+/**
+ * This interface is the entry point for the import filter code to instantiate
+ * other, more specialized interfaces. The life cycles of any specialized
+ * interfaces returned from this interface shall be managed by the implementor
+ * of this interface.
+ *
+ * The implementer of this interface may wrap a backend document store that
+ * needs to be populated.
+ */
+class ORCUS_DLLPUBLIC import_factory
+{
+public:
+ virtual ~import_factory();
+
+ /**
+ * Obtain an optional interface for global settings, which the import filter
+ * uses to specify global filter settings that may affect how certain values
+ * and properties should be processed. The implementor can use this
+ * interface to decide how to process relevant values and properties.
+ *
+ * @return pointer to the global settings interface, or a @p nullptr if the
+ * implementor doesn't support it.
+ */
+ virtual import_global_settings* get_global_settings();
+
+ /**
+ * Obtain an optional interface for importing shared strings for string
+ * cells. Implementing this interface is required in order to import string
+ * cell values.
+ *
+ * @return pointer to the shared strings interface, or a @p nullptr if the
+ * implementor doesn't support it.
+ */
+ virtual import_shared_strings* get_shared_strings();
+
+ /**
+ * Obtain an optional interface for importing global named expressions.
+ *
+ * Note that @ref import_sheet also provides the same interface, but its
+ * interface is for importing sheet-local named expressions.
+ *
+ * @return pointer to the global named expression interface, or a @p nullptr
+ * if the implementor doesn't support it.
+ */
+ virtual import_named_expression* get_named_expression();
+
+ /**
+ * Obtain an optional interface for importing styles used to add formatting
+ * properties to cell values.
+ *
+ * @return pointer to the styles interface, or a @p nullptr if the
+ * implementor doesn't support it.
+ */
+ virtual import_styles* get_styles();
+
+ /**
+ * Obtain an optional interface for resolving cell and cell-range references
+ * from string values.
+ *
+ * @param cxt context in which the formula expression containing the
+ * references to be resolved occurs.
+ *
+ * @return pointer to the reference resolve interfance, or a @p nullptr if
+ * the implementor doesn't support it.
+ */
+ virtual import_reference_resolver* get_reference_resolver(formula_ref_context_t cxt);
+
+ /**
+ * Obtain an optional interface for pivot cache definition import for a
+ * specified cache ID. In case a pivot cache alrady exists for the passed
+ * ID, the implementor should overwrite the existing cache with a brand-new
+ * cache instance.
+ *
+ * @param cache_id numeric ID associated with the pivot cache.
+ *
+ * @return pointer to the pivot cache interface, or a @p nullptr if the
+ * implementor doesn't support pivot cache import.
+ */
+ virtual import_pivot_cache_definition* create_pivot_cache_definition(
+ pivot_cache_id_t cache_id);
+
+ /**
+ * Obtain an optional interface for pivot cache records import for a
+ * specified cache ID.
+ *
+ * @param cache_id numeric ID associated with the pivot cache.
+ *
+ * @return pointer to the pivot cache records interface, or a @p nullptr if
+ * the implementor doesn't support pivot cache import.
+ */
+ virtual import_pivot_cache_records* create_pivot_cache_records(
+ pivot_cache_id_t cache_id);
+
+ /**
+ * Append a sheet with a specified sheet position index and name and return
+ * an interface for importing its content. The implementor can use a call
+ * to this method as a signal to create and append a new sheet instance to
+ * the document store.
+ *
+ * @param sheet_index position index of the sheet to be appended. It is
+ * 0-based i.e. the first sheet to be appended will
+ * have an index value of 0.
+ * @param name sheet name.
+ *
+ * @return pointer to the sheet instance, or a @p nullptr if the implementor
+ * doesn't support it. Note, however, that if the implementor
+ * doesn't support this interface, no cell values will get imported.
+ */
+ virtual import_sheet* append_sheet(sheet_t sheet_index, std::string_view name) = 0;
+
+ /**
+ * Get a sheet instance by name. The import filter may use this method to
+ * get access to an existing sheet after it has been created.
+ *
+ * @param name sheet name.
+ *
+ * @return pointer to the sheet instance whose name matches the name
+ * passed to this method. It returns a @p nullptr if no sheet
+ * instance exists by the specified name.
+ */
+ virtual import_sheet* get_sheet(std::string_view name) = 0;
+
+ /**
+ * Retrieve a sheet instance by a specified numerical sheet index.
+ *
+ * @param sheet_index sheet index.
+ *
+ * @return pointer to the sheet instance, or a @p nullptr if no sheet
+ * instance exists at the specified sheet index.
+ */
+ virtual import_sheet* get_sheet(sheet_t sheet_index) = 0;
+
+ /**
+ * The import filter calls this method after completing its import, to give
+ * the implementor a chance to perform post-processing.
+ */
+ virtual void finalize() = 0;
+};
+
+}}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/import_interface_pivot.hpp b/include/orcus/spreadsheet/import_interface_pivot.hpp
new file mode 100644
index 0000000..275ed44
--- /dev/null
+++ b/include/orcus/spreadsheet/import_interface_pivot.hpp
@@ -0,0 +1,351 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_IMPORT_INTERFACE_PIVOT_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_IMPORT_INTERFACE_PIVOT_HPP
+
+#include <cstdlib>
+
+#include "types.hpp"
+#include "../types.hpp"
+#include "../env.hpp"
+
+// NB: This header must not depend on ixion, as it needs to be usable for
+// those clients that provide their own formula engine. Other headers in
+// the orcus::spreadsheet namespace may depend on ixion.
+
+namespace orcus { namespace spreadsheet { namespace iface {
+
+class import_pivot_cache_field_group;
+
+/**
+ * Interface for importing pivot cache definitions.
+ */
+class ORCUS_DLLPUBLIC import_pivot_cache_definition
+{
+public:
+ virtual ~import_pivot_cache_definition();
+
+ /**
+ * Specify that the source data of this pivot cache is located on a local
+ * worksheet.
+ *
+ * @param ref range string specifying the source range.
+ * @param sheet_name name of the worksheet where the source data is located.
+ *
+ * @todo use the ref resolver to resolve the range.
+ */
+ virtual void set_worksheet_source(std::string_view ref, std::string_view sheet_name) = 0;
+
+ /**
+ * Specify that the source data of this pivot cache is associated with a
+ * table.
+ *
+ * @param table_name name of the table.
+ */
+ virtual void set_worksheet_source(std::string_view table_name) = 0;
+
+ /**
+ * Set the total number of fields present in this pivot cache.
+ *
+ * @param n total number of fields in this pivot cache.
+ */
+ virtual void set_field_count(size_t n) = 0;
+
+ /**
+ * Set the name of the field in the current field buffer.
+ *
+ * @param name field name.
+ */
+ virtual void set_field_name(std::string_view name) = 0;
+
+ /**
+ * Set the lowest value of the field in the current field buffer.
+ *
+ * @param v lowest value of the field.
+ */
+ virtual void set_field_min_value(double v) = 0;
+
+ /**
+ * Set the highest value of the field in the current field buffer.
+ *
+ * @param v highest value of the field.
+ */
+ virtual void set_field_max_value(double v) = 0;
+
+ /**
+ * Set the lowest date value of the field in the current field buffer.
+ *
+ * @param dt lowest date value of the field.
+ */
+ virtual void set_field_min_date(const date_time_t& dt) = 0;
+
+ /**
+ * Set the highest date value of the field in the current field buffer.
+ *
+ * @param dt highest date value of the field.
+ */
+ virtual void set_field_max_date(const date_time_t& dt) = 0;
+
+ /**
+ * Mark the current field as a group field and initiate its import.
+ *
+ * The implementor should create an internal storage to prepare for the
+ * importing of field group data when this method gets called.
+ *
+ * @param base_index 0-based index of the field this group field uses as its
+ * base.
+ * @return interface for importing group field data, or a @p nullptr if the
+ * implementor doesn't support it.
+ */
+ virtual import_pivot_cache_field_group* start_field_group(size_t base_index) = 0;
+
+ /**
+ * Commit the field in the current field buffer to the pivot cache model.
+ */
+ virtual void commit_field() = 0;
+
+ /**
+ * Set a string value to the current field item buffer.
+ *
+ * @param value string value.
+ */
+ virtual void set_field_item_string(std::string_view value) = 0;
+
+ /**
+ * Set a numeric value to the current field item buffer.
+ *
+ * @param v numeric value.
+ */
+ virtual void set_field_item_numeric(double v) = 0;
+
+ /**
+ * Set a date-time value to the current field item buffer.
+ *
+ * @param dt date-time value.
+ */
+ virtual void set_field_item_date_time(const date_time_t& dt) = 0;
+
+ /**
+ * Set an error value to the current field item buffer,
+ *
+ * @param ev error value.
+ */
+ virtual void set_field_item_error(error_value_t ev) = 0;
+
+ /**
+ * Commit the field item in current field item buffer to the current field
+ * model.
+ */
+ virtual void commit_field_item() = 0;
+
+ /**
+ * Commit the current pivot cache model to the document model.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing group field settings in a pivot cache definition.
+ */
+class ORCUS_DLLPUBLIC import_pivot_cache_field_group
+{
+public:
+ virtual ~import_pivot_cache_field_group();
+
+ /**
+ * Establish a linkage between a base item to a group item.
+ *
+ * The index to corresponding base item is inferred from the order of this
+ * method being called; the first call to this method implies a base item
+ * index of 0, the second call implies an index of 1, and so on.
+ *
+ * This method is called only for a non-range group field; a group field
+ * where parent-to-child item relationships are manually defined.
+ *
+ * @param group_item_index 0-based index for the group item.
+ */
+ virtual void link_base_to_group_items(size_t group_item_index) = 0;
+
+ /**
+ * Set an individual field item value that is of string type to the
+ * current internal buffer.
+ *
+ * This method can be called either for a range group field or a non-range
+ * one.
+ *
+ * @param value field item value.
+ */
+ virtual void set_field_item_string(std::string_view value) = 0;
+
+ /**
+ * Set an individual field item value that is of numeric type to the
+ * current internal buffer.
+ *
+ * This method can be called either for a range group field or a non-range
+ * one.
+ *
+ * @param v field item value.
+ */
+ virtual void set_field_item_numeric(double v) = 0;
+
+ /**
+ * Commit the current internal field item buffer to the group.
+ */
+ virtual void commit_field_item() = 0;
+
+ /**
+ * Set the range grouping type.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param group_by type of range grouping.
+ */
+ virtual void set_range_grouping_type(pivot_cache_group_by_t group_by) = 0;
+
+ /**
+ * Set whether the current range group field has an automatic start
+ * position.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param b whether or not the current range group field has an automatic
+ * start position.
+ */
+ virtual void set_range_auto_start(bool b) = 0;
+
+ /**
+ * Set whether the current range group field has an automatic end
+ * position.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param b whether or not the current range group field has an automatic
+ * end position.
+ */
+ virtual void set_range_auto_end(bool b) = 0;
+
+ /**
+ * Set the start number of the current range group field.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param v start number of the current range group field.
+ */
+ virtual void set_range_start_number(double v) = 0;
+
+ /**
+ * Set the end number of the current range group field.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param v end number of the current range group field.
+ */
+ virtual void set_range_end_number(double v) = 0;
+
+ /**
+ * Set the start date of the current range group field.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param dt start date of the current range group field.
+ */
+ virtual void set_range_start_date(const date_time_t& dt) = 0;
+
+ /**
+ * Set the end date of the current range group field.
+ *
+ * The current group field implicitly becomes a range group field when
+ * this method is called.
+ *
+ * @param dt end date of the current range group field.
+ */
+ virtual void set_range_end_date(const date_time_t& dt) = 0;
+
+ /**
+ * Set the interval of the current range group field. If the current
+ * range is a date range, the value represents the number of days.
+ *
+ * @param v interval of the current range group field.
+ */
+ virtual void set_range_interval(double v) = 0;
+
+ /**
+ * Commit the current field group data to the parent field.
+ */
+ virtual void commit() = 0;
+};
+
+/**
+ * Interface for importing pivot cache records.
+ */
+class ORCUS_DLLPUBLIC import_pivot_cache_records
+{
+public:
+ virtual ~import_pivot_cache_records();
+
+ /**
+ * Set the number of records included in pivot cache records.
+ *
+ * @note This method gets called before the very first record gets imported.
+ * The implementor can use this call as an opportunity to initialize
+ * any internal buffers used to store the imported records.
+ *
+ * @param n number of records included in pivot cache records.
+ */
+ virtual void set_record_count(size_t n) = 0;
+
+ /**
+ * Append to the current record buffer a numeric value as a column value.
+ *
+ * @param v numeric value to append to the current record buffer as a column
+ * value.
+ */
+ virtual void append_record_value_numeric(double v) = 0;
+
+ /**
+ * Append to the current record buffer a character value as a column value.
+ *
+ * @param s character value to append to the current record buffer as a
+ * column value.
+ */
+ virtual void append_record_value_character(std::string_view s) = 0;
+
+ /**
+ * Append to the current record buffer a column value referenced by an index
+ * into the shared items table of a pivot cache field. The corresponding
+ * field in the pivot cache definition should provide the shared items table
+ * that this index references.
+ *
+ * @param index index into the shared items table of a pivot cache field.
+ */
+ virtual void append_record_value_shared_item(size_t index) = 0;
+
+ /**
+ * Commit the record in the current record buffer.
+ *
+ * The implementor can clear the buffer afterward.
+ */
+ virtual void commit_record() = 0;
+
+ /**
+ * Commit the entire records set to the document store.
+ */
+ virtual void commit() = 0;
+};
+
+}}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/import_interface_styles.hpp b/include/orcus/spreadsheet/import_interface_styles.hpp
new file mode 100644
index 0000000..6ad94a8
--- /dev/null
+++ b/include/orcus/spreadsheet/import_interface_styles.hpp
@@ -0,0 +1,774 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#pragma once
+
+#include <cstdlib>
+
+#include "types.hpp"
+#include "../types.hpp"
+#include "../env.hpp"
+
+// NB: This header must not depend on ixion, as it needs to be usable for
+// those clients that provide their own formula engine. Other headers in
+// the orcus::spreadsheet namespace may depend on ixion.
+
+namespace orcus { namespace spreadsheet { namespace iface {
+
+class import_font_style;
+class import_fill_style;
+class import_border_style;
+class import_cell_protection;
+class import_number_format;
+class import_xf;
+class import_cell_style;
+
+/**
+ * Interface for importing styles. This one acts as an entry point and
+ * provides other interfaces for the style categories.
+ *
+ * The styles are to be stored in a <a
+ * href="https://en.wikipedia.org/wiki/Flyweight_pattern">flyweight</a>
+ * fashion where each style category maintains an array of stored style
+ * items, which are referenced by their indices. Each time a style
+ * item is pushed through the interface, it returns an index representing the
+ * item. The indices are to be assigned sequentially starting with 0 in each
+ * style category, and <em>the default style must get an index of 0</em>.
+ * Because of this, the import filter imports the default styles first before
+ * importing other non-default styles.
+ *
+ * The appreviation @p xf stands for cell format, and is used throughout the
+ * styles API. Similarly, the @p dxf stands for differential cell format, and
+ * stores partial format properties that are to be applied on top of the base
+ * format properties.
+ *
+ * @note The implementor of this interface @em must implement all interfaces
+ * for all the style categories that this interface returns.
+ */
+class ORCUS_DLLPUBLIC import_styles
+{
+public:
+ virtual ~import_styles();
+
+ /**
+ * Signal the start of the import of font style attributes, and return a
+ * pointer to the interface instance for importing the attributes.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing font style
+ * attributes.
+ */
+ virtual import_font_style* start_font_style() = 0;
+
+ /**
+ * Signal the start of the import of fill style attributes, and return a
+ * pointer to the interface instance for importing the attributes.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing fill style
+ * attributes.
+ */
+ virtual import_fill_style* start_fill_style() = 0;
+
+ /**
+ * Signal the start of the import of border style attributes, and return a
+ * pointer to the interface instance for importing the attributes.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing border style
+ * attributes.
+ */
+ virtual import_border_style* start_border_style() = 0;
+
+ /**
+ * Signal the start of the import of cell protection attributes, and return
+ * a pointer to the interface instance for importing the attributes.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing cell protection
+ * attributes.
+ */
+ virtual import_cell_protection* start_cell_protection() = 0;
+
+ /**
+ * Signal the start of the import of number format attributes and return a
+ * pointer to the interface instance for importing the attributes.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing number format
+ * attributes.
+ */
+ virtual import_number_format* start_number_format() = 0;
+
+ /**
+ * Signal the start of the import of cell format (xf) indices that each
+ * reference different format attributes in their respective pools, and
+ * return a pointer to the interface instance for importing the indices.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing cell format (xf)
+ * indices.
+ */
+ virtual import_xf* start_xf(xf_category_t cat) = 0;
+
+ /**
+ * Signal the start of the import of named cell style information, and
+ * return a pointer to the interface instance for importing the information.
+ *
+ * @note Note that the import_styles implementer <i>must</i> return a
+ * non-null pointer.
+ *
+ * @return pointer to the interface instance for importing named cell style
+ * information.
+ */
+ virtual import_cell_style* start_cell_style() = 0;
+
+ /**
+ * Set the total number of font styles. This may be called before importing
+ * any of the font styles. This will give the implementer a chance to
+ * allocate storage. Note that it may not always be called.
+ *
+ * @param n number of font styles.
+ */
+ virtual void set_font_count(size_t n) = 0;
+
+ /**
+ * Set the total number of fill styles. This may be called before importing
+ * any of the fill styles. This will give the implementer a chance to
+ * allocate storage. Note that it may not always be called.
+ *
+ * @param n number of fill styles.
+ */
+ virtual void set_fill_count(size_t n) = 0;
+
+ /**
+ * Set the total number of border styles. This may be called before
+ * importing any of the border styles. This will give the implementer a
+ * chance to allocate storage. Note that it may not always be called.
+ *
+ * @param n number of border styles.
+ */
+ virtual void set_border_count(size_t n) = 0;
+
+ /**
+ * Set the total number of number format styles. This may be called before
+ * importing any of the number format styles. This will give the implementer
+ * a chance to allocate storage. Note that it may not always be called.
+ *
+ * @param n number of number format styles.
+ */
+ virtual void set_number_format_count(size_t n) = 0;
+
+ /**
+ * Set the total number of cell format styles for a specified cell format
+ * category. This may be called before importing any of the cell format
+ * styles for the specified category. This will give the implementer a
+ * chance to allocate storage. Note that it may not always be called.
+ *
+ * @param cat cell format category.
+ * @param n number of cell formats styles for the specified cell format
+ * category.
+ */
+ virtual void set_xf_count(xf_category_t cat, size_t n) = 0;
+
+ /**
+ * Set the total number of named cell styles. This may be called before
+ * importing any cell styles to give the implementer a chance to allocate
+ * storage. Note that it may not always be called.
+ *
+ * @param n number of named cell styles.
+ */
+ virtual void set_cell_style_count(size_t n) = 0;
+};
+
+/**
+ * Interface for importing font style items. The following font style
+ * properties store different values for western, asian and complex scripts:
+ *
+ * @li font name
+ * @li font size
+ * @li font weight (normal or bold)
+ * @li font style (normal or italic)
+ */
+class ORCUS_DLLPUBLIC import_font_style
+{
+public:
+ virtual ~import_font_style();
+
+ /**
+ * Set the font weight to either normal or bold, for western script.
+ *
+ * @param b whether the font has normal (false) or bold weight (true).
+ */
+ virtual void set_bold(bool b) = 0;
+
+ /**
+ * Set the font weight to either normal or bold, for asian script.
+ *
+ * @param b whether the font has normal (false) or bold weight (true).
+ */
+ virtual void set_bold_asian(bool b) = 0;
+
+ /**
+ * Set the font weight to either normal or bold, for complex script.
+ *
+ * @param b whether the font has normal (false) or bold weight (true).
+ */
+ virtual void set_bold_complex(bool b) = 0;
+
+ /**
+ * Set the font style to either normal or italic, for western script.
+ *
+ * @param b whether the font has normal (false) or italic style (true).
+ */
+ virtual void set_italic(bool b) = 0;
+
+ /**
+ * Set the font style to either normal or italic, for asian script.
+ *
+ * @param b whether the font has normal (false) or italic style (true).
+ */
+ virtual void set_italic_asian(bool b) = 0;
+
+ /**
+ * Set the font style to either normal or italic, for complex script.
+ *
+ * @param b whether the font has normal (false) or italic style (true).
+ */
+ virtual void set_italic_complex(bool b) = 0;
+
+ /**
+ * Set the name of a font, for western script.
+ *
+ * @param s font name.
+ */
+ virtual void set_name(std::string_view s) = 0;
+
+ /**
+ * Set the name of a font, for asian script.
+ *
+ * @param s font name.
+ */
+ virtual void set_name_asian(std::string_view s) = 0;
+
+ /**
+ * Set the name of a font, for complex script.
+ *
+ * @param s font name.
+ */
+ virtual void set_name_complex(std::string_view s) = 0;
+
+ /**
+ * Set the size of a font in points, for western script.
+ *
+ * @param point font size in points.
+ */
+ virtual void set_size(double point) = 0;
+
+ /**
+ * Set the size of a font in points, for asian script.
+ *
+ * @param point font size in points.
+ */
+ virtual void set_size_asian(double point) = 0;
+
+ /**
+ * Set the size of a font in points, for complex script.
+ *
+ * @param point font size in points.
+ */
+ virtual void set_size_complex(double point) = 0;
+
+ /**
+ * Set the underline type of a font.
+ *
+ * @param e underline type of a font.
+ */
+ virtual void set_underline(underline_t e) = 0;
+
+ /**
+ * Set the width of the underline of a font.
+ *
+ * @param e width of the underline of a font.
+ */
+ virtual void set_underline_width(underline_width_t e) = 0;
+
+ /**
+ * Set whether the underline of a font is continuous over the gaps, or skip
+ * the gaps.
+ *
+ * @param e whether the underline of a font is continuous over the gaps or
+ * skip the gaps.
+ */
+ virtual void set_underline_mode(underline_mode_t e) = 0;
+
+ /**
+ * Set whether the underline of a font consists of a single line, or a
+ * double line.
+ *
+ * @param e whether the underline of a font consists of a single line, or a
+ * double line.
+ *
+ * @todo Look into merging this with set_underline().
+ */
+ virtual void set_underline_type(underline_type_t e) = 0;
+
+ /**
+ * Specify the color of an underline in ARGB format.
+ *
+ * @param alpha alpha component of the color.
+ * @param red red component of the color.
+ * @param green green component of the color.
+ * @param blue blue component of the color.
+ *
+ * @note If this value is not explicitly set, the font color should be used.
+ */
+ virtual void set_underline_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Specify the color of font in ARGB format.
+ *
+ * @param alpha alpha component of the color.
+ * @param red red component of the color.
+ * @param green green component of the color.
+ * @param blue blue component of the color.
+ */
+ virtual void set_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Set the strikethrough style of a font.
+ *
+ * @param s strikethrough style of a font.
+ */
+ virtual void set_strikethrough_style(strikethrough_style_t s) = 0;
+
+ /**
+ * Set whether the strikethrough of a font consists of a single line or a
+ * double line.
+ *
+ * @param s whether the strikethrough of a font consists of a single line or
+ * a double line.
+ */
+ virtual void set_strikethrough_type(strikethrough_type_t s) = 0;
+
+ /**
+ * Set the width of the strikethrough of a font.
+ *
+ * @param s the width of the strikethrough of a font.
+ */
+ virtual void set_strikethrough_width(strikethrough_width_t s) = 0;
+
+ /**
+ * Set the text to use as a strikethrough.
+ *
+ * @param s text to use as a strikethrough.
+ */
+ virtual void set_strikethrough_text(strikethrough_text_t s) = 0;
+
+ /**
+ * Commit the font style in the current buffer.
+ *
+ * @return index of the committed font style, to be passed on to the
+ * import_xf::set_font() method as its argument.
+ */
+ virtual std::size_t commit() = 0;
+};
+
+/**
+ * Interface for importing fill style items.
+ */
+class ORCUS_DLLPUBLIC import_fill_style
+{
+public:
+ virtual ~import_fill_style();
+
+ /**
+ * Set the type of fill pattern.
+ *
+ * @param fp fill pattern type.
+ */
+ virtual void set_pattern_type(fill_pattern_t fp) = 0;
+
+ /**
+ * Set the foreground color of a fill. <i>Note that for a solid fill
+ * type, the foreground color will be used.</i>
+ *
+ * @param alpha alpha component ranging from 0 (fully transparent) to 255
+ * (fully opaque).
+ * @param red red component ranging from 0 to 255.
+ * @param green green component ranging from 0 to 255.
+ * @param blue blue component ranging from 0 to 255.
+ */
+ virtual void set_fg_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Set the background color of a fill. <i>Note that this color will
+ * be ignored for a solid fill type.</i>
+ *
+ * @param alpha alpha component ranging from 0 (fully transparent) to 255
+ * (fully opaque).
+ * @param red red component ranging from 0 to 255.
+ * @param green green component ranging from 0 to 255.
+ * @param blue blue component ranging from 0 to 255.
+ */
+ virtual void set_bg_color(color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Commit the fill style in the current buffer.
+ *
+ * @return index of the committed fill style, to be passed on to the
+ * import_xf::set_fill() method as its argument.
+ */
+ virtual size_t commit() = 0;
+};
+
+/**
+ * Interface for importing border style items.
+ */
+class ORCUS_DLLPUBLIC import_border_style
+{
+public:
+ virtual ~import_border_style();
+
+ /**
+ * Set the border style to a specified border position.
+ *
+ * @param dir position of a border to set the style to.
+ * @param style border style to set.
+ */
+ virtual void set_style(border_direction_t dir, border_style_t style) = 0;
+
+ /**
+ * Set the color of a border.
+ *
+ * @param dir position of a border to set the color to.
+ * @param alpha alpha element of the color.
+ * @param red red element of the color.
+ * @param green green element of the color.
+ * @param blue blue element of the color.
+ */
+ virtual void set_color(
+ border_direction_t dir, color_elem_t alpha, color_elem_t red, color_elem_t green, color_elem_t blue) = 0;
+
+ /**
+ * Set the width of a border.
+ *
+ * @param dir position of a border.
+ * @param width width of a border.
+ * @param unit unit of measurement to use in the border width.
+ */
+ virtual void set_width(border_direction_t dir, double width, orcus::length_unit_t unit) = 0;
+
+ /**
+ * Commit the border style in the current buffer.
+ *
+ * @return index of the committed border style, to be passed on to the
+ * import_xf::set_border() method as its argument.
+ */
+ virtual size_t commit() = 0;
+};
+
+/**
+ * Interface for importing cell protection items.
+ */
+class ORCUS_DLLPUBLIC import_cell_protection
+{
+public:
+ virtual ~import_cell_protection();
+
+ /**
+ * Hide the entire cell content when the sheet is protected.
+ *
+ * @param b whether to hide the entire cell content when the sheet is
+ * protected.
+ */
+ virtual void set_hidden(bool b) = 0;
+
+ /**
+ * Lock the cell when the sheet is protected.
+ *
+ * @param b whether or not to lock the cell when the sheet is protected.
+ */
+ virtual void set_locked(bool b) = 0;
+
+ /**
+ * Specify whether or not to print the cell content when the sheet is
+ * protected.
+ *
+ *
+ * @param b whether or not to print the cell content when the sheet is
+ * protected.
+ */
+ virtual void set_print_content(bool b) = 0;
+
+ /**
+ * Hide the formula when the sheet is protected and the cell contains
+ * formula.
+ *
+ * @param b whether or not to hide the formula when the sheet is protected
+ * and the cell contains formula.
+ */
+ virtual void set_formula_hidden(bool b) = 0;
+
+ /**
+ * Commit the cell protection data in the current buffer.
+ *
+ * @return index of the committed cell protection data, to be passed on to
+ * the import_xf::set_protection() method as its argument.
+ */
+ virtual std::size_t commit() = 0;
+};
+
+/**
+ * Interface for importing number format items.
+ */
+class ORCUS_DLLPUBLIC import_number_format
+{
+public:
+ virtual ~import_number_format();
+
+ /**
+ * Set the integral identifier of a number format.
+ *
+ * @param id integral indentifier of a number format.
+ *
+ * @note This is specific to xlsx format. In xlsx, this identifier gets
+ * used to reference number formats instead of the identifier returned
+ * by the commit() method.
+ *
+ * @todo Perhaps when this method is called, the commit() method of the
+ * corresponding item should return the value set in this method
+ * instead.
+ */
+ virtual void set_identifier(std::size_t id) = 0;
+
+ /**
+ * Set the number format code.
+ *
+ * @param s number format code.
+ */
+ virtual void set_code(std::string_view s) = 0;
+
+ /**
+ * Commit the number format item in the current buffer.
+ *
+ * @return index of the committed number format item, to be passed on to the
+ * import_xf::set_number_format() method as its argument.
+ *
+ * @todo Look into returning the identifier set through the set_identifier()
+ * method.
+ */
+ virtual size_t commit() = 0;
+};
+
+/**
+ * This interface is used to import cell format records for direct cell
+ * formats, named cell style formats, and differential cell formats.
+ *
+ * The following cell format types:
+ * <ul>
+ * <li>font</li>
+ * <li>fill</li>
+ * <li>border</li>
+ * <li>protection</li>
+ * <li>number format</li>
+ * </ul>
+ * use indices to reference their records in their respective record pools.
+ *
+ * The horizontal and vertical alignments are specified directly.
+ */
+class ORCUS_DLLPUBLIC import_xf
+{
+public:
+ virtual ~import_xf();
+
+ /**
+ * Set the index of the font record, as returned from the
+ * import_font_style::commit() method.
+ *
+ * @param index index of the font record to reference.
+ */
+ virtual void set_font(size_t index) = 0;
+
+ /**
+ * Set the index of the fill record, as returned from the
+ * import_fill_style::commit() method.
+ *
+ * @param index index of the fill record to reference.
+ */
+ virtual void set_fill(size_t index) = 0;
+
+ /**
+ * Set the index of the border record, as returned from the
+ * import_border_style::commit() method.
+ *
+ * @param index index of the border record to reference.
+ */
+ virtual void set_border(size_t index) = 0;
+
+ /**
+ * Set the index of the cell protection record, as returned from the
+ * import_cell_protection::commit() method.
+ *
+ * @param index index of the cell protection record to reference.
+ */
+ virtual void set_protection(size_t index) = 0;
+
+ /**
+ * Set the index of the number format record, as returned from the
+ * import_number_format::commit() method.
+ *
+ * @param index index of the number format record to reference.
+ */
+ virtual void set_number_format(size_t index) = 0;
+
+ /**
+ * Set the index into the cell style record to specify a named cell style it
+ * uses as its base format in case the cell has an underlying style applied.
+ * This can be used for a direct cell format i.e. when the xf category is
+ * xf_category_t::cell or for a cell style format i.e. the xf category is
+ * xf_category_t::cell_style. In a cell style format, this can be used to
+ * reference a parent style.
+ *
+ * @param index index into the cell style record it uses as its basis.
+ */
+ virtual void set_style_xf(size_t index) = 0;
+
+ /**
+ * Set the flag indicating whether or not to apply the alignment attribute.
+ *
+ * @param b flag indicating whether or not to apply the alignment attribute.
+ *
+ * @note This is specific to Excel format.
+ */
+ virtual void set_apply_alignment(bool b) = 0;
+
+ /**
+ * Set the horizontal alignment of a style.
+ *
+ * @param align horizontal alignment of a style.
+ */
+ virtual void set_horizontal_alignment(hor_alignment_t align) = 0;
+
+ /**
+ * Set the vertical alignment of a style.
+ *
+ * @param align vertical alignment of a style.
+ */
+ virtual void set_vertical_alignment(ver_alignment_t align) = 0;
+
+ /**
+ * Specify whether or not to wrap text when the text spills over the cell
+ * region.
+ *
+ * @param b whether or not to wrap text when the text spills over the cell
+ * region.
+ */
+ virtual void set_wrap_text(bool b) = 0;
+
+ /**
+ * Specify whether or not to shrink the text within cell until it fits
+ * inside the cell.
+ *
+ * @param b whether or not to shrink the text.
+ */
+ virtual void set_shrink_to_fit(bool b) = 0;
+
+ /**
+ * Commit the cell format in the current buffer to the storage.
+ *
+ * @return index of the cell format data in the storage. This index may be
+ * passed to the import_cell_style::set_xf() method.
+ */
+ virtual size_t commit() = 0;
+};
+
+/**
+ * This interface is used to import named cell style records.
+ *
+ * @note The actual cell format data for named cell styles are imported
+ * through import_xf, and this interface references its index through
+ * the import_cell_style::set_xf() method.
+ *
+ */
+class ORCUS_DLLPUBLIC import_cell_style
+{
+public:
+ virtual ~import_cell_style();
+
+ /**
+ * Set the name associated with the named cell style.
+ *
+ * @param s name of the named cell style.
+ */
+ virtual void set_name(std::string_view s) = 0;
+
+ /**
+ * Set the name associated with the named cell style intended for display
+ * purposes.
+ *
+ * @param s name to use for display purposes.
+ *
+ * @note Not all supported formats make use of this property. Also, the
+ * style may not always have this property even if the format supports
+ * it. ODF uses this property when the original name contains
+ * characters that cannot be used in internal symbols.
+ */
+ virtual void set_display_name(std::string_view s) = 0;
+
+ /**
+ * Set the index into the cell format record. The named cell style applies
+ * the format referenced by this index.
+ *
+ * @param index index into the cell format record.
+ */
+ virtual void set_xf(size_t index) = 0;
+
+ /**
+ * Set the index into the built-in cell style record.
+ *
+ * @note This is Excel-specific, and unclear whether it's useful outside of
+ * Excel's implementation. Built-in styles are not stored in file, and
+ * Excel likely has its own internal styles stored in the application
+ * itself.
+ *
+ * @param index index into the built-in cell style record.
+ */
+ virtual void set_builtin(size_t index) = 0;
+
+ /**
+ * Set the name of the parent cell style it uses as its basis.
+ *
+ * @note ODF uses this but Excel does not use this value.
+ *
+ * @param s name of the parent cell style.
+ */
+ virtual void set_parent_name(std::string_view s) = 0;
+
+ /**
+ * Commit the cell style format in the current buffer to the storage.
+ *
+ * @note This method does @em not return an index.
+ */
+ virtual void commit() = 0;
+};
+
+}}}
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/import_interface_view.hpp b/include/orcus/spreadsheet/import_interface_view.hpp
new file mode 100644
index 0000000..8e6b53e
--- /dev/null
+++ b/include/orcus/spreadsheet/import_interface_view.hpp
@@ -0,0 +1,78 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef IMPORT_ORCUS_SPREADSHEET_IMPORT_INTERFACE_VIEW_HPP
+#define IMPORT_ORCUS_SPREADSHEET_IMPORT_INTERFACE_VIEW_HPP
+
+#include <cstdlib>
+
+#include "view_types.hpp"
+#include "../types.hpp"
+#include "../env.hpp"
+
+namespace orcus { namespace spreadsheet { namespace iface {
+
+/**
+ * Interface for importing view properties. This interface may be obtained
+ * from the import_sheet interface.
+ */
+class ORCUS_DLLPUBLIC import_sheet_view
+{
+public:
+ virtual ~import_sheet_view();
+
+ /**
+ * Set the current sheet as the active sheet.
+ */
+ virtual void set_sheet_active() = 0;
+
+ /**
+ * Set the information about split view in the current sheet.
+ *
+ * @param hor_split horizontal position of the split in 1/20th of a point,
+ * or 0 if none. "Horizontal" in this case indicates the
+ * column direction.
+ * @param ver_split vertical position of the split in 1/20th of a point,
+ * or 0 if none. "Vertical" in this case indicates the
+ * row direction.
+ * @param top_left_cell the top left visible cell in the bottom right
+ * pane.
+ * @param active_pane active pane in this sheet.
+ */
+ virtual void set_split_pane(
+ double hor_split, double ver_split, const address_t& top_left_cell,
+ sheet_pane_t active_pane) = 0;
+
+ /**
+ * Set the state of frozen view in the current sheet.
+ *
+ * @param visible_columns number of visible columns in the left pane.
+ * @param visible_rows number of visible rows in the top pane.
+ * @param top_left_cell the top left visible cell in the bottom right
+ * pane.
+ * @param active_pane active pane in this sheet.
+ */
+ virtual void set_frozen_pane(
+ col_t visible_columns, row_t visible_rows, const address_t& top_left_cell,
+ sheet_pane_t active_pane) = 0;
+
+ /**
+ * Set the selected cursor range in a specified sheet pane.
+ *
+ * @param pane sheet pane associated with the selection. The top-left
+ * pane is used for a non-split sheet view.
+ * @param range selected cursor range. The range will be 1 column by 1
+ * row when the cursor is on a single cell only.
+ */
+ virtual void set_selected_range(sheet_pane_t pane, range_t range) = 0;
+};
+
+}}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/pivot.hpp b/include/orcus/spreadsheet/pivot.hpp
new file mode 100644
index 0000000..dee2559
--- /dev/null
+++ b/include/orcus/spreadsheet/pivot.hpp
@@ -0,0 +1,254 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_PIVOT_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_PIVOT_HPP
+
+#include "../env.hpp"
+#include "../types.hpp"
+#include "types.hpp"
+
+#include <memory>
+#include <vector>
+#include <limits>
+#include <variant>
+#include <optional>
+
+namespace ixion {
+
+struct abs_range_t;
+
+}
+
+namespace orcus {
+
+class string_pool;
+
+namespace spreadsheet {
+
+class document;
+
+using pivot_cache_indices_t = std::vector<size_t>;
+
+struct ORCUS_SPM_DLLPUBLIC pivot_cache_record_value_t
+{
+ using value_type = std::variant<bool, double, std::size_t, std::string_view, date_time_t>;
+
+ enum class record_type
+ {
+ unknown = 0,
+ boolean,
+ date_time,
+ character,
+ numeric,
+ blank,
+ error,
+ shared_item_index
+ };
+
+ record_type type;
+ value_type value;
+
+ pivot_cache_record_value_t();
+ pivot_cache_record_value_t(std::string_view s);
+ pivot_cache_record_value_t(double v);
+ pivot_cache_record_value_t(size_t index);
+
+ bool operator== (const pivot_cache_record_value_t& other) const;
+ bool operator!= (const pivot_cache_record_value_t& other) const;
+};
+
+using pivot_cache_record_t = std::vector<pivot_cache_record_value_t>;
+
+struct ORCUS_SPM_DLLPUBLIC pivot_cache_item_t
+{
+ using value_type = std::variant<bool, double, std::string_view, date_time_t, error_value_t>;
+
+ enum class item_type
+ {
+ unknown = 0, boolean, date_time, character, numeric, blank, error
+ };
+
+ item_type type;
+ value_type value;
+
+ pivot_cache_item_t();
+ pivot_cache_item_t(std::string_view s);
+ pivot_cache_item_t(double numeric);
+ pivot_cache_item_t(bool boolean);
+ pivot_cache_item_t(const date_time_t& date_time);
+ pivot_cache_item_t(error_value_t error);
+
+ pivot_cache_item_t(const pivot_cache_item_t& other);
+ pivot_cache_item_t(pivot_cache_item_t&& other);
+
+ bool operator< (const pivot_cache_item_t& other) const;
+ bool operator== (const pivot_cache_item_t& other) const;
+
+ pivot_cache_item_t& operator= (pivot_cache_item_t other);
+
+ void swap(pivot_cache_item_t& other);
+};
+
+using pivot_cache_items_t = std::vector<pivot_cache_item_t>;
+
+/**
+ * Group data for a pivot cache field.
+ */
+struct ORCUS_SPM_DLLPUBLIC pivot_cache_group_data_t
+{
+ struct ORCUS_SPM_DLLPUBLIC range_grouping_type
+ {
+ pivot_cache_group_by_t group_by = pivot_cache_group_by_t::range;
+
+ bool auto_start = true;
+ bool auto_end = true;
+
+ double start = 0.0;
+ double end = 0.0;
+ double interval = 1.0;
+
+ date_time_t start_date;
+ date_time_t end_date;
+
+ range_grouping_type() = default;
+ range_grouping_type(const range_grouping_type& other) = default;
+ };
+
+ /**
+ * Mapping of base field member indices to the group field item indices.
+ */
+ pivot_cache_indices_t base_to_group_indices;
+
+ std::optional<range_grouping_type> range_grouping;
+
+ /**
+ * Individual items comprising the group.
+ */
+ pivot_cache_items_t items;
+
+ /** 0-based index of the base field. */
+ size_t base_field;
+
+ pivot_cache_group_data_t(size_t _base_field);
+ pivot_cache_group_data_t(const pivot_cache_group_data_t& other);
+ pivot_cache_group_data_t(pivot_cache_group_data_t&& other);
+
+ pivot_cache_group_data_t() = delete;
+};
+
+struct ORCUS_SPM_DLLPUBLIC pivot_cache_field_t
+{
+ /**
+ * Field name. It must be interned with the string pool belonging to the
+ * document.
+ */
+ std::string_view name;
+
+ pivot_cache_items_t items;
+
+ std::optional<double> min_value;
+ std::optional<double> max_value;
+
+ std::optional<date_time_t> min_date;
+ std::optional<date_time_t> max_date;
+
+ std::unique_ptr<pivot_cache_group_data_t> group_data;
+
+ pivot_cache_field_t();
+ pivot_cache_field_t(std::string_view _name);
+ pivot_cache_field_t(const pivot_cache_field_t& other);
+ pivot_cache_field_t(pivot_cache_field_t&& other);
+};
+
+class ORCUS_SPM_DLLPUBLIC pivot_cache
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ using fields_type = std::vector<pivot_cache_field_t>;
+ using records_type = std::vector<pivot_cache_record_t>;
+
+ pivot_cache(pivot_cache_id_t cache_id, string_pool& sp);
+ ~pivot_cache();
+
+ /**
+ * Bulk-insert all the fields in one step. Note that this will replace any
+ * pre-existing fields if any.
+ *
+ * @param fields field instances to move into storage.
+ */
+ void insert_fields(fields_type fields);
+
+ void insert_records(records_type record);
+
+ size_t get_field_count() const;
+
+ /**
+ * Retrieve a field data by its index.
+ *
+ * @param index index of the field to retrieve.
+ *
+ * @return pointer to the field instance, or nullptr if the index is
+ * out-of-range.
+ */
+ const pivot_cache_field_t* get_field(size_t index) const;
+
+ pivot_cache_id_t get_id() const;
+
+ const records_type& get_all_records() const;
+};
+
+class ORCUS_SPM_DLLPUBLIC pivot_collection
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ pivot_collection(document& doc);
+ ~pivot_collection();
+
+ /**
+ * Insert a new pivot cache associated with a worksheet source.
+ *
+ * @param sheet_name name of the sheet where the source data is.
+ * @param range range of the source data. Note that the sheet indices are
+ * not used.
+ * @param cache pivot cache instance to store.
+ */
+ void insert_worksheet_cache(
+ std::string_view sheet_name, const ixion::abs_range_t& range, std::unique_ptr<pivot_cache>&& cache);
+
+ /**
+ * Insert a new pivot cache associated with a table name.
+ *
+ * @param table_name source table name.
+ * @param cache pivot cache instance to store.
+ */
+ void insert_worksheet_cache(std::string_view table_name, std::unique_ptr<pivot_cache>&& cache);
+
+ /**
+ * Count the number of pivot caches currently stored.
+ *
+ * @return number of pivot caches currently stored in the document.
+ */
+ size_t get_cache_count() const;
+
+ const pivot_cache* get_cache(
+ std::string_view sheet_name, const ixion::abs_range_t& range) const;
+
+ pivot_cache* get_cache(pivot_cache_id_t cache_id);
+
+ const pivot_cache* get_cache(pivot_cache_id_t cache_id) const;
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/shared_strings.hpp b/include/orcus/spreadsheet/shared_strings.hpp
new file mode 100644
index 0000000..d447cb3
--- /dev/null
+++ b/include/orcus/spreadsheet/shared_strings.hpp
@@ -0,0 +1,77 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_SHARED_STRINGS_HPP
+
+#include "document_types.hpp"
+
+#include <vector>
+#include <memory>
+#include <string>
+
+namespace ixion { class model_context; }
+
+namespace orcus {
+
+namespace spreadsheet {
+
+/**
+ * This class manages access to a pool of shared string instances for both
+ * unformatted strings and rich-text strings. The underlying string values
+ * themselves are stored externally in the `ixion::model_context` instance
+ * which this class references; this class itself only stores the format
+ * properties of the rich-text strings.
+ */
+class ORCUS_SPM_DLLPUBLIC shared_strings
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ shared_strings() = delete;
+ shared_strings(const shared_strings&) = delete;
+ shared_strings& operator=(const shared_strings&) = delete;
+
+ shared_strings(ixion::model_context& cxt);
+ ~shared_strings();
+
+ /**
+ * Set the entire format runs of a string.
+ *
+ * @param sindex index of the string to associate the format runs with.
+ * @param runs format runs.
+ */
+ void set_format_runs(std::size_t sindex, std::unique_ptr<format_runs_t> runs);
+
+ /**
+ * Get the entire format runs of a string.
+ *
+ * @param index index of the string to get the format runs of.
+ *
+ * @return pointer to the format runs, or @p nullptr if no format runs exist
+ * for the specified string index.
+ */
+ const format_runs_t* get_format_runs(std::size_t index) const;
+
+ /**
+ * Get an underlying string value associated with an index.
+ *
+ * @param index index of a string value.
+ *
+ * @return pointer to a string value associated with the index, or @p
+ * nullptr in case of an invalid string index.
+ */
+ const std::string* get_string(std::size_t index) const;
+
+ void dump(std::ostream& os) const;
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/sheet.hpp b/include/orcus/spreadsheet/sheet.hpp
new file mode 100644
index 0000000..2ea6392
--- /dev/null
+++ b/include/orcus/spreadsheet/sheet.hpp
@@ -0,0 +1,150 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_ODSTABLE_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_ODSTABLE_HPP
+
+#include "../env.hpp"
+#include "types.hpp"
+
+#include <ostream>
+#include <memory>
+
+#include <ixion/address.hpp>
+#include <ixion/formula_tokens.hpp>
+#include <ixion/formula_result.hpp>
+
+namespace orcus {
+
+struct date_time_t;
+
+namespace spreadsheet {
+
+class document;
+struct auto_filter_t;
+
+namespace detail {
+
+struct sheet_impl;
+
+}
+
+/**
+ * This class represents a single sheet instance in the internal document
+ * model.
+ */
+class ORCUS_SPM_DLLPUBLIC sheet
+{
+ friend class document;
+ friend struct detail::sheet_impl;
+
+ static const row_t max_row_limit;
+ static const col_t max_col_limit;
+
+public:
+ sheet(document& doc, sheet_t sheet_index);
+ ~sheet() noexcept;
+
+ void set_auto(row_t row, col_t col, std::string_view s);
+ void set_string(row_t row, col_t col, string_id_t sindex);
+ void set_value(row_t row, col_t col, double value);
+ void set_bool(row_t row, col_t col, bool value);
+ void set_date_time(row_t row, col_t col, int year, int month, int day, int hour, int minute, double second);
+ void set_format(row_t row, col_t col, size_t index);
+ void set_format(row_t row_start, col_t col_start, row_t row_end, col_t col_end, size_t index);
+ void set_column_format(col_t col, col_t col_span, std::size_t index);
+ void set_row_format(row_t row, std::size_t index);
+
+ void set_formula(row_t row, col_t col, const ixion::formula_tokens_store_ptr_t& tokens);
+ void set_formula(row_t row, col_t col, const ixion::formula_tokens_store_ptr_t& tokens, ixion::formula_result result);
+ void set_grouped_formula(const range_t& range, ixion::formula_tokens_t tokens);
+ void set_grouped_formula(const range_t& range, ixion::formula_tokens_t tokens, ixion::formula_result result);
+
+ void set_col_width(col_t col, col_t col_span, col_width_t width);
+
+ /**
+ * Get column width in twips.
+ *
+ * @param col column index
+ * @param col_start pointer to a variable to store the index of the starting
+ * column of the range with the same width. Pass nullptr if
+ * the caller doesn't need this information.
+ * @param col_end pointer to a variable to store the index of the ending
+ * column plus one, of the range with the same width. Pass
+ * nullptr if the caller doesn't need this information.
+ *
+ * @return width of the specified column index (in twips).
+ */
+ col_width_t get_col_width(col_t col, col_t* col_start, col_t* col_end) const;
+
+ void set_col_hidden(col_t col, col_t col_span, bool hidden);
+ bool is_col_hidden(col_t col, col_t* col_start, col_t* col_end) const;
+
+ void set_row_height(row_t row, row_height_t height);
+ row_height_t get_row_height(row_t row, row_t* row_start, row_t* row_end) const;
+
+ void set_row_hidden(row_t row, bool hidden);
+ bool is_row_hidden(row_t row, row_t* row_start, row_t* row_end) const;
+
+ void set_merge_cell_range(const range_t& range);
+
+ void fill_down_cells(row_t src_row, col_t src_col, row_t range_size);
+
+ /**
+ * Return the size of a merged cell range.
+ *
+ * @param row row position of the upper-left cell.
+ * @param col column position of the upper-left cell.
+ *
+ * @return merged cell range.
+ */
+ range_t get_merge_cell_range(row_t row, col_t col) const;
+
+ size_t get_string_identifier(row_t row, col_t col) const;
+
+ auto_filter_t* get_auto_filter_data();
+ const auto_filter_t* get_auto_filter_data() const;
+ void set_auto_filter_data(auto_filter_t* p);
+
+ // Sheet dimension methods
+
+ /**
+ * Return the smallest range that contains all non-empty cells in this
+ * sheet. The top-left corner of the returned range is always column 0 and
+ * row 0.
+ *
+ * @return smallest range that contains all non-empty cells.
+ */
+ ixion::abs_range_t get_data_range() const;
+
+ sheet_t get_index() const;
+
+ date_time_t get_date_time(row_t row, col_t col) const;
+
+ void dump_flat(std::ostream& os) const;
+ void dump_check(std::ostream& os, std::string_view sheet_name) const;
+ void dump_html(std::ostream& os) const;
+ void dump_json(std::ostream& os) const;
+ void dump_csv(std::ostream& os) const;
+
+ void dump_debug_state(const std::string& output_dir, std::string_view sheet_name) const;
+
+ /**
+ * Get the cell format ID of specified cell.
+ */
+ size_t get_cell_format(row_t row, col_t col) const;
+
+private:
+ void finalize_import();
+
+ std::unique_ptr<detail::sheet_impl> mp_impl;
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/styles.hpp b/include/orcus/spreadsheet/styles.hpp
new file mode 100644
index 0000000..5458b1f
--- /dev/null
+++ b/include/orcus/spreadsheet/styles.hpp
@@ -0,0 +1,268 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_STYLES_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_STYLES_HPP
+
+#include "../env.hpp"
+#include "../measurement.hpp"
+#include "document_types.hpp"
+
+#include <memory>
+#include <string_view>
+#include <optional>
+
+namespace orcus { namespace spreadsheet {
+
+class document;
+
+/**
+ * Font style record.
+ */
+struct ORCUS_SPM_DLLPUBLIC font_t
+{
+ std::optional<std::string_view> name;
+ std::optional<std::string_view> name_asian;
+ std::optional<std::string_view> name_complex;
+ std::optional<double> size;
+ std::optional<double> size_asian;
+ std::optional<double> size_complex;
+ std::optional<bool> bold;
+ std::optional<bool> bold_asian;
+ std::optional<bool> bold_complex;
+ std::optional<bool> italic;
+ std::optional<bool> italic_asian;
+ std::optional<bool> italic_complex;
+ std::optional<underline_t> underline_style;
+ std::optional<underline_width_t> underline_width;
+ std::optional<underline_mode_t> underline_mode;
+ std::optional<underline_type_t> underline_type;
+ std::optional<color_t> underline_color;
+ std::optional<color_t> color;
+ std::optional<strikethrough_style_t> strikethrough_style;
+ std::optional<strikethrough_width_t> strikethrough_width;
+ std::optional<strikethrough_type_t> strikethrough_type;
+ std::optional<strikethrough_text_t> strikethrough_text;
+
+ font_t();
+ font_t(const font_t& other);
+ ~font_t();
+
+ font_t& operator=(const font_t& other);
+
+ bool operator==(const font_t& other) const;
+ bool operator!=(const font_t& other) const;
+
+ void reset();
+
+ struct ORCUS_SPM_DLLPUBLIC hash
+ {
+ std::size_t operator()(const font_t& v) const;
+ };
+};
+
+/**
+ * Fill style record.
+ */
+struct ORCUS_SPM_DLLPUBLIC fill_t
+{
+ std::optional<fill_pattern_t> pattern_type;
+ std::optional<color_t> fg_color;
+ std::optional<color_t> bg_color;
+
+ fill_t();
+ void reset();
+};
+
+/**
+ * Attributes for a single border.
+ */
+struct ORCUS_SPM_DLLPUBLIC border_attrs_t
+{
+ std::optional<border_style_t> style;
+ std::optional<color_t> border_color;
+ std::optional<length_t> border_width;
+
+ border_attrs_t();
+ void reset();
+};
+
+/**
+ * Style record for the borders of a single cell.
+ */
+struct ORCUS_SPM_DLLPUBLIC border_t
+{
+ border_attrs_t top;
+ border_attrs_t bottom;
+ border_attrs_t left;
+ border_attrs_t right;
+ border_attrs_t diagonal;
+ border_attrs_t diagonal_bl_tr;
+ border_attrs_t diagonal_tl_br;
+
+ border_t();
+ void reset();
+};
+
+/**
+ * Style record for cell protection attributes.
+ */
+struct ORCUS_SPM_DLLPUBLIC protection_t
+{
+ std::optional<bool> locked;
+ std::optional<bool> hidden;
+ std::optional<bool> print_content;
+ std::optional<bool> formula_hidden;
+
+ protection_t();
+ void reset();
+};
+
+/**
+ * Style record for a number format.
+ */
+struct ORCUS_SPM_DLLPUBLIC number_format_t
+{
+ std::optional<std::size_t> identifier;
+ std::optional<std::string_view> format_string;
+
+ number_format_t();
+ void reset();
+
+ bool operator== (const number_format_t& other) const noexcept;
+ bool operator!= (const number_format_t& other) const noexcept;
+};
+
+/**
+ * Format attributes for a single cell. It references the format entries via
+ * integer indices, with some exceptions.
+ */
+struct ORCUS_SPM_DLLPUBLIC cell_format_t
+{
+ /** ID of a font style record. */
+ std::size_t font;
+ /** ID of a fill style record. */
+ std::size_t fill;
+ /** ID of a border style record. */
+ std::size_t border;
+ /** ID for a cell protection record. */
+ std::size_t protection;
+ /** ID for a number format record. */
+ std::size_t number_format;
+ /** ID for a parent named style. */
+ std::size_t style_xf;
+ /** Horizontal alignment of a cell. */
+ hor_alignment_t hor_align;
+ /** Vertical alignment of a cell. */
+ ver_alignment_t ver_align;
+ /** Flag on whether or not wrap text is enabled. */
+ std::optional<bool> wrap_text;
+ /** Flag on whether or not shrink to fit is enabled. */
+ std::optional<bool> shrink_to_fit;
+ bool apply_num_format:1;
+ bool apply_font:1;
+ bool apply_fill:1;
+ bool apply_border:1;
+ bool apply_alignment:1;
+ bool apply_protection:1;
+
+ cell_format_t();
+ void reset();
+};
+
+/**
+ * Attributes of a named cell style.
+ *
+ * Refer to @ref orcus::spreadsheet::iface::import_cell_style for how the data
+ * members of this struct are used in practice.
+ */
+struct ORCUS_SPM_DLLPUBLIC cell_style_t
+{
+ std::string_view name;
+ std::string_view display_name;
+ std::size_t xf;
+ std::size_t builtin;
+ std::string_view parent_name;
+
+ cell_style_t();
+ void reset();
+};
+
+ORCUS_SPM_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const color_t& c);
+
+/**
+ * Stores various styles records such that they can be referenced via integer
+ * indices.
+ */
+class ORCUS_SPM_DLLPUBLIC styles
+{
+ friend class document;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ styles();
+ ~styles();
+
+ void reserve_font_store(size_t n);
+ std::size_t append_font(const font_t& font);
+
+ void reserve_fill_store(size_t n);
+ std::size_t append_fill(const fill_t& fill);
+
+ void reserve_border_store(size_t n);
+ std::size_t append_border(const border_t& border);
+
+ std::size_t append_protection(const protection_t& protection);
+
+ void reserve_number_format_store(size_t n);
+ std::size_t append_number_format(const number_format_t& nf);
+
+ void reserve_cell_style_format_store(size_t n);
+ size_t append_cell_style_format(const cell_format_t& cf);
+
+ void reserve_cell_format_store(size_t n);
+ size_t append_cell_format(const cell_format_t& cf);
+
+ void reserve_diff_cell_format_store(size_t n);
+ size_t append_diff_cell_format(const cell_format_t& cf);
+
+ void reserve_cell_style_store(size_t n);
+ void append_cell_style(const cell_style_t& cs);
+
+ const font_t* get_font(size_t index) const;
+ const fill_t* get_fill(size_t index) const;
+ const border_t* get_border(size_t index) const;
+ const protection_t* get_protection(size_t index) const;
+ const number_format_t* get_number_format(size_t index) const;
+ const cell_format_t* get_cell_format(size_t index) const;
+ const cell_format_t* get_cell_style_format(size_t index) const;
+ const cell_format_t* get_dxf_format(size_t index) const;
+ const cell_style_t* get_cell_style(size_t index) const;
+ const cell_style_t* get_cell_style_by_xf(size_t xfid) const;
+
+ size_t get_font_count() const;
+ size_t get_fill_count() const;
+ size_t get_border_count() const;
+ size_t get_protection_count() const;
+ size_t get_number_format_count() const;
+ size_t get_cell_formats_count() const;
+ size_t get_cell_style_formats_count() const;
+ size_t get_dxf_count() const;
+ size_t get_cell_styles_count() const;
+
+ void clear();
+
+private:
+ void finalize_import();
+};
+
+}}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/types.hpp b/include/orcus/spreadsheet/types.hpp
new file mode 100644
index 0000000..df7b27e
--- /dev/null
+++ b/include/orcus/spreadsheet/types.hpp
@@ -0,0 +1,751 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_SPREADSHEET_TYPES_HPP
+#define ORCUS_SPREADSHEET_TYPES_HPP
+
+#include "../env.hpp"
+
+#include <cstdlib>
+#include <cstdint>
+#include <iosfwd>
+#include <initializer_list>
+#include <string_view>
+#include <vector>
+
+// NB: This header should only define primitive data types, enums and structs.
+
+namespace orcus { namespace spreadsheet {
+
+/** Row ID type. */
+using row_t = int32_t;
+/** Column ID type. */
+using col_t = int32_t;
+/** Sheet ID type. */
+using sheet_t = int32_t;
+/** Individual color element type. */
+using color_elem_t = uint8_t;
+/** Type for column width values. Column width values are stored in twips. */
+using col_width_t = uint16_t;
+/** Type for row height values. Row height values are stored in twips. */
+using row_height_t = uint16_t;
+/** Type for string ID's for string cells. */
+using string_id_t = uint32_t;
+/** Pivot cache ID type. */
+using pivot_cache_id_t = uint32_t;
+
+/**
+ * Get the special column width value that represents the default column
+ * width. The value itself is not to be used as an actual width value.
+ *
+ * @return value that represents the default column width.
+ */
+ORCUS_DLLPUBLIC col_width_t get_default_column_width();
+
+/**
+ * Get the special row height value that represents the default row height.
+ * The value itself is not to be used as an actual row height value.
+ *
+ * @return value that represents the default row height.
+ */
+ORCUS_DLLPUBLIC row_height_t get_default_row_height();
+
+/**
+ * Type of error value in cells.
+ */
+enum class error_value_t
+{
+ /**
+ * Error type unknown, typically used as an initial error value or generic
+ * default value.
+ */
+ unknown = 0,
+ /** Null reference error, displayed as `#NULL!`. */
+ null,
+ /** Division-by-zero error, displayed as `#DIV/0`. */
+ div0,
+ /** Formula expression error, displayed as `#VALUE!`. */
+ value,
+ /** Reference error, displayed as `#REF!`. */
+ ref,
+ /** Invalid named-expression error, displayed as `#NAME?` */
+ name,
+ /** Invalid numeric value error, displayed as `#NUM!`. */
+ num,
+ /** No value is available error, displayed as `#N/A!`. */
+ na
+};
+
+/**
+ * Type of border direction, used to reference the position of a border in a
+ * cell.
+ */
+enum class border_direction_t
+{
+ /** Unknown or uninitialized border direction value. */
+ unknown = 0,
+ /** Top border of a cell. */
+ top,
+ /** Bottom border of a cell. */
+ bottom,
+ /** Left border of a cell. */
+ left,
+ /** Right border of a cell. */
+ right,
+ /**
+ * Cross-diagonal borders of a cell. This is equivalent of both
+ * @p diagonal_bl_tr and @p diagonal_tl_br combined.
+ */
+ diagonal,
+ /** Diagonal border of a cell that runs from bottom-left to top-right. */
+ diagonal_bl_tr,
+ /** Diagonal border of a cell that runs from top-left to bottom-right. */
+ diagonal_tl_br
+};
+
+/**
+ * Type of border style.
+ */
+enum class border_style_t
+{
+ unknown = 0,
+ none,
+ solid,
+ dash_dot,
+ dash_dot_dot,
+ dashed,
+ dotted,
+ double_border,
+ hair,
+ medium,
+ medium_dash_dot,
+ medium_dash_dot_dot,
+ medium_dashed,
+ slant_dash_dot,
+ thick,
+ thin,
+ double_thin,
+ fine_dashed
+};
+
+/**
+ * Type of fill pattern for cell background.
+ */
+enum class fill_pattern_t
+{
+ none = 0,
+ solid,
+ dark_down,
+ dark_gray,
+ dark_grid,
+ dark_horizontal,
+ dark_trellis,
+ dark_up,
+ dark_vertical,
+ gray_0625,
+ gray_125,
+ light_down,
+ light_gray,
+ light_grid,
+ light_horizontal,
+ light_trellis,
+ light_up,
+ light_vertical,
+ medium_gray
+};
+
+/**
+ * Strikethrough style as applied to a cell value.
+ *
+ * @note This is specific to ODS format.
+ */
+enum class strikethrough_style_t
+{
+ none = 0,
+ solid,
+ dash,
+ dot_dash,
+ dot_dot_dash,
+ dotted,
+ long_dash,
+ wave
+};
+
+/**
+ * Strikethrough type as applied to a cell value.
+ *
+ * @note This is specific to ODS format.
+ */
+enum class strikethrough_type_t
+{
+ unknown = 0,
+ none,
+ single_type,
+ double_type
+};
+
+/**
+ * Width of strikethrough applied to a cell value.
+ *
+ * @note This is specific to ODS format.
+ */
+enum class strikethrough_width_t
+{
+ unknown = 0,
+ width_auto,
+ thin,
+ medium,
+ thick,
+ bold
+};
+
+/**
+ * Text used for strike-through.
+ *
+ * @note This is specific to ODS format.
+ */
+enum class strikethrough_text_t
+{
+ unknown = 0,
+ /** `/` is used as the text. */
+ slash,
+ /** `X` is used as the text. */
+ cross
+};
+
+/**
+ * Type that specifies the grammar of a formula expression. Each grammar
+ * may exhibit a different set of syntax rules.
+ */
+enum class formula_grammar_t
+{
+ /** Grammar type is either unknown or unspecified. */
+ unknown = 0,
+ /** Grammar used by the Excel 2003 XML (aka XML Spreadsheet) format. */
+ xls_xml,
+ /** Grammar used by the Office Open XML spreadsheet format. */
+ xlsx,
+ /** Grammar used by the OpenDocument Spreadsheet format. */
+ ods,
+ /** Grammar used by the Gnumeric XML format. */
+ gnumeric
+};
+
+/**
+ * Type of formula expression.
+ */
+enum class formula_t
+{
+ /** Formula expression type unknown, or generic default value. */
+ unknown = 0,
+ /** Formula expression in an array of cells. */
+ array,
+ /** Formula expression in a data table. */
+ data_table,
+ /** Formula expression in a normal formula cell. */
+ normal,
+ /** Formula expression in a shared formula cell. */
+ shared
+};
+
+/**
+ * Formula reference context specifies the location where a formula
+ * expression is used. This is used mainly for those document formats that
+ * make use of multiple formula reference syntaxes, such as ODS.
+ */
+enum class formula_ref_context_t
+{
+ /**
+ * Default context, that is, the context that is NOT any of the other
+ * contexts specified below.
+ */
+ global = 0,
+
+ /** Base cell position of either a named range or expression. */
+ named_expression_base,
+
+ /**
+ * Named range is a special case of named expression where the expression
+ * consists of only one range token.
+ */
+ named_range,
+};
+
+/**
+ * Type of policy on how to handle a formula cell with an erroneous expression
+ * that has been parsed unsuccessfully.
+ */
+enum class formula_error_policy_t
+{
+ unknown,
+ /** Loading of the document will be halted. */
+ fail,
+ /** The error cell will be skipped. */
+ skip
+};
+
+/**
+ * Underline type for a cell value.
+ */
+enum class underline_t
+{
+ /** Underline is absent. */
+ none = 0,
+ /** Underline consists of a single line. */
+ single_line,
+ /**
+ * Single line for accounting format.
+ *
+ * @note This is unique to xlsx format.
+ */
+ single_accounting,
+ /** Underline consists of a double line. */
+ double_line,
+ /**
+ * Double line for accounting format.
+ *
+ * @note This is unique to xlsx format.
+ */
+ double_accounting,
+ /** Underline is dotted. */
+ dotted,
+ /** Underline is dashed. */
+ dash,
+ /** Underline consists of repeated long dash segments. */
+ long_dash,
+ /** Underline consists of repeated dot and dash segments. */
+ dot_dash,
+ /** Underline consists of repeated dot, dot and dash segments. */
+ dot_dot_dash,
+ /** Underline is waved. */
+ wave
+};
+
+/**
+ * Underline width types, specific to ODF. When the enum value is either
+ * percent, positive_integer, or positive_length, the actual value should be
+ * given separately.
+ *
+ * @note The automatic enum value corresponds with the "auto" text value,
+ * which could not be used since it's a keyword in C++.
+ */
+enum class underline_width_t
+{
+ none = 0,
+ automatic,
+ bold,
+ dash,
+ medium,
+ thick,
+ thin,
+ percent,
+ positive_integer,
+ positive_length
+};
+
+/**
+ * Underline mode that determines whether an underline is applied to both
+ * words and spaces, or words only.
+ *
+ * @note This is specific to ODS format.
+ */
+enum class underline_mode_t
+{
+ /** Underline is applied to both words and spaces. */
+ continuous = 0,
+ /** Underline is applied only to words. */
+ skip_white_space
+};
+
+/**
+ * Whether a single line or a double line is used as an underline.
+ *
+ * @todo Perhaps we should merge this with underline_t.
+ */
+enum class underline_type_t
+{
+ none = 0,
+ /** A single line is used as an underline. */
+ single_type,
+ /** A double line is used as an underline. */
+ double_type
+};
+
+/**
+ * Type of horizontal alignment applied to a cell content.
+ */
+enum class hor_alignment_t
+{
+ unknown = 0,
+ left,
+ center,
+ right,
+ justified,
+ distributed,
+ filled
+};
+
+/**
+ * Type of vertical alignment applied to a cell content.
+ */
+enum class ver_alignment_t
+{
+ unknown = 0,
+ top,
+ middle,
+ bottom,
+ justified,
+ distributed
+};
+
+/**
+ * Cell format categories. The abbreviation "xf" stands for "cell format"
+ * where the "x" is short for cell.
+ */
+enum class xf_category_t
+{
+ unknown,
+ /** Direct cell format, also often referenced as xf. */
+ cell,
+ /** Cell format for named styles. */
+ cell_style,
+ /** Incremental cell format, also referenced as dxf. */
+ differential,
+};
+
+/**
+ * Type of data table. A data table can be either of a single-variable
+ * column, a single-variable row, or a double-variable type that uses both
+ * column and row input cells.
+ */
+enum class data_table_type_t
+{
+ column,
+ row,
+ both
+};
+
+/**
+ * Function type used in the totals row of a table.
+ */
+enum class totals_row_function_t
+{
+ none = 0,
+ sum,
+ minimum,
+ maximum,
+ average,
+ count,
+ count_numbers,
+ standard_deviation,
+ variance,
+ custom
+};
+
+/**
+ * Type of conditional format.
+ */
+enum class conditional_format_t
+{
+ unknown = 0,
+ condition,
+ date,
+ formula,
+ colorscale,
+ databar,
+ iconset
+};
+
+/**
+ * Operator type associated with a conditional format rule.
+ */
+enum class condition_operator_t
+{
+ unknown = 0,
+ equal,
+ less,
+ greater,
+ greater_equal,
+ less_equal,
+ not_equal,
+ between,
+ not_between,
+ duplicate,
+ unique,
+ top_n,
+ bottom_n,
+ above_average,
+ below_average,
+ above_equal_average,
+ below_equal_average,
+ contains_error,
+ contains_no_error,
+ begins_with,
+ ends_with,
+ contains,
+ contains_blanks,
+ not_contains,
+ expression
+};
+
+/**
+ * Type of a condition in a conditional format rule. This is applicable only
+ * when the type of a conditional format entry is either:
+ *
+ * @li @p colorscale,
+ * @li @p databar or
+ * @li @p iconset.
+ */
+enum class condition_type_t
+{
+ unknown = 0,
+ value,
+ automatic,
+ max,
+ min,
+ formula,
+ percent,
+ percentile
+};
+
+/**
+ * Type of a date condition when the type of a conditional format entry is
+ * @p date.
+ */
+enum class condition_date_t
+{
+ unknown = 0,
+ today,
+ yesterday,
+ tomorrow,
+ last_7_days,
+ this_week,
+ next_week,
+ last_week,
+ this_month,
+ next_month,
+ last_month,
+ this_year,
+ next_year,
+ last_year,
+};
+
+/**
+ * Databar axis type, applicable only when the type of a conditional format
+ * entry is @p databar.
+ */
+enum class databar_axis_t
+{
+ none = 0,
+ middle,
+ automatic
+};
+
+/**
+ * Type of range grouping in a group field of a pivot table cache.
+ */
+enum class pivot_cache_group_by_t
+{
+ /**
+ * Type of range grouping is unknown.
+ *
+ * This is an implicit default value of this type.
+ */
+ unknown = 0,
+ /** Grouping on "days" for date values. */
+ days,
+ /** Grouping on "hours" for date values. */
+ hours,
+ /** Grouping on "minutes" for date values. */
+ minutes,
+ /** Grouping on "months" for date values. */
+ months,
+ /** Grouping on "quarters" for date values. */
+ quarters,
+ /** Grouping by numeric ranges for numeric values. */
+ range,
+ /** Grouping on "seconds" for date values. */
+ seconds,
+ /** Grouping on "years" for date values. */
+ years
+};
+
+/**
+ * Stores a 2-dimensional cell address.
+ */
+struct address_t
+{
+ row_t row;
+ col_t column;
+};
+
+/**
+ * Stores the size of a range of a spreadsheet.
+ */
+struct range_size_t
+{
+ row_t rows;
+ col_t columns;
+};
+
+/**
+ * Stores a 2-dimensional cell range by storing the positions of the top-left
+ * and bottom-right corners of the range.
+ */
+struct range_t
+{
+ address_t first;
+ address_t last;
+};
+
+/**
+ * Stores 3-dimensional cell address. The 'src' abbreviation stands for
+ * sheet-row-column.
+ */
+struct src_address_t
+{
+ sheet_t sheet;
+ row_t row;
+ col_t column;
+};
+
+/**
+ * Stores 3-dimensional cell range address. The 'src' abbreviation stands for
+ * sheet-row-column.
+ */
+struct src_range_t
+{
+ src_address_t first;
+ src_address_t last;
+};
+
+/**
+ * Convert a 3-dimensional cell address to a 2-dimensional counterpart by
+ * dropping the sheet index.
+ */
+ORCUS_DLLPUBLIC address_t to_rc_address(const src_address_t& r);
+
+/**
+ * Convert a 3-dimensional cell range address to a 2-dimensional counterpart
+ * by dropping the sheet indices.
+ */
+ORCUS_DLLPUBLIC range_t to_rc_range(const src_range_t& r);
+
+ORCUS_DLLPUBLIC bool operator== (const address_t& left, const address_t& right);
+ORCUS_DLLPUBLIC bool operator!= (const address_t& left, const address_t& right);
+
+ORCUS_DLLPUBLIC bool operator== (const src_address_t& left, const src_address_t& right);
+ORCUS_DLLPUBLIC bool operator!= (const src_address_t& left, const src_address_t& right);
+
+ORCUS_DLLPUBLIC bool operator== (const range_t& left, const range_t& right);
+ORCUS_DLLPUBLIC bool operator!= (const range_t& left, const range_t& right);
+
+ORCUS_DLLPUBLIC bool operator== (const src_range_t& left, const src_range_t& right);
+ORCUS_DLLPUBLIC bool operator!= (const src_range_t& left, const src_range_t& right);
+
+ORCUS_DLLPUBLIC bool operator< (const range_t& left, const range_t& right);
+ORCUS_DLLPUBLIC bool operator> (const range_t& left, const range_t& right);
+
+ORCUS_DLLPUBLIC range_t& operator+= (range_t& left, const address_t& right);
+ORCUS_DLLPUBLIC range_t& operator-= (range_t& left, const address_t& right);
+
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const address_t& v);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const src_address_t& v);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const range_t& v);
+
+/**
+ * Stores a color value in RGB format.
+ */
+struct color_rgb_t
+{
+ color_elem_t red;
+ color_elem_t green;
+ color_elem_t blue;
+};
+
+/**
+ * Convert a string representation of a totals row function name to its
+ * equivalent enum value.
+ *
+ * @param s string value for totals row function name.
+ *
+ * @return enum value representing the totals row function.
+ */
+ORCUS_DLLPUBLIC totals_row_function_t to_totals_row_function_enum(std::string_view s);
+
+/**
+ * Convert a string representation of a pivot cache group-by type to its
+ * equivalent enum value.
+ *
+ * @param s string value for pivot cache group-by type.
+ *
+ * @return enum value representing the pivot cache group-by type.
+ */
+ORCUS_DLLPUBLIC pivot_cache_group_by_t to_pivot_cache_group_by_enum(std::string_view s);
+
+/**
+ * Convert a string representation of a error value to its equivalent enum
+ * value.
+ *
+ * @param s error value string.
+ *
+ * @return enum value representing the error value.
+ */
+ORCUS_DLLPUBLIC error_value_t to_error_value_enum(std::string_view s);
+
+/**
+ * Convert a string representation of a RGB value to an equivalent struct
+ * value. The string representation is expected to be a 6 digit hexadecimal
+ * value string that may or may not be prefixed with a '#'.
+ *
+ * @param s string representation of the RGB value.
+ *
+ * @return struct value representing an RGB value.
+ */
+ORCUS_DLLPUBLIC color_rgb_t to_color_rgb(std::string_view s);
+
+/**
+ * Convert a color name to an RGB value. It supports SVG 1.0 color keyword
+ * names minus those gray colors with 'grey' spelling variants. Note that
+ * the name must be all in lowercase.
+ *
+ * @param s color name.
+ *
+ * @return struct value representing an RGB value.
+ */
+ORCUS_DLLPUBLIC color_rgb_t to_color_rgb_from_name(std::string_view s);
+
+/**
+ * Convert a formula error policy name to its enum value equivalent.
+ *
+ * @param s policy name.
+ *
+ * @return enum value equivalent for the original error policy name.
+ */
+ORCUS_DLLPUBLIC formula_error_policy_t to_formula_error_policy(std::string_view s);
+
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, error_value_t ev);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, border_style_t border);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, formula_grammar_t grammar);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, underline_t uline);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, underline_width_t ulwidth);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, underline_mode_t ulmode);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, underline_type_t ultype);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, hor_alignment_t halign);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, ver_alignment_t valign);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const color_rgb_t& color);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const fill_pattern_t& fill);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const strikethrough_style_t& ss);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const strikethrough_type_t& st);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const strikethrough_width_t& sw);
+ORCUS_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const strikethrough_text_t& st);
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/view.hpp b/include/orcus/spreadsheet/view.hpp
new file mode 100644
index 0000000..7b5552f
--- /dev/null
+++ b/include/orcus/spreadsheet/view.hpp
@@ -0,0 +1,65 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_SPREADSHEET_VIEW_HPP
+#define INCLUDED_ORCUS_SPREADSHEET_VIEW_HPP
+
+#include "orcus/env.hpp"
+#include "orcus/spreadsheet/types.hpp"
+#include "orcus/spreadsheet/view_types.hpp"
+
+#include <memory>
+
+namespace orcus { namespace spreadsheet {
+
+class sheet_view;
+class document;
+
+class ORCUS_SPM_DLLPUBLIC view
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ view(document& doc);
+ ~view();
+
+ sheet_view* get_or_create_sheet_view(sheet_t sheet);
+ const sheet_view* get_sheet_view(sheet_t sheet) const;
+
+ void set_active_sheet(sheet_t sheet);
+ sheet_t get_active_sheet() const;
+};
+
+class ORCUS_SPM_DLLPUBLIC sheet_view
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ sheet_view(view& doc_view);
+ ~sheet_view();
+
+ const range_t& get_selection(sheet_pane_t pos) const;
+
+ void set_selection(sheet_pane_t pos, const range_t& range);
+
+ void set_active_pane(sheet_pane_t pos);
+ sheet_pane_t get_active_pane() const;
+
+ void set_split_pane(double hor_split, double ver_split, const address_t& top_left_cell);
+ const split_pane_t& get_split_pane() const;
+
+ void set_frozen_pane(col_t visible_cols, row_t visible_rows, const address_t& top_left_cell);
+ const frozen_pane_t& get_frozen_pane() const;
+
+ view& get_document_view();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/spreadsheet/view_types.hpp b/include/orcus/spreadsheet/view_types.hpp
new file mode 100644
index 0000000..ae6e728
--- /dev/null
+++ b/include/orcus/spreadsheet/view_types.hpp
@@ -0,0 +1,95 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef ORCUS_SPREADSHEET_VIEW_TYPES_HPP
+#define ORCUS_SPREADSHEET_VIEW_TYPES_HPP
+
+#include "orcus/spreadsheet/types.hpp"
+
+namespace orcus { namespace spreadsheet {
+
+/**
+ * Sheet pane position in a split sheet view. When the sheet is split, it is
+ * split into four panes.
+ */
+enum class sheet_pane_t : uint8_t
+{
+ unspecified = 0,
+ /** Top-left pane. */
+ top_left,
+ /** Top-right pane. */
+ top_right,
+ /** Bottom-left pane. */
+ bottom_left,
+ /** Bottom-right pane. */
+ bottom_right
+};
+
+/**
+ * State of a split pane - whether it's frozen, split, or both.
+ */
+enum class pane_state_t : uint8_t
+{
+ /** The state of the pane is not specified. */
+ unspecified = 0,
+ /** The pane is frozen. */
+ frozen,
+ /** The pane is split. */
+ split,
+ /** The pane is both frozen and split. */
+ frozen_split
+};
+
+/**
+ * Store information about the state of a split sheet view.
+ */
+struct split_pane_t
+{
+ /**
+ * Horizontal distance to the vertical split bar in 1/20th of a point, or
+ * 0 if not horizontally split.
+ */
+ double hor_split;
+
+ /**
+ * Vertical distance to the horizontal split bar in 1/20th of a point, or
+ * 0 if not vertically split.
+ */
+ double ver_split;
+
+ /**
+ * Top-left visible cell of the bottom-right pane. This value is valid
+ * only when either the horizontal distance or the vertical distance is
+ * non-zero.
+ */
+ address_t top_left_cell;
+};
+
+/**
+ * Store the state of a frozen sheet view.
+ */
+struct frozen_pane_t
+{
+ /**
+ * The number of visible columns in the top-left pane.
+ */
+ col_t visible_columns;
+ /**
+ * The number of visible rows in the top-left pane.
+ */
+ row_t visible_rows;
+ /**
+ * The position of the top-left cell in the bottom-right pane.
+ */
+ address_t top_left_cell;
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/stream.hpp b/include/orcus/stream.hpp
new file mode 100644
index 0000000..dd094bb
--- /dev/null
+++ b/include/orcus/stream.hpp
@@ -0,0 +1,188 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_STREAM_HPP
+#define INCLUDED_ORCUS_STREAM_HPP
+
+#include "env.hpp"
+
+#include <memory>
+#include <string>
+
+namespace orcus {
+
+/**
+ * Represents the content of a file.
+ *
+ * The file content is memory-mapped initially, but may later become in-memory
+ * if the non-utf-8 content gets converted to utf-8.
+ */
+class ORCUS_PSR_DLLPUBLIC file_content
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ file_content(const file_content&) = delete;
+ file_content& operator= (const file_content&) = delete;
+
+ file_content();
+ file_content(file_content&& other);
+ file_content(std::string_view filepath);
+ ~file_content();
+
+ /**
+ * Obtain the memory address to the first character in the content buffer.
+ *
+ * @return pointer to the first character in the buffer.
+ */
+ const char* data() const;
+
+ /**
+ * Return the size of the content i.e. the number of characters in the
+ * content buffer.
+ *
+ * @return size of the content.
+ */
+ size_t size() const;
+
+ /**
+ * Query whether or not the content is empty.
+ *
+ * @return true if the content is empty, otherwise false.
+ */
+ bool empty() const;
+
+ /**
+ * Swap content with another instance.
+ *
+ * @param other another instance to swap content with.
+ */
+ void swap(file_content& other);
+
+ /**
+ * Load from a new file. This will invalidate the pointer returned from the
+ * data() method prior to the call.
+ *
+ * @param filepath path of the file to load from.
+ */
+ void load(std::string_view filepath);
+
+ /**
+ * Convert a non-utf-8 stream to a utf-8 one if the source stream contains
+ * a byte order mark. If not, it does nothing. When the conversion
+ * happens, the converted content will be stored in-memory.
+ */
+ void convert_to_utf8();
+
+ std::string_view str() const;
+};
+
+/**
+ * Represents the content of an in-memory buffer. Note that this class will
+ * NOT own the content of the source buffer but simply will reference it,
+ * except when the original buffer is a non-utf-8 stream and the caller
+ * chooses to convert it to utf-8 by calling its convert_to_utf8() method.
+ */
+class ORCUS_PSR_DLLPUBLIC memory_content
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+public:
+ memory_content(const file_content&) = delete;
+ memory_content& operator= (const file_content&) = delete;
+
+ memory_content();
+ memory_content(std::string_view s);
+ memory_content(memory_content&& other);
+ ~memory_content();
+
+ const char* data() const;
+ size_t size() const;
+ bool empty() const;
+
+ void swap(memory_content& other);
+
+ /**
+ * Convert a non-utf-8 stream to a utf-8 one if the source stream contains
+ * a byte order mark. If not, it does nothing. When the conversion
+ * happens, the converted content will be owned by the object.
+ */
+ void convert_to_utf8();
+
+ std::string_view str() const;
+};
+
+struct ORCUS_PSR_DLLPUBLIC line_with_offset
+{
+ /** content of the entire line. */
+ std::string line;
+ /** 0-based line number. */
+ std::size_t line_number;
+ /** 0-based offset within the line. */
+ std::size_t offset_on_line;
+
+ line_with_offset(std::string _line, std::size_t _line_number, std::size_t _offset_on_line);
+ line_with_offset(const line_with_offset& other);
+ line_with_offset(line_with_offset&& other);
+ ~line_with_offset();
+
+ bool operator== (const line_with_offset& other) const;
+ bool operator!= (const line_with_offset& other) const;
+};
+
+/**
+ * Generate a sensible error output for parse error including the line where
+ * the error occurred and the offset of the error position on that line.
+ *
+ * @param strm entire character stream where the error occurred.
+ * @param offset offset of the error position within the stream.
+ *
+ * @return string formatted to be usable as an error message for stdout.
+ */
+ORCUS_PSR_DLLPUBLIC std::string create_parse_error_output(std::string_view strm, std::ptrdiff_t offset);
+
+/**
+ * Given a string consisting of multiple lines i.e. multiple line breaks,
+ * find the line that contains the specified offset position.
+ *
+ * @param strm string stream containing multiple lines to search.
+ * @param offset offset position.
+ *
+ * @return structure containing information about the line containing the
+ * offset position.
+ *
+ * @exception std::invalid_argument if the offset value equals or exceeds the
+ * length of the string stream being searched.
+ */
+ORCUS_PSR_DLLPUBLIC line_with_offset locate_line_with_offset(std::string_view strm, std::ptrdiff_t offset);
+
+/**
+ * Given two strings, locate the position of the first character that is
+ * different between the two strings. Note that if one of the strings is
+ * empty (or both of them are empty), it returns 0.
+ *
+ * @param left one of the strings to compare.
+ * @param right one of the strings to compare.
+ *
+ * @return position of the first character that is different between the two
+ * compared strings.
+ */
+ORCUS_PSR_DLLPUBLIC size_t locate_first_different_char(std::string_view left, std::string_view right);
+
+/**
+ * Calculate the logical length of a UTF-8 encoded string.
+ *
+ * @param s string to calculate the logical length of.
+ * @return logical length of the UTF-8 encoded string.
+ */
+ORCUS_PSR_DLLPUBLIC std::size_t calc_logical_string_length(std::string_view s);
+
+} // namespace orcus
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/string_pool.hpp b/include/orcus/string_pool.hpp
new file mode 100644
index 0000000..12419bc
--- /dev/null
+++ b/include/orcus/string_pool.hpp
@@ -0,0 +1,99 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_STRING_POOL_HPP
+#define INCLUDED_ORCUS_STRING_POOL_HPP
+
+#include "env.hpp"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace orcus {
+
+/**
+ * This class implements a shared string pool with the ability to merge with
+ * other pools.
+ *
+ * @note This class is not copy-constructible, but is move-constructible.
+ */
+class ORCUS_PSR_DLLPUBLIC string_pool
+{
+public:
+ string_pool(const string_pool&) = delete;
+ string_pool& operator=(const string_pool&) = delete;
+
+ string_pool();
+ string_pool(string_pool&& other);
+ ~string_pool();
+
+ /**
+ * Intern a string.
+ *
+ * @param str string to intern.
+ *
+ * @return pair whose first value is the interned string, and the second
+ * value specifies whether it is a newly created instance (true)
+ * or a reuse of an existing instance (false).
+ */
+ std::pair<std::string_view, bool> intern(std::string_view str);
+
+ /**
+ * Return all interned strings.
+ *
+ * @return sequence of all interned strings. The sequence will be sorted.
+ */
+ std::vector<std::string_view> get_interned_strings() const;
+
+ /**
+ * Dump pool's content to stdout.
+ *
+ * @todo This needs to be reworked to make it more generally usable.
+ */
+ void dump() const;
+
+ /**
+ * Clear pool's content.
+ */
+ void clear();
+
+ /**
+ * Query the total number of strings stored in the pool.
+ *
+ * @return size_t total number of strings in the pool.
+ */
+ size_t size() const;
+
+ /**
+ * Swap the content with another string-pool instance.
+ *
+ *
+ * @param other string-pool instance to swap contents with.
+ */
+ void swap(string_pool& other);
+
+ /**
+ * Merge another string pool instance in. This will not invalidate any
+ * string references to the other pool.
+ *
+ * The other string pool instance will become empty when this call
+ * returns.
+ *
+ * @param other string pool instance to merge in.
+ */
+ void merge(string_pool& other);
+
+private:
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/threaded_json_parser.hpp b/include/orcus/threaded_json_parser.hpp
new file mode 100644
index 0000000..09bddfa
--- /dev/null
+++ b/include/orcus/threaded_json_parser.hpp
@@ -0,0 +1,185 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_THREADED_JSON_PARSER_HPP
+#define INCLUDED_ORCUS_THREADED_JSON_PARSER_HPP
+
+#include "json_parser_thread.hpp"
+#include "json_parser_base.hpp"
+#include "detail/thread.hpp"
+
+#include <algorithm>
+
+namespace orcus {
+
+template<typename _Handler>
+class threaded_json_parser
+{
+public:
+
+ typedef _Handler handler_type;
+
+ /**
+ * Constructor.
+ *
+ * @param p pointer to a string stream containing JSON string.
+ * @param n size of the stream.
+ * @param hdl handler class instance.
+ * @param min_token_size minimum size of the internal token buffer.
+ */
+ threaded_json_parser(
+ const char* p, size_t n, handler_type& hdl, size_t min_token_size);
+
+ /**
+ * Constructor.
+ *
+ * @param p pointer to a string stream containing JSON string.
+ * @param n size of the stream.
+ * @param hdl handler class instance.
+ * @param min_token_size minimum size of the internal token buffer.
+ * @param max_token_size maximum size of the internal token buffer.
+ */
+ threaded_json_parser(
+ const char* p, size_t n, handler_type& hdl, size_t min_token_size,
+ size_t max_token_size);
+
+ /**
+ * Call this method to start parsing.
+ */
+ void parse();
+
+ /**
+ * Get statistics on the parsing session. Call this only after the
+ * parsing has finished.
+ *
+ * @return structure containing statistics of the parsing session.
+ */
+ json::parser_stats get_stats() const;
+
+ void swap_string_pool(string_pool& pool);
+
+private:
+ void thread_parse();
+
+ void process_tokens(json::parse_tokens_t& tokens);
+
+private:
+ json::parser_thread m_parser_thread;
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+threaded_json_parser<_Handler>::threaded_json_parser(
+ const char* p, size_t n, handler_type& hdl, size_t min_token_size) :
+ m_parser_thread(p, n, min_token_size), m_handler(hdl) {}
+
+template<typename _Handler>
+threaded_json_parser<_Handler>::threaded_json_parser(
+ const char* p, size_t n, handler_type& hdl, size_t min_token_size, size_t max_token_size) :
+ m_parser_thread(p, n, min_token_size, max_token_size), m_handler(hdl) {}
+
+template<typename _Handler>
+void threaded_json_parser<_Handler>::parse()
+{
+ std::thread t(&threaded_json_parser::thread_parse, this);
+ detail::thread::scoped_guard guard(std::move(t));
+
+ json::parse_tokens_t tokens;
+
+ while (m_parser_thread.next_tokens(tokens))
+ process_tokens(tokens);
+
+ process_tokens(tokens);
+}
+
+template<typename _Handler>
+json::parser_stats threaded_json_parser<_Handler>::get_stats() const
+{
+ return m_parser_thread.get_stats();
+}
+
+template<typename _Handler>
+void threaded_json_parser<_Handler>::swap_string_pool(string_pool& pool)
+{
+ m_parser_thread.swap_string_pool(pool);
+}
+
+template<typename _Handler>
+void threaded_json_parser<_Handler>::thread_parse()
+{
+ // Start parsing.
+ m_parser_thread.start();
+}
+
+template<typename _Handler>
+void threaded_json_parser<_Handler>::process_tokens(json::parse_tokens_t& tokens)
+{
+ std::for_each(tokens.begin(), tokens.end(),
+ [this](const json::parse_token& t)
+ {
+ switch (t.type)
+ {
+ case json::parse_token_t::begin_array:
+ m_handler.begin_array();
+ break;
+ case json::parse_token_t::begin_object:
+ m_handler.begin_object();
+ break;
+ case json::parse_token_t::begin_parse:
+ m_handler.begin_parse();
+ break;
+ case json::parse_token_t::boolean_false:
+ m_handler.boolean_false();
+ break;
+ case json::parse_token_t::boolean_true:
+ m_handler.boolean_true();
+ break;
+ case json::parse_token_t::end_array:
+ m_handler.end_array();
+ break;
+ case json::parse_token_t::end_object:
+ m_handler.end_object();
+ break;
+ case json::parse_token_t::end_parse:
+ m_handler.end_parse();
+ break;
+ case json::parse_token_t::null:
+ m_handler.null();
+ break;
+ case json::parse_token_t::number:
+ m_handler.number(std::get<double>(t.value));
+ break;
+ case json::parse_token_t::object_key:
+ {
+ auto s = std::get<std::string_view>(t.value);
+ m_handler.object_key(s.data(), s.size(), false);
+ break;
+ }
+ case json::parse_token_t::string:
+ {
+ auto s = std::get<std::string_view>(t.value);
+ m_handler.string(s.data(), s.size(), false);
+ break;
+ }
+ case json::parse_token_t::parse_error:
+ {
+ auto v = std::get<parse_error_value_t>(t.value);
+ throw parse_error(std::string{v.str}, v.offset);
+ }
+ case json::parse_token_t::unknown:
+ default:
+ throw general_error("unknown token type encountered.");
+ }
+ }
+ );
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/threaded_sax_token_parser.hpp b/include/orcus/threaded_sax_token_parser.hpp
new file mode 100644
index 0000000..aa9019f
--- /dev/null
+++ b/include/orcus/threaded_sax_token_parser.hpp
@@ -0,0 +1,165 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_THREADED_SAX_TOKEN_PARSER_HPP
+#define INCLUDED_ORCUS_THREADED_SAX_TOKEN_PARSER_HPP
+
+#include "tokens.hpp"
+#include "xml_namespace.hpp"
+#include "sax_token_parser_thread.hpp"
+#include "sax_parser_base.hpp"
+#include "exception.hpp"
+#include "detail/thread.hpp"
+
+#include <thread>
+
+namespace orcus {
+
+class xmlns_context;
+class string_pool;
+
+template<typename _Handler>
+class threaded_sax_token_parser
+{
+public:
+
+ typedef _Handler handler_type;
+
+ /**
+ * Constructor.
+ *
+ * @param p pointer to a string stream containing XML content.
+ * @param n size of the stream.
+ * @param tks XML token map instance.
+ * @param ns_cxt namespace context instance.
+ * @param hdl handler class instance.
+ * @param min_token_size minimum size of the internal token buffer.
+ */
+ threaded_sax_token_parser(
+ const char* p, size_t n, const tokens& tks, xmlns_context& ns_cxt,
+ handler_type& hdl, size_t min_token_size);
+
+ /**
+ * Constructor.
+ *
+ * @param p pointer to a string stream containing XML content.
+ * @param n size of the stream.
+ * @param tks XML token map instance.
+ * @param ns_cxt namespace context instance.
+ * @param hdl handler class instance.
+ * @param min_token_size minimum size of the internal token buffer.
+ * @param max_token_size maximum size of the internal token buffer.
+ */
+ threaded_sax_token_parser(
+ const char* p, size_t n, const tokens& tks, xmlns_context& ns_cxt,
+ handler_type& hdl, size_t min_token_size, size_t max_token_size);
+
+ /**
+ * Call this method to start parsing.
+ */
+ void parse();
+
+ void swap_string_pool(string_pool& pool);
+
+private:
+ void thread_parse();
+
+ void process_tokens(const sax::parse_tokens_t& tokens);
+
+private:
+ sax::parser_thread m_parser_thread;
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+threaded_sax_token_parser<_Handler>::threaded_sax_token_parser(
+ const char* p, size_t n, const tokens& tks, xmlns_context& ns_cxt,
+ handler_type& hdl, size_t min_token_size) :
+ m_parser_thread(p, n, tks, ns_cxt, min_token_size), m_handler(hdl) {}
+
+template<typename _Handler>
+threaded_sax_token_parser<_Handler>::threaded_sax_token_parser(
+ const char* p, size_t n, const tokens& tks, xmlns_context& ns_cxt, handler_type& hdl,
+ size_t min_token_size, size_t max_token_size) :
+ m_parser_thread(p, n, tks, ns_cxt, min_token_size, max_token_size), m_handler(hdl) {}
+
+template<typename _Handler>
+void threaded_sax_token_parser<_Handler>::parse()
+{
+ std::thread t(&threaded_sax_token_parser::thread_parse, this);
+ detail::thread::scoped_guard guard(std::move(t));
+
+ sax::parse_tokens_t tokens;
+
+ try
+ {
+ while (m_parser_thread.next_tokens(tokens))
+ process_tokens(tokens);
+
+ process_tokens(tokens);
+ }
+ catch (const std::exception&)
+ {
+ m_parser_thread.abort();
+ throw;
+ }
+}
+
+template<typename _Handler>
+void threaded_sax_token_parser<_Handler>::swap_string_pool(string_pool& pool)
+{
+ m_parser_thread.swap_string_pool(pool);
+}
+
+template<typename _Handler>
+void threaded_sax_token_parser<_Handler>::thread_parse()
+{
+ // Start parsing.
+ m_parser_thread.start();
+}
+
+template<typename _Handler>
+void threaded_sax_token_parser<_Handler>::process_tokens(const sax::parse_tokens_t& tks)
+{
+ for (const sax::parse_token& t : tks)
+ {
+ switch (t.type)
+ {
+ case sax::parse_token_t::start_element:
+ {
+ const auto* elem = std::get<const xml_token_element_t*>(t.value);
+ m_handler.start_element(*elem);
+ break;
+ }
+ case sax::parse_token_t::end_element:
+ {
+ const auto* elem = std::get<const xml_token_element_t*>(t.value);
+ m_handler.end_element(*elem);
+ break;
+ }
+ case sax::parse_token_t::characters:
+ {
+ auto s = std::get<std::string_view>(t.value);
+ m_handler.characters(s, false);
+ break;
+ }
+ case sax::parse_token_t::parse_error:
+ {
+ auto v = std::get<parse_error_value_t>(t.value);
+ throw malformed_xml_error(std::string{v.str}, v.offset);
+ }
+ default:
+ throw general_error("unknown token type encountered.");
+ }
+ }
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/tokens.hpp b/include/orcus/tokens.hpp
new file mode 100644
index 0000000..9edc877
--- /dev/null
+++ b/include/orcus/tokens.hpp
@@ -0,0 +1,74 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_TOKENS_HPP
+#define INCLUDED_ORCUS_TOKENS_HPP
+
+#include "types.hpp"
+
+#include <algorithm>
+#include <unordered_map>
+
+namespace orcus {
+
+/**
+ * XML token store that provides mapping of integral token indentifiers and
+ * their original names. Instances of this class are typically used as global
+ * constants.
+ *
+ * @note The string values for the original token names should be static
+ * values whose values and memory addresses remain unchanged during the
+ * life cycle of the instance that references them.
+ *
+ * @note This class is not copy-constructible.
+ */
+class ORCUS_PSR_DLLPUBLIC tokens
+{
+public:
+ tokens() = delete;
+ tokens(const tokens&) = delete;
+ tokens(const char** token_names, size_t token_name_count);
+ ~tokens();
+
+ /**
+ * Check if a token returned from get_token() method is valid.
+ *
+ * @return true if valid, false otherwise.
+ */
+ bool is_valid_token(xml_token_t token) const;
+
+ /**
+ * Get token from a specified name.
+ *
+ * @param name textural token name
+ *
+ * @return token value representing the given textural token.
+ */
+ xml_token_t get_token(std::string_view name) const;
+
+ /**
+ * Get textural token name from a token value.
+ *
+ * @param token numeric token value
+ *
+ * @return textural token name, or empty string in case the given token is
+ * not valid.
+ */
+ std::string_view get_token_name(xml_token_t token) const;
+
+private:
+ using token_map_type = std::unordered_map<std::string_view, xml_token_t>;
+
+ token_map_type m_tokens;
+ const char** m_token_names;
+ size_t m_token_name_count;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/types.hpp b/include/orcus/types.hpp
new file mode 100644
index 0000000..34c968a
--- /dev/null
+++ b/include/orcus/types.hpp
@@ -0,0 +1,634 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_TYPES_HPP
+#define INCLUDED_ORCUS_TYPES_HPP
+
+#include <cstdint>
+#include <cstdlib>
+#include <vector>
+#include <string>
+#include <unordered_set>
+#include "env.hpp"
+
+namespace orcus {
+
+class xmlns_context;
+class xmlns_repository;
+
+/**
+ * Integral type that represents a tokenized XML element name.
+ */
+using xml_token_t = std::size_t;
+
+/**
+ * Type that represents a normalized XML namespace identifier. Internally it
+ * is a pointer value that points to a static char buffer that stores a
+ * namespace name.
+ */
+using xmlns_id_t = const char*;
+
+/**
+ * Parser token that represents the state of a parse error, used by
+ * threaded_json_parser and threaded_sax_token_parser when transferring
+ * parse status between threads.
+ */
+struct ORCUS_PSR_DLLPUBLIC parse_error_value_t
+{
+ /** error message associated with the parse error. */
+ std::string_view str;
+ /** offset in stream where the error occurred. */
+ std::ptrdiff_t offset;
+
+ parse_error_value_t();
+ parse_error_value_t(const parse_error_value_t& other);
+ parse_error_value_t(std::string_view _str, std::ptrdiff_t _offset);
+
+ parse_error_value_t& operator=(const parse_error_value_t& other);
+
+ bool operator==(const parse_error_value_t& other) const;
+ bool operator!=(const parse_error_value_t& other) const;
+};
+
+/**
+ * Represents a name with a normalized namespace in XML documents. This can
+ * be used either as an element name or as an attribute name.
+ */
+struct ORCUS_PSR_DLLPUBLIC xml_name_t
+{
+ enum to_string_type { use_alias, use_short_name };
+
+ xmlns_id_t ns;
+ std::string_view name;
+
+ xml_name_t() noexcept;
+ xml_name_t(xmlns_id_t _ns, std::string_view _name);
+ xml_name_t(const xml_name_t& other);
+
+ xml_name_t& operator= (const xml_name_t& other);
+
+ bool operator== (const xml_name_t& other) const noexcept;
+ bool operator!= (const xml_name_t& other) const noexcept;
+
+ /**
+ * Convert a namespace-name value pair to a string representation with the
+ * namespace value converted to either an alias or a unique "short name".
+ * Refer to @link xmlns_context::get_alias() get_alias() @endlink and
+ * @link xmlns_context::get_short_name() get_short_name() @endlink
+ * for the explanations of an alias and short name.
+ *
+ * @param cxt namespace context object associated with the XML stream
+ * currently being parsed.
+ * @param type policy on how to convert a namespace identifier to a string
+ * representation.
+ *
+ * @return string representation of a namespace-name value pair.
+ */
+ std::string to_string(const xmlns_context& cxt, to_string_type type) const;
+
+ /**
+ * Convert a namespace-name value pair to a string representation with the
+ * namespace value converted to a unique "short name". Refer to @link
+ * xmlns_repository::get_short_name() get_short_name() @endlink for the
+ * explanations of a short name.
+ *
+ * @param repo namespace repository.
+ *
+ * @return string representation of a namespace-name value pair.
+ */
+ std::string to_string(const xmlns_repository& repo) const;
+};
+
+/**
+ * Struct containing properties of a tokenized XML attribute.
+ */
+struct ORCUS_PSR_DLLPUBLIC xml_token_attr_t
+{
+ xmlns_id_t ns;
+ xml_token_t name;
+ std::string_view raw_name;
+ std::string_view value;
+
+ /**
+ * Whether or not the attribute value is transient. A transient value is
+ * only guaranteed to be valid until the end of the start_element call,
+ * after which its validity is not guaranteed. A non-transient value is
+ * guaranteed to be valid during the life cycle of the xml stream it
+ * belongs to.
+ */
+ bool transient;
+
+ xml_token_attr_t();
+ xml_token_attr_t(const xml_token_attr_t& other);
+ xml_token_attr_t(
+ xmlns_id_t _ns, xml_token_t _name, std::string_view _value, bool _transient);
+ xml_token_attr_t(
+ xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name,
+ std::string_view _value, bool _transient);
+
+ xml_token_attr_t& operator=(const xml_token_attr_t& other);
+};
+
+using xml_token_attrs_t = std::vector<xml_token_attr_t>;
+
+/**
+ * Struct containing XML element properties passed to the handler of
+ * sax_token_parser via its @p start_element() and @p end_element()
+ * calls.
+ *
+ * @see
+ * @li sax_token_handler::start_element
+ * @li sax_token_handler::end_element
+ */
+struct ORCUS_PSR_DLLPUBLIC xml_token_element_t
+{
+ xmlns_id_t ns;
+ xml_token_t name;
+ std::string_view raw_name;
+ xml_token_attrs_t attrs;
+
+ xml_token_element_t& operator= (xml_token_element_t) = delete;
+
+ xml_token_element_t();
+ xml_token_element_t(xmlns_id_t _ns, xml_token_t _name, std::string_view _raw_name, std::vector<xml_token_attr_t>&& _attrs);
+ xml_token_element_t(const xml_token_element_t& other);
+ xml_token_element_t(xml_token_element_t&& other);
+};
+
+/**
+ * Character set types, generated from IANA character-sets specifications.
+ *
+ * @see https://www.iana.org/assignments/character-sets/character-sets.xhtml
+ */
+enum class character_set_t
+{
+ unspecified = 0,
+ adobe_standard_encoding,
+ adobe_symbol_encoding,
+ amiga_1251,
+ ansi_x3_110_1983,
+ asmo_449,
+ big5,
+ big5_hkscs,
+ bocu_1,
+ brf,
+ bs_4730,
+ bs_viewdata,
+ cesu_8,
+ cp50220,
+ cp51932,
+ csa_z243_4_1985_1,
+ csa_z243_4_1985_2,
+ csa_z243_4_1985_gr,
+ csn_369103,
+ dec_mcs,
+ din_66003,
+ dk_us,
+ ds_2089,
+ ebcdic_at_de,
+ ebcdic_at_de_a,
+ ebcdic_ca_fr,
+ ebcdic_dk_no,
+ ebcdic_dk_no_a,
+ ebcdic_es,
+ ebcdic_es_a,
+ ebcdic_es_s,
+ ebcdic_fi_se,
+ ebcdic_fi_se_a,
+ ebcdic_fr,
+ ebcdic_it,
+ ebcdic_pt,
+ ebcdic_uk,
+ ebcdic_us,
+ ecma_cyrillic,
+ es,
+ es2,
+ euc_jp,
+ euc_kr,
+ extended_unix_code_fixed_width_for_japanese,
+ gb18030,
+ gb2312,
+ gb_1988_80,
+ gb_2312_80,
+ gbk,
+ gost_19768_74,
+ greek7,
+ greek7_old,
+ greek_ccitt,
+ hp_desktop,
+ hp_legal,
+ hp_math8,
+ hp_pi_font,
+ hp_roman8,
+ hz_gb_2312,
+ ibm00858,
+ ibm00924,
+ ibm01140,
+ ibm01141,
+ ibm01142,
+ ibm01143,
+ ibm01144,
+ ibm01145,
+ ibm01146,
+ ibm01147,
+ ibm01148,
+ ibm01149,
+ ibm037,
+ ibm038,
+ ibm1026,
+ ibm1047,
+ ibm273,
+ ibm274,
+ ibm275,
+ ibm277,
+ ibm278,
+ ibm280,
+ ibm281,
+ ibm284,
+ ibm285,
+ ibm290,
+ ibm297,
+ ibm420,
+ ibm423,
+ ibm424,
+ ibm437,
+ ibm500,
+ ibm775,
+ ibm850,
+ ibm851,
+ ibm852,
+ ibm855,
+ ibm857,
+ ibm860,
+ ibm861,
+ ibm862,
+ ibm863,
+ ibm864,
+ ibm865,
+ ibm866,
+ ibm868,
+ ibm869,
+ ibm870,
+ ibm871,
+ ibm880,
+ ibm891,
+ ibm903,
+ ibm904,
+ ibm905,
+ ibm918,
+ ibm_symbols,
+ ibm_thai,
+ iec_p27_1,
+ inis,
+ inis_8,
+ inis_cyrillic,
+ invariant,
+ iso_10367_box,
+ iso_10646_j_1,
+ iso_10646_ucs_2,
+ iso_10646_ucs_4,
+ iso_10646_ucs_basic,
+ iso_10646_unicode_latin1,
+ iso_10646_utf_1,
+ iso_11548_1,
+ iso_2022_cn,
+ iso_2022_cn_ext,
+ iso_2022_jp,
+ iso_2022_jp_2,
+ iso_2022_kr,
+ iso_2033_1983,
+ iso_5427,
+ iso_5427_1981,
+ iso_5428_1980,
+ iso_646_basic_1983,
+ iso_646_irv_1983,
+ iso_6937_2_25,
+ iso_6937_2_add,
+ iso_8859_1,
+ iso_8859_10,
+ iso_8859_13,
+ iso_8859_14,
+ iso_8859_15,
+ iso_8859_16,
+ iso_8859_1_windows_3_0_latin_1,
+ iso_8859_1_windows_3_1_latin_1,
+ iso_8859_2,
+ iso_8859_2_windows_latin_2,
+ iso_8859_3,
+ iso_8859_4,
+ iso_8859_5,
+ iso_8859_6,
+ iso_8859_6_e,
+ iso_8859_6_i,
+ iso_8859_7,
+ iso_8859_8,
+ iso_8859_8_e,
+ iso_8859_8_i,
+ iso_8859_9,
+ iso_8859_9_windows_latin_5,
+ iso_8859_supp,
+ iso_ir_90,
+ iso_unicode_ibm_1261,
+ iso_unicode_ibm_1264,
+ iso_unicode_ibm_1265,
+ iso_unicode_ibm_1268,
+ iso_unicode_ibm_1276,
+ it,
+ jis_c6220_1969_jp,
+ jis_c6220_1969_ro,
+ jis_c6226_1978,
+ jis_c6226_1983,
+ jis_c6229_1984_a,
+ jis_c6229_1984_b,
+ jis_c6229_1984_b_add,
+ jis_c6229_1984_hand,
+ jis_c6229_1984_hand_add,
+ jis_c6229_1984_kana,
+ jis_encoding,
+ jis_x0201,
+ jis_x0212_1990,
+ jus_i_b1_002,
+ jus_i_b1_003_mac,
+ jus_i_b1_003_serb,
+ koi7_switched,
+ koi8_r,
+ koi8_u,
+ ks_c_5601_1987,
+ ksc5636,
+ kz_1048,
+ latin_greek,
+ latin_greek_1,
+ latin_lap,
+ macintosh,
+ microsoft_publishing,
+ mnem,
+ mnemonic,
+ msz_7795_3,
+ nats_dano,
+ nats_dano_add,
+ nats_sefi,
+ nats_sefi_add,
+ nc_nc00_10_81,
+ nf_z_62_010,
+ nf_z_62_010_1973,
+ ns_4551_1,
+ ns_4551_2,
+ osd_ebcdic_df03_irv,
+ osd_ebcdic_df04_1,
+ osd_ebcdic_df04_15,
+ pc8_danish_norwegian,
+ pc8_turkish,
+ pt,
+ pt2,
+ ptcp154,
+ scsu,
+ sen_850200_b,
+ sen_850200_c,
+ shift_jis,
+ t_101_g2,
+ t_61_7bit,
+ t_61_8bit,
+ tis_620,
+ tscii,
+ unicode_1_1,
+ unicode_1_1_utf_7,
+ unknown_8bit,
+ us_ascii,
+ us_dk,
+ utf_16,
+ utf_16be,
+ utf_16le,
+ utf_32,
+ utf_32be,
+ utf_32le,
+ utf_7,
+ utf_7_imap,
+ utf_8,
+ ventura_international,
+ ventura_math,
+ ventura_us,
+ videotex_suppl,
+ viqr,
+ viscii,
+ windows_1250,
+ windows_1251,
+ windows_1252,
+ windows_1253,
+ windows_1254,
+ windows_1255,
+ windows_1256,
+ windows_1257,
+ windows_1258,
+ windows_31j,
+ windows_874,
+};
+
+/**
+ * Struct holding XML declaration properties.
+ */
+struct ORCUS_PSR_DLLPUBLIC xml_declaration_t
+{
+ uint8_t version_major;
+ uint8_t version_minor;
+ character_set_t encoding;
+ bool standalone;
+
+ xml_declaration_t();
+ xml_declaration_t(uint8_t _version_major, uint8_t _version_minor, character_set_t _encoding, bool _standalone);
+ xml_declaration_t(const xml_declaration_t& other);
+ ~xml_declaration_t();
+
+ xml_declaration_t& operator= (const xml_declaration_t& other);
+
+ bool operator== (const xml_declaration_t& other) const;
+ bool operator!= (const xml_declaration_t& other) const;
+};
+
+/**
+ * Unit of length, as used in length_t.
+ */
+enum class length_unit_t
+{
+ unknown = 0,
+ centimeter,
+ millimeter,
+ /**
+ * Special unit of length used by Excel, defined as the maximum digit width
+ * of font used as the "Normal" style font.
+ *
+ * @note Since it's not possible to determine the actual length using this
+ * unit, it is approximated by 1.9 millimeters.
+ */
+ xlsx_column_digit,
+ inch,
+ point,
+ /** One twip is a twentieth of a point equal to 1/1440 of an inch. */
+ twip,
+ pixel
+};
+
+/**
+ * Input formats that orcus can import.
+ */
+enum class format_t
+{
+ unknown = 0,
+ ods,
+ xlsx,
+ gnumeric,
+ xls_xml,
+ csv,
+ parquet
+};
+
+/**
+ * Formats supported by orcus as output formats.
+ */
+enum class dump_format_t
+{
+ unknown = 0,
+ none,
+ check,
+ csv,
+ flat,
+ html,
+ json,
+ xml,
+ yaml,
+ debug_state
+};
+
+/**
+ * Holds a length value with unit of measurement.
+ */
+struct ORCUS_PSR_DLLPUBLIC length_t
+{
+ length_unit_t unit;
+ double value;
+
+ length_t();
+ length_t(length_unit_t _unit, double _value);
+ length_t(const length_t& other);
+ length_t& operator= (const length_t& other);
+
+ std::string to_string() const;
+
+ bool operator== (const length_t& other) const noexcept;
+ bool operator!= (const length_t& other) const noexcept;
+};
+
+/**
+ * Struct that holds a date or date-time value.
+ */
+struct ORCUS_PSR_DLLPUBLIC date_time_t
+{
+ int year;
+ int month;
+ int day;
+ int hour;
+ int minute;
+ double second;
+
+ date_time_t();
+ date_time_t(int _year, int _month, int _day);
+ date_time_t(int _year, int _month, int _day, int _hour, int _minute, double _second);
+ date_time_t(const date_time_t& other);
+ ~date_time_t();
+
+ date_time_t& operator= (date_time_t other);
+
+ bool operator== (const date_time_t& other) const;
+ bool operator!= (const date_time_t& other) const;
+ bool operator< (const date_time_t& other) const;
+
+ /**
+ * Convert the date-time value to an ISO-formatted string representation.
+ *
+ * @return ISO-formatted string representation of the date-time value.
+ */
+ std::string to_string() const;
+
+ /**
+ * Swap the value with another instance.
+ *
+ * @param other another instance to swap values with.
+ */
+ void swap(date_time_t& other);
+
+ /**
+ * Parse an ISO-formatted string representation of a date-time value, and
+ * convert it into a date_time_t value. A string representation allows
+ * either a date only or a date and time value, but it does not allow a time
+ * only value.
+ *
+ * Here are some examples of ISO-formatted date and date-time values:
+ *
+ * @li <b>2013-04-09</b> (date only)
+ * @li <b>2013-04-09T21:34:09.55</b> (date and time)
+ *
+ * @param str string representation of a date-time value.
+ * @return converted date-time value consisting of a set of numeric values.
+ */
+ static date_time_t from_chars(std::string_view str);
+};
+
+/**
+ * Parse a string that represents an output format type and convert it to a
+ * corresponding enum value.
+ *
+ * @param s string representing an output format type.
+ *
+ * @return enum value representing a character set, or
+ * character_set_t::unknown in case it cannot be
+ * determined.
+ */
+ORCUS_PSR_DLLPUBLIC dump_format_t to_dump_format_enum(std::string_view s);
+
+/**
+ * Parse a string that represents a character set and convert it to a
+ * corresponding enum value.
+ *
+ * @param s string representing a character set.
+ *
+ * @return enum value representing a character set, or
+ * character_set_t::unspecified in case it cannot be
+ * determined.
+ */
+ORCUS_PSR_DLLPUBLIC character_set_t to_character_set(std::string_view s);
+
+/**
+ * Get a list of available output format entries. Each entry consists of the
+ * name of a format and its enum value equivalent.
+ *
+ * @return list of available output format entries.
+ */
+ORCUS_PSR_DLLPUBLIC std::vector<std::pair<std::string_view, dump_format_t>> get_dump_format_entries();
+
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const length_t& v);
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, const date_time_t& v);
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<< (std::ostream& os, format_t v);
+
+/**
+ * Generic constant to be used to indicate that a valid index value is
+ * expected but not found.
+ */
+ORCUS_PSR_DLLPUBLIC extern const std::size_t INDEX_NOT_FOUND;
+
+/**
+ * Value associated with an unknown XML namespace.
+ */
+ORCUS_PSR_DLLPUBLIC extern const xmlns_id_t XMLNS_UNKNOWN_ID;
+
+/**
+ * Value associated with an unknown XML token.
+ */
+ORCUS_PSR_DLLPUBLIC extern const xml_token_t XML_UNKNOWN_TOKEN;
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/xml_namespace.hpp b/include/orcus/xml_namespace.hpp
new file mode 100644
index 0000000..cf9b270
--- /dev/null
+++ b/include/orcus/xml_namespace.hpp
@@ -0,0 +1,195 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_XML_NAMESPACE_MANAGER_HPP
+#define INCLUDED_ORCUS_XML_NAMESPACE_MANAGER_HPP
+
+#include "types.hpp"
+
+#include <ostream>
+#include <memory>
+
+namespace orcus {
+
+class xmlns_context;
+struct xmlns_repository_impl;
+struct xmlns_context_impl;
+
+/**
+ * Central XML namespace repository that stores all namespaces that are used
+ * in the current session.
+ *
+ * @warning this class is not copyable, but is movable; however, the
+ * moved-from object will not be usable after the move.
+ */
+class ORCUS_PSR_DLLPUBLIC xmlns_repository
+{
+ friend class xmlns_context;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ xmlns_id_t intern(std::string_view uri);
+
+ size_t get_index(xmlns_id_t ns_id) const;
+
+public:
+ xmlns_repository(const xmlns_repository&) = delete;
+ xmlns_repository& operator= (const xmlns_repository&) = delete;
+
+ xmlns_repository();
+ xmlns_repository(xmlns_repository&& other);
+ ~xmlns_repository();
+
+ xmlns_repository& operator= (xmlns_repository&&);
+
+ /**
+ * Add a set of predefined namespace values to the repository.
+ *
+ * @param predefined_ns predefined set of namespace values. This is a
+ * null-terminated array of xmlns_id_t. This
+ * xmlns_repository instance will assume that the
+ * instances of these xmlns_id_t values will be
+ * available throughout its life cycle; caller needs
+ * to ensure that they won't get deleted before the
+ * corresponding xmlns_repository instance is
+ * deleted.
+ */
+ void add_predefined_values(const xmlns_id_t* predefined_ns);
+
+ /**
+ * Create a context object associated with this namespace repository.
+ *
+ * @warning Since this context object references values stored in the repo,
+ * make sure that it will not out-live the repository object
+ * itself.
+ *
+ * @return context object to use for a new XML stream.
+ */
+ xmlns_context create_context();
+
+ /**
+ * Get XML namespace identifier from its numerical index.
+ *
+ * @param index numeric index of namespace.
+ *
+ * @return valid namespace identifier, or XMLNS_UNKNOWN_ID if not found.
+ */
+ xmlns_id_t get_identifier(size_t index) const;
+
+ /**
+ * See xmlns_context::get_short_name() for the explanation of this method,
+ * which works identically to it.
+ */
+ std::string get_short_name(xmlns_id_t ns_id) const;
+};
+
+/**
+ * XML namespace context. A new context should be used for each xml stream
+ * since the namespace keys themselves are not interned. Don't hold an
+ * instance of this class any longer than the life cycle of the xml stream
+ * it is used in.
+ *
+ * An empty key value i.e. `""` is associated with a default namespace.
+ */
+class ORCUS_PSR_DLLPUBLIC xmlns_context
+{
+ friend class xmlns_repository;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ xmlns_context(xmlns_repository& repo);
+public:
+ xmlns_context();
+ xmlns_context(xmlns_context&&);
+ xmlns_context(const xmlns_context& r);
+ ~xmlns_context();
+
+ xmlns_context& operator= (const xmlns_context& r);
+ xmlns_context& operator= (xmlns_context&& r);
+
+ /**
+ * Push a new namespace alias-value pair to the stack.
+ *
+ * @param alias namespace alias to push onto the stack. If the same alias
+ * is already present, this overwrites it until it gets popped
+ * off the stack.
+ * @param uri namespace name to associate with the alias.
+ *
+ * @return normalized namespace identifier for the namespace name.
+ */
+ xmlns_id_t push(std::string_view alias, std::string_view uri);
+
+ /**
+ * Pop a namespace alias from the stack.
+ *
+ * @param alias namespace alias to pop from the stack.
+ */
+ void pop(std::string_view alias);
+
+ /**
+ * Get the currnet namespace identifier for a specified namespace alias.
+ *
+ * @param alias namespace alias to get the current namespace identifier for.
+ *
+ * @return current namespace identifier associated with the alias.
+ */
+ xmlns_id_t get(std::string_view alias) const;
+
+ /**
+ * Get a unique index value associated with a specified identifier. An
+ * index value is guaranteed to be unique regardless of contexts.
+ *
+ * @param ns_id a namespace identifier to obtain index for.
+ *
+ * @return index value associated with the identifier.
+ */
+ size_t get_index(xmlns_id_t ns_id) const;
+
+ /**
+ * Get a 'short' name associated with a specified identifier. A short
+ * name is a string value conveniently short enough for display purposes,
+ * but still guaranteed to be unique to the identifier it is associated
+ * with.
+ *
+ * @note The xmlns_repository class has method of the same name, and that
+ * method works identically to this method.
+ *
+ * @param ns_id a namespace identifier to obtain short name for.
+ *
+ * @return short name for the specified identifier.
+ */
+ std::string get_short_name(xmlns_id_t ns_id) const;
+
+ /**
+ * Get an alias currently associated with a given namespace identifier.
+ *
+ * @param ns_id namespace identifier.
+ *
+ * @return alias name currently associted with the given namespace
+ * identifier, or an empty string if the given namespace is
+ * currently not associated with any aliases.
+ */
+ std::string_view get_alias(xmlns_id_t ns_id) const;
+
+ std::vector<xmlns_id_t> get_all_namespaces() const;
+
+ void dump(std::ostream& os) const;
+
+ /**
+ * Dump the internal state for debugging in YAML format.
+ */
+ void dump_state(std::ostream& os) const;
+
+ void swap(xmlns_context& other) noexcept;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/xml_structure_tree.hpp b/include/orcus/xml_structure_tree.hpp
new file mode 100644
index 0000000..423ede4
--- /dev/null
+++ b/include/orcus/xml_structure_tree.hpp
@@ -0,0 +1,198 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP
+#define INCLUDED_ORCUS_XML_STRUCTURE_TREE_HPP
+
+#include "env.hpp"
+#include "types.hpp"
+
+#include <ostream>
+#include <memory>
+#include <functional>
+
+namespace orcus {
+
+class xmlns_context;
+
+struct ORCUS_DLLPUBLIC xml_table_range_t
+{
+ std::vector<std::string> paths;
+ std::vector<std::string> row_groups;
+
+ xml_table_range_t();
+ ~xml_table_range_t();
+};
+
+/**
+ * Tree representing the structure of elements in XML content. Recurring
+ * elements under the same parent are represented by a single element
+ * instance. This tree only includes elements; no attributes and content
+ * nodes appear in this tree.
+ */
+class ORCUS_DLLPUBLIC xml_structure_tree
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ xml_structure_tree() = delete;
+ xml_structure_tree(const xml_structure_tree&) = delete;
+ xml_structure_tree& operator= (const xml_structure_tree&) = delete;
+
+ struct ORCUS_DLLPUBLIC entity_name
+ {
+ xmlns_id_t ns;
+ std::string_view name;
+
+ entity_name();
+ entity_name(xmlns_id_t _ns, std::string_view _name);
+
+ bool operator< (const entity_name& r) const;
+ bool operator== (const entity_name& r) const;
+
+ struct ORCUS_DLLPUBLIC hash
+ {
+ size_t operator ()(const entity_name& val) const;
+ };
+ };
+
+ typedef std::vector<entity_name> entity_names_type;
+
+ struct ORCUS_DLLPUBLIC element
+ {
+ entity_name name;
+ bool repeat;
+ bool has_content;
+
+ element();
+ element(const entity_name& _name, bool _repeat, bool _has_content);
+ };
+
+ struct walker_impl;
+
+ /**
+ * This class allows client to traverse the tree.
+ */
+ class ORCUS_DLLPUBLIC walker
+ {
+ friend class xml_structure_tree;
+
+ std::unique_ptr<walker_impl> mp_impl;
+
+ walker(const xml_structure_tree::impl& parent_impl);
+ public:
+ walker() = delete;
+ walker(const walker& r);
+ ~walker();
+ walker& operator= (const walker& r);
+
+ /**
+ * Set current position to the root element, and return the root
+ * element.
+ *
+ * @return root element.
+ */
+ element root();
+
+ /**
+ * Descend into a specified child element.
+ *
+ * @param name name of a child element.
+ *
+ * @return child element
+ *
+ * @throw general_error if no child elements exist for the specified
+ * name.
+ */
+ element descend(const entity_name& name);
+
+ /**
+ * Move up to the parent element.
+ */
+ element ascend();
+
+ /**
+ * Move to the element specified by a path expression. The path
+ * expression may be generated by
+ * <code>xml_structure_tree::walker::get_path</code>.
+ *
+ * @param path a simple XPath like expression
+ *
+ * @return element pointed to by the path.
+ */
+ element move_to(const std::string& path);
+
+ /**
+ * Get a list of names of all child elements at the current element
+ * position. The list of names is in order of appearance.
+ *
+ * @return list of child element names in order of appearance.
+ */
+ entity_names_type get_children();
+
+ /**
+ * Get a list of names of all attributes that belong to current
+ * element. The list of names is in order of appearance.
+ *
+ * @return list of attribute names in order of appearance.
+ */
+ entity_names_type get_attributes();
+
+ /**
+ * Get a numerical, 0-based index of given XML namespace.
+ *
+ * @param ns XML namespace ID.
+ *
+ * @return numeric, 0-based index of XML namespace if found, or
+ * <code>xml_structure_tree::walker::index_not_found</code> if
+ * the namespace is not found in this structure.
+ */
+ size_t get_xmlns_index(xmlns_id_t ns) const;
+
+ std::string get_xmlns_short_name(xmlns_id_t ns) const;
+
+ /**
+ * Convert an entity name to its proper string representation.
+ *
+ * @param name entity name to convert to string.
+ *
+ * @return string representation of the entity name, including the
+ * namespace.
+ */
+ std::string to_string(const entity_name& name) const;
+
+ /**
+ * Get a XPath like ID for the element inside of the XML tree.
+ *
+ */
+ std::string get_path() const;
+ };
+
+ xml_structure_tree(xmlns_context& xmlns_cxt);
+ xml_structure_tree(xml_structure_tree&& other);
+ ~xml_structure_tree();
+
+ void parse(std::string_view s);
+
+ void dump_compact(std::ostream& os) const;
+
+ walker get_walker() const;
+
+ using range_handler_type = std::function<void(xml_table_range_t&&)>;
+
+ void process_ranges(range_handler_type rh) const;
+
+ void swap(xml_structure_tree& other);
+};
+
+}
+
+
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/xml_writer.hpp b/include/orcus/xml_writer.hpp
new file mode 100644
index 0000000..b55485c
--- /dev/null
+++ b/include/orcus/xml_writer.hpp
@@ -0,0 +1,122 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_XML_WRITER_HPP
+#define INCLUDED_ORCUS_XML_WRITER_HPP
+
+#include "orcus/types.hpp"
+
+#include <memory>
+
+namespace orcus {
+
+class xmlns_repository;
+
+/**
+ * This class lets you produce XML contents from scratch. It writes its
+ * content to any object supporting the std::ostream interface.
+ */
+class ORCUS_PSR_DLLPUBLIC xml_writer
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ void close_current_element();
+ void pop_elements();
+
+public:
+ class ORCUS_PSR_DLLPUBLIC scope
+ {
+ friend class xml_writer;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ scope(xml_writer* parent, const xml_name_t& name);
+ public:
+ scope(const scope&) = delete;
+ scope(scope&& other);
+ ~scope();
+
+ scope& operator= (scope&& other);
+ };
+
+ xml_writer(const xml_writer&) = delete;
+ xml_writer& operator= (const xml_writer&) = delete;
+
+ xml_writer(xmlns_repository& ns_repo, std::ostream& os);
+ xml_writer(xml_writer&& other);
+
+ xml_writer& operator= (xml_writer&& other);
+
+ /**
+ * Destructor. Any remaining element(s) on the stack will get popped when
+ * the destructor is called.
+ */
+ ~xml_writer();
+
+ /**
+ * Push a new element to the stack, and write an opening element to the
+ * output stream. It differs from the {@link push_element} method in that
+ * the new element will be automatically popped when the returned object
+ * goes out of scope.
+ *
+ * @param name name of the new element.
+ *
+ * @return scope object which automatically pops the element when it goes
+ * out of scope.
+ */
+ scope push_element_scope(const xml_name_t& name);
+
+ /**
+ * Push a new element to the stack, and write an opening element to the
+ * output stream.
+ *
+ * @param name name of the element.
+ */
+ void push_element(const xml_name_t& name);
+
+ /**
+ * Add a namespace definition for the next element to be pushed.
+ *
+ * @param alias alias for the namespace.
+ * @param value value of the namespace definition.
+ *
+ * @return ID for the namespace being added.
+ */
+ xmlns_id_t add_namespace(std::string_view alias, std::string_view value);
+
+ /**
+ * Add a new attribute for the next element to be pushed.
+ *
+ * @param name name of the attribute to be added.
+ * @param value value of the attribute to be added.
+ */
+ void add_attribute(const xml_name_t& name, std::string_view value);
+
+ /**
+ * Add a content to the current element on the stack. The content will be
+ * properly encoded.
+ *
+ * @param content content to be added to the current element.
+ */
+ void add_content(std::string_view content);
+
+ /**
+ * Pop the current element from the stack, and write a closing element to
+ * the output stream.
+ *
+ * @return the name of the element being popped.
+ */
+ xml_name_t pop_element();
+};
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/yaml_document_tree.hpp b/include/orcus/yaml_document_tree.hpp
new file mode 100644
index 0000000..d22a588
--- /dev/null
+++ b/include/orcus/yaml_document_tree.hpp
@@ -0,0 +1,109 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_YAML_DOCUMENT_TREE_HPP
+#define INCLUDED_ORCUS_YAML_DOCUMENT_TREE_HPP
+
+#include "env.hpp"
+#include "exception.hpp"
+
+#include <string>
+#include <memory>
+#include <vector>
+
+namespace orcus {
+
+namespace yaml {
+
+class document_tree;
+
+class ORCUS_DLLPUBLIC document_error : public general_error
+{
+public:
+ document_error(const std::string& msg);
+ virtual ~document_error();
+};
+
+enum class node_t : uint8_t
+{
+ unset,
+ string,
+ number,
+ map,
+ sequence,
+ boolean_true,
+ boolean_false,
+ null
+};
+
+struct yaml_value;
+
+class ORCUS_DLLPUBLIC const_node
+{
+ friend class ::orcus::yaml::document_tree;
+
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+ const_node(const yaml_value* yv);
+
+public:
+ const_node() = delete;
+
+ const_node(const const_node& other);
+ const_node(const_node&& rhs);
+ ~const_node();
+
+ node_t type() const;
+
+ size_t child_count() const;
+
+ std::vector<const_node> keys() const;
+
+ const_node key(size_t index) const;
+
+ const_node child(size_t index) const;
+
+ const_node child(const const_node& key) const;
+
+ const_node parent() const;
+
+ std::string_view string_value() const;
+ double numeric_value() const;
+
+ const_node& operator=(const const_node& other);
+
+ uintptr_t identity() const;
+};
+
+class ORCUS_DLLPUBLIC document_tree
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ document_tree();
+ document_tree(const document_tree&) = delete;
+ document_tree(document_tree&& other);
+ ~document_tree();
+
+ void load(std::string_view s);
+
+ size_t get_document_count() const;
+
+ const_node get_document_root(size_t index) const;
+
+ std::string dump_yaml() const;
+
+ std::string dump_json() const;
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/yaml_parser.hpp b/include/orcus/yaml_parser.hpp
new file mode 100644
index 0000000..836a902
--- /dev/null
+++ b/include/orcus/yaml_parser.hpp
@@ -0,0 +1,691 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_YAML_PARSER_HPP
+#define INCLUDED_ORCUS_YAML_PARSER_HPP
+
+#include "orcus/yaml_parser_base.hpp"
+#include "orcus/parser_global.hpp"
+
+namespace orcus {
+
+/**
+ * Blank handler class for yaml_parser. One can sub-class this and overwrite
+ * callback functions one needs to handle.
+ */
+class yaml_handler
+{
+public:
+ /**
+ * Called when the parser starts parsing a content.
+ */
+ void begin_parse() {}
+
+ /**
+ * Called when the parser finishes parsing an entire content.
+ */
+ void end_parse() {}
+
+ /**
+ * Called when a new document is encountered.
+ */
+ void begin_document() {}
+
+ /**
+ * Called when the parser has finished parsing a document.
+ */
+ void end_document() {}
+
+ /**
+ * Called when a sequence begins.
+ */
+ void begin_sequence() {}
+
+ /**
+ * Called when a sequence ends.
+ */
+ void end_sequence() {}
+
+ /**
+ * Called when a map begins.
+ */
+ void begin_map() {}
+
+ /**
+ * Called when the parser starts parsing a map key.
+ */
+ void begin_map_key() {}
+
+ /**
+ * Called when the parser finishes parsing a map key.
+ */
+ void end_map_key() {}
+
+ /**
+ * Called when the parser finishes parsing an entire map.
+ */
+ void end_map() {}
+
+ /**
+ * Called when a string value is encountered.
+ *
+ * @param value string value.
+ */
+ void string(std::string_view value)
+ {
+ (void)value;
+ }
+
+ /**
+ * Called when a numeric value is encountered.
+ *
+ * @param val numeric value.
+ */
+ void number(double val)
+ {
+ (void)val;
+ }
+
+ /**
+ * Called when a boolean 'true' keyword is encountered.
+ */
+ void boolean_true() {}
+
+ /**
+ * Called when a boolean 'false' keyword is encountered.
+ */
+ void boolean_false() {}
+
+ /**
+ * Called when a 'null' keyword is encountered.
+ */
+ void null() {}
+};
+
+/**
+ * Parser for YAML documents.
+ *
+ * @tparam HandlerT Hanlder type with member functions for event callbacks.
+ * Refer to yaml_handler.
+ *
+ * @warning This parser is still highly experimental. Use with caution.
+ */
+template<typename HandlerT>
+class yaml_parser : public yaml::parser_base
+{
+public:
+ typedef HandlerT handler_type;
+
+ yaml_parser(std::string_view content, handler_type& hdl);
+
+ void parse();
+
+private:
+ size_t end_scope();
+ void check_or_begin_document();
+ void check_or_begin_map();
+ void check_or_begin_sequence();
+ void parse_value(const char* p, size_t len);
+ void push_value(const char* p, size_t len);
+ void parse_line(const char* p, size_t len);
+ void parse_map_key(const char* p, size_t len);
+
+ void handler_begin_parse();
+ void handler_end_parse();
+ void handler_begin_document();
+ void handler_end_document();
+ void handler_begin_sequence();
+ void handler_end_sequence();
+ void handler_begin_map();
+ void handler_end_map();
+ void handler_begin_map_key();
+ void handler_end_map_key();
+ void handler_string(const char* p, size_t n);
+ void handler_number(double val);
+ void handler_boolean_true();
+ void handler_boolean_false();
+ void handler_null();
+
+private:
+ handler_type& m_handler;
+};
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_begin_parse()
+{
+ push_parse_token(yaml::detail::parse_token_t::begin_parse);
+ m_handler.begin_parse();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_end_parse()
+{
+ push_parse_token(yaml::detail::parse_token_t::end_parse);
+ m_handler.end_parse();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_begin_document()
+{
+ push_parse_token(yaml::detail::parse_token_t::begin_document);
+ m_handler.begin_document();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_end_document()
+{
+ push_parse_token(yaml::detail::parse_token_t::end_document);
+ m_handler.end_document();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_begin_sequence()
+{
+ push_parse_token(yaml::detail::parse_token_t::begin_sequence);
+ m_handler.begin_sequence();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_end_sequence()
+{
+ push_parse_token(yaml::detail::parse_token_t::end_sequence);
+ m_handler.end_sequence();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_begin_map()
+{
+ push_parse_token(yaml::detail::parse_token_t::begin_map);
+ m_handler.begin_map();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_end_map()
+{
+ push_parse_token(yaml::detail::parse_token_t::end_map);
+ m_handler.end_map();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_begin_map_key()
+{
+ push_parse_token(yaml::detail::parse_token_t::begin_map_key);
+ m_handler.begin_map_key();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_end_map_key()
+{
+ push_parse_token(yaml::detail::parse_token_t::end_map_key);
+ m_handler.end_map_key();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_string(const char* p, size_t n)
+{
+ push_parse_token(yaml::detail::parse_token_t::string);
+ m_handler.string({p, n});
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_number(double val)
+{
+ push_parse_token(yaml::detail::parse_token_t::number);
+ m_handler.number(val);
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_boolean_true()
+{
+ push_parse_token(yaml::detail::parse_token_t::boolean_true);
+ m_handler.boolean_true();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_boolean_false()
+{
+ push_parse_token(yaml::detail::parse_token_t::boolean_false);
+ m_handler.boolean_false();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::handler_null()
+{
+ push_parse_token(yaml::detail::parse_token_t::null);
+ m_handler.null();
+}
+
+template<typename _Handler>
+yaml_parser<_Handler>::yaml_parser(std::string_view content, handler_type& hdl) :
+ yaml::parser_base(content), m_handler(hdl) {}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::parse()
+{
+ handler_begin_parse();
+
+ while (has_char())
+ {
+ reset_on_new_line();
+
+ size_t indent = parse_indent();
+ if (indent == parse_indent_end_of_stream)
+ break;
+
+ if (indent == parse_indent_blank_line)
+ continue;
+
+ size_t cur_scope = get_scope();
+
+ if (cur_scope <= indent)
+ {
+ if (in_literal_block())
+ {
+ handle_line_in_literal(indent);
+ continue;
+ }
+
+ if (has_line_buffer())
+ {
+ // This line is part of multi-line string. Push the line to the
+ // buffer as-is.
+ handle_line_in_multi_line_string();
+ continue;
+ }
+ }
+
+ if (cur_scope == scope_empty)
+ {
+ if (indent > 0)
+ throw parse_error(
+ "first node of the document should not be indented.", offset());
+
+ push_scope(indent);
+ }
+ else if (indent > cur_scope)
+ {
+ push_scope(indent);
+ }
+ else if (indent < cur_scope)
+ {
+ // Current indent is less than the current scope level.
+ do
+ {
+ cur_scope = end_scope();
+ if (cur_scope < indent)
+ throw parse_error("parse: invalid indent level.", offset());
+ }
+ while (indent < cur_scope);
+ }
+
+ // Parse the rest of the line.
+ std::string_view line = parse_to_end_of_line();
+ line = trim(line);
+
+ assert(!line.empty());
+ parse_line(line.data(), line.size());
+ }
+
+ // End all remaining scopes.
+ size_t cur_scope = get_scope();
+ while (cur_scope != scope_empty)
+ cur_scope = end_scope();
+
+ if (get_doc_hash())
+ handler_end_document();
+
+ handler_end_parse();
+}
+
+template<typename _Handler>
+size_t yaml_parser<_Handler>::end_scope()
+{
+ switch (get_scope_type())
+ {
+ case yaml::detail::scope_t::map:
+ {
+ if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
+ handler_null();
+
+ handler_end_map();
+ break;
+ }
+ case yaml::detail::scope_t::sequence:
+ {
+ if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
+ handler_null();
+
+ handler_end_sequence();
+ break;
+ }
+ case yaml::detail::scope_t::multi_line_string:
+ {
+ std::string_view merged = merge_line_buffer();
+ handler_string(merged.data(), merged.size());
+ break;
+ }
+ default:
+ {
+ if (has_line_buffer())
+ {
+ assert(get_line_buffer_count() == 1);
+ std::string_view line = pop_line_front();
+ parse_value(line.data(), line.size());
+ }
+ }
+ }
+ return pop_scope();
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::check_or_begin_document()
+{
+ if (!get_doc_hash())
+ {
+ set_doc_hash(mp_char);
+ handler_begin_document();
+ }
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::check_or_begin_map()
+{
+ switch (get_scope_type())
+ {
+ case yaml::detail::scope_t::unset:
+ {
+ check_or_begin_document();
+ set_scope_type(yaml::detail::scope_t::map);
+ handler_begin_map();
+ break;
+ }
+ case yaml::detail::scope_t::map:
+ {
+ if (get_last_parse_token() == yaml::detail::parse_token_t::end_map_key)
+ handler_null();
+ break;
+ }
+ default:
+ ;
+ }
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::check_or_begin_sequence()
+{
+ switch (get_scope_type())
+ {
+ case yaml::detail::scope_t::unset:
+ {
+ check_or_begin_document();
+ set_scope_type(yaml::detail::scope_t::sequence);
+ handler_begin_sequence();
+ break;
+ }
+ case yaml::detail::scope_t::sequence:
+ {
+ if (get_last_parse_token() == yaml::detail::parse_token_t::begin_sequence_element)
+ handler_null();
+ break;
+ }
+ default:
+ ;
+ }
+
+ push_parse_token(yaml::detail::parse_token_t::begin_sequence_element);
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::parse_value(const char* p, size_t len)
+{
+ check_or_begin_document();
+
+ const char* p0 = p;
+ const char* p_end = p + len;
+ double val;
+ p = parse_numeric(p, p_end, val);
+ if (p == p_end)
+ {
+ handler_number(val);
+ return;
+ }
+
+ yaml::detail::keyword_t kw = parse_keyword(p0, len);
+
+ if (kw != yaml::detail::keyword_t::unknown)
+ {
+ switch (kw)
+ {
+ case yaml::detail::keyword_t::null:
+ handler_null();
+ break;
+ case yaml::detail::keyword_t::boolean_true:
+ handler_boolean_true();
+ break;
+ case yaml::detail::keyword_t::boolean_false:
+ handler_boolean_false();
+ break;
+ default:
+ ;
+ }
+
+ return;
+ }
+
+ // Failed to parse it as a number or a keyword. It must be a string.
+ handler_string(p0, len);
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::push_value(const char* p, size_t len)
+{
+ check_or_begin_document();
+
+ if (has_line_buffer() && get_scope_type() == yaml::detail::scope_t::unset)
+ set_scope_type(yaml::detail::scope_t::multi_line_string);
+
+ push_line_back(p, len);
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::parse_line(const char* p, size_t len)
+{
+ const char* p_end = p + len;
+ const char* p0 = p; // Save the original head position.
+
+ if (*p == '-')
+ {
+ ++p;
+ if (p == p_end)
+ {
+ // List item start.
+ check_or_begin_sequence();
+ return;
+ }
+
+ switch (*p)
+ {
+ case '-':
+ {
+ // start of a document
+ ++p;
+ if (p == p_end)
+ throw parse_error("parse_line: line ended with '--'.", offset_last_char_of_line());
+
+ if (*p != '-')
+ parse_error::throw_with(
+ "parse_line: '-' expected but '", *p, "' found.",
+ offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
+
+ ++p; // Skip the '-'.
+ set_doc_hash(p);
+ handler_begin_document();
+ clear_scopes();
+
+ if (p != p_end)
+ {
+ skip_blanks(p, p_end-p);
+
+ // Whatever comes after '---' is equivalent of first node.
+ assert(p != p_end);
+ push_scope(0);
+ parse_line(p, p_end-p);
+ }
+ return;
+ }
+ case ' ':
+ {
+ check_or_begin_sequence();
+
+ // list item start with inline first item content.
+ ++p;
+ if (p == p_end)
+ throw parse_error(
+ "parse_line: list item expected, but the line ended prematurely.",
+ offset_last_char_of_line() - std::ptrdiff_t(p_end-p));
+
+ skip_blanks(p, p_end-p);
+
+ size_t scope_width = get_scope() + (p-p0);
+ push_scope(scope_width);
+ parse_line(p, p_end-p);
+ return;
+ }
+ default:
+ // It is none of the above.
+ p = p0;
+ }
+
+ }
+
+ if (get_scope_type() == yaml::detail::scope_t::sequence)
+ parse_error::throw_with(
+ "'-' was expected for a sequence element, but '", *p, "' was found.",
+ offset_last_char_of_line()-len+1);
+
+ // If the line doesn't start with a "- ", it must be a dictionary key.
+ parse_map_key(p, len);
+}
+
+template<typename _Handler>
+void yaml_parser<_Handler>::parse_map_key(const char* p, size_t len)
+{
+ const char* p_end = p + len;
+ const char* p0 = p; // Save the original head position.
+
+ switch (*p)
+ {
+ case '"':
+ {
+ std::string_view quoted_str = parse_double_quoted_string_value(p, len);
+
+ if (p == p_end)
+ {
+ handler_string(quoted_str.data(), quoted_str.size());
+ return;
+ }
+
+ skip_blanks(p, p_end-p);
+
+ if (*p != ':')
+ throw parse_error(
+ "parse_map_key: ':' is expected after the quoted string key.",
+ offset() - std::ptrdiff_t(p_end-p+1));
+
+ check_or_begin_map();
+ handler_begin_map_key();
+ handler_string(quoted_str.data(), quoted_str.size());
+ handler_end_map_key();
+
+ ++p; // skip the ':'.
+ if (p == p_end)
+ return;
+
+ // Skip all white spaces.
+ skip_blanks(p, p_end-p);
+ }
+ break;
+ case '\'':
+ {
+ std::string_view quoted_str = parse_single_quoted_string_value(p, len);
+
+ if (p == p_end)
+ {
+ handler_string(quoted_str.data(), quoted_str.size());
+ return;
+ }
+
+ skip_blanks(p, p_end-p);
+
+ if (*p != ':')
+ throw parse_error(
+ "parse_map_key: ':' is expected after the quoted string key.",
+ offset() - std::ptrdiff_t(p_end-p+1));
+
+ check_or_begin_map();
+ handler_begin_map_key();
+ handler_string(quoted_str.data(), quoted_str.size());
+ handler_end_map_key();
+
+ ++p; // skip the ':'.
+ if (p == p_end)
+ return;
+
+ skip_blanks(p, p_end-p);
+ }
+ break;
+ default:
+ {
+ key_value kv = parse_key_value(p, p_end-p);
+
+ if (kv.key.empty())
+ {
+ // No map key found.
+ if (*p == '|')
+ {
+ start_literal_block();
+ return;
+ }
+
+ push_value(p, len);
+ return;
+ }
+
+ check_or_begin_map();
+ handler_begin_map_key();
+ parse_value(kv.key.data(), kv.key.size());
+ handler_end_map_key();
+
+ if (kv.value.empty())
+ return;
+
+ p = kv.value.data();
+ }
+ }
+
+ if (*p == '|')
+ {
+ start_literal_block();
+ return;
+ }
+
+ // inline map item.
+ if (*p == '-')
+ throw parse_error(
+ "parse_map_key: sequence entry is not allowed as an inline map item.",
+ offset() - std::ptrdiff_t(p_end-p+1));
+
+ size_t scope_width = get_scope() + (p-p0);
+ push_scope(scope_width);
+ parse_line(p, p_end-p);
+}
+
+}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/yaml_parser_base.hpp b/include/orcus/yaml_parser_base.hpp
new file mode 100644
index 0000000..13b4c91
--- /dev/null
+++ b/include/orcus/yaml_parser_base.hpp
@@ -0,0 +1,195 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_YAML_PARSER_BASE_HPP
+#define INCLUDED_ORCUS_YAML_PARSER_BASE_HPP
+
+#include "orcus/parser_base.hpp"
+
+#include <memory>
+#include <cassert>
+
+namespace orcus { namespace yaml {
+
+namespace detail {
+
+enum class scope_t
+{
+ unset,
+ sequence,
+ map,
+ multi_line_string
+};
+
+enum class keyword_t
+{
+ unknown,
+ boolean_true,
+ boolean_false,
+ null
+};
+
+enum class parse_token_t
+{
+ unknown,
+
+ // handler tokens (tokens associated with handler events)
+
+ begin_parse,
+ end_parse,
+ begin_document,
+ end_document,
+ begin_sequence,
+ end_sequence,
+ begin_map,
+ end_map,
+ begin_map_key,
+ end_map_key,
+ string,
+ number,
+ boolean_true,
+ boolean_false,
+ null,
+
+ // non-handler tokens
+
+ begin_sequence_element
+};
+
+}
+
+class ORCUS_PSR_DLLPUBLIC parser_base : public ::orcus::parser_base
+{
+ struct impl;
+ std::unique_ptr<impl> mp_impl;
+
+protected:
+
+ // The entire line is empty.
+ static const size_t parse_indent_blank_line;
+
+ // End of stream has reached while parsing in the indent part of a line.
+ static const size_t parse_indent_end_of_stream;
+
+ static const size_t scope_empty;
+
+ struct key_value
+ {
+ std::string_view key;
+ std::string_view value;
+ };
+
+ parser_base() = delete;
+ parser_base(const parser_base&) = delete;
+ parser_base& operator=(const parser_base&) = delete;
+
+ parser_base(std::string_view content);
+ ~parser_base();
+
+ void push_parse_token(detail::parse_token_t t);
+
+ detail::parse_token_t get_last_parse_token() const;
+
+ /**
+ * Get the offset position of the last character of the current line
+ * without comment or trailing whitespaces (if present). Call this only
+ * after the current line has been parsed to the end, that is, only after
+ * parse_to_end_of_line() has been called.
+ *
+ * @return offset position of the last character of the current line.
+ */
+ size_t offset_last_char_of_line() const;
+
+ /**
+ * Parse the prefix indent part of a line.
+ *
+ * @return number of whitespace characters encountered.
+ */
+ size_t parse_indent();
+
+ /**
+ * Once a non-whitespace character is reached, parse until the end of the
+ * line.
+ */
+ std::string_view parse_to_end_of_line();
+
+ /**
+ * Upon encountering a '#', skip until either the line-feed or the
+ * end-of-stream is reached.
+ */
+ void skip_comment();
+
+ void reset_on_new_line();
+
+ size_t get_scope() const;
+
+ void push_scope(size_t scope_width);
+
+ void clear_scopes();
+
+ detail::scope_t get_scope_type() const;
+
+ void set_scope_type(detail::scope_t type);
+
+ /**
+ * Pop the current scope and return the new scope width after the pop.
+ *
+ * @return new scope width after the pop.
+ */
+ size_t pop_scope();
+
+ void push_line_back(const char* p, size_t n);
+
+ std::string_view pop_line_front();
+
+ bool has_line_buffer() const;
+
+ size_t get_line_buffer_count() const;
+
+ std::string_view merge_line_buffer();
+
+ /**
+ * Get the hash value of current document, or nullptr if a document has
+ * not started.
+ *
+ * @return hash value of current document.
+ */
+ const char* get_doc_hash() const;
+
+ /**
+ * Set the hash value representing the current document. For now the
+ * memory address of the first character of the document is used as its
+ * hash value.
+ *
+ * @param hash hash value of a document.
+ */
+ void set_doc_hash(const char* hash);
+
+ detail::keyword_t parse_keyword(const char* p, size_t len);
+
+ key_value parse_key_value(const char* p, size_t len);
+
+ std::string_view parse_single_quoted_string_value(const char*& p, size_t max_length);
+
+ std::string_view parse_double_quoted_string_value(const char*& p, size_t max_length);
+
+ void skip_blanks(const char*& p, size_t len);
+
+ void start_literal_block();
+
+ bool in_literal_block() const;
+
+ void handle_line_in_literal(size_t indent);
+
+ void handle_line_in_multi_line_string();
+};
+
+}}
+
+#endif
+
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/zip_archive.hpp b/include/orcus/zip_archive.hpp
new file mode 100644
index 0000000..afc6727
--- /dev/null
+++ b/include/orcus/zip_archive.hpp
@@ -0,0 +1,126 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef INCLUDED_ORCUS_ZIP_ARCHIVE_HPP
+#define INCLUDED_ORCUS_ZIP_ARCHIVE_HPP
+
+#include "env.hpp"
+#include "exception.hpp"
+
+#include <string_view>
+#include <vector>
+#include <memory>
+#include <ostream>
+
+namespace orcus {
+
+/**
+ * Structure containing file entry header attributes.
+ */
+struct ORCUS_PSR_DLLPUBLIC zip_file_entry_header
+{
+ uint32_t header_signature = 0;
+ uint16_t required_version = 0;
+ uint16_t flag = 0;
+ uint16_t compression_method = 0;
+ uint16_t last_modified_time = 0;
+ uint16_t last_modified_date = 0;
+ uint32_t crc32 = 0;
+ uint32_t compressed_size = 0;
+ uint32_t uncompressed_size = 0;
+
+ std::string filename;
+ std::vector<uint8_t> extra_field;
+
+ zip_file_entry_header();
+ zip_file_entry_header(const zip_file_entry_header& other);
+ zip_file_entry_header(zip_file_entry_header&& other);
+ ~zip_file_entry_header();
+
+ zip_file_entry_header& operator=(const zip_file_entry_header& other);
+ zip_file_entry_header& operator=(zip_file_entry_header&& other);
+};
+
+ORCUS_PSR_DLLPUBLIC std::ostream& operator<<(std::ostream& os, const zip_file_entry_header& header);
+
+class zip_archive_stream;
+
+class ORCUS_PSR_DLLPUBLIC zip_archive
+{
+ class impl;
+
+ std::unique_ptr<impl> mp_impl;
+
+public:
+ zip_archive() = delete;
+ zip_archive(const zip_archive&) = delete;
+ zip_archive& operator= (const zip_archive) = delete;
+
+ zip_archive(zip_archive_stream* stream);
+ ~zip_archive();
+
+ /**
+ * Loading involves the parsing of the central directory of a zip archive
+ * (located toward the end of the stream) and building of file entry data
+ * which are stored in the central directory.
+ */
+ void load();
+
+ /**
+ * Retrieve the header information for a file entry specified by index.
+ *
+ * @param index file entry index.
+ *
+ * @return header information for a file entry.
+ */
+ zip_file_entry_header get_file_entry_header(std::size_t index) const;
+
+ /**
+ * Retrieve the header information for a file entry specified by name.
+ *
+ * @param name file entry name.
+ *
+ * @return header information for a file entry.
+ */
+ zip_file_entry_header get_file_entry_header(std::string_view name) const;
+
+ /**
+ * Get file entry name from its index.
+ *
+ * @param index file entry index
+ *
+ * @return file entry name
+ */
+ std::string_view get_file_entry_name(std::size_t index) const;
+
+ /**
+ * Return the number of file entries stored in this zip archive. Note
+ * that a file entry may be a directory, so the number of files stored in
+ * the zip archive may not equal the number of file entries.
+ *
+ * @return number of file entries.
+ */
+ size_t get_file_entry_count() const;
+
+ /**
+ * Retrieve data stream of specified file entry. The retrieved data stream
+ * gets uncompressed if the original stream is compressed.
+ *
+ * @param entry_name file entry name.
+ *
+ * @return buffer containing the data stream for specified entry.
+ *
+ * @exception zip_error thrown when any problem is encountered during data
+ * stream retrieval.
+ */
+ std::vector<unsigned char> read_file_entry(std::string_view entry_name) const;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/include/orcus/zip_archive_stream.hpp b/include/orcus/zip_archive_stream.hpp
new file mode 100644
index 0000000..7a6bb02
--- /dev/null
+++ b/include/orcus/zip_archive_stream.hpp
@@ -0,0 +1,71 @@
+/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+/*
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ */
+
+#ifndef __ORCUS_ZIP_ARCHIVE_STREAM_HPP__
+#define __ORCUS_ZIP_ARCHIVE_STREAM_HPP__
+
+#include "env.hpp"
+#include <cstdlib>
+#include <cstdio>
+#include <cstdint>
+
+namespace orcus {
+
+class ORCUS_PSR_DLLPUBLIC zip_archive_stream
+{
+public:
+ virtual ~zip_archive_stream();
+
+ virtual size_t size() const = 0;
+ virtual size_t tell() const = 0;
+ virtual void seek(size_t pos) = 0;
+ virtual void read(unsigned char* buffer, size_t length) const = 0;
+};
+
+/**
+ * Zip archive based on file descriptor. The caller needs to provide the
+ * file path to the zip archive.
+ */
+class ORCUS_PSR_DLLPUBLIC zip_archive_stream_fd : public zip_archive_stream
+{
+ FILE* m_stream;
+
+public:
+ zip_archive_stream_fd() = delete;
+ zip_archive_stream_fd(const char* filepath);
+ virtual ~zip_archive_stream_fd();
+
+ virtual size_t size() const;
+ virtual size_t tell() const;
+ virtual void seek(size_t pos);
+ virtual void read(unsigned char* buffer, size_t length) const;
+};
+
+/**
+ * Zip archive whose content is already loaded onto memory.
+ */
+class ORCUS_PSR_DLLPUBLIC zip_archive_stream_blob : public zip_archive_stream
+{
+ const uint8_t* m_blob;
+ const uint8_t* m_cur;
+ std::size_t m_size;
+
+public:
+ zip_archive_stream_blob() = delete;
+ zip_archive_stream_blob(const uint8_t* blob, std::size_t size);
+ virtual ~zip_archive_stream_blob();
+
+ virtual size_t size() const;
+ virtual size_t tell() const;
+ virtual void seek(size_t pos);
+ virtual void read(unsigned char* buffer, size_t length) const;
+};
+
+}
+
+#endif
+/* vim:set shiftwidth=4 softtabstop=4 expandtab: */