summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/wave/samples/cpp_tokens/slex
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-27 18:24:20 +0000
commit483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch)
treee5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/boost/libs/wave/samples/cpp_tokens/slex
parentInitial commit. (diff)
downloadceph-upstream.tar.xz
ceph-upstream.zip
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/wave/samples/cpp_tokens/slex')
-rw-r--r--src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp827
-rw-r--r--src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp2932
2 files changed, 3759 insertions, 0 deletions
diff --git a/src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp b/src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp
new file mode 100644
index 00000000..b37b21fb
--- /dev/null
+++ b/src/boost/libs/wave/samples/cpp_tokens/slex/cpp_slex_lexer.hpp
@@ -0,0 +1,827 @@
+/*=============================================================================
+ Boost.Wave: A Standard compliant C++ preprocessor library
+
+ SLex (Spirit Lex) based C++ lexer
+
+ http://www.boost.org/
+
+ Copyright (c) 2001-2012 Hartmut Kaiser. Distributed under the Boost
+ Software License, Version 1.0. (See accompanying file
+ LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+=============================================================================*/
+
+#if !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
+#define SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED
+
+#include <string>
+#if defined(BOOST_SPIRIT_DEBUG)
+#include <iostream>
+#endif // defined(BOOST_SPIRIT_DEBUG)
+
+#include <boost/assert.hpp>
+#include <boost/spirit/include/classic_core.hpp>
+
+#include <boost/wave/wave_config.hpp>
+#include <boost/wave/language_support.hpp>
+#include <boost/wave/token_ids.hpp>
+#include <boost/wave/util/file_position.hpp>
+#include <boost/wave/util/time_conversion_helper.hpp>
+#include <boost/wave/cpplexer/validate_universal_char.hpp>
+#include <boost/wave/cpplexer/convert_trigraphs.hpp>
+#include <boost/wave/cpplexer/cpplexer_exceptions.hpp>
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+#include <boost/wave/cpplexer/detect_include_guards.hpp>
+#endif
+#include <boost/wave/cpplexer/cpp_lex_interface.hpp>
+
+#include "../slex_interface.hpp"
+#include "../slex_token.hpp"
+#include "../slex_iterator.hpp"
+
+#include "lexer.hpp" // "spirit/lexer.hpp"
+
+///////////////////////////////////////////////////////////////////////////////
+namespace boost {
+namespace wave {
+namespace cpplexer {
+namespace slex {
+namespace lexer {
+
+///////////////////////////////////////////////////////////////////////////////
+// The following numbers are the array sizes of the token regex's which we
+// need to specify to make the CW compiler happy (at least up to V9.5).
+#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
+#define INIT_DATA_SIZE 175
+#else
+#define INIT_DATA_SIZE 158
+#endif
+#define INIT_DATA_CPP_SIZE 15
+#define INIT_DATA_PP_NUMBER_SIZE 2
+#define INIT_DATA_CPP0X_SIZE 15
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// encapsulation of the boost::spirit::classic::slex based cpp lexer
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+// The following lexer_base class was necessary to workaround a CodeWarrior
+// bug (at least up to CW V9.5).
+template <typename IteratorT, typename PositionT>
+class lexer_base
+: public boost::spirit::classic::lexer<
+ boost::wave::util::position_iterator<IteratorT, PositionT> >
+{
+protected:
+ typedef boost::wave::util::position_iterator<IteratorT, PositionT>
+ iterator_type;
+ typedef typename std::iterator_traits<IteratorT>::value_type char_type;
+ typedef boost::spirit::classic::lexer<iterator_type> base_type;
+
+ lexer_base();
+
+// initialization data (regular expressions for the token definitions)
+ struct lexer_data {
+ token_id tokenid; // token data
+ char_type const *tokenregex; // associated token to match
+ typename base_type::callback_t tokencb; // associated callback function
+ unsigned int lexerstate; // valid for lexer state
+ };
+};
+
+///////////////////////////////////////////////////////////////////////////////
+template <typename IteratorT, typename PositionT>
+class lexer
+: public lexer_base<IteratorT, PositionT>
+{
+public:
+ typedef boost::wave::cpplexer::slex_token<PositionT> token_type;
+
+ void init_dfa(boost::wave::language_support language);
+
+// get time of last compilation
+ static std::time_t get_compilation_time()
+ { return compilation_time.get_time(); }
+
+// helper for calculation of the time of last compilation
+ static boost::wave::util::time_conversion_helper compilation_time;
+
+private:
+ typedef lexer_base<IteratorT, PositionT> base_type;
+
+ static typename base_type::lexer_data const init_data[INIT_DATA_SIZE]; // common patterns
+ static typename base_type::lexer_data const init_data_cpp[INIT_DATA_CPP_SIZE]; // C++ only patterns
+ static typename base_type::lexer_data const init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE]; // pp-number only patterns
+ static typename base_type::lexer_data const init_data_cpp0x[INIT_DATA_CPP0X_SIZE]; // C++0X only patterns
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// data required for initialization of the lexer (token definitions)
+#define OR "|"
+#define Q(c) "\\" c
+#define TRI(c) Q("?") Q("?") c
+
+// definition of some sub-token regexps to simplify the regex definitions
+#define BLANK "[ \\t]"
+#define CCOMMENT \
+ Q("/") Q("*") "[^*]*" Q("*") "+" "(" "[^/*][^*]*" Q("*") "+" ")*" Q("/")
+
+#define PPSPACE "(" BLANK OR CCOMMENT ")*"
+
+#define OCTALDIGIT "[0-7]"
+#define DIGIT "[0-9]"
+#define HEXDIGIT "[0-9a-fA-F]"
+#define OPTSIGN "[-+]?"
+#define EXPSTART "[eE]" "[-+]"
+#define EXPONENT "(" "[eE]" OPTSIGN "[0-9]+" ")"
+#define NONDIGIT "[a-zA-Z_]"
+
+#define INTEGER \
+ "(" "(0x|0X)" HEXDIGIT "+" OR "0" OCTALDIGIT "*" OR "[1-9]" DIGIT "*" ")"
+
+#define INTEGER_SUFFIX "(" "[uU][lL]?|[lL][uU]?" ")"
+#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
+#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
+ "(" "[lL][lL]" ")" "[uU]" "?" OR \
+ "i64" \
+ ")"
+#else
+#define LONGINTEGER_SUFFIX "(" "[uU]" "(" "[lL][lL]" ")" OR \
+ "(" "[lL][lL]" ")" "[uU]" "?" ")"
+#endif
+#define FLOAT_SUFFIX "(" "[fF][lL]?" OR "[lL][fF]?" ")"
+#define CHAR_SPEC "L?"
+#define EXTCHAR_SPEC "(" "[uU]" OR "u8" ")"
+
+#define BACKSLASH "(" Q("\\") OR TRI(Q("/")) ")"
+#define ESCAPESEQ "(" BACKSLASH "(" \
+ "[abfnrtv?'\"]" OR \
+ BACKSLASH OR \
+ "x" HEXDIGIT "+" OR \
+ OCTALDIGIT OCTALDIGIT "?" OCTALDIGIT "?" \
+ "))"
+#define HEXQUAD "(" HEXDIGIT HEXDIGIT HEXDIGIT HEXDIGIT ")"
+#define UNIVERSALCHAR "(" BACKSLASH "(" \
+ "u" HEXQUAD OR \
+ "U" HEXQUAD HEXQUAD \
+ "))"
+
+#define POUNDDEF "(" "#" OR TRI("=") OR Q("%:") ")"
+#define NEWLINEDEF "(" "\n" OR "\r" OR "\r\n" ")"
+
+#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
+#define INCLUDEDEF "(include|include_next)"
+#else
+#define INCLUDEDEF "include"
+#endif
+
+#define PP_NUMBERDEF Q(".") "?" DIGIT "(" DIGIT OR NONDIGIT OR EXPSTART OR Q(".") ")*"
+
+///////////////////////////////////////////////////////////////////////////////
+// lexer state constants
+#define LEXER_STATE_NORMAL 0
+#define LEXER_STATE_PP 1
+
+#define NUM_LEXER_STATES 1
+
+// helper for initializing token data
+#define TOKEN_DATA(id, regex) \
+ { T_##id, regex, 0, LEXER_STATE_NORMAL } \
+ /**/
+
+#define TOKEN_DATA_EX(id, regex, callback) \
+ { T_##id, regex, callback, LEXER_STATE_NORMAL } \
+ /**/
+
+///////////////////////////////////////////////////////////////////////////////
+// common C++/C99 token definitions
+template <typename IteratorT, typename PositionT>
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data[INIT_DATA_SIZE] =
+{
+ TOKEN_DATA(AND, "&"),
+ TOKEN_DATA(ANDAND, "&&"),
+ TOKEN_DATA(ASSIGN, "="),
+ TOKEN_DATA(ANDASSIGN, "&="),
+ TOKEN_DATA(OR, Q("|")),
+ TOKEN_DATA(OR_TRIGRAPH, TRI("!")),
+ TOKEN_DATA(ORASSIGN, Q("|=")),
+ TOKEN_DATA(ORASSIGN_TRIGRAPH, TRI("!=")),
+ TOKEN_DATA(XOR, Q("^")),
+ TOKEN_DATA(XOR_TRIGRAPH, TRI("'")),
+ TOKEN_DATA(XORASSIGN, Q("^=")),
+ TOKEN_DATA(XORASSIGN_TRIGRAPH, TRI("'=")),
+ TOKEN_DATA(COMMA, ","),
+ TOKEN_DATA(COLON, ":"),
+ TOKEN_DATA(DIVIDEASSIGN, Q("/=")),
+ TOKEN_DATA(DIVIDE, Q("/")),
+ TOKEN_DATA(DOT, Q(".")),
+ TOKEN_DATA(ELLIPSIS, Q(".") Q(".") Q(".")),
+ TOKEN_DATA(EQUAL, "=="),
+ TOKEN_DATA(GREATER, ">"),
+ TOKEN_DATA(GREATEREQUAL, ">="),
+ TOKEN_DATA(LEFTBRACE, Q("{")),
+ TOKEN_DATA(LEFTBRACE_ALT, "<" Q("%")),
+ TOKEN_DATA(LEFTBRACE_TRIGRAPH, TRI("<")),
+ TOKEN_DATA(LESS, "<"),
+ TOKEN_DATA(LESSEQUAL, "<="),
+ TOKEN_DATA(LEFTPAREN, Q("(")),
+ TOKEN_DATA(LEFTBRACKET, Q("[")),
+ TOKEN_DATA(LEFTBRACKET_ALT, "<:"),
+ TOKEN_DATA(LEFTBRACKET_TRIGRAPH, TRI(Q("("))),
+ TOKEN_DATA(MINUS, Q("-")),
+ TOKEN_DATA(MINUSASSIGN, Q("-=")),
+ TOKEN_DATA(MINUSMINUS, Q("-") Q("-")),
+ TOKEN_DATA(PERCENT, Q("%")),
+ TOKEN_DATA(PERCENTASSIGN, Q("%=")),
+ TOKEN_DATA(NOT, "!"),
+ TOKEN_DATA(NOTEQUAL, "!="),
+ TOKEN_DATA(OROR, Q("|") Q("|")),
+ TOKEN_DATA(OROR_TRIGRAPH, TRI("!") Q("|") OR Q("|") TRI("!") OR TRI("!") TRI("!")),
+ TOKEN_DATA(PLUS, Q("+")),
+ TOKEN_DATA(PLUSASSIGN, Q("+=")),
+ TOKEN_DATA(PLUSPLUS, Q("+") Q("+")),
+ TOKEN_DATA(ARROW, Q("->")),
+ TOKEN_DATA(QUESTION_MARK, Q("?")),
+ TOKEN_DATA(RIGHTBRACE, Q("}")),
+ TOKEN_DATA(RIGHTBRACE_ALT, Q("%>")),
+ TOKEN_DATA(RIGHTBRACE_TRIGRAPH, TRI(">")),
+ TOKEN_DATA(RIGHTPAREN, Q(")")),
+ TOKEN_DATA(RIGHTBRACKET, Q("]")),
+ TOKEN_DATA(RIGHTBRACKET_ALT, ":>"),
+ TOKEN_DATA(RIGHTBRACKET_TRIGRAPH, TRI(Q(")"))),
+ TOKEN_DATA(SEMICOLON, ";"),
+ TOKEN_DATA(SHIFTLEFT, "<<"),
+ TOKEN_DATA(SHIFTLEFTASSIGN, "<<="),
+ TOKEN_DATA(SHIFTRIGHT, ">>"),
+ TOKEN_DATA(SHIFTRIGHTASSIGN, ">>="),
+ TOKEN_DATA(STAR, Q("*")),
+ TOKEN_DATA(COMPL, Q("~")),
+ TOKEN_DATA(COMPL_TRIGRAPH, TRI("-")),
+ TOKEN_DATA(STARASSIGN, Q("*=")),
+ TOKEN_DATA(ASM, "asm"),
+ TOKEN_DATA(AUTO, "auto"),
+ TOKEN_DATA(BOOL, "bool"),
+ TOKEN_DATA(FALSE, "false"),
+ TOKEN_DATA(TRUE, "true"),
+ TOKEN_DATA(BREAK, "break"),
+ TOKEN_DATA(CASE, "case"),
+ TOKEN_DATA(CATCH, "catch"),
+ TOKEN_DATA(CHAR, "char"),
+ TOKEN_DATA(CLASS, "class"),
+ TOKEN_DATA(CONST, "const"),
+ TOKEN_DATA(CONSTCAST, "const_cast"),
+ TOKEN_DATA(CONTINUE, "continue"),
+ TOKEN_DATA(DEFAULT, "default"),
+ TOKEN_DATA(DELETE, "delete"),
+ TOKEN_DATA(DO, "do"),
+ TOKEN_DATA(DOUBLE, "double"),
+ TOKEN_DATA(DYNAMICCAST, "dynamic_cast"),
+ TOKEN_DATA(ELSE, "else"),
+ TOKEN_DATA(ENUM, "enum"),
+ TOKEN_DATA(EXPLICIT, "explicit"),
+ TOKEN_DATA(EXPORT, "export"),
+ TOKEN_DATA(EXTERN, "extern"),
+ TOKEN_DATA(FLOAT, "float"),
+ TOKEN_DATA(FOR, "for"),
+ TOKEN_DATA(FRIEND, "friend"),
+ TOKEN_DATA(GOTO, "goto"),
+ TOKEN_DATA(IF, "if"),
+ TOKEN_DATA(INLINE, "inline"),
+ TOKEN_DATA(INT, "int"),
+ TOKEN_DATA(LONG, "long"),
+ TOKEN_DATA(MUTABLE, "mutable"),
+ TOKEN_DATA(NAMESPACE, "namespace"),
+ TOKEN_DATA(NEW, "new"),
+ TOKEN_DATA(OPERATOR, "operator"),
+ TOKEN_DATA(PRIVATE, "private"),
+ TOKEN_DATA(PROTECTED, "protected"),
+ TOKEN_DATA(PUBLIC, "public"),
+ TOKEN_DATA(REGISTER, "register"),
+ TOKEN_DATA(REINTERPRETCAST, "reinterpret_cast"),
+ TOKEN_DATA(RETURN, "return"),
+ TOKEN_DATA(SHORT, "short"),
+ TOKEN_DATA(SIGNED, "signed"),
+ TOKEN_DATA(SIZEOF, "sizeof"),
+ TOKEN_DATA(STATIC, "static"),
+ TOKEN_DATA(STATICCAST, "static_cast"),
+ TOKEN_DATA(STRUCT, "struct"),
+ TOKEN_DATA(SWITCH, "switch"),
+ TOKEN_DATA(TEMPLATE, "template"),
+ TOKEN_DATA(THIS, "this"),
+ TOKEN_DATA(THROW, "throw"),
+ TOKEN_DATA(TRY, "try"),
+ TOKEN_DATA(TYPEDEF, "typedef"),
+ TOKEN_DATA(TYPEID, "typeid"),
+ TOKEN_DATA(TYPENAME, "typename"),
+ TOKEN_DATA(UNION, "union"),
+ TOKEN_DATA(UNSIGNED, "unsigned"),
+ TOKEN_DATA(USING, "using"),
+ TOKEN_DATA(VIRTUAL, "virtual"),
+ TOKEN_DATA(VOID, "void"),
+ TOKEN_DATA(VOLATILE, "volatile"),
+ TOKEN_DATA(WCHART, "wchar_t"),
+ TOKEN_DATA(WHILE, "while"),
+ TOKEN_DATA(PP_DEFINE, POUNDDEF PPSPACE "define"),
+ TOKEN_DATA(PP_IF, POUNDDEF PPSPACE "if"),
+ TOKEN_DATA(PP_IFDEF, POUNDDEF PPSPACE "ifdef"),
+ TOKEN_DATA(PP_IFNDEF, POUNDDEF PPSPACE "ifndef"),
+ TOKEN_DATA(PP_ELSE, POUNDDEF PPSPACE "else"),
+ TOKEN_DATA(PP_ELIF, POUNDDEF PPSPACE "elif"),
+ TOKEN_DATA(PP_ENDIF, POUNDDEF PPSPACE "endif"),
+ TOKEN_DATA(PP_ERROR, POUNDDEF PPSPACE "error"),
+ TOKEN_DATA(PP_QHEADER, POUNDDEF PPSPACE \
+ INCLUDEDEF PPSPACE Q("\"") "[^\\n\\r\"]+" Q("\"")),
+ TOKEN_DATA(PP_HHEADER, POUNDDEF PPSPACE \
+ INCLUDEDEF PPSPACE "<" "[^\\n\\r>]+" ">"),
+ TOKEN_DATA(PP_INCLUDE, POUNDDEF PPSPACE \
+ INCLUDEDEF PPSPACE),
+ TOKEN_DATA(PP_LINE, POUNDDEF PPSPACE "line"),
+ TOKEN_DATA(PP_PRAGMA, POUNDDEF PPSPACE "pragma"),
+ TOKEN_DATA(PP_UNDEF, POUNDDEF PPSPACE "undef"),
+ TOKEN_DATA(PP_WARNING, POUNDDEF PPSPACE "warning"),
+#if BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
+ TOKEN_DATA(MSEXT_INT8, "__int8"),
+ TOKEN_DATA(MSEXT_INT16, "__int16"),
+ TOKEN_DATA(MSEXT_INT32, "__int32"),
+ TOKEN_DATA(MSEXT_INT64, "__int64"),
+ TOKEN_DATA(MSEXT_BASED, "_?" "_based"),
+ TOKEN_DATA(MSEXT_DECLSPEC, "_?" "_declspec"),
+ TOKEN_DATA(MSEXT_CDECL, "_?" "_cdecl"),
+ TOKEN_DATA(MSEXT_FASTCALL, "_?" "_fastcall"),
+ TOKEN_DATA(MSEXT_STDCALL, "_?" "_stdcall"),
+ TOKEN_DATA(MSEXT_TRY , "__try"),
+ TOKEN_DATA(MSEXT_EXCEPT, "__except"),
+ TOKEN_DATA(MSEXT_FINALLY, "__finally"),
+ TOKEN_DATA(MSEXT_LEAVE, "__leave"),
+ TOKEN_DATA(MSEXT_INLINE, "_?" "_inline"),
+ TOKEN_DATA(MSEXT_ASM, "_?" "_asm"),
+ TOKEN_DATA(MSEXT_PP_REGION, POUNDDEF PPSPACE "region"),
+ TOKEN_DATA(MSEXT_PP_ENDREGION, POUNDDEF PPSPACE "endregion"),
+#endif // BOOST_WAVE_SUPPORT_MS_EXTENSIONS != 0
+// TOKEN_DATA(OCTALINT, "0" OCTALDIGIT "*" INTEGER_SUFFIX "?"),
+// TOKEN_DATA(DECIMALINT, "[1-9]" DIGIT "*" INTEGER_SUFFIX "?"),
+// TOKEN_DATA(HEXAINT, "(0x|0X)" HEXDIGIT "+" INTEGER_SUFFIX "?"),
+ TOKEN_DATA(LONGINTLIT, INTEGER LONGINTEGER_SUFFIX),
+ TOKEN_DATA(INTLIT, INTEGER INTEGER_SUFFIX "?"),
+ TOKEN_DATA(FLOATLIT,
+ "(" DIGIT "*" Q(".") DIGIT "+" OR DIGIT "+" Q(".") ")"
+ EXPONENT "?" FLOAT_SUFFIX "?" OR
+ DIGIT "+" EXPONENT FLOAT_SUFFIX "?"),
+ TOKEN_DATA(CCOMMENT, CCOMMENT),
+ TOKEN_DATA(CPPCOMMENT, Q("/") Q("/[^\\n\\r]*") NEWLINEDEF ),
+ TOKEN_DATA(CHARLIT, CHAR_SPEC "'"
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
+ TOKEN_DATA(STRINGLIT, CHAR_SPEC Q("\"")
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
+#if BOOST_WAVE_USE_STRICT_LEXER != 0
+ TOKEN_DATA(IDENTIFIER, "([a-zA-Z_]" OR UNIVERSALCHAR ")([a-zA-Z0-9_]" OR UNIVERSALCHAR ")*"),
+#else
+ TOKEN_DATA(IDENTIFIER, "([a-zA-Z_$]" OR UNIVERSALCHAR ")([a-zA-Z0-9_$]" OR UNIVERSALCHAR ")*"),
+#endif
+ TOKEN_DATA(SPACE, "[ \t\v\f]+"),
+// TOKEN_DATA(SPACE2, "[\\v\\f]+"),
+ TOKEN_DATA(CONTLINE, Q("\\") "\n"),
+ TOKEN_DATA(NEWLINE, NEWLINEDEF),
+ TOKEN_DATA(POUND_POUND, "##"),
+ TOKEN_DATA(POUND_POUND_ALT, Q("%:") Q("%:")),
+ TOKEN_DATA(POUND_POUND_TRIGRAPH, TRI("=") TRI("=")),
+ TOKEN_DATA(POUND, "#"),
+ TOKEN_DATA(POUND_ALT, Q("%:")),
+ TOKEN_DATA(POUND_TRIGRAPH, TRI("=")),
+ TOKEN_DATA(ANY_TRIGRAPH, TRI(Q("/"))),
+ TOKEN_DATA(ANY, "."), // this should be the last recognized token
+ { token_id(0) } // this should be the last entry
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// C++ only token definitions
+template <typename IteratorT, typename PositionT>
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_cpp[INIT_DATA_CPP_SIZE] =
+{
+ TOKEN_DATA(AND_ALT, "bitand"),
+ TOKEN_DATA(ANDASSIGN_ALT, "and_eq"),
+ TOKEN_DATA(ANDAND_ALT, "and"),
+ TOKEN_DATA(OR_ALT, "bitor"),
+ TOKEN_DATA(ORASSIGN_ALT, "or_eq"),
+ TOKEN_DATA(OROR_ALT, "or"),
+ TOKEN_DATA(XORASSIGN_ALT, "xor_eq"),
+ TOKEN_DATA(XOR_ALT, "xor"),
+ TOKEN_DATA(NOTEQUAL_ALT, "not_eq"),
+ TOKEN_DATA(NOT_ALT, "not"),
+ TOKEN_DATA(COMPL_ALT, "compl"),
+#if BOOST_WAVE_SUPPORT_IMPORT_KEYWORD != 0
+ TOKEN_DATA(IMPORT, "import"),
+#endif
+ TOKEN_DATA(ARROWSTAR, Q("->") Q("*")),
+ TOKEN_DATA(DOTSTAR, Q(".") Q("*")),
+ TOKEN_DATA(COLON_COLON, "::"),
+ { token_id(0) } // this should be the last entry
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// C++ only token definitions
+template <typename IteratorT, typename PositionT>
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_pp_number[INIT_DATA_PP_NUMBER_SIZE] =
+{
+ TOKEN_DATA(PP_NUMBER, PP_NUMBERDEF),
+ { token_id(0) } // this should be the last entry
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// C++ only token definitions
+
+#define T_EXTCHARLIT token_id(T_CHARLIT|AltTokenType)
+#define T_EXTSTRINGLIT token_id(T_STRINGLIT|AltTokenType)
+#define T_EXTRAWSTRINGLIT token_id(T_RAWSTRINGLIT|AltTokenType)
+
+template <typename IteratorT, typename PositionT>
+typename lexer_base<IteratorT, PositionT>::lexer_data const
+lexer<IteratorT, PositionT>::init_data_cpp0x[INIT_DATA_CPP0X_SIZE] =
+{
+ TOKEN_DATA(EXTCHARLIT, EXTCHAR_SPEC "'"
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\']" ")+" "'"),
+ TOKEN_DATA(EXTSTRINGLIT, EXTCHAR_SPEC Q("\"")
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\n\\r\\\\\"]" ")*" Q("\"")),
+ TOKEN_DATA(RAWSTRINGLIT, CHAR_SPEC "R" Q("\"")
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
+ TOKEN_DATA(EXTRAWSTRINGLIT, EXTCHAR_SPEC "R" Q("\"")
+ "(" ESCAPESEQ OR UNIVERSALCHAR OR "[^\\\\\"]" ")*" Q("\"")),
+ TOKEN_DATA(ALIGNAS, "alignas"),
+ TOKEN_DATA(ALIGNOF, "alignof"),
+ TOKEN_DATA(CHAR16_T, "char16_t"),
+ TOKEN_DATA(CHAR32_T, "char32_t"),
+ TOKEN_DATA(CONSTEXPR, "constexpr"),
+ TOKEN_DATA(DECLTYPE, "decltype"),
+ TOKEN_DATA(NOEXCEPT, "noexcept"),
+ TOKEN_DATA(NULLPTR, "nullptr"),
+ TOKEN_DATA(STATICASSERT, "static_assert"),
+ TOKEN_DATA(THREADLOCAL, "threadlocal"),
+ { token_id(0) } // this should be the last entry
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// undefine macros, required for regular expression definitions
+#undef INCLUDEDEF
+#undef POUNDDEF
+#undef CCOMMENT
+#undef PPSPACE
+#undef DIGIT
+#undef OCTALDIGIT
+#undef HEXDIGIT
+#undef NONDIGIT
+#undef OPTSIGN
+#undef EXPSTART
+#undef EXPONENT
+#undef LONGINTEGER_SUFFIX
+#undef INTEGER_SUFFIX
+#undef INTEGER
+#undef FLOAT_SUFFIX
+#undef CHAR_SPEC
+#undef BACKSLASH
+#undef ESCAPESEQ
+#undef HEXQUAD
+#undef UNIVERSALCHAR
+#undef PP_NUMBERDEF
+
+#undef Q
+#undef TRI
+#undef OR
+
+#undef TOKEN_DATA
+#undef TOKEN_DATA_EX
+
+///////////////////////////////////////////////////////////////////////////////
+// initialize cpp lexer with token data
+template <typename IteratorT, typename PositionT>
+inline
+lexer_base<IteratorT, PositionT>::lexer_base()
+: base_type(NUM_LEXER_STATES)
+{
+}
+
+template <typename IteratorT, typename PositionT>
+inline void
+lexer<IteratorT, PositionT>::init_dfa(boost::wave::language_support lang)
+{
+ if (this->has_compiled_dfa())
+ return;
+
+// if pp-numbers should be preferred, insert the corresponding rule first
+ if (boost::wave::need_prefer_pp_numbers(lang)) {
+ for (int j = 0; 0 != init_data_pp_number[j].tokenid; ++j) {
+ this->register_regex(init_data_pp_number[j].tokenregex,
+ init_data_pp_number[j].tokenid, init_data_pp_number[j].tokencb,
+ init_data_pp_number[j].lexerstate);
+ }
+ }
+
+// if in C99 mode, some of the keywords are not valid
+ if (!boost::wave::need_c99(lang)) {
+ for (int j = 0; 0 != init_data_cpp[j].tokenid; ++j) {
+ this->register_regex(init_data_cpp[j].tokenregex,
+ init_data_cpp[j].tokenid, init_data_cpp[j].tokencb,
+ init_data_cpp[j].lexerstate);
+ }
+ }
+
+// if in C++0x mode, add all new keywords
+#if BOOST_WAVE_SUPPORT_CPP0X != 0
+ if (boost::wave::need_cpp0x(lang)) {
+ for (int j = 0; 0 != init_data_cpp0x[j].tokenid; ++j) {
+ this->register_regex(init_data_cpp0x[j].tokenregex,
+ init_data_cpp0x[j].tokenid, init_data_cpp0x[j].tokencb,
+ init_data_cpp0x[j].lexerstate);
+ }
+ }
+#endif
+
+ for (int i = 0; 0 != init_data[i].tokenid; ++i) {
+ this->register_regex(init_data[i].tokenregex, init_data[i].tokenid,
+ init_data[i].tokencb, init_data[i].lexerstate);
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+// get time of last compilation of this file
+template <typename IteratorT, typename PositionT>
+boost::wave::util::time_conversion_helper
+ lexer<IteratorT, PositionT>::compilation_time(__DATE__ " " __TIME__);
+
+///////////////////////////////////////////////////////////////////////////////
+} // namespace lexer
+
+///////////////////////////////////////////////////////////////////////////////
+//
+template <typename IteratorT, typename PositionT>
+inline void
+init_lexer (lexer::lexer<IteratorT, PositionT> &lexer,
+ boost::wave::language_support language, bool force_reinit = false)
+{
+ if (lexer.has_compiled_dfa())
+ return; // nothing to do
+
+ using std::ifstream;
+ using std::ofstream;
+ using std::ios;
+ using std::cerr;
+ using std::endl;
+
+ifstream dfa_in("wave_slex_lexer.dfa", ios::in|ios::binary);
+
+ lexer.init_dfa(language);
+ if (force_reinit || !dfa_in.is_open() ||
+ !lexer.load (dfa_in, (long)lexer.get_compilation_time()))
+ {
+#if defined(BOOST_SPIRIT_DEBUG)
+ cerr << "Compiling regular expressions for slex ...";
+#endif // defined(BOOST_SPIRIT_DEBUG)
+
+ dfa_in.close();
+ lexer.create_dfa();
+
+ ofstream dfa_out ("wave_slex_lexer.dfa", ios::out|ios::binary|ios::trunc);
+
+ if (dfa_out.is_open())
+ lexer.save (dfa_out, (long)lexer.get_compilation_time());
+
+#if defined(BOOST_SPIRIT_DEBUG)
+ cerr << " Done." << endl;
+#endif // defined(BOOST_SPIRIT_DEBUG)
+ }
+}
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// lex_functor
+//
+///////////////////////////////////////////////////////////////////////////////
+
+template <typename IteratorT, typename PositionT = wave::util::file_position_type>
+class slex_functor
+: public slex_input_interface<
+ typename lexer::lexer<IteratorT, PositionT>::token_type
+ >
+{
+public:
+
+ typedef boost::wave::util::position_iterator<IteratorT, PositionT>
+ iterator_type;
+ typedef typename std::iterator_traits<IteratorT>::value_type char_type;
+ typedef BOOST_WAVE_STRINGTYPE string_type;
+ typedef typename lexer::lexer<IteratorT, PositionT>::token_type token_type;
+
+ slex_functor(IteratorT const &first_, IteratorT const &last_,
+ PositionT const &pos_, boost::wave::language_support language_)
+ : first(first_, last_, pos_), language(language_), at_eof(false)
+ {
+ // initialize lexer dfa tables
+ init_lexer(lexer, language_);
+ }
+ virtual ~slex_functor() {}
+
+// get the next token from the input stream
+ token_type& get(token_type& result)
+ {
+ if (!at_eof) {
+ do {
+ // generate and return the next token
+ std::string value;
+ PositionT pos = first.get_position(); // begin of token position
+ token_id id = token_id(lexer.next_token(first, last, &value));
+
+ if ((token_id)(-1) == id)
+ id = T_EOF; // end of input reached
+
+ string_type token_val(value.c_str());
+
+ if (boost::wave::need_emit_contnewlines(language) ||
+ T_CONTLINE != id)
+ {
+ // The cast should avoid spurious warnings about missing case labels
+ // for the other token ids's.
+ switch (id) {
+ case T_IDENTIFIER:
+ // test identifier characters for validity (throws if
+ // invalid chars found)
+ if (!boost::wave::need_no_character_validation(language)) {
+ using boost::wave::cpplexer::impl::validate_identifier_name;
+ validate_identifier_name(token_val,
+ pos.get_line(), pos.get_column(), pos.get_file());
+ }
+ break;
+
+ case T_EXTCHARLIT:
+ case T_EXTSTRINGLIT:
+ case T_EXTRAWSTRINGLIT:
+ id = token_id(id & ~AltTokenType);
+ BOOST_FALLTHROUGH;
+
+ case T_CHARLIT:
+ case T_STRINGLIT:
+ case T_RAWSTRINGLIT:
+ // test literal characters for validity (throws if invalid
+ // chars found)
+ if (boost::wave::need_convert_trigraphs(language)) {
+ using boost::wave::cpplexer::impl::convert_trigraphs;
+ token_val = convert_trigraphs(token_val);
+ }
+ if (!boost::wave::need_no_character_validation(language)) {
+ using boost::wave::cpplexer::impl::validate_literal;
+ validate_literal(token_val,
+ pos.get_line(), pos.get_column(), pos.get_file());
+ }
+ break;
+
+ case T_LONGINTLIT: // supported in C99 and long_long mode
+ if (!boost::wave::need_long_long(language)) {
+ // syntax error: not allowed in C++ mode
+ BOOST_WAVE_LEXER_THROW(
+ boost::wave::cpplexer::lexing_exception,
+ invalid_long_long_literal, value.c_str(),
+ pos.get_line(), pos.get_column(),
+ pos.get_file().c_str());
+ }
+ break;
+
+#if BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
+ case T_PP_HHEADER:
+ case T_PP_QHEADER:
+ case T_PP_INCLUDE:
+ // convert to the corresponding ..._next token, if appropriate
+ {
+ // Skip '#' and whitespace and see whether we find an
+ // 'include_next' here.
+ typename string_type::size_type start = value.find("include");
+ if (0 == value.compare(start, 12, "include_next", 12))
+ id = token_id(id | AltTokenType);
+ break;
+ }
+#endif // BOOST_WAVE_SUPPORT_INCLUDE_NEXT != 0
+
+ case T_EOF:
+ // T_EOF is returned as a valid token, the next call will
+ // return T_EOI, i.e. the actual end of input
+ at_eof = true;
+ token_val.clear();
+ break;
+
+ case T_OR_TRIGRAPH:
+ case T_XOR_TRIGRAPH:
+ case T_LEFTBRACE_TRIGRAPH:
+ case T_RIGHTBRACE_TRIGRAPH:
+ case T_LEFTBRACKET_TRIGRAPH:
+ case T_RIGHTBRACKET_TRIGRAPH:
+ case T_COMPL_TRIGRAPH:
+ case T_POUND_TRIGRAPH:
+ case T_ANY_TRIGRAPH:
+ if (boost::wave::need_convert_trigraphs(language))
+ {
+ using boost::wave::cpplexer::impl::convert_trigraph;
+ token_val = convert_trigraph(token_val);
+ }
+ break;
+ }
+
+ result = token_type(id, token_val, pos);
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+ return guards.detect_guard(result);
+#else
+ return result;
+#endif
+ }
+
+ // skip the T_CONTLINE token
+ } while (true);
+ }
+ return result = token_type(); // return T_EOI
+ }
+
+ void set_position(PositionT const &pos)
+ {
+ // set position has to change the file name and line number only
+ first.get_position().set_file(pos.get_file());
+ first.get_position().set_line(pos.get_line());
+ }
+
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+ bool has_include_guards(std::string& guard_name) const
+ { return guards.detected(guard_name); }
+#endif
+
+private:
+ iterator_type first;
+ iterator_type last;
+ boost::wave::language_support language;
+ static lexer::lexer<IteratorT, PositionT> lexer; // needed only once
+
+ bool at_eof;
+
+#if BOOST_WAVE_SUPPORT_PRAGMA_ONCE != 0
+ include_guards<token_type> guards;
+#endif
+};
+
+template <typename IteratorT, typename PositionT>
+lexer::lexer<IteratorT, PositionT> slex_functor<IteratorT, PositionT>::lexer;
+
+#undef T_EXTCHARLIT
+#undef T_EXTSTRINGLIT
+#undef T_EXTRAWSTRINGLIT
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// The 'new_lexer' function allows the opaque generation of a new lexer object.
+// It is coupled to the iterator type to allow to decouple the lexer/iterator
+// configurations at compile time.
+//
+// This function is declared inside the cpp_slex_token.hpp file, which is
+// referenced by the source file calling the lexer and the source file, which
+// instantiates the lex_functor. But it is defined here, so it will be
+// instantiated only while compiling the source file, which instantiates the
+// lex_functor. While the cpp_slex_token.hpp file may be included everywhere,
+// this file (cpp_slex_lexer.hpp) should be included only once. This allows
+// to decouple the lexer interface from the lexer implementation and reduces
+// compilation time.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// The new_lexer_gen<>::new_lexer function (declared in cpp_slex_token.hpp)
+// should be defined inline, if the lex_functor shouldn't be instantiated
+// separately from the lex_iterator.
+//
+// Separate (explicit) instantiation helps to reduce compilation time.
+//
+///////////////////////////////////////////////////////////////////////////////
+
+#if BOOST_WAVE_SEPARATE_LEXER_INSTANTIATION != 0
+#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE
+#else
+#define BOOST_WAVE_SLEX_NEW_LEXER_INLINE inline
+#endif
+
+template <typename IteratorT, typename PositionT>
+BOOST_WAVE_SLEX_NEW_LEXER_INLINE
+lex_input_interface<slex_token<PositionT> > *
+new_lexer_gen<IteratorT, PositionT>::new_lexer(IteratorT const &first,
+ IteratorT const &last, PositionT const &pos,
+ boost::wave::language_support language)
+{
+ return new slex_functor<IteratorT, PositionT>(first, last, pos,
+ language);
+}
+
+#undef BOOST_WAVE_SLEX_NEW_LEXER_INLINE
+
+///////////////////////////////////////////////////////////////////////////////
+} // namespace slex
+} // namespace cpplexer
+} // namespace wave
+} // namespace boost
+
+#endif // !defined(SLEX_LEXER_HPP_5E8E1DF0_BB41_4938_B7E5_A4BB68222FF6_INCLUDED)
diff --git a/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp b/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp
new file mode 100644
index 00000000..32459cb4
--- /dev/null
+++ b/src/boost/libs/wave/samples/cpp_tokens/slex/lexer.hpp
@@ -0,0 +1,2932 @@
+/*=============================================================================
+ Boost.Wave: A Standard compliant C++ preprocessor library
+
+ Spirit based lexer
+
+ http://www.boost.org/
+
+ Copyright (c) 2001, Daniel C. Nuffer.
+ Copyright (c) 2001-2012 Hartmut Kaiser.
+ Distributed under the Boost Software License, Version 1.0. (See accompanying
+ file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+ TODO List:
+ X callback objects (called when a match is made.)
+ X callback passed first & last iterator, and
+ a reference to a lexercontrol object that supports some
+ operations on the lexer.
+ set state
+ terminate
+ state stack (push, pop, top)
+ set new token return value
+ ignore the current token
+ yymore
+ get length of matched token
+ get current lexer state
+ X DFA serialization to save recomputing the DFA.
+
+ lexer states.
+ organize the file into hpp and ipp. arrange the classes in a logical order.
+ use buffering - iterator only needs be an input iterator,
+ lexer & callback are not templatized on iterator type, but char type.
+ next_token is templatized on iterator type.
+ beginning/ending contexts.
+ ^ and $
+ DFA minimization.
+ DFA table compression.
+
+=============================================================================*/
+#ifndef BOOST_SPIRIT_LEXER_HPP
+#define BOOST_SPIRIT_LEXER_HPP
+
+///////////////////////////////////////////////////////////////////////////////
+#include <boost/config.hpp>
+#include <boost/throw_exception.hpp>
+
+#include <boost/spirit/include/classic_core.hpp>
+#include <boost/spirit/include/classic_symbols.hpp>
+#include <boost/spirit/include/classic_chset.hpp>
+#include <boost/spirit/include/classic_escape_char.hpp>
+
+#include <set>
+#include <map>
+#include <memory> // for auto_ptr/unique_ptr
+#include <vector>
+#include <stack>
+#include <utility> // for pair
+#include <iostream>
+#include <fstream>
+#include <boost/assert.hpp>
+#include <boost/limits.hpp>
+
+#if defined(BOOST_NO_STD_ITERATOR_TRAITS)
+#define BOOST_SPIRIT_IT_NS impl
+#else
+#define BOOST_SPIRIT_IT_NS std
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+namespace boost {
+namespace spirit {
+namespace classic {
+
+typedef unsigned char uchar;
+typedef unsigned int node_id_t;
+const node_id_t invalid_node = node_id_t(-1);
+typedef std::set<node_id_t> node_set;
+typedef std::vector<uchar> uchar_vector;
+typedef std::map<node_id_t, node_set> followpos_t;
+typedef std::vector<uchar_vector> state_match_t;
+
+template <typename TokenT>
+class lexer_control;
+
+class bad_regex : public std::exception
+{
+};
+
+namespace lexerimpl
+{
+
+class node
+{
+public:
+
+ virtual ~node() {}
+
+ virtual node* clone() const = 0;
+ virtual bool nullable() const = 0;
+ virtual node_set firstpos() const = 0;
+ virtual node_set lastpos() const = 0;
+ virtual void compute_followpos(followpos_t& followpos) const = 0;
+ virtual void compute_state_match(state_match_t& state_match) const = 0;
+ virtual void get_eof_ids(node_set& eof_set) const = 0;
+ virtual void assign_node_ids(node_id_t& node_count) = 0;
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const = 0;
+#endif
+
+};
+
+class char_node : public node
+{
+
+public:
+
+ char_node(const uchar c);
+ char_node(const char_node& x);
+ virtual ~char_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+ uchar m_char;
+ node_id_t m_node_num;
+};
+
+inline
+char_node::char_node(const uchar c)
+ : node()
+ , m_char(c)
+ , m_node_num(0)
+{
+}
+
+inline
+char_node::char_node(const char_node& x)
+ : node(x)
+ , m_char(x.m_char)
+ , m_node_num(x.m_node_num)
+{
+}
+
+inline node *
+char_node::clone() const
+{
+ return new char_node(*this);
+}
+
+inline bool
+char_node::nullable() const
+{
+ return false;
+}
+
+inline node_set
+char_node::firstpos() const
+{
+ node_set rval;
+ rval.insert(m_node_num);
+ return rval;
+}
+
+inline node_set
+char_node::lastpos() const
+{
+ return firstpos();
+}
+
+inline void
+char_node::compute_followpos(followpos_t&) const
+{
+ return;
+}
+
+inline void
+char_node::compute_state_match(state_match_t& state_match) const
+{
+ if (state_match.size() < m_node_num + 1)
+ state_match.resize(m_node_num + 1);
+ state_match[m_node_num].resize(256);
+ state_match[m_node_num][m_char] = 1;
+}
+
+inline void
+char_node::get_eof_ids(node_set&) const
+{
+ return;
+}
+
+inline void
+char_node::assign_node_ids(node_id_t& node_count)
+{
+ m_node_num = node_count++;
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+char_node::dump(std::ostream& out) const
+{
+ out << "\nchar_node m_char = " << m_char;
+ out << " m_node_num = " << m_node_num;
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+}
+#endif
+
+
+class epsilon_node : public node
+{
+
+public:
+
+ epsilon_node();
+ epsilon_node(const epsilon_node& x);
+ virtual ~epsilon_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+ node_id_t m_node_num;
+};
+
+inline
+epsilon_node::epsilon_node()
+ : node()
+ , m_node_num(0)
+{
+}
+
+inline
+epsilon_node::epsilon_node(const epsilon_node& x)
+ : node(x)
+ , m_node_num(x.m_node_num)
+{
+}
+
+inline node *
+epsilon_node::clone() const
+{
+ return new epsilon_node(*this);
+}
+
+inline bool
+epsilon_node::nullable() const
+{
+ return true;
+}
+
+inline node_set
+epsilon_node::firstpos() const
+{
+ return node_set();
+}
+
+inline node_set
+epsilon_node::lastpos() const
+{
+ return node_set();
+}
+
+inline void
+epsilon_node::compute_followpos(followpos_t&) const
+{
+ return;
+}
+
+inline void
+epsilon_node::compute_state_match(state_match_t& state_match) const
+{
+ if (state_match.size() < m_node_num + 1)
+ state_match.resize(m_node_num + 1);
+ state_match[m_node_num].resize(256, 1);
+}
+
+inline void
+epsilon_node::get_eof_ids(node_set&) const
+{
+ return;
+}
+
+inline void
+epsilon_node::assign_node_ids(node_id_t& node_count)
+{
+ m_node_num = node_count++;
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+epsilon_node::dump(std::ostream& out) const
+{
+ out << "\nepsilon_node";
+ out << " m_node_num = " << m_node_num;
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+}
+#endif
+
+
+class or_node : public node
+{
+
+public:
+
+ or_node(node* left, node* right);
+ or_node(const or_node& x);
+ virtual ~or_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> m_left;
+ std::unique_ptr<node> m_right;
+#else
+ std::auto_ptr<node> m_left;
+ std::auto_ptr<node> m_right;
+#endif
+};
+
+inline
+or_node::or_node(node* left, node* right)
+ : node()
+ , m_left(left)
+ , m_right(right)
+{
+}
+
+inline
+or_node::or_node(const or_node& x)
+ : node(x)
+ , m_left(x.m_left->clone())
+ , m_right(x.m_right->clone())
+{
+}
+
+inline node *
+or_node::clone() const
+{
+ return new or_node(m_left->clone(), m_right->clone());
+}
+
+inline bool
+or_node::nullable() const
+{
+ return m_left->nullable() || m_right->nullable();
+}
+
+inline node_set
+or_node::firstpos() const
+{
+ node_set rval;
+ node_set l = m_left->firstpos();
+ node_set r = m_right->firstpos();
+ std::set_union(l.begin(), l.end(), r.begin(), r.end(),
+ std::inserter(rval, rval.begin()));
+ return rval;
+}
+
+inline node_set
+or_node::lastpos() const
+{
+ node_set rval;
+ node_set l = m_left->lastpos();
+ node_set r = m_right->lastpos();
+ std::set_union(l.begin(), l.end(), r.begin(), r.end(),
+ std::inserter(rval, rval.begin()));
+ return rval;
+}
+
+inline void
+or_node::compute_followpos(followpos_t& followpos) const
+{
+ m_left->compute_followpos(followpos);
+ m_right->compute_followpos(followpos);
+}
+
+inline void
+or_node::compute_state_match(state_match_t& state_match) const
+{
+ m_left->compute_state_match(state_match);
+ m_right->compute_state_match(state_match);
+}
+
+inline void
+or_node::get_eof_ids(node_set& eof_nodes) const
+{
+ m_left->get_eof_ids(eof_nodes);
+ m_right->get_eof_ids(eof_nodes);
+}
+
+inline void
+or_node::assign_node_ids(node_id_t& node_count)
+{
+ m_left->assign_node_ids(node_count);
+ m_right->assign_node_ids(node_count);
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+or_node::dump(std::ostream& out) const
+{
+ m_left->dump(out);
+
+ out << "\nor_node";
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ m_right->dump(out);
+}
+#endif
+
+
+class cat_node : public node
+{
+
+public:
+
+ cat_node(node* left, node* right);
+ cat_node(const cat_node& x);
+ virtual ~cat_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> m_left;
+ std::unique_ptr<node> m_right;
+#else
+ std::auto_ptr<node> m_left;
+ std::auto_ptr<node> m_right;
+#endif
+};
+
+inline
+cat_node::cat_node(node* left, node* right)
+ : node()
+ , m_left(left)
+ , m_right(right)
+{
+}
+
+inline
+cat_node::cat_node(const cat_node& x)
+ : node(x)
+ , m_left(x.m_left->clone())
+ , m_right(x.m_right->clone())
+{
+}
+
+inline node *
+cat_node::clone() const
+{
+ return new cat_node(m_left->clone(), m_right->clone());
+}
+
+inline bool
+cat_node::nullable() const
+{
+ return m_left->nullable() && m_right->nullable();
+}
+
+inline node_set
+cat_node::firstpos() const
+{
+ if (m_left->nullable())
+ {
+ node_set rval;
+ node_set l = m_left->firstpos();
+ node_set r = m_right->firstpos();
+ std::set_union(l.begin(), l.end(), r.begin(), r.end(),
+ std::inserter(rval, rval.begin()));
+ return rval;
+ }
+ else
+ {
+ return m_left->firstpos();
+ }
+}
+
+inline node_set
+cat_node::lastpos() const
+{
+ if (m_right->nullable())
+ {
+ node_set rval;
+ node_set l = m_left->lastpos();
+ node_set r = m_right->lastpos();
+ std::set_union(l.begin(), l.end(), r.begin(), r.end(),
+ std::inserter(rval, rval.begin()));
+ return rval;
+ }
+ else
+ {
+ return m_right->lastpos();
+ }
+}
+
+inline void
+cat_node::compute_followpos(followpos_t& followpos) const
+{
+ node_set l = m_left->lastpos();
+ for (node_set::iterator i = l.begin();
+ i != l.end();
+ ++i)
+ {
+ node_set rf = m_right->firstpos();
+ followpos[*i].insert(rf.begin(), rf.end());
+ }
+
+ m_left->compute_followpos(followpos);
+ m_right->compute_followpos(followpos);
+}
+
+inline void
+cat_node::compute_state_match(state_match_t& state_match) const
+{
+ m_left->compute_state_match(state_match);
+ m_right->compute_state_match(state_match);
+}
+
+inline void
+cat_node::get_eof_ids(node_set& eof_nodes) const
+{
+ m_left->get_eof_ids(eof_nodes);
+ m_right->get_eof_ids(eof_nodes);
+}
+
+inline void
+cat_node::assign_node_ids(node_id_t& node_count)
+{
+ m_left->assign_node_ids(node_count);
+ m_right->assign_node_ids(node_count);
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+cat_node::dump(std::ostream& out) const
+{
+ m_left->dump(out);
+
+ out << "\ncat_node";
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ m_right->dump(out);
+}
+#endif
+
+
+class star_node : public node
+{
+
+public:
+
+ star_node(node* left);
+ star_node(const star_node& x);
+ virtual ~star_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> m_left;
+#else
+ std::auto_ptr<node> m_left;
+#endif
+};
+
+inline
+star_node::star_node(node* left)
+ : node()
+ , m_left(left)
+{
+}
+
+inline
+star_node::star_node(const star_node& x)
+ : node(x)
+ , m_left(x.m_left->clone())
+{
+}
+
+inline node *
+star_node::clone() const
+{
+ return new star_node(m_left->clone());
+}
+
+inline bool
+star_node::nullable() const
+{
+ return true;
+}
+
+inline node_set
+star_node::firstpos() const
+{
+ return m_left->firstpos();
+}
+
+inline node_set
+star_node::lastpos() const
+{
+ return m_left->lastpos();
+}
+
+inline void
+star_node::compute_followpos(followpos_t& followpos) const
+{
+ node_set lp = this->lastpos();
+ for (node_set::iterator i = lp.begin();
+ i != lp.end();
+ ++i)
+ {
+ node_set fp = this->firstpos();
+ followpos[*i].insert(fp.begin(), fp.end());
+ }
+
+ m_left->compute_followpos(followpos);
+}
+
+inline void
+star_node::compute_state_match(state_match_t& state_match) const
+{
+ m_left->compute_state_match(state_match);
+}
+
+inline void
+star_node::get_eof_ids(node_set& eof_nodes) const
+{
+ m_left->get_eof_ids(eof_nodes);
+}
+
+inline void
+star_node::assign_node_ids(node_id_t& node_count)
+{
+ m_left->assign_node_ids(node_count);
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+star_node::dump(std::ostream& out) const
+{
+ m_left->dump(out);
+
+ out << "\nstar_node";
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+}
+#endif
+
+
+class eof_node : public node
+{
+
+public:
+
+ eof_node();
+ eof_node(const eof_node& x);
+ virtual ~eof_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+ node_id_t m_node_num;
+};
+
+inline
+eof_node::eof_node()
+ : node()
+ , m_node_num(0)
+{
+}
+
+inline
+eof_node::eof_node(const eof_node& x)
+ : node(x)
+ , m_node_num(x.m_node_num)
+{
+}
+
+inline node *
+eof_node::clone() const
+{
+ return new eof_node(*this);
+}
+
+inline bool
+eof_node::nullable() const
+{
+ return false;
+}
+
+inline node_set
+eof_node::firstpos() const
+{
+ node_set rval;
+ rval.insert(m_node_num);
+ return rval;
+}
+
+inline node_set
+eof_node::lastpos() const
+{
+ node_set rval;
+ rval.insert(m_node_num);
+ return rval;
+}
+
+inline void
+eof_node::compute_followpos(followpos_t&) const
+{
+ return;
+}
+
+inline void
+eof_node::compute_state_match(state_match_t& state_match) const
+{
+ if (state_match.size() < m_node_num + 1)
+ state_match.resize(m_node_num + 1);
+ state_match[m_node_num].resize(256, 0);
+}
+
+inline void
+eof_node::get_eof_ids(node_set& eof_nodes) const
+{
+ eof_nodes.insert(m_node_num);
+}
+
+inline void
+eof_node::assign_node_ids(node_id_t& node_count)
+{
+ m_node_num = node_count++;
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+eof_node::dump(std::ostream& out) const
+{
+ out << "\neof_node";
+ out << " m_node_num = " << m_node_num;
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+}
+#endif
+
+class ccl_node : public node
+{
+
+public:
+
+ ccl_node(const std::vector<uchar>& v);
+ ccl_node(const uchar c1, const uchar c2);
+ ccl_node(const ccl_node& x);
+ virtual ~ccl_node(){}
+
+ virtual node* clone() const;
+ virtual bool nullable() const;
+ virtual node_set firstpos() const;
+ virtual node_set lastpos() const;
+ virtual void compute_followpos(followpos_t& followpos) const;
+ virtual void compute_state_match(state_match_t& state_match ) const;
+ virtual void get_eof_ids(node_set& eof_set) const;
+ virtual void assign_node_ids(node_id_t& node_count);
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ virtual void dump(std::ostream& out) const;
+#endif
+
+private:
+
+ std::vector<uchar> m_match;
+ node_id_t m_node_num;
+};
+
+inline
+ccl_node::ccl_node(const std::vector<uchar>& v)
+ : node()
+ , m_match(v)
+ , m_node_num(0)
+{
+ m_match.resize(256); // make sure it's the right size
+}
+
+inline
+ccl_node::ccl_node(const uchar c1, const uchar c2)
+ : node()
+ , m_match(256, uchar(0))
+ , m_node_num(0)
+{
+ BOOST_ASSERT(c1 < c2);
+ for (std::size_t i = c1; i <= std::size_t(c2); ++i)
+ {
+ m_match[i] = 1;
+ }
+}
+
+inline
+ccl_node::ccl_node(const ccl_node& x)
+ : node(x)
+ , m_match(x.m_match)
+ , m_node_num(x.m_node_num)
+{
+}
+
+inline node *
+ccl_node::clone() const
+{
+ return new ccl_node(*this);
+}
+
+inline bool
+ccl_node::nullable() const
+{
+ return false;
+}
+
+inline node_set
+ccl_node::firstpos() const
+{
+ node_set rval;
+ rval.insert(m_node_num);
+ return rval;
+}
+
+inline node_set
+ccl_node::lastpos() const
+{
+ return firstpos();
+}
+
+inline void
+ccl_node::compute_followpos(followpos_t&) const
+{
+ return;
+}
+
+inline void
+ccl_node::compute_state_match(state_match_t& state_match) const
+{
+ if (state_match.size() < m_node_num + 1)
+ state_match.resize(m_node_num + 1);
+ state_match[m_node_num] = m_match;
+}
+
+inline void
+ccl_node::get_eof_ids(node_set&) const
+{
+ return;
+}
+
+inline void
+ccl_node::assign_node_ids(node_id_t& node_count)
+{
+ m_node_num = node_count++;
+}
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+inline void
+ccl_node::dump(std::ostream& out) const
+{
+ out << "\nccl_node m_match = ";
+ for (std::size_t i = 0; i < m_match.size(); ++i)
+ {
+ if (m_match[i])
+ out << i << ", ";
+ }
+ out << " m_node_num = " << m_node_num;
+ out << " nullable() = " << (nullable() ? "true" : "false");
+ out << " firstpos() = ";
+ node_set fp = firstpos();
+ std::copy(fp.begin(), fp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+
+ out << " lastpos() = ";
+ node_set lp = lastpos();
+ std::copy(lp.begin(), lp.end(),
+ std::ostream_iterator<node_id_t>(out, ","));
+}
+#endif
+
+template <typename ScannerT>
+class make_concat
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+
+ make_concat(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const &, iterator_type const &) const
+ {
+ node* right = m_stack.top();
+ m_stack.pop();
+ node* left = m_stack.top();
+ m_stack.pop();
+ node* newnode = new cat_node(left, right);
+ m_stack.push(newnode);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+template <int CharTSize>
+struct get_byte_aux;
+
+template<>
+struct get_byte_aux<1>
+{
+ template <typename CharT>
+ unsigned char operator()(CharT c, unsigned int byte)
+ {
+ BOOST_ASSERT(byte == 0);
+ return c;
+ }
+};
+
+template<>
+struct get_byte_aux<2>
+{
+ template <typename CharT>
+ unsigned char operator()(CharT c, unsigned int byte)
+ {
+ static unsigned long mask[] =
+ {
+ 0xFF00,
+ 0x00FF
+ };
+
+ BOOST_ASSERT(byte < 2);
+ return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
+ }
+};
+
+template<>
+struct get_byte_aux<4>
+{
+ template <typename CharT>
+ unsigned char operator()(CharT c, unsigned int byte)
+ {
+ static unsigned long mask[] =
+ {
+ 0xFF000000,
+ 0x00FF0000,
+ 0x0000FF00,
+ 0x000000FF
+ };
+
+ BOOST_ASSERT(byte < 4);
+ return (c & mask[byte]) >> ((sizeof(c) - 1 - byte) * 8);
+ }
+};
+
+template <typename CharT>
+inline unsigned char
+get_byte(CharT c, unsigned int byte)
+{
+ return get_byte_aux<sizeof(c)>()(c, byte);
+}
+
+template <typename ScannerT>
+class make_star
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ make_star(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(const char_t) const
+ {
+ node* left = m_stack.top();
+ m_stack.pop();
+ node* newnode = new star_node(left);
+ m_stack.push(newnode);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+template <typename ScannerT>
+class make_or
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+
+ make_or(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const&, iterator_type const&) const
+ {
+ node* right = m_stack.top();
+ m_stack.pop();
+ node* left = m_stack.top();
+ m_stack.pop();
+ node* newnode = new or_node(left, right);
+ m_stack.push(newnode);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+template <typename ScannerT>
+class make_plus
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ make_plus(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(const char_t) const
+ {
+ node* left = m_stack.top();
+ m_stack.pop();
+
+ node* copy = left->clone();
+
+ node* new_star = new star_node(copy);
+ node* new_cat = new cat_node(left, new_star);
+ m_stack.push(new_cat);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+template <typename ScannerT>
+class make_optional
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ make_optional(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(const char_t) const
+ {
+ node* left = m_stack.top();
+ m_stack.pop();
+
+ node* new_or = new or_node(left, new epsilon_node());
+ m_stack.push(new_or);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+///////////////////////////////////////////////////////////////////////////////
+// utility function
+template <typename CharT>
+inline utility::impl::range<CharT> const&
+full_range()
+{
+ static utility::impl::range<CharT> full((std::numeric_limits<CharT>::min)(),
+ (std::numeric_limits<CharT>::max)());
+ return full;
+}
+
+namespace ccl_utils
+{
+ template <typename char_t>
+ inline utility::impl::range_run<char_t>
+ negate_range_run(
+ const utility::impl::range_run<char_t>& rr)
+ {
+ utility::impl::range_run<char_t> newrr;
+ newrr.set(full_range<char_t>());
+ for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
+ iter != rr.end(); ++iter)
+ newrr.clear(*iter);
+ return newrr;
+ }
+
+ template <typename char_t>
+ inline node*
+ create_mb_node_seq(char_t c)
+ {
+ node* newnode = new char_node(get_byte(c, 0));
+ for (unsigned int i = 1; i < sizeof(c); ++i)
+ {
+ node* cnode = new char_node(get_byte(c, i));
+ node* top_node = new cat_node(newnode, cnode);
+ newnode = top_node;
+ }
+ return newnode;
+ }
+
+ template <typename char_t>
+ inline void
+ handle_mb_char(char_t c, bool first_time,
+ std::stack<node*>& stack)
+ {
+ node* newnode = create_mb_node_seq(c);
+
+ if (first_time)
+ {
+ stack.push(newnode);
+ }
+ else
+ {
+ node* top = stack.top();
+ stack.pop();
+
+ node* newtop = new or_node(top, newnode);
+ stack.push(newtop);
+ }
+ }
+
+ // forward decl only
+ template <typename char_t>
+ inline void
+ handle_mb_range(char_t c1, char_t c2, bool first_time,
+ std::stack<node*>& stack);
+
+ template <typename char_t>
+ inline void
+ create_nodes(const utility::impl::range_run<char_t>& rr,
+ std::stack<node*>& stack)
+ {
+
+ if (sizeof(char_t) == 1)
+ {
+ std::vector<uchar> ccl;
+ ccl.resize(256);
+ for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
+ iter != rr.end(); ++iter)
+ {
+ for (int i = iter->first; i <= iter->last; ++i)
+ {
+// this is always true because of the limited datatype
+// BOOST_ASSERT(uchar(i) < 256 && ccl.size() == 256);
+ ccl[uchar(i)] = 1;
+ }
+ }
+
+ node* new_ccl = new ccl_node(ccl);
+ stack.push(new_ccl);
+ }
+ else
+ {
+ bool mb_first_time = true;
+ for (typename utility::impl::range_run<char_t>::const_iterator iter = rr.begin();
+ iter != rr.end(); ++iter)
+ {
+ if (iter->first == iter->last)
+ {
+ handle_mb_char(iter->first, mb_first_time, stack);
+ }
+ else
+ {
+ handle_mb_range(iter->first, iter->last, mb_first_time, stack);
+ }
+ mb_first_time = false;
+ }
+ }
+ }
+
+ template <typename char_t>
+ inline std::size_t
+ compute_differing_byte(char_t c1, char_t c2)
+ {
+ std::size_t rval = 0;
+ while (rval < sizeof(c1) &&
+ get_byte(c1, (unsigned int)rval) == get_byte(c2, (unsigned int)rval))
+ {
+ ++rval;
+ }
+ return rval;
+ }
+
+ template <typename char_t>
+ inline node*
+ create_mb_node_type1(std::size_t j, char_t c1, char_t c2)
+ {
+ std::size_t diff = get_byte(c2, (unsigned int)j) -
+ get_byte(c1, (unsigned int)j);
+ if (diff == 1) {
+ return 0;
+ }
+ else if (diff == 2) {
+ return new char_node(get_byte(c1, (unsigned int)j)+1);
+ }
+ else {
+ return new ccl_node(get_byte(c1, (unsigned int)j)+1,
+ get_byte(c2, (unsigned int)j)-1);
+ }
+ }
+
+ template <typename char_t>
+ inline node *
+ create_mb_node_for_byte(std::size_t i, std::size_t j, std::size_t sizem1,
+ std::size_t differing_byte, char_t c1, char_t c2, node* newnode)
+ {
+ node* cnode;
+ if (i == sizem1 && j == differing_byte && j != sizem1)
+ {
+ node* tmp = create_mb_node_type1(j, c1, c2);
+ if (tmp == 0)
+ {
+ delete newnode;
+ return 0;
+ }
+ else
+ cnode = tmp;
+ }
+ else if (i == differing_byte && j == sizem1)
+ {
+ if (i != sizem1) {
+ cnode = new ccl_node(get_byte(c1, (unsigned int)j), 0xFF);
+ }
+ else {
+ cnode = new ccl_node(get_byte(c1, (unsigned int)j),
+ get_byte(c2, (unsigned int)j));
+ }
+ }
+ else if (i != differing_byte && i != sizem1 &&
+ j == (sizem1 - i + differing_byte))
+ {
+ cnode = new ccl_node(get_byte(c1, (unsigned int)j)+1, 0xFF);
+ }
+ else if (i + j - differing_byte > sizem1) {
+ cnode = new ccl_node(0, 0xFF);
+ }
+ else {//if (is plain)
+ cnode = new char_node(get_byte(c1, (unsigned int)j));
+ }
+
+ node* top_node = new cat_node(newnode, cnode);
+ return top_node;
+ }
+
+// On platforms, where wchar_t is a typedef for unsigned short, the
+// comparision for a negative value is pointless
+ template <bool is_signed>
+ struct correct_char_aux {
+ };
+
+ template <>
+ struct correct_char_aux<true> {
+
+ template <typename char_t>
+ static char_t correct(char_t c) { if (c < 0) c = 0; return c; }
+ };
+
+ template <>
+ struct correct_char_aux<false> {
+
+ template <typename char_t>
+ static char_t correct(char_t c) { return c; }
+ };
+
+ template <typename char_t>
+ struct correct_char
+ {
+ static char_t correct(char_t c)
+ {
+ return correct_char_aux<std::numeric_limits<char_t>::is_signed >::
+ correct(c);
+ }
+ };
+
+ template <typename char_t>
+ inline void
+ handle_mb_range(char_t c1, char_t c2, bool first_time,
+ std::stack<node*>& stack)
+ {
+ // The algorithm can't handle negative value chars, which don't make
+ // much sense anyway. This comparision is pointless for wchar_t's on
+ // platforms, where wchar_t is a typedef for unsigned short
+
+ c1 = correct_char<char_t>::correct(c1);
+ //if (c1 < 0)
+ // c1 = 0;
+
+ BOOST_ASSERT(c1 < c2);
+ node* newnode = 0;
+ node* savednode = 0;
+ const std::size_t differing_byte = compute_differing_byte(c1, c2);
+ const std::size_t sizem1 = sizeof(c1) - 1;
+ const std::size_t ndb = sizem1 - differing_byte;
+ for (std::size_t i = differing_byte; i < sizeof(c1); ++i)
+ {
+ // generate node for the first byte
+ if (differing_byte == 0 && i == ndb)
+ {
+ node* tmp = create_mb_node_type1(0, c1, c2);
+ if (tmp == 0)
+ continue;
+ else
+ newnode = tmp;
+ }
+ else
+ {
+ newnode = new char_node(get_byte(c1, 0));
+ }
+ for (std::size_t j = 1; j < sizeof(c1); ++j)
+ {
+ newnode = create_mb_node_for_byte(i, j, sizem1, differing_byte,
+ c1, c2, newnode);
+ if (newnode == 0)
+ goto end_outer_for;
+ }
+
+ // or together the various parts
+ if (savednode)
+ {
+ node* top_node = new or_node(savednode, newnode);
+ savednode = top_node;
+ }
+ else
+ {
+ savednode = newnode;
+ }
+end_outer_for:
+ continue;
+ }
+
+ for (std::size_t k = 0; k < ndb; ++k)
+ {
+ newnode = new char_node(get_byte(c2, 0));
+ for (std::size_t j = 1; j < sizeof(c2); ++j)
+ {
+ node* cnode;
+ if (k == differing_byte && j == sizem1 && k != sizem1)
+ cnode = new ccl_node(0, get_byte(c2, (unsigned int)j));
+
+ else if (k != differing_byte && k != sizem1 &&
+ j == (sizem1 - k + differing_byte))
+ cnode = new ccl_node(0, get_byte(c2, (unsigned int)j)-1);
+
+ else if (k + j - differing_byte > sizem1)
+ cnode = new ccl_node(0, 0xFF);
+
+ else //if (is plain)
+ cnode = new char_node(get_byte(c2, (unsigned int)j));
+
+
+ node* top_node = new cat_node(newnode, cnode);
+ newnode = top_node;
+ }
+
+ // or together the various parts
+ if (savednode)
+ {
+ node* top_node = new or_node(savednode, newnode);
+ savednode = top_node;
+ }
+ else
+ {
+ savednode = newnode;
+ }
+ }
+
+
+ if (first_time)
+ {
+ stack.push(savednode);
+ }
+ else
+ {
+ node* top = stack.top();
+ stack.pop();
+
+ node* newtop = new or_node(top, savednode);
+ stack.push(newtop);
+ }
+ }
+} // namespace ccl_utils
+
+template <typename ScannerT>
+class make_char
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ make_char(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const& first, iterator_type const& last) const
+ {
+ const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
+ escape_char_parser<lex_escapes, char_t>();
+ char_t the_char;
+ iterator_type first_ = first;
+ ScannerT scan(first_, last);
+ lex_escape_ch[assign(the_char)].parse(scan);
+ node* newnode = ccl_utils::create_mb_node_seq(the_char);
+ m_stack.push(newnode);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+
+template <typename ScannerT>
+class make_ccl
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ make_ccl(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ static bool is_equal_to_string(iterator_type first,
+ iterator_type const & last, const char* str)
+ {
+ while (first != last &&*str &&*first ==*str)
+ {
+ ++first;
+ ++str;
+ }
+ return*str == 0;
+ }
+
+ template <typename ParserT>
+ static void fill_ccl(utility::impl::range_run<char_t>& rr, const ParserT& parser)
+ {
+ for (int i = 0; i < 256; ++i)
+ {
+ if (parser.test(static_cast<char_t>(uchar(i))))
+ rr.set(utility::impl::range<char_t>(char_t(i), char_t(i)));
+ }
+ }
+
+ void operator()(iterator_type const& first_, iterator_type const& last) const
+ {
+ BOOST_ASSERT(*first_ == '[');
+
+ iterator_type first = first_;
+ ++first; // skip over '['
+ bool negated_ccl = false;
+ if (*first == '^')
+ {
+ negated_ccl = true;
+ ++first;
+ }
+
+ utility::impl::range_run<char_t> rr;
+ while (first != last &&*first != ']')
+ {
+ if (*first == '[') // it's a ccl_expr like [:space:]
+ {
+ // check for [:space:], etc.
+ if (is_equal_to_string(first, last, "[:alnum:]"))
+ {
+ fill_ccl(rr, alnum_p);
+ }
+ else if (is_equal_to_string(first, last, "[:alpha:]"))
+ {
+ fill_ccl(rr, alpha_p);
+ }
+ else if (is_equal_to_string(first, last, "[:blank:]"))
+ {
+ fill_ccl(rr, blank_p);
+ }
+ else if (is_equal_to_string(first, last, "[:cntrl:]"))
+ {
+ fill_ccl(rr, cntrl_p);
+ }
+ else if (is_equal_to_string(first, last, "[:digit:]"))
+ {
+ fill_ccl(rr, digit_p);
+ }
+ else if (is_equal_to_string(first, last, "[:graph:]"))
+ {
+ fill_ccl(rr, graph_p);
+ }
+ else if (is_equal_to_string(first, last, "[:lower:]"))
+ {
+ fill_ccl(rr, lower_p);
+ }
+ else if (is_equal_to_string(first, last, "[:print:]"))
+ {
+ fill_ccl(rr, print_p);
+ }
+ else if (is_equal_to_string(first, last, "[:punct:]"))
+ {
+ fill_ccl(rr, punct_p);
+ }
+ else if (is_equal_to_string(first, last, "[:space:]"))
+ {
+ fill_ccl(rr, space_p);
+ }
+ else if (is_equal_to_string(first, last, "[:upper:]"))
+ {
+ fill_ccl(rr, upper_p);
+ }
+ else if (is_equal_to_string(first, last, "[:xdigit:]"))
+ {
+ fill_ccl(rr, xdigit_p);
+ }
+ // this can't happen, because it's parsed before we get here.
+ //else
+ // throw bad_regex();
+
+ // Advance past the character class expression
+ while (first != last &&*first != ']')
+ ++first;
+ BOOST_ASSERT(*first == ']');
+ ++first;
+ }
+ else {
+ const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
+ escape_char_parser<lex_escapes, char_t>();
+
+ char_t c1;
+ ScannerT scan(first, last);
+ lex_escape_ch[assign(c1)].parse(scan);
+ if (*scan.first == '-') // insert a range
+ {
+ ++scan.first;
+ char_t c2;
+ lex_escape_ch[assign(c2)].parse(scan);
+ BOOST_ASSERT(c1 < c2); // Throw exception?
+ rr.set(utility::impl::range<char_t>(c1, c2));
+ }
+ else // insert 1 char
+ {
+ rr.set(utility::impl::range<char_t>(c1, c1));
+ }
+ }
+ }
+
+ if (negated_ccl)
+ {
+ rr = ccl_utils::negate_range_run(rr);
+ }
+
+ ccl_utils::create_nodes(rr, m_stack);
+ }
+
+ std::stack<node*>& m_stack;
+};
+
+template <typename ScannerT>
+class make_any_char
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ std::stack<node*>& m_stack;
+
+ make_any_char(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(const char_t c) const
+ {
+ BOOST_ASSERT(c == '.');
+ do_any_char();
+ }
+
+ void do_any_char() const
+ {
+ static utility::impl::range_run<char_t> rr;
+ rr.set(full_range<char_t>());
+ char_t newline = '\n';
+ rr.clear(utility::impl::range<char_t>(newline, newline));
+
+ ccl_utils::create_nodes(rr, m_stack);
+ }
+};
+
+template <typename ScannerT>
+class make_string
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ std::stack<node*>& m_stack;
+
+ make_string(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const& first, iterator_type const& last) const
+ {
+ BOOST_ASSERT(*first == '"');
+
+ iterator_type first_ = first;
+ ScannerT scan(first_, last);
+ ++scan.first; // skip over '"'
+
+ // empty string not allowed
+ if (*scan.first == '"')
+ {
+ boost::throw_exception(bad_regex());
+ }
+
+ const escape_char_parser<lex_escapes, char_t> lex_escape_ch =
+ escape_char_parser<lex_escapes, char_t>();
+
+ char_t c;
+ lex_escape_ch[assign(c)].parse(scan);
+ node* top_node = ccl_utils::create_mb_node_seq(c);
+
+ while (*scan.first != '"' && scan.first != scan.last)
+ {
+ lex_escape_ch[assign(c)].parse(scan);
+ node* cur_node = ccl_utils::create_mb_node_seq(c);
+ top_node = new cat_node(top_node, cur_node);
+ }
+ m_stack.push(top_node);
+ }
+};
+
+inline
+node* repeat_node(node* n, int num)
+{
+ node* list_of_nodes = n;
+ for (int i = 1; i < num; ++i)
+ {
+ list_of_nodes = new cat_node(list_of_nodes, n->clone());
+ }
+ return list_of_nodes;
+}
+
+inline
+node* optional_node(node* n)
+{
+ return new or_node(n, new epsilon_node());
+}
+
+template <typename ScannerT>
+class make_rep1
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ std::stack<node*>& m_stack;
+
+ make_rep1(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const& first, iterator_type const& last) const
+ {
+ BOOST_ASSERT(*first == '{');
+
+ iterator_type first_ = first;
+ ScannerT scan(first_, last);
+ ++scan.first; // skip over '{'
+
+ unsigned int count;
+ uint_p[assign(count)].parse(scan);
+ if (count == 0)
+ boost::throw_exception(bad_regex());
+
+ node* top_node = m_stack.top();
+ m_stack.pop();
+ top_node = repeat_node(top_node, count);
+ m_stack.push(top_node);
+ }
+};
+
+template <typename ScannerT>
+class make_rep2
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ std::stack<node*>& m_stack;
+
+ make_rep2(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const& first, iterator_type const& last) const
+ {
+ BOOST_ASSERT(*first == '{');
+
+ iterator_type first_ = first;
+ ScannerT scan (first_, last);
+ ++scan.first; // skip over '{'
+
+ unsigned int count;
+ uint_p[assign(count)].parse(scan);
+ if (count == 0)
+ boost::throw_exception(bad_regex());
+
+ node* top_node = m_stack.top();
+ m_stack.pop();
+ top_node = new cat_node(repeat_node(top_node, count),
+ new star_node(top_node->clone()));
+ m_stack.push(top_node);
+
+ }
+};
+
+template <typename ScannerT>
+class make_rep3
+{
+ typedef typename ScannerT::iterator_t iterator_type;
+
+public:
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ std::stack<node*>& m_stack;
+
+ make_rep3(std::stack<node*>& the_stack)
+ : m_stack(the_stack)
+ {}
+
+ void operator()(iterator_type const& first, iterator_type const& last) const
+ {
+ BOOST_ASSERT(*first == '{');
+
+ iterator_type first_ = first;
+ ScannerT scan(first_, last);
+ ++scan.first; // skip over '{'
+
+ unsigned int count1, count2;
+ uint_p[assign(count1)].parse(scan);
+ if (count1 == 0)
+ boost::throw_exception(bad_regex());
+
+ ++scan.first; // skip over ','
+
+ uint_p[assign(count2)].parse(scan);
+ if (count2 <= count1)
+ boost::throw_exception(bad_regex());
+
+ node* top_node = m_stack.top();
+ m_stack.pop();
+ node* repeats = repeat_node(top_node, count1);
+ top_node = new cat_node(repeats,
+ repeat_node(optional_node(top_node->clone()),
+ count2 - count1));
+
+ m_stack.push(top_node);
+ }
+};
+
+///////////////////////////////////////////////////////////////////////////////
+//
+// Lexer grammar
+//
+// Defines the grammar, which mandates the syntax of the understood
+// lexeme definitions passed to lexer::register_regex.
+//
+///////////////////////////////////////////////////////////////////////////////
+class lexer_grammar : public boost::spirit::classic::grammar<lexer_grammar>
+{
+public:
+ lexer_grammar(std::stack<node*> &node_stack_)
+ : node_stack(node_stack_) {}
+
+ template <typename ScannerT>
+ struct definition
+ {
+ typedef rule<ScannerT> rule_t;
+ typedef typename ScannerT::iterator_t iterator_type;
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<iterator_type>::value_type
+ char_t;
+
+ rule_t regex, re, series, singleton, singleton2, fullccl, ccl, string,
+ escseq, ccl_char;
+ symbols<> ccl_expr;
+
+ definition(lexer_grammar const &self)
+ {
+ regex =
+ re >> !('/' >> re) >> !ch_p('$')
+ ;
+
+ re =
+ series
+ >>*( ('|' >> series)[make_or<ScannerT>(self.node_stack)] )
+ ;
+
+ series =
+ singleton
+ >>*( singleton[make_concat<ScannerT>(self.node_stack)] )
+ ;
+
+ singleton =
+ ch_p('.')[make_any_char<ScannerT>(self.node_stack)]
+ >> singleton2
+ | fullccl
+ >> singleton2
+ | ('"' >> string >> '"')
+ [
+ make_string<ScannerT>(self.node_stack)
+ ]
+ >> singleton2
+ | '(' >> re >> ')'
+ >> singleton2
+ | ((anychar_p - chset<>("/|*+?.(){}\\")) | escseq)
+ [
+ make_char<ScannerT>(self.node_stack)
+ ]
+ >> singleton2
+ ;
+
+ singleton2 =
+ ch_p('*')[make_star<ScannerT>(self.node_stack)]
+ >> singleton2
+ | ch_p('+')[make_plus<ScannerT>(self.node_stack)]
+ >> singleton2
+ | ch_p('?')[make_optional<ScannerT>(self.node_stack)]
+ >> singleton2
+ | ('{' >> uint_p >> '}')
+ [
+ make_rep1<ScannerT>(self.node_stack)
+ ]
+ >> singleton2
+ | ('{' >> uint_p >> ',' >> '}')
+ [
+ make_rep2<ScannerT>(self.node_stack)
+ ]
+ >> singleton2
+ | ('{' >> uint_p >> ',' >> uint_p >> '}')
+ [
+ make_rep3<ScannerT>(self.node_stack)
+ ]
+ >> singleton2
+ | epsilon_p
+ ;
+
+ fullccl =
+ ('[' >> !ch_p('^') >> ccl >> ']')
+ [
+ make_ccl<ScannerT>(self.node_stack)
+ ]
+ ;
+
+ ccl =
+ *(ccl_expr | (ccl_char >> !('-' >> ccl_char)))
+ ;
+
+ ccl_char =
+ ( (anychar_p - chset<>("\\\n]")) | escseq )
+ ;
+
+ ccl_expr =
+ "[:alnum:]",
+ "[:alpha:]",
+ "[:blank:]",
+ "[:cntrl:]",
+ "[:digit:]",
+ "[:graph:]",
+ "[:lower:]",
+ "[:print:]",
+ "[:punct:]",
+ "[:space:]",
+ "[:upper:]",
+ "[:xdigit:]"
+ ;
+
+ string =
+ +( (anychar_p - chset<>("\"\\")) | escseq )
+ ;
+
+ typedef
+ uint_parser<char_t, 8, 1,
+ std::numeric_limits<char_t>::digits / 3 + 1
+ > oct_parser_t;
+ typedef
+ uint_parser<char_t, 16, 1,
+ std::numeric_limits<char_t>::digits / 4 + 1
+ > hex_parser_t;
+
+ escseq =
+ ch_p('\\')
+ >> (
+ oct_parser_t()
+ | as_lower_d['x'] >> hex_parser_t()
+ | (anychar_p - chset<>('\n'))
+ )
+ ;
+
+#define BOOST_SLEX_DEBUG (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(regex, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(re, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(series, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(singleton2, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(fullccl, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(string, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(escseq, BOOST_SLEX_DEBUG);
+ BOOST_SPIRIT_DEBUG_TRACE_RULE(ccl_char, BOOST_SLEX_DEBUG);
+
+#undef BOOST_SLEX_DEBUG
+ }
+
+ rule<ScannerT> const&
+ start() const { return regex; }
+ };
+
+ std::stack<node*> &node_stack;
+
+}; // class lexer_grammar
+
+template <typename StringT>
+inline node *
+parse(lexer_grammar& g, StringT const& str)
+{
+ typedef
+ scanner<typename StringT::const_iterator, scanner_policies<> >
+ scanner_t;
+ typedef typename rule<scanner_t>::template result<scanner_t>::type
+ result_t;
+
+ typename StringT::const_iterator first = str.begin();
+ typename StringT::const_iterator last = str.end();
+
+ scanner_t scan(first, last);
+// typename rule<scanner_t>::result_t hit = g.parse(scan);
+ result_t hit = g.parse(scan);
+ if (!hit || !scan.at_end())
+ {
+ while (g.node_stack.size())
+ {
+ delete g.node_stack.top();
+ g.node_stack.pop();
+ }
+ return 0;
+ }
+
+ BOOST_ASSERT(g.node_stack.size() == 1);
+ node* rval = g.node_stack.top();
+ g.node_stack.pop();
+ node* an_eof_node = new eof_node();
+ rval = new cat_node(rval, an_eof_node);
+ return rval;
+}
+
+inline
+void make_case_insensitive(state_match_t& state_match)
+{
+ // TODO: Fix this.
+ // This doesn't take into account foreign languages, figure out how to
+ // do that. Also this approach is broken for this implementation of
+ // wide chars.
+ for (state_match_t::iterator iter = state_match.begin();
+ iter != state_match.end(); ++iter)
+ {
+ int i, j;
+ for (i = 'A', j = 'a'; i <= 'Z'; ++i, ++j)
+ {
+ // if either is set, turn them both on
+ (*iter)[i] = (*iter)[j] = uchar((*iter)[i] | (*iter)[j]);
+ }
+ }
+}
+
+template<bool wide_char>
+struct regex_match_helper;
+
+template<>
+struct regex_match_helper<false> // single byte char
+{
+ template <typename DfaT, typename IteratorT>
+ static bool
+ do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
+ int& regex_index,
+ std::basic_string<
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ > *token)
+ {
+ typedef std::basic_string<
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ > string_type;
+ typedef typename string_type::size_type size_type;
+
+ node_id_t s = 0;
+ node_id_t last_accepting_index = invalid_node;
+ IteratorT p = first;
+ IteratorT last_accepting_cpos = first;
+ while (p != last)
+ {
+ s = dfa.transition_table[s][(uchar)*p];
+ if (s == invalid_node)
+ break;
+ if (token) token->append((size_type)1, *p);
+ ++p;
+ if (dfa.acceptance_index[s] != invalid_node)
+ {
+ last_accepting_index = s;
+ last_accepting_cpos = p;
+ }
+ }
+ if (last_accepting_index != invalid_node)
+ {
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
+ dfa.acceptance_index[last_accepting_index] << '\n';
+#endif
+
+ first = last_accepting_cpos;
+ regex_index = dfa.acceptance_index[last_accepting_index];
+ return true;
+ }
+ else
+ return false;
+ }
+};
+
+template<>
+struct regex_match_helper<true> // wide char
+{
+ template <typename DfaT, typename IteratorT>
+ static bool
+ do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
+ int& regex_index,
+ std::basic_string<
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ > *token)
+ {
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ char_t;
+ typedef std::basic_string<char_t> string_type;
+ typedef typename string_type::size_type size_type;
+
+ node_id_t s = 0;
+ node_id_t last_accepting_index = invalid_node;
+ IteratorT wp = first;
+ IteratorT last_accepting_cpos = first;
+
+ while (wp != last)
+ {
+ for (unsigned int i = 0; i < sizeof(char_t); ++i)
+ {
+ s = dfa.transition_table[s][get_byte(*wp,i)];
+ if (s == invalid_node)
+ {
+ goto break_while;
+ }
+ }
+ if (token) token->append((size_type)1, *wp);
+ ++wp;
+ if (dfa.acceptance_index[s] != invalid_node)
+ {
+ last_accepting_index = s;
+ last_accepting_cpos = wp;
+ }
+
+ }
+
+ break_while:
+ if (last_accepting_index != invalid_node)
+ {
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ std::cout << "dfa.acceptance_index[" << last_accepting_index << "] = " <<
+ dfa.acceptance_index[last_accepting_index] << '\n';
+#endif
+ first = last_accepting_cpos;
+ regex_index = dfa.acceptance_index[last_accepting_index];
+
+ return true;
+ }
+ else
+ return false;
+ }
+};
+
+template <typename DfaT, typename IteratorT, bool wide_char>
+struct regex_match
+{
+ static bool
+ do_match(DfaT const& dfa, IteratorT &first, IteratorT const& last,
+ int& regex_index,
+ std::basic_string<
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ > *token)
+ {
+ return regex_match_helper<wide_char>::do_match(
+ dfa, first, last, regex_index, token);
+ }
+};
+
+} // namespace lexerimpl
+
+///////////////////////////////////////////////////////////////////////////////
+//
+template <typename IteratorT = char const*, typename TokenT = int,
+ typename CallbackT = void(*)(IteratorT const &,
+ IteratorT &,
+ IteratorT const&,
+ TokenT const&,
+ lexer_control<TokenT> &)>
+class lexer
+{
+public:
+ typedef CallbackT callback_t;
+ typedef
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ char_t;
+
+ struct regex_info
+ {
+ std::basic_string<char_t> str;
+ TokenT token;
+ CallbackT callback;
+
+ regex_info(const std::basic_string<char_t>& _str,
+ const TokenT& _token,
+ const CallbackT& _callback)
+ : str(_str)
+ , token(_token)
+ , callback(_callback)
+ {}
+
+ };
+
+ struct dfa_table
+ {
+ std::vector<std::vector<node_id_t> > transition_table;
+ std::vector<node_id_t> acceptance_index;
+ };
+ typedef std::vector<node_id_t> node_table_t;
+ typedef std::vector<node_table_t> transition_table_t;
+ typedef std::vector<dfa_table> dfa_t;
+
+
+ lexer(unsigned int states = 1);
+
+ void register_regex(const std::basic_string<char_t>& regex,
+ const TokenT& id, const CallbackT& cb = CallbackT(),
+ unsigned int state = 0);
+
+ TokenT next_token(IteratorT &first, IteratorT const &last,
+ std::basic_string<char_t> *token = 0);
+
+ void create_dfa();
+ bool has_compiled_dfa() { return m_compiled_dfa; }
+
+ void set_case_insensitive(bool insensitive);
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ void dump(std::ostream& out);
+#endif
+ typedef std::vector<std::vector<regex_info> > regex_list_t;
+
+ bool load (std::ifstream &in, long unique_id = 0);
+ bool save (std::ofstream &out, long unique_id = 0);
+ enum {
+ SLEX_SIGNATURE = 0x58454C53, // "SLEX"
+ SLEX_VERSION_100 = 0x0100, // persistance version
+ SLEX_LAST_KNOWN_VERSION = SLEX_VERSION_100,
+ SLEX_MINOR_VERSION_MASK = 0xFF
+ };
+
+private:
+
+ void create_dfa_for_state(int state);
+
+ static bool regex_match(const dfa_t& dfa, IteratorT& first,
+ IteratorT& last, int& regex_index);
+
+ mutable std::stack<lexerimpl::node*> node_stack;
+ lexerimpl::lexer_grammar g;
+
+ mutable bool m_compiled_dfa;
+ mutable dfa_t m_dfa;
+
+ regex_list_t m_regex_list;
+ bool m_case_insensitive;
+
+ unsigned int m_state;
+ std::stack<unsigned int> m_state_stack;
+ unsigned int m_num_states;
+};
+
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline
+lexer<IteratorT, TokenT, CallbackT>::lexer(unsigned int states)
+ : g(node_stack)
+ , m_compiled_dfa(false)
+ , m_regex_list(states)
+ , m_case_insensitive(false)
+ , m_state(0)
+ , m_state_stack()
+ , m_num_states(states)
+{
+ BOOST_SPIRIT_DEBUG_TRACE_NODE_NAME(g, "slex::lexer",
+ BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX);
+}
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline void
+lexer<IteratorT, TokenT, CallbackT>::register_regex(
+ const std::basic_string<char_t>& regex, const TokenT& id,
+ const CallbackT& callback, unsigned int state)
+{
+ if (state > m_num_states) {
+ m_regex_list.resize(state);
+ m_num_states = state;
+ }
+ m_regex_list[state].push_back(regex_info(regex, id, callback));
+}
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline TokenT
+lexer<IteratorT, TokenT, CallbackT>::next_token(
+ IteratorT &first, IteratorT const& last,
+ std::basic_string<
+ typename BOOST_SPIRIT_IT_NS::iterator_traits<IteratorT>::value_type
+ > *token)
+{
+ if (!m_compiled_dfa)
+ {
+ create_dfa();
+ }
+
+ IteratorT saved = first;
+ int regex_index;
+ if (!lexerimpl::regex_match<dfa_table, IteratorT, (sizeof(char_t) > 1)>::
+ do_match(m_dfa[m_state], first, last, regex_index, token))
+ return -1; // TODO: can't return -1, need to return some invalid token.
+ // how to figure this out? We can use traits I guess.
+ else
+ {
+ regex_info regex = m_regex_list[m_state][regex_index];
+ TokenT rval = regex.token;
+ if (regex.callback)
+ {
+ // execute corresponding callback
+ lexer_control<TokenT> controller(rval, m_state, m_state_stack);
+ regex.callback(saved, first, last, regex.token, controller);
+ if (controller.ignore_current_token_set()) {
+ if (token)
+ token->erase();
+ return next_token(first, last, token);
+ }
+ }
+ return rval;
+ }
+}
+
+namespace lexerimpl
+{
+
+inline
+bool find_acceptance_state(const node_set& eof_node_ids,
+ const node_set& current_set,
+ node_id_t& acceptance_node_id)
+{
+ for(node_set::const_iterator nsi = eof_node_ids.begin();
+ nsi != eof_node_ids.end(); ++nsi)
+ {
+ node_id_t eof_node_id =*nsi;
+ if (current_set.end() != current_set.find(eof_node_id))
+ {
+ // store the first one we come to as the
+ // matched pattern
+ acceptance_node_id = eof_node_id;
+ // don't bother searching for more
+ return true;
+ }
+ }
+ return false;
+}
+
+template <typename RegexListT, typename GrammarT>
+#ifndef BOOST_NO_CXX11_SMART_PTR
+inline std::unique_ptr<node>
+#else
+inline std::auto_ptr<node>
+#endif
+parse_regexes(const RegexListT& regex_list, GrammarT& g)
+{
+ // parse the expressions into a tree
+ if (regex_list.begin() == regex_list.end())
+ boost::throw_exception(bad_regex());
+
+ typename RegexListT::const_iterator ri = regex_list.begin();
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
+#else
+ std::auto_ptr<node> tree(lexerimpl::parse(g, (*ri).str));
+#endif
+ if (tree.get() == 0)
+ boost::throw_exception(bad_regex());
+
+ ++ri;
+ for (/**/; ri != regex_list.end(); ++ri)
+ {
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
+#else
+ std::auto_ptr<node> next_tree(lexerimpl::parse(g, (*ri).str));
+#endif
+ if (next_tree.get() == 0)
+ boost::throw_exception(bad_regex());
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ tree = std::unique_ptr<node>(new or_node(tree.release(), next_tree.release()));
+#else
+ tree = std::auto_ptr<node>(new or_node(tree.release(), next_tree.release()));
+#endif
+ }
+ return tree;
+}
+
+} //namespace lexerimpl
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline void
+lexer<IteratorT, TokenT, CallbackT>::create_dfa()
+{
+ m_dfa.resize(m_num_states);
+ for (unsigned int i = 0; i < m_num_states; ++i)
+ create_dfa_for_state(i);
+}
+
+// Algorithm from Compilers: Principles, Techniques, and Tools p. 141
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline void
+lexer<IteratorT, TokenT, CallbackT>::create_dfa_for_state(int state)
+{
+ using lexerimpl::node;
+#ifndef BOOST_NO_CXX11_SMART_PTR
+ std::unique_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
+#else
+ std::auto_ptr<node> tree = lexerimpl::parse_regexes(m_regex_list[state], g);
+#endif
+ node_id_t dummy = 0;
+ tree->assign_node_ids(dummy);
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+ tree->dump(std::cout);
+#endif
+
+ // compute followpos(root)
+ followpos_t followpos;
+ tree->compute_followpos(followpos);
+
+ // the dfa states <-> nfa state groups
+ std::map<node_set, node_id_t> dstates1;
+ std::map<node_id_t, node_set> dstates2;
+
+ // the dfa transitions
+ m_dfa[state].transition_table.push_back(
+ std::vector<node_id_t>(256, invalid_node));
+ m_dfa[state].acceptance_index.push_back(invalid_node);
+
+ // whether the dfa state has been processed yet
+ std::vector<node_id_t> marked;
+
+ // used to give a unique id to each dfa state
+ node_id_t num_states = 0;
+
+ // initially, the only unmarked state in Dstates is firstpos(root).
+ marked.push_back(0);
+ node_set fpr = tree->firstpos();
+ dstates1[fpr] = 0;
+ dstates2[0] = fpr;
+ state_match_t state_match;
+ tree->compute_state_match(state_match);
+
+ if (m_case_insensitive)
+ lexerimpl::make_case_insensitive(state_match);
+
+ node_set eof_node_ids;
+ tree->get_eof_ids(eof_node_ids);
+ // translate the eof_node_ids into a 0-based index
+ std::map<node_id_t, node_id_t> eof_node_id_map;
+ unsigned int x = 0;
+ for (node_set::iterator node_id_it = eof_node_ids.begin();
+ node_id_it != eof_node_ids.end();
+ ++node_id_it)
+ {
+ eof_node_id_map[*node_id_it] = x++;
+ }
+
+ // figure out if this is an acceptance state
+ node_id_t eof_node_id;
+ if (lexerimpl::find_acceptance_state(eof_node_ids, fpr, eof_node_id))
+ {
+ m_dfa[state].acceptance_index[0] = eof_node_id_map[eof_node_id];
+ }
+
+ std::vector<node_id_t>::iterator i = std::find(marked.begin(), marked.end(),
+ node_id_t(0));
+ while (marked.end() != i)
+ {
+ *i = 1;
+ node_id_t T = node_id_t(std::distance(marked.begin(), i));
+ BOOST_ASSERT(T < dstates2.size());
+ node_set Tstates = dstates2[T];
+ for (node_id_t j = 0; j < 256; ++j)
+ {
+ node_set U;
+ for (node_set::iterator k = Tstates.begin();
+ k != Tstates.end(); ++k)
+ {
+ node_id_t p =*k;
+ BOOST_ASSERT(p < state_match.size());
+ BOOST_ASSERT(j < state_match[p].size());
+ if (state_match[p][j])
+ {
+ node_set fpp = followpos[p];
+ U.insert(fpp.begin(), fpp.end());
+ }
+ }
+ if (U.size() > 0)
+ {
+ std::map<node_set, node_id_t>::iterator l = dstates1.find(U);
+ node_id_t target_state;
+ if (l == dstates1.end()) // not in the states yet
+ {
+ ++num_states;
+ dstates1[U] = target_state = num_states;
+ dstates2[target_state] = U;
+ marked.push_back(0);
+ m_dfa[state].transition_table.push_back(
+ std::vector<node_id_t>(256, invalid_node));
+ m_dfa[state].acceptance_index.push_back(invalid_node);
+ // figure out if this is an acceptance state
+ node_id_t eof_node_id;
+ if (lexerimpl::find_acceptance_state(eof_node_ids, U, eof_node_id))
+ {
+ m_dfa[state].acceptance_index[target_state] =
+ eof_node_id_map[eof_node_id];
+ }
+ }
+ else
+ {
+ target_state = dstates1[U];
+ }
+
+ BOOST_ASSERT(T < m_dfa[state].transition_table.size());
+ BOOST_ASSERT(j < m_dfa[state].transition_table[T].size());
+ m_dfa[state].transition_table[T][j] = target_state;
+ }
+
+ }
+
+ i = std::find(marked.begin(), marked.end(), node_id_t(0));
+ }
+ m_compiled_dfa = true;
+}
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline void
+lexer<IteratorT, TokenT, CallbackT>::set_case_insensitive(bool insensitive)
+{
+ m_case_insensitive = insensitive;
+}
+
+
+#if defined(BOOST_SPIRIT_DEBUG) && (BOOST_SPIRIT_DEBUG_FLAGS & BOOST_SPIRIT_DEBUG_FLAGS_SLEX)
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline void
+lexer<IteratorT, TokenT, CallbackT>::dump(std::ostream& out)
+{
+ for (unsigned x = 0; x < m_dfa.size(); ++x)
+ {
+ out << "\nm_dfa[" << x << "] has " << m_dfa[x].transition_table.size() << " states\n";
+ for (node_id_t i = 0; i < m_dfa[x].transition_table.size(); ++i)
+ {
+ out << "state " << i << ":";
+ for (node_id_t j = 0; j < m_dfa[x].transition_table[i].size(); ++j)
+ {
+ if (m_dfa[x].transition_table[i][j] != invalid_node)
+ out << j << "->" << m_dfa[x].transition_table[i][j] << " ";
+ }
+ out << "\n";
+ }
+ out << "acceptance states: ";
+ for(unsigned int k = 0; k < m_dfa[x].acceptance_index.size(); ++k)
+ {
+ if (m_dfa[x].acceptance_index[k] != invalid_node)
+ out << '<' << k << ',' << m_dfa[x].acceptance_index[k] << "> ";
+ }
+ out << endl;
+ }
+}
+#endif
+
+///////////////////////////////////////////////////////////////////////////////
+// load the lexer tables
+#define slex_in(strm, val) \
+ strm.read((char*)&val, sizeof(val)); \
+ if (std::ios::goodbit != strm.rdstate()) return false
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline bool
+lexer<IteratorT, TokenT, CallbackT>::load (std::ifstream &in, long unique_id)
+{
+// ensure correct signature and version
+long signature = 0;
+
+ slex_in (in, signature);
+ if (signature != SLEX_SIGNATURE)
+ return false; // not for us
+
+long version = 0;
+
+ slex_in (in, version);
+ if ((version & ~SLEX_MINOR_VERSION_MASK) > SLEX_LAST_KNOWN_VERSION)
+ return false; // to new for us
+
+long uid = 0;
+
+ slex_in (in, uid);
+ if (uid != unique_id)
+ return false; // not saved by us
+
+// load auxiliary members
+int num_states = 0;
+
+ slex_in (in, num_states);
+
+// load the dfa tables
+dfa_t in_dfa;
+std::size_t dfa_size = 0;
+
+ slex_in (in, dfa_size);
+ in_dfa.resize(dfa_size);
+ for (std::size_t dfa = 0; dfa < dfa_size; ++dfa)
+ {
+ // load the transition tables
+ std::size_t tt_size = 0;
+ transition_table_t &tt_table = in_dfa[dfa].transition_table;
+
+ slex_in (in, tt_size);
+ tt_table.resize(tt_size);
+ for (std::size_t tt = 0; tt < tt_size; ++tt)
+ {
+ std::size_t nt_size = 0;
+ node_table_t &nt_table = tt_table[tt];
+
+ slex_in (in, nt_size);
+ nt_table.resize(nt_size);
+ for (std::size_t nt = 0; nt < nt_size; ++nt)
+ {
+ slex_in (in, nt_table[nt]);
+ }
+ }
+
+ // load the acceptance index table
+ std::size_t ai_size = 0;
+ node_table_t &ai_table = in_dfa[dfa].acceptance_index;
+
+ slex_in (in, ai_size);
+ ai_table.resize(ai_size);
+ for (std::size_t ai = 0; ai < ai_size; ++ai)
+ {
+ slex_in (in, ai_table[ai]);
+ }
+ }
+
+ m_dfa.swap(in_dfa); // success, swap in the read values
+ m_num_states = num_states;
+
+ m_compiled_dfa = true;
+ return true;
+}
+
+#undef slex_in
+
+///////////////////////////////////////////////////////////////////////////////
+// save the lexer tables
+#define slex_out(strm, val) \
+ strm.write((char*)&val, sizeof(val)); \
+ if (std::ios::goodbit != strm.rdstate()) return false
+
+template <typename IteratorT, typename TokenT, typename CallbackT>
+inline bool
+lexer<IteratorT, TokenT, CallbackT>::save (std::ofstream &out, long unique_id)
+{
+// save signature and version information
+long out_long = SLEX_SIGNATURE;
+
+ slex_out(out, out_long);
+ out_long = SLEX_VERSION_100;
+ slex_out(out, out_long);
+ slex_out(out, unique_id);
+
+// save auxiliary members
+ slex_out(out, m_num_states);
+
+// save the dfa tables
+ typedef typename dfa_t::const_iterator dfa_iter_t;
+ typedef transition_table_t::const_iterator transition_table_iter_t;
+ typedef node_table_t::const_iterator node_table_iter_t;
+
+ std::size_t out_size_t = m_dfa.size();
+ slex_out(out, out_size_t);
+
+ dfa_iter_t end = m_dfa.end();
+ for (dfa_iter_t it = m_dfa.begin(); it != end; ++it)
+ {
+ // save the transition table
+ out_size_t = (*it).transition_table.size();
+ slex_out(out, out_size_t);
+
+ transition_table_iter_t tt_end = (*it).transition_table.end();
+ for (transition_table_iter_t tt_it = (*it).transition_table.begin();
+ tt_it != tt_end;
+ ++tt_it)
+ {
+ out_size_t = (*tt_it).size();
+ slex_out(out, out_size_t);
+
+ node_table_iter_t nt_end = (*tt_it).end();
+ for (node_table_iter_t nt_it = (*tt_it).begin();
+ nt_it != nt_end;
+ ++nt_it)
+ {
+ slex_out(out, (*nt_it));
+ }
+ }
+
+ // save the acceptance index table
+ out_size_t = (*it).acceptance_index.size();
+ slex_out(out, out_size_t);
+
+ node_table_iter_t nt_end = (*it).acceptance_index.end();
+ for (node_table_iter_t nt_it = (*it).acceptance_index.begin();
+ nt_it != nt_end;
+ ++nt_it)
+ {
+ slex_out(out, (*nt_it));
+ }
+ }
+ return true;
+}
+
+#undef slex_out
+
+/*
+a lexer_control object supports some operations on the lexer.
+ get current lexer state
+ set state
+ terminate
+ state stack (push, pop, top)
+ set new token return value
+ ignore the current token
+ yymore
+ get length of matched token
+*/
+template <typename TokenT>
+class lexer_control
+{
+public:
+
+ lexer_control(TokenT& token, unsigned int& current_state,
+ std::stack<unsigned int>& state_stack);
+ // functions dealing with the lexer state
+
+ // set the state to state
+ void set_state(unsigned int state);
+
+ // get the current state
+ unsigned int get_state();
+
+ // pushes the current state onto the top of the state stack and
+ // switches to new_state
+ void push_state(unsigned int new_state);
+
+ // pops the top of the state stack and switches to it.
+ void pop_state();
+
+ // returns the top of the stack without altering the stack's contents.
+ unsigned int top_state();
+
+ // functions dealing with the token returned.
+
+ // set a new TokenT return value, overriding that one that was
+ // registered via. register_regex()
+ void set_token(const TokenT& token);
+
+ // tell the lexer to return an invalid token, signifying termination.
+ void terminate();
+
+ // ignore the current token, and move on to the next one. The current
+ // token will NOT be returned from next_token()
+ void ignore_current_token();
+
+ // returns true if ignore_current_token() has been called,
+ // false otherwise.
+ bool ignore_current_token_set();
+
+private:
+ TokenT& m_token;
+ bool m_ignore_current_token;
+ unsigned int& m_current_state;
+ std::stack<unsigned int>& m_state_stack;
+};
+
+template <typename TokenT>
+inline
+lexer_control<TokenT>::lexer_control(TokenT& token, unsigned int& current_state,
+ std::stack<unsigned int>& state_stack)
+ : m_token(token)
+ , m_ignore_current_token(false)
+ , m_current_state(current_state)
+ , m_state_stack(state_stack)
+{
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::set_state(unsigned int state)
+{
+ m_current_state = state;
+}
+
+template <typename TokenT>
+inline unsigned int
+lexer_control<TokenT>::get_state()
+{
+ return m_current_state;
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::push_state(unsigned int new_state)
+{
+ m_state_stack.push(m_current_state);
+ m_current_state = new_state;
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::pop_state()
+{
+ m_current_state = m_state_stack.top();
+ m_state_stack.pop();
+}
+
+template <typename TokenT>
+inline unsigned int
+lexer_control<TokenT>::top_state()
+{
+ return m_state_stack.top();
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::set_token(const TokenT& token)
+{
+ m_token = token;
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::terminate()
+{
+ m_token = -1; // TOOD: fix this, need to be determined by traits
+}
+
+template <typename TokenT>
+inline void
+lexer_control<TokenT>::ignore_current_token()
+{
+ m_ignore_current_token = true;
+}
+
+template <typename TokenT>
+inline bool
+lexer_control<TokenT>::ignore_current_token_set()
+{
+ return m_ignore_current_token;
+}
+
+} // namespace classic
+} // namespace spirit
+} // namespace boost
+
+#undef BOOST_SPIRIT_IT_NS
+
+#endif
+