diff options
Diffstat (limited to 'src/boost/libs/spirit/example/lex/static_lexer')
10 files changed, 747 insertions, 0 deletions
diff --git a/src/boost/libs/spirit/example/lex/static_lexer/Jamfile b/src/boost/libs/spirit/example/lex/static_lexer/Jamfile new file mode 100644 index 00000000..25c30d0d --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/Jamfile @@ -0,0 +1,15 @@ +#============================================================================== +# Copyright (c) 2001-2009 Hartmut Kaiser +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#============================================================================== + +project spirit-static-lexer-example ; + +exe word_count_generate : word_count_generate.cpp ; +exe word_count_static : word_count_static.cpp ; + +exe word_count_lexer_generate : word_count_lexer_generate.cpp ; +exe word_count_lexer_static : word_count_lexer_static.cpp ; + diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count.input b/src/boost/libs/spirit/example/lex/static_lexer/word_count.input new file mode 100644 index 00000000..2f768330 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count.input @@ -0,0 +1,7 @@ +Our hiking boots are ready. So, let's pack! + +Have you the plane tickets for there and back? + +I do, I do. We're all ready to go. Grab my hand and be my beau. + + diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp new file mode 100644 index 00000000..87f0527f --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp @@ -0,0 +1,45 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens (this file) +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// (see the file: word_count_static.cpp) + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include <boost/config/warning_disable.hpp> +#include <boost/spirit/include/lex_lexertl.hpp> +#include <boost/spirit/include/lex_generate_static_lexertl.hpp> + +#include <fstream> + +#include "word_count_tokens.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_generate_main +int main(int argc, char* argv[]) +{ + // create the lexer object instance needed to invoke the generator + word_count_tokens<lex::lexertl::lexer<> > word_count; // the token definition + + // open the output file, where the generated tokenizer function will be + // written to + std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]); + + // invoke the generator, passing the token definition, the output stream + // and the name suffix of the tables and functions to be generated + // + // The suffix "wc" used below results in a type lexertl::static_::lexer_wc + // to be generated, which needs to be passed as a template parameter to the + // lexertl::static_lexer template (see word_count_static.cpp). + return lex::lexertl::generate_static_dfa(word_count, out, "wc") ? 0 : -1; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp new file mode 100644 index 00000000..65593cda --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp @@ -0,0 +1,45 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens (this file) +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// (see the file: word_count_lexer_static.cpp) + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include <boost/config/warning_disable.hpp> +#include <boost/spirit/include/lex_lexertl.hpp> +#include <boost/spirit/include/lex_generate_static_lexertl.hpp> + +#include <fstream> + +#include "word_count_lexer_tokens.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wcl_static_generate_main +int main(int argc, char* argv[]) +{ + // create the lexer object instance needed to invoke the generator + word_count_lexer_tokens<lex::lexertl::actor_lexer<> > word_count; // the token definition + + // open the output file, where the generated tokenizer function will be + // written to + std::ofstream out(argc < 2 ? "word_count_lexer_static.hpp" : argv[1]); + + // invoke the generator, passing the token definition, the output stream + // and the name prefix of the tokenizing function to be generated + // + // The suffix "wcl" used below results in a type lexertl::static_::lexer_wcl + // to be generated, which needs to be passed as a template parameter to the + // lexertl::static_lexer template (see word_count_lexer_static.cpp). + return lex::lexertl::generate_static_dfa(word_count, out, "wcl") ? 0 : -1; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp new file mode 100644 index 00000000..2bda5adc --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp @@ -0,0 +1,84 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// + +// #define BOOST_SPIRIT_DEBUG +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include <boost/config/warning_disable.hpp> +#include <boost/spirit/include/lex_static_lexertl.hpp> + +#include <iostream> +#include <string> + +#include "../example.hpp" +#include "word_count_lexer_tokens.hpp" // token definition +#include "word_count_lexer_static.hpp" // generated tokenizer + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wcl_static_main +int main(int argc, char* argv[]) +{ + // read input from the given file + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + + // Specifying 'omit' as the token attribute type generates a token class + // notholding any token attribute at all (not even the iterator_range of the + // matched input sequence), therefor optimizing the token, the lexer, and + // possibly the parser implementation as much as possible. + // + // Specifying mpl::false_ as the 3rd template parameter generates a token + // type and an iterator, both holding no lexer state, allowing for even more + // aggressive optimizations. + // + // As a result the token instances contain the token ids as the only data + // member. + typedef lex::lexertl::token<char const*, lex::omit, boost::mpl::false_> token_type; + + // Define the lexer type to be used as the base class for our token + // definition. + // + // This is the only place where the code is different from an equivalent + // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of + // the `lexertl::lexer<>` as the base class for our token defintion type. + // + // As we specified the suffix "wcl" while generating the static tables we + // need to pass the type lexertl::static_::lexer_wcl as the second template + // parameter below (see word_count_lexer_generate.cpp). + typedef lex::lexertl::static_actor_lexer< + token_type, lex::lexertl::static_::lexer_wcl + > lexer_type; + + // create the lexer object instance needed to invoke the lexical analysis + word_count_lexer_tokens<lexer_type> word_count_lexer; + + // tokenize the given string, all generated tokens are discarded + char const* first = str.c_str(); + char const* last = &first[str.size()]; + bool r = lex::tokenize(first, last, word_count_lexer); + + if (r) { + std::cout << "lines: " << word_count_lexer.l + << ", words: " << word_count_lexer.w + << ", characters: " << word_count_lexer.c + << "\n"; + } + else { + std::string rest(first, last); + std::cout << "Lexical analysis failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp new file mode 100644 index 00000000..e69b936e --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp @@ -0,0 +1,164 @@ +// Copyright (c) 2008-2009 Ben Hanson +// Copyright (c) 2008-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Auto-generated by boost::lexer, do not edit + +#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_NOV_10_2009_17_20_29) +#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_NOV_10_2009_17_20_29 + +#include <boost/spirit/home/support/detail/lexer/char_traits.hpp> + +//////////////////////////////////////////////////////////////////////////////// +// the generated table of state names and the tokenizer have to be +// defined in the boost::spirit::lex::lexertl::static_ namespace +namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ { + +//////////////////////////////////////////////////////////////////////////////// +// this table defines the names of the lexer states +char const* const lexer_state_names_wcl[1] = +{ + "INITIAL" +}; + +//////////////////////////////////////////////////////////////////////////////// +// this variable defines the number of lexer states +std::size_t const lexer_state_count_wcl = 1; + +//////////////////////////////////////////////////////////////////////////////// +// this function returns the next matched token +template<typename Iterator> +std::size_t next_token_wcl (std::size_t& /*start_state_*/, bool& /*bol_*/, + Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) +{ + enum {end_state_index, id_index, unique_id_index, state_index, bol_index, + eol_index, dead_state_index, dfa_offset}; + + static const std::size_t npos = static_cast<std::size_t>(~0); + static const std::size_t lookup_[256] = { + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 8, 7, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 8, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9 }; + static const std::size_t dfa_alphabet_ = 10; + static const std::size_t dfa_[50] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 4, 2, 1, 65536, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 65537, + 1, 0, 0, 0, 0, 0, 0, 0, + 1, 65538, 2, 0, 0, 0, 0, 0, + 0, 0 }; + + if (start_token_ == end_) + { + unique_id_ = npos; + return 0; + } + + std::size_t const* ptr_ = dfa_ + dfa_alphabet_; + Iterator curr_ = start_token_; + bool end_state_ = *ptr_ != 0; + std::size_t id_ = *(ptr_ + id_index); + std::size_t uid_ = *(ptr_ + unique_id_index); + Iterator end_token_ = start_token_; + + while (curr_ != end_) + { + std::size_t const state_ = + ptr_[lookup_[static_cast<unsigned char>(*curr_++)]]; + + if (state_ == 0) break; + + ptr_ = &dfa_[state_ * dfa_alphabet_]; + + if (*ptr_) + { + end_state_ = true; + id_ = *(ptr_ + id_index); + uid_ = *(ptr_ + unique_id_index); + end_token_ = curr_; + } + } + + if (end_state_) + { + // return longest match + start_token_ = end_token_; + } + else + { + id_ = npos; + uid_ = npos; + } + + unique_id_ = uid_; + return id_; +} + +//////////////////////////////////////////////////////////////////////////////// +// this defines a generic accessors for the information above +struct lexer_wcl +{ + // version number and feature-set of compatible static lexer engine + enum + { + static_version = 65536, + supports_bol = false, + supports_eol = false + }; + + // return the number of lexer states + static std::size_t state_count() + { + return lexer_state_count_wcl; + } + + // return the name of the lexer state as given by 'idx' + static char const* state_name(std::size_t idx) + { + return lexer_state_names_wcl[idx]; + } + + // return the next matched token + template<typename Iterator> + static std::size_t next(std::size_t &start_state_, bool& bol_ + , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) + { + return next_token_wcl(start_state_, bol_, start_token_, end_, unique_id_); + } +}; + +}}}}} // namespace boost::spirit::lex::lexertl::static_ + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp new file mode 100644 index 00000000..af52a209 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp @@ -0,0 +1,62 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM) +#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM + +#include <boost/spirit/include/phoenix_operator.hpp> +#include <boost/spirit/include/phoenix_statement.hpp> +#include <boost/spirit/include/phoenix_core.hpp> +#include <boost/iterator/iterator_traits.hpp> + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +// +// Note, the token definition type is derived from the 'lexertl_actor_lexer' +// template, which is a necessary to being able to use lexer semantic actions. +/////////////////////////////////////////////////////////////////////////////// +struct distance_func +{ + template <typename Iterator1, typename Iterator2> + struct result : boost::iterator_difference<Iterator1> {}; + + template <typename Iterator1, typename Iterator2> + typename result<Iterator1, Iterator2>::type + operator()(Iterator1& begin, Iterator2& end) const + { + return std::distance(begin, end); + } +}; +boost::phoenix::function<distance_func> const distance = distance_func(); + +//[wcl_static_token_definition +template <typename Lexer> +struct word_count_lexer_tokens : boost::spirit::lex::lexer<Lexer> +{ + word_count_lexer_tokens() + : c(0), w(0), l(0) + , word("[^ \t\n]+") // define tokens + , eol("\n") + , any(".") + { + using boost::spirit::lex::_start; + using boost::spirit::lex::_end; + using boost::phoenix::ref; + + // associate tokens with the lexer + this->self + = word [++ref(w), ref(c) += distance(_start, _end)] + | eol [++ref(c), ++ref(l)] + | any [++ref(c)] + ; + } + + std::size_t c, w, l; + boost::spirit::lex::token_def<> word, eol, any; +}; +//] + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp new file mode 100644 index 00000000..4ba3bb53 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp @@ -0,0 +1,120 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// + +// #define BOOST_SPIRIT_LEXERTL_DEBUG +#define BOOST_VARIANT_MINIMIZE_SIZE + +#include <boost/config/warning_disable.hpp> +#include <boost/spirit/include/qi.hpp> +//[wc_static_include +#include <boost/spirit/include/lex_static_lexertl.hpp> +//] +#include <boost/spirit/include/phoenix_operator.hpp> +#include <boost/spirit/include/phoenix_statement.hpp> +#include <boost/spirit/include/phoenix_container.hpp> + +#include <iostream> +#include <string> + +#include "../example.hpp" +#include "word_count_tokens.hpp" // token definition +#include "word_count_static.hpp" // generated tokenizer + +using namespace boost::spirit; +using namespace boost::spirit::ascii; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_grammar +// This is an ordinary grammar definition following the rules defined by +// Spirit.Qi. There is nothing specific about it, except it gets the token +// definition class instance passed to the constructor to allow accessing the +// embedded token_def<> instances. +template <typename Iterator> +struct word_count_grammar : qi::grammar<Iterator> +{ + template <typename TokenDef> + word_count_grammar(TokenDef const& tok) + : word_count_grammar::base_type(start) + , c(0), w(0), l(0) + { + using boost::phoenix::ref; + using boost::phoenix::size; + + // associate the defined tokens with the lexer, at the same time + // defining the actions to be executed + start = *( tok.word [ ++ref(w), ref(c) += size(_1) ] + | lit('\n') [ ++ref(l), ++ref(c) ] + | qi::token(IDANY) [ ++ref(c) ] + ) + ; + } + + std::size_t c, w, l; // counter for characters, words, and lines + qi::rule<Iterator> start; +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_main +int main(int argc, char* argv[]) +{ + // Define the token type to be used: 'std::string' is available as the type + // of the token value. + typedef lex::lexertl::token< + char const*, boost::mpl::vector<std::string> + > token_type; + + // Define the lexer type to be used as the base class for our token + // definition. + // + // This is the only place where the code is different from an equivalent + // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of + // the `lexertl::lexer<>` as the base class for our token defintion type. + // + // As we specified the suffix "wc" while generating the static tables we + // need to pass the type lexertl::static_::lexer_wc as the second template + // parameter below (see word_count_generate.cpp). + typedef lex::lexertl::static_lexer< + token_type, lex::lexertl::static_::lexer_wc + > lexer_type; + + // Define the iterator type exposed by the lexer. + typedef word_count_tokens<lexer_type>::iterator_type iterator_type; + + // Now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process. + word_count_tokens<lexer_type> word_count; // Our lexer + word_count_grammar<iterator_type> g (word_count); // Our parser + + // Read in the file into memory. + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + char const* first = str.c_str(); + char const* last = &first[str.size()]; + + // Parsing is done based on the token stream, not the character stream. + bool r = lex::tokenize_and_parse(first, last, word_count, g); + + if (r) { // success + std::cout << "lines: " << g.l << ", words: " << g.w + << ", characters: " << g.c << "\n"; + } + else { + std::string rest(first, last); + std::cerr << "Parsing failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp new file mode 100644 index 00000000..4a7aa3c6 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp @@ -0,0 +1,164 @@ +// Copyright (c) 2008-2009 Ben Hanson +// Copyright (c) 2008-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Auto-generated by boost::lexer, do not edit + +#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_NOV_10_2009_17_20_04) +#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_NOV_10_2009_17_20_04 + +#include <boost/spirit/home/support/detail/lexer/char_traits.hpp> + +//////////////////////////////////////////////////////////////////////////////// +// the generated table of state names and the tokenizer have to be +// defined in the boost::spirit::lex::lexertl::static_ namespace +namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ { + +//////////////////////////////////////////////////////////////////////////////// +// this table defines the names of the lexer states +char const* const lexer_state_names_wc[1] = +{ + "INITIAL" +}; + +//////////////////////////////////////////////////////////////////////////////// +// this variable defines the number of lexer states +std::size_t const lexer_state_count_wc = 1; + +//////////////////////////////////////////////////////////////////////////////// +// this function returns the next matched token +template<typename Iterator> +std::size_t next_token_wc (std::size_t& /*start_state_*/, bool& /*bol_*/, + Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) +{ + enum {end_state_index, id_index, unique_id_index, state_index, bol_index, + eol_index, dead_state_index, dfa_offset}; + + static const std::size_t npos = static_cast<std::size_t>(~0); + static const std::size_t lookup_[256] = { + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 9, 7, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 9, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8 }; + static const std::size_t dfa_alphabet_ = 10; + static const std::size_t dfa_[50] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 2, 4, 1, 65536, 0, 0, + 0, 0, 0, 0, 2, 0, 1, 10, + 1, 0, 0, 0, 0, 0, 0, 0, + 1, 65537, 2, 0, 0, 0, 0, 0, + 0, 0 }; + + if (start_token_ == end_) + { + unique_id_ = npos; + return 0; + } + + std::size_t const* ptr_ = dfa_ + dfa_alphabet_; + Iterator curr_ = start_token_; + bool end_state_ = *ptr_ != 0; + std::size_t id_ = *(ptr_ + id_index); + std::size_t uid_ = *(ptr_ + unique_id_index); + Iterator end_token_ = start_token_; + + while (curr_ != end_) + { + std::size_t const state_ = + ptr_[lookup_[static_cast<unsigned char>(*curr_++)]]; + + if (state_ == 0) break; + + ptr_ = &dfa_[state_ * dfa_alphabet_]; + + if (*ptr_) + { + end_state_ = true; + id_ = *(ptr_ + id_index); + uid_ = *(ptr_ + unique_id_index); + end_token_ = curr_; + } + } + + if (end_state_) + { + // return longest match + start_token_ = end_token_; + } + else + { + id_ = npos; + uid_ = npos; + } + + unique_id_ = uid_; + return id_; +} + +//////////////////////////////////////////////////////////////////////////////// +// this defines a generic accessors for the information above +struct lexer_wc +{ + // version number and feature-set of compatible static lexer engine + enum + { + static_version = 65536, + supports_bol = false, + supports_eol = false + }; + + // return the number of lexer states + static std::size_t state_count() + { + return lexer_state_count_wc; + } + + // return the name of the lexer state as given by 'idx' + static char const* state_name(std::size_t idx) + { + return lexer_state_names_wc[idx]; + } + + // return the next matched token + template<typename Iterator> + static std::size_t next(std::size_t &start_state_, bool& bol_ + , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) + { + return next_token_wc(start_state_, bol_, start_token_, end_, unique_id_); + } +}; + +}}}}} // namespace boost::spirit::lex::lexertl::static_ + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp new file mode 100644 index 00000000..5828adde --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp @@ -0,0 +1,41 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM) +#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We keep the base class for the token definition as a +// template parameter to allow this class to be used for +// both: the code generation and the lexical analysis +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_tokenids +enum tokenids +{ + IDANY = boost::spirit::lex::min_token_id + 1, +}; +//] + +//[wc_static_tokendef +// This token definition class can be used without any change for all three +// possible use cases: a dynamic lexical analyzer, a code generator, and a +// static lexical analyzer. +template <typename BaseLexer> +struct word_count_tokens : boost::spirit::lex::lexer<BaseLexer> +{ + word_count_tokens() + : word_count_tokens::base_type( + boost::spirit::lex::match_flags::match_not_dot_newline) + { + // define tokens and associate them with the lexer + word = "[^ \t\n]+"; + this->self = word | '\n' | boost::spirit::lex::token_def<>(".", IDANY); + } + + boost::spirit::lex::token_def<std::string> word; +}; +//] + +#endif |