From 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Sat, 27 Apr 2024 20:24:20 +0200 Subject: Adding upstream version 14.2.21. Signed-off-by: Daniel Baumann --- src/boost/libs/spirit/example/lex/Jamfile | 32 + .../spirit/example/lex/custom_token_attribute.cpp | 112 ++ src/boost/libs/spirit/example/lex/example.hpp | 26 + src/boost/libs/spirit/example/lex/example1.cpp | 133 ++ src/boost/libs/spirit/example/lex/example1.input | 5 + src/boost/libs/spirit/example/lex/example2.cpp | 169 +++ src/boost/libs/spirit/example/lex/example2.input | 7 + src/boost/libs/spirit/example/lex/example3.cpp | 150 ++ src/boost/libs/spirit/example/lex/example3.input | 5 + src/boost/libs/spirit/example/lex/example4.cpp | 227 +++ src/boost/libs/spirit/example/lex/example4.input | 17 + src/boost/libs/spirit/example/lex/example5.cpp | 273 ++++ src/boost/libs/spirit/example/lex/example5.input | 16 + src/boost/libs/spirit/example/lex/example6.cpp | 249 ++++ src/boost/libs/spirit/example/lex/example6.input | 17 + .../spirit/example/lex/lexer_debug_support.cpp | 109 ++ .../spirit/example/lex/print_number_tokenids.cpp | 121 ++ .../libs/spirit/example/lex/print_numbers.cpp | 118 ++ .../libs/spirit/example/lex/print_numbers.input | 17 + src/boost/libs/spirit/example/lex/reference.cpp | 30 + .../libs/spirit/example/lex/static_lexer/Jamfile | 15 + .../example/lex/static_lexer/word_count.input | 7 + .../lex/static_lexer/word_count_generate.cpp | 45 + .../lex/static_lexer/word_count_lexer_generate.cpp | 45 + .../lex/static_lexer/word_count_lexer_static.cpp | 84 ++ .../lex/static_lexer/word_count_lexer_static.hpp | 164 ++ .../lex/static_lexer/word_count_lexer_tokens.hpp | 62 + .../example/lex/static_lexer/word_count_static.cpp | 120 ++ .../example/lex/static_lexer/word_count_static.hpp | 164 ++ .../example/lex/static_lexer/word_count_tokens.hpp | 41 + .../libs/spirit/example/lex/strip_comments.cpp | 163 ++ .../libs/spirit/example/lex/strip_comments.input | 162 ++ .../spirit/example/lex/strip_comments_lexer.cpp | 172 +++ src/boost/libs/spirit/example/lex/word_count.cpp | 166 +++ src/boost/libs/spirit/example/lex/word_count.input | 7 + .../libs/spirit/example/lex/word_count_functor.cpp | 183 +++ .../spirit/example/lex/word_count_functor.flex | 59 + .../spirit/example/lex/word_count_functor_flex.cpp | 1576 ++++++++++++++++++++ .../libs/spirit/example/lex/word_count_lexer.cpp | 152 ++ 39 files changed, 5220 insertions(+) create mode 100644 src/boost/libs/spirit/example/lex/Jamfile create mode 100644 src/boost/libs/spirit/example/lex/custom_token_attribute.cpp create mode 100644 src/boost/libs/spirit/example/lex/example.hpp create mode 100644 src/boost/libs/spirit/example/lex/example1.cpp create mode 100644 src/boost/libs/spirit/example/lex/example1.input create mode 100644 src/boost/libs/spirit/example/lex/example2.cpp create mode 100644 src/boost/libs/spirit/example/lex/example2.input create mode 100644 src/boost/libs/spirit/example/lex/example3.cpp create mode 100644 src/boost/libs/spirit/example/lex/example3.input create mode 100644 src/boost/libs/spirit/example/lex/example4.cpp create mode 100644 src/boost/libs/spirit/example/lex/example4.input create mode 100644 src/boost/libs/spirit/example/lex/example5.cpp create mode 100644 src/boost/libs/spirit/example/lex/example5.input create mode 100644 src/boost/libs/spirit/example/lex/example6.cpp create mode 100644 src/boost/libs/spirit/example/lex/example6.input create mode 100644 src/boost/libs/spirit/example/lex/lexer_debug_support.cpp create mode 100644 src/boost/libs/spirit/example/lex/print_number_tokenids.cpp create mode 100644 src/boost/libs/spirit/example/lex/print_numbers.cpp create mode 100644 src/boost/libs/spirit/example/lex/print_numbers.input create mode 100644 src/boost/libs/spirit/example/lex/reference.cpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/Jamfile create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count.input create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp create mode 100644 src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp create mode 100644 src/boost/libs/spirit/example/lex/strip_comments.cpp create mode 100644 src/boost/libs/spirit/example/lex/strip_comments.input create mode 100644 src/boost/libs/spirit/example/lex/strip_comments_lexer.cpp create mode 100644 src/boost/libs/spirit/example/lex/word_count.cpp create mode 100644 src/boost/libs/spirit/example/lex/word_count.input create mode 100644 src/boost/libs/spirit/example/lex/word_count_functor.cpp create mode 100644 src/boost/libs/spirit/example/lex/word_count_functor.flex create mode 100644 src/boost/libs/spirit/example/lex/word_count_functor_flex.cpp create mode 100644 src/boost/libs/spirit/example/lex/word_count_lexer.cpp (limited to 'src/boost/libs/spirit/example/lex') diff --git a/src/boost/libs/spirit/example/lex/Jamfile b/src/boost/libs/spirit/example/lex/Jamfile new file mode 100644 index 00000000..79887762 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/Jamfile @@ -0,0 +1,32 @@ +#============================================================================== +# Copyright (c) 2001-2009 Joel de Guzman +# Copyright (c) 2001-2009 Hartmut Kaiser +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#============================================================================== +project spirit-lexer-example + : requirements + 300 + : + : + ; + + +exe example1 : example1.cpp ; +exe example2 : example2.cpp ; +exe example3 : example3.cpp ; +exe example4 : example4.cpp ; +exe example5 : example5.cpp ; +exe example6 : example6.cpp ; +exe print_numbers : print_numbers.cpp ; +exe print_number_tokenids : print_number_tokenids.cpp ; +exe word_count : word_count.cpp ; +exe word_count_functor : word_count_functor.cpp ; +exe word_count_lexer : word_count_lexer.cpp ; +exe word_count_functor_flex : word_count_functor_flex.cpp ; +exe strip_comments : strip_comments.cpp ; +exe strip_comments_lexer : strip_comments_lexer.cpp ; +exe custom_token_attribute : custom_token_attribute.cpp ; + +exe lexer_debug_support : lexer_debug_support.cpp ; diff --git a/src/boost/libs/spirit/example/lex/custom_token_attribute.cpp b/src/boost/libs/spirit/example/lex/custom_token_attribute.cpp new file mode 100644 index 00000000..e29bbcee --- /dev/null +++ b/src/boost/libs/spirit/example/lex/custom_token_attribute.cpp @@ -0,0 +1,112 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to demonstrate how custom, user defined types +// can be easily integrated with the lexer as token value types. Moreover, the +// custom token values are properly exposed to the parser as well, allowing to +// retrieve the custom values using the built in parser attribute propagation +// rules. + +#include +#include + +namespace lex = boost::spirit::lex; +namespace qi = boost::spirit::qi; +namespace mpl = boost::mpl; + +/////////////////////////////////////////////////////////////////////////////// +// This is just a simple custom rational data structure holding two ints to be +// interpreted as a rational number +struct rational +{ + rational(int n = 0, int d = 0) + : nominator_(n), denominator_(d) + {} + + int nominator_; + int denominator_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// A rational is represented as "{n,d}", where 'n' and 'd' are the nominator +// and denominator of the number. We use Spirit.Qi to do the low level parsing +// of the input sequence as matched by the lexer. Certainly, any other +// conversion could be used instead. +// +// The lexer uses the template assign_to_attribute_from_iterators<> to convert +// the matched input sequence (pair of iterators) to the token value type as +// specified while defining the lex::token_def<>. +// +// Our specialization of assign_to_attribute_from_iterators<> for the rational +// data type defined above has to be placed into the +// namespace boost::spirit::traits, otherwise it won't be found by the library. +namespace boost { namespace spirit { namespace traits +{ + template + struct assign_to_attribute_from_iterators + { + static void + call(Iterator const& first, Iterator const& last, rational& attr) + { + int x, y; + Iterator b = first; + qi::parse(b, last, + '{' >> qi::int_ >> ',' >> qi::int_ >> '}', x, y); + attr = rational(x, y); + } + }; +}}} + +/////////////////////////////////////////////////////////////////////////////// +// a lexer recognizing a single token type: rational +template +struct lex_rational : lex::lexer +{ + lex_rational() + { + this->self.add_pattern("INT", "[1-9][0-9]*"); + + rt = "\\{{INT},{INT}\\}"; + this->self.add(rt); + } + lex::token_def rt; +}; + + +int main() +{ + // the token type needs to know the iterator type of the underlying + // input and the set of used token value types + typedef lex::lexertl::token > token_type; + + // use actor_lexer<> here if your token definitions have semantic + // actions + typedef lex::lexertl::lexer lexer_type; + + // this is the iterator exposed by the lexer, we use this for parsing + typedef lexer_type::iterator_type iterator_type; + + // create a lexer instance + std::string input("{3,4}"); + std::string::iterator s = input.begin(); + + lex_rational lex; + iterator_type b = lex.begin(s, input.end()); + + // use the embedded token_def as a parser, it exposes its token value type + // as its parser attribute type + rational r; + if (!qi::parse(b, lex.end(), lex.rt, r)) + { + std::cerr << "Parsing failed!" << std::endl; + return -1; + } + + std::cout << "Parsing succeeded: {" + << r.nominator_ << ", " << r.denominator_ << "}" << std::endl; + return 0; +} + diff --git a/src/boost/libs/spirit/example/lex/example.hpp b/src/boost/libs/spirit/example/lex/example.hpp new file mode 100644 index 00000000..41399a5f --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example.hpp @@ -0,0 +1,26 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// Copyright (c) 2001-2007 Joel de Guzman +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#include +#include + +/////////////////////////////////////////////////////////////////////////////// +// Helper function reading a file into a string +/////////////////////////////////////////////////////////////////////////////// +inline std::string +read_from_file(char const* infile) +{ + std::ifstream instream(infile); + if (!instream.is_open()) { + std::cerr << "Couldn't open file: " << infile << std::endl; + exit(-1); + } + instream.unsetf(std::ios::skipws); // No white space skipping! + return std::string(std::istreambuf_iterator(instream.rdbuf()), + std::istreambuf_iterator()); +} + diff --git a/src/boost/libs/spirit/example/lex/example1.cpp b/src/boost/libs/spirit/example/lex/example1.cpp new file mode 100644 index 00000000..b157b082 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example1.cpp @@ -0,0 +1,133 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Simple lexer/parser to test the Spirit installation. +// +// This example shows, how to create a simple lexer recognizing 5 different +// tokens, and how to use a single token definition as the skip parser during +// the parsing. Additionally, it demonstrates how to use one of the defined +// tokens as a parser component in the grammar. +// +// The grammar recognizes a simple input structure, for instance: +// +// { +// hello world, hello it is me +// } +// +// Any number of simple sentences (optionally comma separated) inside a pair +// of curly braces will be matched. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example1_tokens : lex::lexer +{ + example1_tokens() + { + // define tokens and associate them with the lexer + identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; + this->self = lex::char_(',') | '{' | '}' | identifier; + + // any token definition to be used as the skip parser during parsing + // has to be associated with a separate lexer state (here 'WS') + this->white_space = "[ \\t\\n]+"; + this->self("WS") = white_space; + } + + lex::token_def<> identifier, white_space; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example1_grammar + : qi::grammar > > +{ + template + example1_grammar(TokenDef const& tok) + : example1_grammar::base_type(start) + { + start = '{' >> *(tok.identifier >> -ascii::char_(',')) >> '}'; + } + + qi::rule > > start; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the token type to return from the lexer iterator + typedef lex::lexertl::token token_type; + + // This is the lexer type to use to tokenize the input. + // We use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the lexer type (derived from the given lexer type). + typedef example1_tokens example1_lex; + + // This is the iterator type exposed by the lexer + typedef example1_lex::iterator_type iterator_type; + + // This is the type of the grammar to parse + typedef example1_grammar example1_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example1_lex lex; // Our lexer + example1_grammar calc(lex); // Our grammar definition + + std::string str (read_from_file("example1.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = lex.begin(it, str.end()); + iterator_type end = lex.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note, how we use the token_def defined above as the skip parser. It must + // be explicitly wrapped inside a state directive, switching the lexer + // state for the duration of skipping whitespace. + bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[lex.white_space]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::string rest(iter, end); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example1.input b/src/boost/libs/spirit/example/lex/example1.input new file mode 100644 index 00000000..e2aa2b62 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example1.input @@ -0,0 +1,5 @@ +{ + hello world, + hello world, + goodbye +} diff --git a/src/boost/libs/spirit/example/lex/example2.cpp b/src/boost/libs/spirit/example/lex/example2.cpp new file mode 100644 index 00000000..68a47667 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example2.cpp @@ -0,0 +1,169 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens and how to use this with a grammar. This example has a +// heavily backtracking grammar which makes it a candidate for lexer based +// parsing (all tokens are scanned and generated only once, even if +// backtracking is required) which speeds up the overall parsing process +// considerably, out-weighting the overhead needed for setting up the lexer. +// Additionally it demonstrates how to use one of the defined tokens as a +// parser component in the grammar. +// +// The grammar recognizes a simple input structure: any number of English +// simple sentences (statements, questions and commands) are recognized and +// are being counted separately. + +// #define BOOST_SPIRIT_DEBUG +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; +using namespace boost::spirit::ascii; +using boost::phoenix::ref; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example2_tokens : lex::lexer +{ + example2_tokens() + { + // A 'word' is comprised of one or more letters and an optional + // apostrophe. If it contains an apostrophe, there may only be one and + // the apostrophe must be preceded and succeeded by at least 1 letter. + // For example, "I'm" and "doesn't" meet the definition of 'word' we + // define below. + word = "[a-zA-Z]+('[a-zA-Z]+)?"; + + // Associate the tokens and the token set with the lexer. Note that + // single character token definitions as used below always get + // interpreted literally and never as special regex characters. This is + // done to be able to assign single characters the id of their character + // code value, allowing to reference those as literals in Qi grammars. + this->self = lex::token_def<>(',') | '!' | '.' | '?' | ' ' | '\n' | word; + } + + lex::token_def<> word; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example2_grammar : qi::grammar +{ + template + example2_grammar(TokenDef const& tok) + : example2_grammar::base_type(story) + , paragraphs(0), commands(0), questions(0), statements(0) + { + story + = +paragraph + ; + + paragraph + = ( +( command [ ++ref(commands) ] + | question [ ++ref(questions) ] + | statement [ ++ref(statements) ] + ) + >> *char_(' ') >> +char_('\n') + ) + [ ++ref(paragraphs) ] + ; + + command + = +(tok.word | ' ' | ',') >> '!' + ; + + question + = +(tok.word | ' ' | ',') >> '?' + ; + + statement + = +(tok.word | ' ' | ',') >> '.' + ; + + BOOST_SPIRIT_DEBUG_NODE(story); + BOOST_SPIRIT_DEBUG_NODE(paragraph); + BOOST_SPIRIT_DEBUG_NODE(command); + BOOST_SPIRIT_DEBUG_NODE(question); + BOOST_SPIRIT_DEBUG_NODE(statement); + } + + qi::rule story, paragraph, command, question, statement; + int paragraphs, commands, questions, statements; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the token type to return from the lexer iterator + typedef lex::lexertl::token token_type; + + // This is the lexer type to use to tokenize the input. + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example2_tokens example2_tokens; + + // this is the iterator type exposed by the lexer + typedef example2_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example2_grammar example2_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example2_tokens tokens; // Our lexer + example2_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example2.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + bool r = qi::parse(iter, end, calc); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "There were " + << calc.commands << " commands, " + << calc.questions << " questions, and " + << calc.statements << " statements.\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example2.input b/src/boost/libs/spirit/example/lex/example2.input new file mode 100644 index 00000000..2f768330 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example2.input @@ -0,0 +1,7 @@ +Our hiking boots are ready. So, let's pack! + +Have you the plane tickets for there and back? + +I do, I do. We're all ready to go. Grab my hand and be my beau. + + diff --git a/src/boost/libs/spirit/example/lex/example3.cpp b/src/boost/libs/spirit/example/lex/example3.cpp new file mode 100644 index 00000000..83800331 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example3.cpp @@ -0,0 +1,150 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens and how to use this with a grammar. This example has a +// heavily backtracking grammar which makes it a candidate for lexer based +// parsing (all tokens are scanned and generated only once, even if +// backtracking is required) which speeds up the overall parsing process +// considerably, out-weighting the overhead needed for setting up the lexer. +// +// Additionally, this example demonstrates, how to define a token set usable +// as the skip parser during parsing, allowing to define several tokens to be +// ignored. +// +// This example recognizes couplets, which are sequences of numbers enclosed +// in matching pairs of parenthesis. See the comments below to for details +// and examples. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG +// #define BOOST_SPIRIT_DEBUG + +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example3_tokens : lex::lexer +{ + example3_tokens() + { + // define the tokens to match + ellipses = "\\.\\.\\."; + number = "[0-9]+"; + + // associate the tokens and the token set with the lexer + this->self = ellipses | '(' | ')' | number; + + // define the whitespace to ignore (spaces, tabs, newlines and C-style + // comments) + this->self("WS") + = lex::token_def<>("[ \\t\\n]+") // whitespace + | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments + ; + } + + // these tokens expose the iterator_range of the matched input sequence + lex::token_def<> ellipses, identifier, number; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example3_grammar + : qi::grammar > +{ + template + example3_grammar(TokenDef const& tok) + : example3_grammar::base_type(start) + { + start + = +(couplet | tok.ellipses) + ; + + // A couplet matches nested left and right parenthesis. + // For example: + // (1) (1 2) (1 2 3) ... + // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... + // (((1))) ... + couplet + = tok.number + | '(' >> +couplet >> ')' + ; + + BOOST_SPIRIT_DEBUG_NODE(start); + BOOST_SPIRIT_DEBUG_NODE(couplet); + } + + qi::rule > start, couplet; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the token type to return from the lexer iterator + typedef lex::lexertl::token token_type; + + // This is the lexer type to use to tokenize the input. + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example3_tokens example3_tokens; + + // this is the iterator type exposed by the lexer + typedef example3_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example3_grammar example3_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example3_tokens tokens; // Our lexer + example3_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example3.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note how we use the lexer defined above as the skip parser. + bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example3.input b/src/boost/libs/spirit/example/lex/example3.input new file mode 100644 index 00000000..497cab43 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example3.input @@ -0,0 +1,5 @@ +/* the following are couplets */ +(1) (1 2) (1 2 3) ... +((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... +(((1))) ... + diff --git a/src/boost/libs/spirit/example/lex/example4.cpp b/src/boost/libs/spirit/example/lex/example4.cpp new file mode 100644 index 00000000..2970340a --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example4.cpp @@ -0,0 +1,227 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens aimed at a simple language and how to use this lexer with +// a grammar. It shows how to associate attributes to tokens and how to access +// the token attributes from inside the grammar. +// +// We use explicit token attribute types, making the corresponding token instances +// carry convert the matched input into an instance of that type. The token +// attribute is exposed as the parser attribute if this token is used as a +// parser component somewhere in a grammar. +// +// Additionally, this example demonstrates, how to define a token set usable +// as the skip parser during parsing, allowing to define several tokens to be +// ignored. +// +// This example recognizes a very simple programming language having +// assignment statements and if and while control structures. Look at the file +// example4.input for an example. + +#include +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; +using boost::phoenix::val; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example4_tokens : lex::lexer +{ + example4_tokens() + { + // define the tokens to match + identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; + constant = "[0-9]+"; + if_ = "if"; + else_ = "else"; + while_ = "while"; + + // associate the tokens and the token set with the lexer + this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant; + this->self += if_ | else_ | while_ | identifier; + + // define the whitespace to ignore (spaces, tabs, newlines and C-style + // comments) + this->self("WS") + = lex::token_def<>("[ \\t\\n]+") + | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" + ; + } + +//[example4_token_def + // these tokens expose the iterator_range of the matched input sequence + lex::token_def<> if_, else_, while_; + + // The following two tokens have an associated attribute type, 'identifier' + // carries a string (the identifier name) and 'constant' carries the + // matched integer value. + // + // Note: any token attribute type explicitly specified in a token_def<> + // declaration needs to be listed during token type definition as + // well (see the typedef for the token_type below). + // + // The conversion of the matched input to an instance of this type occurs + // once (on first access), which makes token attributes as efficient as + // possible. Moreover, token instances are constructed once by the lexer + // library. From this point on tokens are passed by reference only, + // avoiding them being copied around. + lex::token_def identifier; + lex::token_def constant; +//] +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example4_grammar + : qi::grammar > +{ + template + example4_grammar(TokenDef const& tok) + : example4_grammar::base_type(program) + { + using boost::spirit::_val; + + program + = +block + ; + + block + = '{' >> *statement >> '}' + ; + + statement + = assignment + | if_stmt + | while_stmt + ; + + assignment + = (tok.identifier >> '=' >> expression >> ';') + [ + std::cout << val("assignment statement to: ") << _1 << "\n" + ] + ; + + if_stmt + = ( tok.if_ >> '(' >> expression >> ')' >> block + >> -(tok.else_ >> block) + ) + [ + std::cout << val("if expression: ") << _2 << "\n" + ] + ; + + while_stmt + = (tok.while_ >> '(' >> expression >> ')' >> block) + [ + std::cout << val("while expression: ") << _2 << "\n" + ] + ; + + // since expression has a variant return type accommodating for + // std::string and unsigned integer, both possible values may be + // returned to the calling rule + expression + = tok.identifier [ _val = _1 ] + | tok.constant [ _val = _1 ] + ; + } + + typedef boost::variant expression_type; + + qi::rule > program, block, statement; + qi::rule > assignment, if_stmt; + qi::rule > while_stmt; + + // the expression is the only rule having a return value + qi::rule > expression; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + +//[example4_token + // This is the lexer token type to use. The second template parameter lists + // all attribute types used for token_def's during token definition (see + // calculator_tokens<> above). Here we use the predefined lexertl token + // type, but any compatible token type may be used instead. + // + // If you don't list any token attribute types in the following declaration + // (or just use the default token type: lexertl_token) + // it will compile and work just fine, just a bit less efficient. This is + // because the token attribute will be generated from the matched input + // sequence every time it is requested. But as soon as you specify at + // least one token attribute type you'll have to list all attribute types + // used for token_def<> declarations in the token definition class above, + // otherwise compilation errors will occur. + typedef lex::lexertl::token< + base_iterator_type, boost::mpl::vector + > token_type; +//] + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example4_tokens example4_tokens; + + // this is the iterator type exposed by the lexer + typedef example4_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example4_grammar example4_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example4_tokens tokens; // Our lexer + example4_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example4.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note how we use the lexer defined above as the skip parser. It must + // be explicitly wrapped inside a state directive, switching the lexer + // state for the duration of skipping whitespace. + bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example4.input b/src/boost/libs/spirit/example/lex/example4.input new file mode 100644 index 00000000..7a5ff76d --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example4.input @@ -0,0 +1,17 @@ +/* example4.input */ +{ + + if (variable) { a = b ; } + + while (10) { + + if (2) { b = 10 ; } + if (x) { c = x ; } else { d = 10 ; } + + } + + variable = 42; + +} + + diff --git a/src/boost/libs/spirit/example/lex/example5.cpp b/src/boost/libs/spirit/example/lex/example5.cpp new file mode 100644 index 00000000..8083042c --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example5.cpp @@ -0,0 +1,273 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens aimed at a simple language and how to use this lexer with +// a grammar. It shows how to associate attributes to tokens and how to access the +// token attributes from inside the grammar. +// +// Additionally, this example demonstrates, how to define a token set usable +// as the skip parser during parsing, allowing to define several tokens to be +// ignored. +// +// The main purpose of this example is to show how inheritance can be used to +// overload parts of a base grammar and add token definitions to a base lexer. +// +// Further, it shows how you can use the 'omit' attribute type specifier +// for token definitions to force the token to have no attribute (expose an +// unused attribute). +// +// This example recognizes a very simple programming language having +// assignment statements and if and while control structures. Look at the file +// example5.input for an example. + +#include +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; +using boost::phoenix::val; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition base, defines all tokens for the base grammar below +/////////////////////////////////////////////////////////////////////////////// +template +struct example5_base_tokens : lex::lexer +{ +protected: + // this lexer is supposed to be used as a base type only + example5_base_tokens() {} + +public: + void init_token_definitions() + { + // define the tokens to match + identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; + constant = "[0-9]+"; + if_ = "if"; + while_ = "while"; + + // associate the tokens and the token set with the lexer + this->self += lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';' | constant; + this->self += if_ | while_ | identifier; + + // define the whitespace to ignore (spaces, tabs, newlines and C-style + // comments) + this->self("WS") + = lex::token_def<>("[ \\t\\n]+") + | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" + ; + } + + // these tokens have no attribute + lex::token_def if_, while_; + + // The following two tokens have an associated attribute type, 'identifier' + // carries a string (the identifier name) and 'constant' carries the + // matched integer value. + // + // Note: any token attribute type explicitly specified in a token_def<> + // declaration needs to be listed during token type definition as + // well (see the typedef for the token_type below). + // + // The conversion of the matched input to an instance of this type occurs + // once (on first access), which makes token attributes as efficient as + // possible. Moreover, token instances are constructed once by the lexer + // library. From this point on tokens are passed by reference only, + // avoiding them being copied around. + lex::token_def identifier; + lex::token_def constant; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition base, defines a basic language +/////////////////////////////////////////////////////////////////////////////// +template +struct example5_base_grammar + : qi::grammar > +{ + template + example5_base_grammar(TokenDef const& tok) + : example5_base_grammar::base_type(program) + { + using boost::spirit::_val; + + program + = +block + ; + + block + = '{' >> *statement >> '}' + ; + + statement + = assignment + | if_stmt + | while_stmt + ; + + assignment + = (tok.identifier >> '=' >> expression >> ';') + [ + std::cout << val("assignment statement to: ") << _1 << "\n" + ] + ; + + if_stmt + = (tok.if_ >> '(' >> expression >> ')' >> block) + [ + std::cout << val("if expression: ") << _1 << "\n" + ] + ; + + while_stmt + = (tok.while_ >> '(' >> expression >> ')' >> block) + [ + std::cout << val("while expression: ") << _1 << "\n" + ] + ; + + // since expression has a variant return type accommodating for + // std::string and unsigned integer, both possible values may be + // returned to the calling rule + expression + = tok.identifier [ _val = _1 ] + | tok.constant [ _val = _1 ] + ; + } + + typedef qi::in_state_skipper skipper_type; + + qi::rule program, block, statement; + qi::rule assignment, if_stmt; + qi::rule while_stmt; + + // the expression is the only rule having a return value + typedef boost::variant expression_type; + qi::rule expression; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition for derived lexer, defines additional tokens +/////////////////////////////////////////////////////////////////////////////// +template +struct example5_tokens : example5_base_tokens +{ + typedef example5_base_tokens base_type; + + example5_tokens() + { + // define the additional token to match + else_ = "else"; + + // associate the new token with the lexer, note we add 'else' before + // anything else to add it to the token set before the identifier + // token, otherwise "else" would be matched as an identifier + this->self = else_; + + // now add the token definitions from the base class + this->base_type::init_token_definitions(); + } + + // this token has no attribute + lex::token_def else_; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Derived grammar definition, defines a language extension +/////////////////////////////////////////////////////////////////////////////// +template +struct example5_grammar : example5_base_grammar +{ + template + example5_grammar(TokenDef const& tok) + : example5_base_grammar(tok) + { + // we alter the if_stmt only + this->if_stmt + = this->if_stmt.copy() >> -(tok.else_ >> this->block) + ; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the lexer token type to use. The second template parameter lists + // all attribute types used for token_def's during token definition (see + // example5_base_tokens<> above). Here we use the predefined lexertl token + // type, but any compatible token type may be used instead. + // + // If you don't list any token attribute types in the following declaration + // (or just use the default token type: lexertl_token) + // it will compile and work just fine, just a bit less efficient. This is + // because the token attribute will be generated from the matched input + // sequence every time it is requested. But as soon as you specify at + // least one token attribute type you'll have to list all attribute types + // used for token_def<> declarations in the token definition class above, + // otherwise compilation errors will occur. + typedef lex::lexertl::token< + base_iterator_type, boost::mpl::vector + > token_type; + + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example5_tokens example5_tokens; + + // this is the iterator type exposed by the lexer + typedef example5_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example5_grammar example5_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example5_tokens tokens; // Our lexer + example5_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example5.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note how we use the lexer defined above as the skip parser. It must + // be explicitly wrapped inside a state directive, switching the lexer + // state for the duration of skipping whitespace. + std::string ws("WS"); + bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example5.input b/src/boost/libs/spirit/example/lex/example5.input new file mode 100644 index 00000000..1cbecf52 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example5.input @@ -0,0 +1,16 @@ +/* example5.input */ +{ + + if (variable) { a = b ; } + + while (10) { + + if (2) { b = 10 ; } + if (x) { c = x ; } else { d = 10 ; } + + } + + variable = 42; + if (variable) { a = b ; } else { } +} + diff --git a/src/boost/libs/spirit/example/lex/example6.cpp b/src/boost/libs/spirit/example/lex/example6.cpp new file mode 100644 index 00000000..843b2d98 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example6.cpp @@ -0,0 +1,249 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens aimed at a simple language and how to use this lexer with +// a grammar. It shows how to associate attributes to tokens and how to access the +// token attributes from inside the grammar. +// +// Additionally, this example demonstrates, how to define a token set usable +// as the skip parser during parsing, allowing to define several tokens to be +// ignored. +// +// The example demonstrates how to use the add(...)(...) syntax to associate +// token definitions with the lexer and how token ids can be used in the +// parser to refer to a token, without having to directly reference its +// definition. +// +// This example recognizes a very simple programming language having +// assignment statements and if and while control structures. Look at the file +// example6.input for an example. +// +// This example is essentially identical to example4.cpp. The only difference +// is that we use the self.add() syntax to define tokens and to associate them +// with the lexer. + +#include +#include +#include +#include + +#include +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; +using boost::phoenix::val; + +/////////////////////////////////////////////////////////////////////////////// +// Token id definitions +/////////////////////////////////////////////////////////////////////////////// +enum token_ids +{ + ID_CONSTANT = 1000, + ID_IF, + ID_ELSE, + ID_WHILE, + ID_IDENTIFIER +}; + +/////////////////////////////////////////////////////////////////////////////// +// Token definitions +/////////////////////////////////////////////////////////////////////////////// +template +struct example6_tokens : lex::lexer +{ + example6_tokens() + { + // define the tokens to match + identifier = "[a-zA-Z_][a-zA-Z0-9_]*"; + constant = "[0-9]+"; + + // associate the tokens and the token set with the lexer + this->self = lex::token_def<>('(') | ')' | '{' | '}' | '=' | ';'; + + // Token definitions can be added by using some special syntactic + // construct as shown below. + // Note, that the token definitions added this way expose the iterator + // pair pointing to the matched input stream as their attribute. + this->self.add + (constant, ID_CONSTANT) + ("if", ID_IF) + ("else", ID_ELSE) + ("while", ID_WHILE) + (identifier, ID_IDENTIFIER) + ; + + // define the whitespace to ignore (spaces, tabs, newlines and C-style + // comments) and add those to another lexer state (here: "WS") + this->self("WS") + = lex::token_def<>("[ \\t\\n]+") + | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" + ; + } + + // The following two tokens have an associated attribute type, identifier + // carries a string (the identifier name) and constant carries the matched + // integer value. + // + // Note: any token attribute type explicitly specified in a token_def<> + // declaration needs to be listed during token type definition as + // well (see the typedef for the token_type below). + // + // The conversion of the matched input to an instance of this type occurs + // once (on first access), which makes token attributes as efficient as + // possible. Moreover, token instances are constructed once by the lexer + // library. From this point on tokens are passed by reference only, + // avoiding them being copied around. + lex::token_def identifier; + lex::token_def constant; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct example6_grammar + : qi::grammar > +{ + template + example6_grammar(TokenDef const& tok) + : example6_grammar::base_type(program) + { + using boost::spirit::_val; + + program + = +block + ; + + block + = '{' >> *statement >> '}' + ; + + statement + = assignment + | if_stmt + | while_stmt + ; + + assignment + = (tok.identifier >> '=' >> expression >> ';') + [ + std::cout << val("assignment statement to: ") + << _1 << "\n" + ] + ; + + if_stmt + = ( token(ID_IF) >> '(' >> expression >> ')' >> block + >> -(token(ID_ELSE) >> block) + ) + [ + std::cout << val("if expression: ") + << _2 << "\n" + ] + ; + + while_stmt + = (token(ID_WHILE) >> '(' >> expression >> ')' >> block) + [ + std::cout << val("while expression: ") + << _2 << "\n" + ] + ; + + // since expression has a variant return type accommodating for + // std::string and unsigned integer, both possible values may be + // returned to the calling rule + expression + = tok.identifier [ _val = _1 ] + | tok.constant [ _val = _1 ] + ; + } + + typedef boost::variant expression_type; + + qi::rule > program, block, statement; + qi::rule > assignment, if_stmt; + qi::rule > while_stmt; + + // the expression is the only rule having a return value + qi::rule > expression; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the lexer token type to use. The second template parameter lists + // all attribute types used for token_def's during token definition (see + // calculator_tokens<> above). Here we use the predefined lexertl token + // type, but any compatible token type may be used instead. + // + // If you don't list any token attribute types in the following declaration + // (or just use the default token type: lexertl_token) + // it will compile and work just fine, just a bit less efficient. This is + // because the token attribute will be generated from the matched input + // sequence every time it is requested. But as soon as you specify at + // least one token attribute type you'll have to list all attribute types + // used for token_def<> declarations in the token definition class above, + // otherwise compilation errors will occur. + typedef lex::lexertl::token< + base_iterator_type, boost::mpl::vector + > token_type; + + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example6_tokens example6_tokens; + + // this is the iterator type exposed by the lexer + typedef example6_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example6_grammar example6_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example6_tokens tokens; // Our lexer + example6_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example6.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note how we use the lexer defined above as the skip parser. It must + // be explicitly wrapped inside a state directive, switching the lexer + // state for the duration of skipping whitespace. + std::string ws("WS"); + bool r = qi::phrase_parse(iter, end, calc, qi::in_state(ws)[tokens.self]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/example6.input b/src/boost/libs/spirit/example/lex/example6.input new file mode 100644 index 00000000..992cbeb3 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example6.input @@ -0,0 +1,17 @@ +/* example6.input */ +{ + + if (variable) { a = b ; } + + while (10) { + + if (2) { b = 10 ; } + if (x) { c = x ; } else { d = 10 ; } + + } + + variable = 42; + +} + + diff --git a/src/boost/libs/spirit/example/lex/lexer_debug_support.cpp b/src/boost/libs/spirit/example/lex/lexer_debug_support.cpp new file mode 100644 index 00000000..c6d3f76d --- /dev/null +++ b/src/boost/libs/spirit/example/lex/lexer_debug_support.cpp @@ -0,0 +1,109 @@ +// Copyright (c) 2001-2011 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// #define BOOST_SPIRIT_LEXERTL_DEBUG 1 + +#include + +#include +#include +#include + +namespace lex = boost::spirit::lex; +namespace qi = boost::spirit::qi; +namespace phoenix = boost::phoenix; + +/////////////////////////////////////////////////////////////////////////////// +template +struct language_tokens : lex::lexer +{ + language_tokens() + { + tok_float = "float"; + tok_int = "int"; + floatlit = "[0-9]+\\.[0-9]*"; + intlit = "[0-9]+"; + ws = "[ \t\n]+"; + identifier = "[a-zA-Z_][a-zA-Z_0-9]*"; + + this->self = ws [lex::_pass = lex::pass_flags::pass_ignore]; + this->self += tok_float | tok_int | floatlit | intlit | identifier; + this->self += lex::char_('='); + } + + lex::token_def<> tok_float, tok_int; + lex::token_def<> ws; + lex::token_def floatlit; + lex::token_def intlit; + lex::token_def<> identifier; +}; + +/////////////////////////////////////////////////////////////////////////////// +template +struct language_grammar : qi::grammar +{ + template + language_grammar(language_tokens const& tok) + : language_grammar::base_type(declarations) + { + declarations = +number; + number = + tok.tok_float >> tok.identifier >> '=' >> tok.floatlit + | tok.tok_int >> tok.identifier >> '=' >> tok.intlit + ; + + declarations.name("declarations"); + number.name("number"); + debug(declarations); + debug(number); + } + + qi::rule declarations; + qi::rule number; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // lexer type + typedef lex::lexertl::actor_lexer< + lex::lexertl::token< + base_iterator_type, boost::mpl::vector2 + > > lexer_type; + + // iterator type exposed by the lexer + typedef language_tokens::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + language_tokens tokenizer; // Our lexer + language_grammar g (tokenizer); // Our parser + + // Parsing is done based on the token stream, not the character + // stream read from the input. + std::string str ("float f = 3.4\nint i = 6\n"); + base_iterator_type first = str.begin(); + + bool r = lex::tokenize_and_parse(first, str.end(), tokenizer, g); + + if (r) { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else { + std::string rest(first, str.end()); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/print_number_tokenids.cpp b/src/boost/libs/spirit/example/lex/print_number_tokenids.cpp new file mode 100644 index 00000000..3789f0eb --- /dev/null +++ b/src/boost/libs/spirit/example/lex/print_number_tokenids.cpp @@ -0,0 +1,121 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +// +// %{ +// #include +// %} +// %% +// [0-9]+ { printf("%s\n", yytext); } +// .|\n ; +// %% +// main() +// { +// yylex(); +// } +// +// Its purpose is to print all the (integer) numbers found in a file + +#include +#include +#include +#include + +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +template +struct print_numbers_tokenids : lex::lexer +{ + // define tokens and associate it with the lexer, we set the lexer flags + // not to match newlines while matching a dot, so we need to add the + // '\n' explicitly below + print_numbers_tokenids() + : print_numbers_tokenids::base_type(lex::match_flags::match_not_dot_newline) + { + this->self = lex::token_def("[0-9]+") | ".|\n"; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct print_numbers_grammar : qi::grammar +{ + print_numbers_grammar() + : print_numbers_grammar::base_type(start) + { + // we just know, that the token ids get assigned starting min_token_id + // so, "[0-9]+" gets the id 'min_token_id' and ".|\n" gets the id + // 'min_token_id+1'. + + // this prints the token ids of the matched tokens + start = *( qi::tokenid(lex::min_token_id) + | qi::tokenid(lex::min_token_id+1) + ) + [ std::cout << _1 << "\n" ] + ; + } + + qi::rule start; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // the token type to be used, 'int' is available as the type of the token + // attribute and no lexer state is supported + typedef lex::lexertl::token + , boost::mpl::false_> token_type; + + // lexer type + typedef lex::lexertl::lexer lexer_type; + + // iterator type exposed by the lexer + typedef print_numbers_tokenids::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + print_numbers_tokenids print_tokens; // Our lexer + print_numbers_grammar print; // Our parser + + // Parsing is done based on the token stream, not the character + // stream read from the input. + std::string str (read_from_file(1 == argc ? "print_numbers.input" : argv[1])); + base_iterator_type first = str.begin(); + bool r = lex::tokenize_and_parse(first, str.end(), print_tokens, print); + + if (r) { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else { + std::string rest(first, str.end()); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} + + + diff --git a/src/boost/libs/spirit/example/lex/print_numbers.cpp b/src/boost/libs/spirit/example/lex/print_numbers.cpp new file mode 100644 index 00000000..e128af97 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/print_numbers.cpp @@ -0,0 +1,118 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +// +// %{ +// #include +// %} +// %% +// [0-9]+ { printf("%s\n", yytext); } +// .|\n ; +// %% +// main() +// { +// yylex(); +// } +// +// Its purpose is to print all the (integer) numbers found in a file + +#include +#include +#include +#include + +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +template +struct print_numbers_tokens : lex::lexer +{ + // define tokens and associate it with the lexer, we set the lexer flags + // not to match newlines while matching a dot, so we need to add the + // '\n' explicitly below + print_numbers_tokens() + : print_numbers_tokens::base_type(lex::match_flags::match_not_dot_newline) + { + this->self = lex::token_def("[0-9]+") | ".|\n"; + } +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct print_numbers_grammar : qi::grammar +{ + print_numbers_grammar() + : print_numbers_grammar::base_type(start) + { + // we just know, that the token ids get assigned starting min_token_id + // so, "[0-9]+" gets the id 'min_token_id' and ".|\n" gets the id + // 'min_token_id+1'. + start = *( qi::token(lex::min_token_id) [ std::cout << _1 << "\n" ] + | qi::token(lex::min_token_id+1) + ) + ; + } + + qi::rule start; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // the token type to be used, 'int' is available as the type of the token + // attribute and no lexer state is supported + typedef lex::lexertl::token + , boost::mpl::false_> token_type; + + // lexer type + typedef lex::lexertl::lexer lexer_type; + + // iterator type exposed by the lexer + typedef print_numbers_tokens::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + print_numbers_tokens print_tokens; // Our lexer + print_numbers_grammar print; // Our parser + + // Parsing is done based on the token stream, not the character + // stream read from the input. + std::string str (read_from_file(1 == argc ? "print_numbers.input" : argv[1])); + base_iterator_type first = str.begin(); + bool r = lex::tokenize_and_parse(first, str.end(), print_tokens, print); + + if (r) { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else { + std::string rest(first, str.end()); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} + + + diff --git a/src/boost/libs/spirit/example/lex/print_numbers.input b/src/boost/libs/spirit/example/lex/print_numbers.input new file mode 100644 index 00000000..992cbeb3 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/print_numbers.input @@ -0,0 +1,17 @@ +/* example6.input */ +{ + + if (variable) { a = b ; } + + while (10) { + + if (2) { b = 10 ; } + if (x) { c = x ; } else { d = 10 ; } + + } + + variable = 42; + +} + + diff --git a/src/boost/libs/spirit/example/lex/reference.cpp b/src/boost/libs/spirit/example/lex/reference.cpp new file mode 100644 index 00000000..0a878e24 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/reference.cpp @@ -0,0 +1,30 @@ +/*============================================================================= + Copyright (c) 2001-2011 Hartmut Kaiser + http://spirit.sourceforge.net/ + + Distributed under the Boost Software License, Version 1.0. (See accompanying + file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +=============================================================================*/ +//[reference_lex_includes +#include +#include +#include +#include +#include +//] + +//[reference_lex_test +//] + +int main() +{ + { + //[reference_lex_using_declarations_char + //] + + //[reference_lex_char + //] + } + + return 0; +} diff --git a/src/boost/libs/spirit/example/lex/static_lexer/Jamfile b/src/boost/libs/spirit/example/lex/static_lexer/Jamfile new file mode 100644 index 00000000..25c30d0d --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/Jamfile @@ -0,0 +1,15 @@ +#============================================================================== +# Copyright (c) 2001-2009 Hartmut Kaiser +# +# Distributed under the Boost Software License, Version 1.0. (See accompanying +# file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) +#============================================================================== + +project spirit-static-lexer-example ; + +exe word_count_generate : word_count_generate.cpp ; +exe word_count_static : word_count_static.cpp ; + +exe word_count_lexer_generate : word_count_lexer_generate.cpp ; +exe word_count_lexer_static : word_count_lexer_static.cpp ; + diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count.input b/src/boost/libs/spirit/example/lex/static_lexer/word_count.input new file mode 100644 index 00000000..2f768330 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count.input @@ -0,0 +1,7 @@ +Our hiking boots are ready. So, let's pack! + +Have you the plane tickets for there and back? + +I do, I do. We're all ready to go. Grab my hand and be my beau. + + diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp new file mode 100644 index 00000000..87f0527f --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_generate.cpp @@ -0,0 +1,45 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens (this file) +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// (see the file: word_count_static.cpp) + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include + +#include + +#include "word_count_tokens.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_generate_main +int main(int argc, char* argv[]) +{ + // create the lexer object instance needed to invoke the generator + word_count_tokens > word_count; // the token definition + + // open the output file, where the generated tokenizer function will be + // written to + std::ofstream out(argc < 2 ? "word_count_static.hpp" : argv[1]); + + // invoke the generator, passing the token definition, the output stream + // and the name suffix of the tables and functions to be generated + // + // The suffix "wc" used below results in a type lexertl::static_::lexer_wc + // to be generated, which needs to be passed as a template parameter to the + // lexertl::static_lexer template (see word_count_static.cpp). + return lex::lexertl::generate_static_dfa(word_count, out, "wc") ? 0 : -1; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp new file mode 100644 index 00000000..65593cda --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_generate.cpp @@ -0,0 +1,45 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens (this file) +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// (see the file: word_count_lexer_static.cpp) + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include + +#include + +#include "word_count_lexer_tokens.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wcl_static_generate_main +int main(int argc, char* argv[]) +{ + // create the lexer object instance needed to invoke the generator + word_count_lexer_tokens > word_count; // the token definition + + // open the output file, where the generated tokenizer function will be + // written to + std::ofstream out(argc < 2 ? "word_count_lexer_static.hpp" : argv[1]); + + // invoke the generator, passing the token definition, the output stream + // and the name prefix of the tokenizing function to be generated + // + // The suffix "wcl" used below results in a type lexertl::static_::lexer_wcl + // to be generated, which needs to be passed as a template parameter to the + // lexertl::static_lexer template (see word_count_lexer_static.cpp). + return lex::lexertl::generate_static_dfa(word_count, out, "wcl") ? 0 : -1; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp new file mode 100644 index 00000000..2bda5adc --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.cpp @@ -0,0 +1,84 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// + +// #define BOOST_SPIRIT_DEBUG +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include + +#include +#include + +#include "../example.hpp" +#include "word_count_lexer_tokens.hpp" // token definition +#include "word_count_lexer_static.hpp" // generated tokenizer + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +//[wcl_static_main +int main(int argc, char* argv[]) +{ + // read input from the given file + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + + // Specifying 'omit' as the token attribute type generates a token class + // notholding any token attribute at all (not even the iterator_range of the + // matched input sequence), therefor optimizing the token, the lexer, and + // possibly the parser implementation as much as possible. + // + // Specifying mpl::false_ as the 3rd template parameter generates a token + // type and an iterator, both holding no lexer state, allowing for even more + // aggressive optimizations. + // + // As a result the token instances contain the token ids as the only data + // member. + typedef lex::lexertl::token token_type; + + // Define the lexer type to be used as the base class for our token + // definition. + // + // This is the only place where the code is different from an equivalent + // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of + // the `lexertl::lexer<>` as the base class for our token defintion type. + // + // As we specified the suffix "wcl" while generating the static tables we + // need to pass the type lexertl::static_::lexer_wcl as the second template + // parameter below (see word_count_lexer_generate.cpp). + typedef lex::lexertl::static_actor_lexer< + token_type, lex::lexertl::static_::lexer_wcl + > lexer_type; + + // create the lexer object instance needed to invoke the lexical analysis + word_count_lexer_tokens word_count_lexer; + + // tokenize the given string, all generated tokens are discarded + char const* first = str.c_str(); + char const* last = &first[str.size()]; + bool r = lex::tokenize(first, last, word_count_lexer); + + if (r) { + std::cout << "lines: " << word_count_lexer.l + << ", words: " << word_count_lexer.w + << ", characters: " << word_count_lexer.c + << "\n"; + } + else { + std::string rest(first, last); + std::cout << "Lexical analysis failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp new file mode 100644 index 00000000..e69b936e --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_static.hpp @@ -0,0 +1,164 @@ +// Copyright (c) 2008-2009 Ben Hanson +// Copyright (c) 2008-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Auto-generated by boost::lexer, do not edit + +#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_NOV_10_2009_17_20_29) +#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WCL_NOV_10_2009_17_20_29 + +#include + +//////////////////////////////////////////////////////////////////////////////// +// the generated table of state names and the tokenizer have to be +// defined in the boost::spirit::lex::lexertl::static_ namespace +namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ { + +//////////////////////////////////////////////////////////////////////////////// +// this table defines the names of the lexer states +char const* const lexer_state_names_wcl[1] = +{ + "INITIAL" +}; + +//////////////////////////////////////////////////////////////////////////////// +// this variable defines the number of lexer states +std::size_t const lexer_state_count_wcl = 1; + +//////////////////////////////////////////////////////////////////////////////// +// this function returns the next matched token +template +std::size_t next_token_wcl (std::size_t& /*start_state_*/, bool& /*bol_*/, + Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) +{ + enum {end_state_index, id_index, unique_id_index, state_index, bol_index, + eol_index, dead_state_index, dfa_offset}; + + static const std::size_t npos = static_cast(~0); + static const std::size_t lookup_[256] = { + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 8, 7, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 8, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9, + 9, 9, 9, 9, 9, 9, 9, 9 }; + static const std::size_t dfa_alphabet_ = 10; + static const std::size_t dfa_[50] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 4, 2, 1, 65536, 0, 0, + 0, 0, 0, 0, 0, 2, 1, 65537, + 1, 0, 0, 0, 0, 0, 0, 0, + 1, 65538, 2, 0, 0, 0, 0, 0, + 0, 0 }; + + if (start_token_ == end_) + { + unique_id_ = npos; + return 0; + } + + std::size_t const* ptr_ = dfa_ + dfa_alphabet_; + Iterator curr_ = start_token_; + bool end_state_ = *ptr_ != 0; + std::size_t id_ = *(ptr_ + id_index); + std::size_t uid_ = *(ptr_ + unique_id_index); + Iterator end_token_ = start_token_; + + while (curr_ != end_) + { + std::size_t const state_ = + ptr_[lookup_[static_cast(*curr_++)]]; + + if (state_ == 0) break; + + ptr_ = &dfa_[state_ * dfa_alphabet_]; + + if (*ptr_) + { + end_state_ = true; + id_ = *(ptr_ + id_index); + uid_ = *(ptr_ + unique_id_index); + end_token_ = curr_; + } + } + + if (end_state_) + { + // return longest match + start_token_ = end_token_; + } + else + { + id_ = npos; + uid_ = npos; + } + + unique_id_ = uid_; + return id_; +} + +//////////////////////////////////////////////////////////////////////////////// +// this defines a generic accessors for the information above +struct lexer_wcl +{ + // version number and feature-set of compatible static lexer engine + enum + { + static_version = 65536, + supports_bol = false, + supports_eol = false + }; + + // return the number of lexer states + static std::size_t state_count() + { + return lexer_state_count_wcl; + } + + // return the name of the lexer state as given by 'idx' + static char const* state_name(std::size_t idx) + { + return lexer_state_names_wcl[idx]; + } + + // return the next matched token + template + static std::size_t next(std::size_t &start_state_, bool& bol_ + , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) + { + return next_token_wcl(start_state_, bol_, start_token_, end_, unique_id_); + } +}; + +}}}}} // namespace boost::spirit::lex::lexertl::static_ + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp new file mode 100644 index 00000000..af52a209 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_lexer_tokens.hpp @@ -0,0 +1,62 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM) +#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_LEXER_TOKENS_FEB_10_2008_0739PM + +#include +#include +#include +#include + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +// +// Note, the token definition type is derived from the 'lexertl_actor_lexer' +// template, which is a necessary to being able to use lexer semantic actions. +/////////////////////////////////////////////////////////////////////////////// +struct distance_func +{ + template + struct result : boost::iterator_difference {}; + + template + typename result::type + operator()(Iterator1& begin, Iterator2& end) const + { + return std::distance(begin, end); + } +}; +boost::phoenix::function const distance = distance_func(); + +//[wcl_static_token_definition +template +struct word_count_lexer_tokens : boost::spirit::lex::lexer +{ + word_count_lexer_tokens() + : c(0), w(0), l(0) + , word("[^ \t\n]+") // define tokens + , eol("\n") + , any(".") + { + using boost::spirit::lex::_start; + using boost::spirit::lex::_end; + using boost::phoenix::ref; + + // associate tokens with the lexer + this->self + = word [++ref(w), ref(c) += distance(_start, _end)] + | eol [++ref(c), ++ref(l)] + | any [++ref(c)] + ; + } + + std::size_t c, w, l; + boost::spirit::lex::token_def<> word, eol, any; +}; +//] + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp new file mode 100644 index 00000000..4ba3bb53 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.cpp @@ -0,0 +1,120 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// The purpose of this example is to show, how it is possible to use a lexer +// token definition for two purposes: +// +// . To generate C++ code implementing a static lexical analyzer allowing +// to recognize all defined tokens +// . To integrate the generated C++ lexer into the /Spirit/ framework. +// + +// #define BOOST_SPIRIT_LEXERTL_DEBUG +#define BOOST_VARIANT_MINIMIZE_SIZE + +#include +#include +//[wc_static_include +#include +//] +#include +#include +#include + +#include +#include + +#include "../example.hpp" +#include "word_count_tokens.hpp" // token definition +#include "word_count_static.hpp" // generated tokenizer + +using namespace boost::spirit; +using namespace boost::spirit::ascii; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_grammar +// This is an ordinary grammar definition following the rules defined by +// Spirit.Qi. There is nothing specific about it, except it gets the token +// definition class instance passed to the constructor to allow accessing the +// embedded token_def<> instances. +template +struct word_count_grammar : qi::grammar +{ + template + word_count_grammar(TokenDef const& tok) + : word_count_grammar::base_type(start) + , c(0), w(0), l(0) + { + using boost::phoenix::ref; + using boost::phoenix::size; + + // associate the defined tokens with the lexer, at the same time + // defining the actions to be executed + start = *( tok.word [ ++ref(w), ref(c) += size(_1) ] + | lit('\n') [ ++ref(l), ++ref(c) ] + | qi::token(IDANY) [ ++ref(c) ] + ) + ; + } + + std::size_t c, w, l; // counter for characters, words, and lines + qi::rule start; +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_main +int main(int argc, char* argv[]) +{ + // Define the token type to be used: 'std::string' is available as the type + // of the token value. + typedef lex::lexertl::token< + char const*, boost::mpl::vector + > token_type; + + // Define the lexer type to be used as the base class for our token + // definition. + // + // This is the only place where the code is different from an equivalent + // dynamic lexical analyzer. We use the `lexertl::static_lexer<>` instead of + // the `lexertl::lexer<>` as the base class for our token defintion type. + // + // As we specified the suffix "wc" while generating the static tables we + // need to pass the type lexertl::static_::lexer_wc as the second template + // parameter below (see word_count_generate.cpp). + typedef lex::lexertl::static_lexer< + token_type, lex::lexertl::static_::lexer_wc + > lexer_type; + + // Define the iterator type exposed by the lexer. + typedef word_count_tokens::iterator_type iterator_type; + + // Now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process. + word_count_tokens word_count; // Our lexer + word_count_grammar g (word_count); // Our parser + + // Read in the file into memory. + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + char const* first = str.c_str(); + char const* last = &first[str.size()]; + + // Parsing is done based on the token stream, not the character stream. + bool r = lex::tokenize_and_parse(first, last, word_count, g); + + if (r) { // success + std::cout << "lines: " << g.l << ", words: " << g.w + << ", characters: " << g.c << "\n"; + } + else { + std::string rest(first, last); + std::cerr << "Parsing failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp new file mode 100644 index 00000000..4a7aa3c6 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_static.hpp @@ -0,0 +1,164 @@ +// Copyright (c) 2008-2009 Ben Hanson +// Copyright (c) 2008-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file licence_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// Auto-generated by boost::lexer, do not edit + +#if !defined(BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_NOV_10_2009_17_20_04) +#define BOOST_SPIRIT_LEXER_NEXT_TOKEN_WC_NOV_10_2009_17_20_04 + +#include + +//////////////////////////////////////////////////////////////////////////////// +// the generated table of state names and the tokenizer have to be +// defined in the boost::spirit::lex::lexertl::static_ namespace +namespace boost { namespace spirit { namespace lex { namespace lexertl { namespace static_ { + +//////////////////////////////////////////////////////////////////////////////// +// this table defines the names of the lexer states +char const* const lexer_state_names_wc[1] = +{ + "INITIAL" +}; + +//////////////////////////////////////////////////////////////////////////////// +// this variable defines the number of lexer states +std::size_t const lexer_state_count_wc = 1; + +//////////////////////////////////////////////////////////////////////////////// +// this function returns the next matched token +template +std::size_t next_token_wc (std::size_t& /*start_state_*/, bool& /*bol_*/, + Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) +{ + enum {end_state_index, id_index, unique_id_index, state_index, bol_index, + eol_index, dead_state_index, dfa_offset}; + + static const std::size_t npos = static_cast(~0); + static const std::size_t lookup_[256] = { + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 9, 7, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 9, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8, + 8, 8, 8, 8, 8, 8, 8, 8 }; + static const std::size_t dfa_alphabet_ = 10; + static const std::size_t dfa_[50] = { + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, + 0, 3, 2, 4, 1, 65536, 0, 0, + 0, 0, 0, 0, 2, 0, 1, 10, + 1, 0, 0, 0, 0, 0, 0, 0, + 1, 65537, 2, 0, 0, 0, 0, 0, + 0, 0 }; + + if (start_token_ == end_) + { + unique_id_ = npos; + return 0; + } + + std::size_t const* ptr_ = dfa_ + dfa_alphabet_; + Iterator curr_ = start_token_; + bool end_state_ = *ptr_ != 0; + std::size_t id_ = *(ptr_ + id_index); + std::size_t uid_ = *(ptr_ + unique_id_index); + Iterator end_token_ = start_token_; + + while (curr_ != end_) + { + std::size_t const state_ = + ptr_[lookup_[static_cast(*curr_++)]]; + + if (state_ == 0) break; + + ptr_ = &dfa_[state_ * dfa_alphabet_]; + + if (*ptr_) + { + end_state_ = true; + id_ = *(ptr_ + id_index); + uid_ = *(ptr_ + unique_id_index); + end_token_ = curr_; + } + } + + if (end_state_) + { + // return longest match + start_token_ = end_token_; + } + else + { + id_ = npos; + uid_ = npos; + } + + unique_id_ = uid_; + return id_; +} + +//////////////////////////////////////////////////////////////////////////////// +// this defines a generic accessors for the information above +struct lexer_wc +{ + // version number and feature-set of compatible static lexer engine + enum + { + static_version = 65536, + supports_bol = false, + supports_eol = false + }; + + // return the number of lexer states + static std::size_t state_count() + { + return lexer_state_count_wc; + } + + // return the name of the lexer state as given by 'idx' + static char const* state_name(std::size_t idx) + { + return lexer_state_names_wc[idx]; + } + + // return the next matched token + template + static std::size_t next(std::size_t &start_state_, bool& bol_ + , Iterator &start_token_, Iterator const& end_, std::size_t& unique_id_) + { + return next_token_wc(start_state_, bol_, start_token_, end_, unique_id_); + } +}; + +}}}}} // namespace boost::spirit::lex::lexertl::static_ + +#endif diff --git a/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp b/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp new file mode 100644 index 00000000..5828adde --- /dev/null +++ b/src/boost/libs/spirit/example/lex/static_lexer/word_count_tokens.hpp @@ -0,0 +1,41 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#if !defined(SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM) +#define SPIRIT_LEXER_EXAMPLE_WORD_COUNT_TOKENS_FEB_10_2008_0739PM + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We keep the base class for the token definition as a +// template parameter to allow this class to be used for +// both: the code generation and the lexical analysis +/////////////////////////////////////////////////////////////////////////////// +//[wc_static_tokenids +enum tokenids +{ + IDANY = boost::spirit::lex::min_token_id + 1, +}; +//] + +//[wc_static_tokendef +// This token definition class can be used without any change for all three +// possible use cases: a dynamic lexical analyzer, a code generator, and a +// static lexical analyzer. +template +struct word_count_tokens : boost::spirit::lex::lexer +{ + word_count_tokens() + : word_count_tokens::base_type( + boost::spirit::lex::match_flags::match_not_dot_newline) + { + // define tokens and associate them with the lexer + word = "[^ \t\n]+"; + this->self = word | '\n' | boost::spirit::lex::token_def<>(".", IDANY); + } + + boost::spirit::lex::token_def word; +}; +//] + +#endif diff --git a/src/boost/libs/spirit/example/lex/strip_comments.cpp b/src/boost/libs/spirit/example/lex/strip_comments.cpp new file mode 100644 index 00000000..c4e0913a --- /dev/null +++ b/src/boost/libs/spirit/example/lex/strip_comments.cpp @@ -0,0 +1,163 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +// +// %{ +// /* INITIAL is the default start state. COMMENT is our new */ +// /* state where we remove comments. */ +// %} +// +// %s COMMENT +// %% +// "//".* ; +// "/*" BEGIN COMMENT; +// . ECHO; +// [\n] ECHO; +// "*/" BEGIN INITIAL; +// . ; +// [\n] ; +// %% +// +// main() +// { +// yylex(); +// } +// +// Its purpose is to strip comments out of C code. +// +// Additionally this example demonstrates the use of lexer states to structure +// the lexer definition. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include +#include +#include + +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +enum tokenids +{ + IDANY = lex::min_token_id + 10 +}; + +template +struct strip_comments_tokens : lex::lexer +{ + strip_comments_tokens() + : strip_comments_tokens::base_type(lex::match_flags::match_default) + { + // define tokens and associate them with the lexer + cppcomment = "\"//\"[^\n]*"; // '//[^\n]*' + ccomment = "\"/*\""; // '/*' + endcomment = "\"*/\""; // '*/' + + // The following tokens are associated with the default lexer state + // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is + // strictly optional. + this->self.add + (cppcomment) // no explicit token id is associated + (ccomment) + (".", IDANY) // IDANY is the token id associated with this token + // definition + ; + + // The following tokens are associated with the lexer state "COMMENT". + // We switch lexer states from inside the parsing process using the + // in_state("COMMENT")[] parser component as shown below. + this->self("COMMENT").add + (endcomment) + (".", IDANY) + ; + } + + lex::token_def<> cppcomment, ccomment, endcomment; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct strip_comments_grammar : qi::grammar +{ + template + strip_comments_grammar(TokenDef const& tok) + : strip_comments_grammar::base_type(start) + { + // The in_state("COMMENT")[...] parser component switches the lexer + // state to be 'COMMENT' during the matching of the embedded parser. + start = *( tok.ccomment + >> qi::in_state("COMMENT") + [ + // the lexer is in the 'COMMENT' state during + // matching of the following parser components + *token(IDANY) >> tok.endcomment + ] + | tok.cppcomment + | qi::token(IDANY) [ std::cout << _1 ] + ) + ; + } + + qi::rule start; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // lexer type + typedef + lex::lexertl::lexer > + lexer_type; + + // iterator type exposed by the lexer + typedef strip_comments_tokens::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + strip_comments_tokens strip_comments; // Our lexer + strip_comments_grammar g (strip_comments); // Our parser + + // Parsing is done based on the token stream, not the character + // stream read from the input. + std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1])); + base_iterator_type first = str.begin(); + + bool r = lex::tokenize_and_parse(first, str.end(), strip_comments, g); + + if (r) { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else { + std::string rest(first, str.end()); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} + + + diff --git a/src/boost/libs/spirit/example/lex/strip_comments.input b/src/boost/libs/spirit/example/lex/strip_comments.input new file mode 100644 index 00000000..bed0f071 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/strip_comments.input @@ -0,0 +1,162 @@ +// Copyright (c) 2001-2009 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +// +// %{ +// /* INITIAL is the default start state. COMMENT is our new */ +// /* state where we remove comments. */ +// %} +// +// %s COMMENT +// %% +// "//".* ; +// "/*" BEGIN COMMENT; +// . ECHO; +// [\n] ECHO; +// "*/" BEGIN INITIAL; +// . ; +// [\n] ; +// %% +// +// main() +// { +// yylex(); +// } +// +// Its purpose is to strip comments out of C code. +// +// Additionally this example demonstrates the use of lexer states to structure +// the lexer definition. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include +#include +#include + +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; +using namespace boost::spirit::qi; +using namespace boost::spirit::lex; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +enum tokenids +{ + IDANY = lex::min_token_id + 10 +}; + +template +struct strip_comments_tokens : lexer +{ + strip_comments_tokens() + { + // define tokens and associate them with the lexer + cppcomment = "//[^\n]*"; + ccomment = "/\\*"; + endcomment = "\\*/"; + + // The following tokens are associated with the default lexer state + // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is + // strictly optional. + this->self.add + (cppcomment) // no explicit token id is associated + (ccomment) + (".", IDANY) // IDANY is the token id associated with this token + // definition + ; + + // The following tokens are associated with the lexer state "COMMENT". + // We switch lexer states from inside the parsing process using the + // in_state("COMMENT")[] parser component as shown below. + this->self("COMMENT").add + (endcomment) + (".", IDANY) + ; + } + + token_def<> cppcomment, ccomment, endcomment; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template +struct strip_comments_grammar : grammar +{ + template + strip_comments_grammar(TokenDef const& tok) + : strip_comments_grammar::base_type(start) + { + // The in_state("COMMENT")[...] parser component switches the lexer + // state to be 'COMMENT' during the matching of the embedded parser. + start = *( tok.ccomment + >> in_state("COMMENT") + [ + // the lexer is in the 'COMMENT' state during + // matching of the following parser components + *token(IDANY) >> tok.endcomment + ] + | tok.cppcomment + | token(IDANY) [ std::cout << _1 ] + ) + ; + } + + rule start; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // lexer type + typedef lexertl::lexer > lexer_type; + + // iterator type exposed by the lexer + typedef strip_comments_tokens::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + strip_comments_tokens strip_comments; // Our lexer + strip_comments_grammar g (strip_comments); // Our grammar + + // Parsing is done based on the token stream, not the character + // stream read from the input. + std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1])); + base_iterator_type first = str.begin(); + + bool r = tokenize_and_parse(first, str.end(), strip_comments, g); + + if (r) { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else { + std::string rest(first, str.end()); + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "stopped at: \"" << rest << "\"\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} + + + diff --git a/src/boost/libs/spirit/example/lex/strip_comments_lexer.cpp b/src/boost/libs/spirit/example/lex/strip_comments_lexer.cpp new file mode 100644 index 00000000..30e0b34b --- /dev/null +++ b/src/boost/libs/spirit/example/lex/strip_comments_lexer.cpp @@ -0,0 +1,172 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +// +// %{ +// /* INITIAL is the default start state. COMMENT is our new */ +// /* state where we remove comments. */ +// %} +// +// %s COMMENT +// %% +// "//".* ; +// "/*" BEGIN COMMENT; +// . ECHO; +// [\n] ECHO; +// "*/" BEGIN INITIAL; +// . ; +// [\n] ; +// %% +// +// main() +// { +// yylex(); +// } +// +// Its purpose is to strip comments out of C code. +// +// Additionally this example demonstrates the use of lexer states to structure +// the lexer definition. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +#include +#include +#include +#include + +#include +#include + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +enum tokenids +{ + IDANY = lex::min_token_id + 10, + IDEOL = lex::min_token_id + 11 +}; + +/////////////////////////////////////////////////////////////////////////////// +// Simple custom semantic action function object used to print the matched +// input sequence for a particular token +template +struct echo_input_functor +{ + echo_input_functor (std::basic_ostream& os_) + : os(os_) {} + + // This is called by the semantic action handling code during the lexing + template + void operator()(Iterator const& b, Iterator const& e + , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)& + , std::size_t&, Context&) const + { + os << std::string(b, e); + } + + std::basic_ostream& os; +}; + +template +inline echo_input_functor +echo_input(std::basic_ostream& os) +{ + return echo_input_functor(os); +} + +/////////////////////////////////////////////////////////////////////////////// +// Another simple custom semantic action function object used to switch the +// state of the lexer +struct set_lexer_state +{ + set_lexer_state(char const* state_) + : state(state_) {} + + // This is called by the semantic action handling code during the lexing + template + void operator()(Iterator const&, Iterator const& + , BOOST_SCOPED_ENUM(boost::spirit::lex::pass_flags)& + , std::size_t&, Context& ctx) const + { + ctx.set_state_name(state.c_str()); + } + + std::string state; +}; + +/////////////////////////////////////////////////////////////////////////////// +template +struct strip_comments_tokens : lex::lexer +{ + strip_comments_tokens() + : strip_comments_tokens::base_type(lex::match_flags::match_default) + { + // define tokens and associate them with the lexer + cppcomment = "\"//\"[^\n]*"; // '//[^\n]*' + ccomment = "\"/*\""; // '/*' + endcomment = "\"*/\""; // '*/' + any = std::string("."); + eol = "\n"; + + // The following tokens are associated with the default lexer state + // (the "INITIAL" state). Specifying 'INITIAL' as a lexer state is + // strictly optional. + this->self + = cppcomment + | ccomment [ set_lexer_state("COMMENT") ] + | eol [ echo_input(std::cout) ] + | any [ echo_input(std::cout) ] + ; + + // The following tokens are associated with the lexer state 'COMMENT'. + this->self("COMMENT") + = endcomment [ set_lexer_state("INITIAL") ] + | "\n" + | std::string(".") + ; + } + + lex::token_def<> cppcomment, ccomment, endcomment, any, eol; +}; + + /////////////////////////////////////////////////////////////////////////////// +int main(int argc, char* argv[]) +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // lexer type + typedef + lex::lexertl::actor_lexer > + lexer_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + strip_comments_tokens strip_comments; // Our lexer + + // No parsing is done alltogether, everything happens in the lexer semantic + // actions. + std::string str (read_from_file(1 == argc ? "strip_comments.input" : argv[1])); + base_iterator_type first = str.begin(); + bool r = lex::tokenize(first, str.end(), strip_comments); + + if (!r) { + std::string rest(first, str.end()); + std::cerr << "Lexical analysis failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} + + + diff --git a/src/boost/libs/spirit/example/lex/word_count.cpp b/src/boost/libs/spirit/example/lex/word_count.cpp new file mode 100644 index 00000000..c6b21d95 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count.cpp @@ -0,0 +1,166 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +/* +//[wcp_flex_version + %{ + int c = 0, w = 0, l = 0; + %} + word [^ \t\n]+ + eol \n + %% + {word} { ++w; c += yyleng; } + {eol} { ++c; ++l; } + . { ++c; } + %% + main() + { + yylex(); + printf("%d %d %d\n", l, w, c); + } +//] +*/ +// Its purpose is to do the word count function of the wc command in UNIX. It +// prints the number of lines, words and characters in a file. +// +// The example additionally demonstrates how to use the add_pattern(...)(...) +// syntax to define lexer patterns. These patterns are essentially parameter- +// less 'macros' for regular expressions, allowing to simplify their +// definition. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG +#define BOOST_VARIANT_MINIMIZE_SIZE + +#include +//[wcp_includes +#include +#include +#include +#include +#include +//] + +#include +#include + +#include "example.hpp" + +//[wcp_namespaces +using namespace boost::spirit; +using namespace boost::spirit::ascii; +//] + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +/////////////////////////////////////////////////////////////////////////////// +//[wcp_token_ids +enum tokenids +{ + IDANY = lex::min_token_id + 10 +}; +//] + +//[wcp_token_definition +template +struct word_count_tokens : lex::lexer +{ + word_count_tokens() + { + // define patterns (lexer macros) to be used during token definition + // below + this->self.add_pattern + ("WORD", "[^ \t\n]+") + ; + + // define tokens and associate them with the lexer + word = "{WORD}"; // reference the pattern 'WORD' as defined above + + // this lexer will recognize 3 token types: words, newlines, and + // everything else + this->self.add + (word) // no token id is needed here + ('\n') // characters are usable as tokens as well + (".", IDANY) // string literals will not be escaped by the library + ; + } + + // the token 'word' exposes the matched string as its parser attribute + lex::token_def word; +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +//[wcp_grammar_definition +template +struct word_count_grammar : qi::grammar +{ + template + word_count_grammar(TokenDef const& tok) + : word_count_grammar::base_type(start) + , c(0), w(0), l(0) + { + using boost::phoenix::ref; + using boost::phoenix::size; + + start = *( tok.word [++ref(w), ref(c) += size(_1)] + | lit('\n') [++ref(c), ++ref(l)] + | qi::token(IDANY) [++ref(c)] + ) + ; + } + + std::size_t c, w, l; + qi::rule start; +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +//[wcp_main +int main(int argc, char* argv[]) +{ +/*< Define the token type to be used: `std::string` is available as the + type of the token attribute +>*/ typedef lex::lexertl::token< + char const*, boost::mpl::vector + > token_type; + +/*< Define the lexer type to use implementing the state machine +>*/ typedef lex::lexertl::lexer lexer_type; + +/*< Define the iterator type exposed by the lexer type +>*/ typedef word_count_tokens::iterator_type iterator_type; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + word_count_tokens word_count; // Our lexer + word_count_grammar g (word_count); // Our parser + + // read in the file int memory + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + char const* first = str.c_str(); + char const* last = &first[str.size()]; + +/*< Parsing is done based on the token stream, not the character + stream read from the input. The function `tokenize_and_parse()` wraps + the passed iterator range `[first, last)` by the lexical analyzer and + uses its exposed iterators to parse the token stream. +>*/ bool r = lex::tokenize_and_parse(first, last, word_count, g); + + if (r) { + std::cout << "lines: " << g.l << ", words: " << g.w + << ", characters: " << g.c << "\n"; + } + else { + std::string rest(first, last); + std::cerr << "Parsing failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] diff --git a/src/boost/libs/spirit/example/lex/word_count.input b/src/boost/libs/spirit/example/lex/word_count.input new file mode 100644 index 00000000..2f768330 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count.input @@ -0,0 +1,7 @@ +Our hiking boots are ready. So, let's pack! + +Have you the plane tickets for there and back? + +I do, I do. We're all ready to go. Grab my hand and be my beau. + + diff --git a/src/boost/libs/spirit/example/lex/word_count_functor.cpp b/src/boost/libs/spirit/example/lex/word_count_functor.cpp new file mode 100644 index 00000000..f1969aac --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count_functor.cpp @@ -0,0 +1,183 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following flex program: +/* +//[wcf_flex_version + %{ + #define ID_WORD 1000 + #define ID_EOL 1001 + #define ID_CHAR 1002 + int c = 0, w = 0, l = 0; + %} + %% + [^ \t\n]+ { return ID_WORD; } + \n { return ID_EOL; } + . { return ID_CHAR; } + %% + bool count(int tok) + { + switch (tok) { + case ID_WORD: ++w; c += yyleng; break; + case ID_EOL: ++l; ++c; break; + case ID_CHAR: ++c; break; + default: + return false; + } + return true; + } + void main() + { + int tok = EOF; + do { + tok = yylex(); + if (!count(tok)) + break; + } while (EOF != tok); + printf("%d %d %d\n", l, w, c); + } +//] +*/ +// Its purpose is to do the word count function of the wc command in UNIX. It +// prints the number of lines, words and characters in a file. +// +// This examples shows how to use the tokenize() function together with a +// simple functor, which gets executed whenever a token got matched in the +// input sequence. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +//[wcf_includes +#include +#include +#include +//] + +#include +#include + +#include "example.hpp" + +//[wcf_namespaces +namespace lex = boost::spirit::lex; +//] + +/////////////////////////////////////////////////////////////////////////////// +// Token id definitions +/////////////////////////////////////////////////////////////////////////////// +//[wcf_token_ids +enum token_ids +{ + ID_WORD = 1000, + ID_EOL, + ID_CHAR +}; +//] + +//[wcf_token_definition +/*` The template `word_count_tokens` defines three different tokens: + `ID_WORD`, `ID_EOL`, and `ID_CHAR`, representing a word (anything except + a whitespace or a newline), a newline character, and any other character + (`ID_WORD`, `ID_EOL`, and `ID_CHAR` are enum values representing the token + ids, but could be anything else convertible to an integer as well). + The direct base class of any token definition class needs to be the + template `lex::lexer<>`, where the corresponding template parameter (here: + `lex::lexertl::lexer`) defines which underlying lexer engine has + to be used to provide the required state machine functionality. In this + example we use the Lexertl based lexer engine as the underlying lexer type. +*/ +template +struct word_count_tokens : lex::lexer +{ + word_count_tokens() + { + // define tokens (the regular expression to match and the corresponding + // token id) and add them to the lexer + this->self.add + ("[^ \t\n]+", ID_WORD) // words (anything except ' ', '\t' or '\n') + ("\n", ID_EOL) // newline characters + (".", ID_CHAR) // anything else is a plain character + ; + } +}; +//] + +//[wcf_functor +/*` In this example the struct 'counter' is used as a functor counting the + characters, words and lines in the analyzed input sequence by identifying + the matched tokens as passed from the /Spirit.Lex/ library. +*/ +struct counter +{ +//<- this is an implementation detail specific to boost::bind and doesn't show +// up in the documentation + typedef bool result_type; +//-> + // the function operator gets called for each of the matched tokens + // c, l, w are references to the counters used to keep track of the numbers + template + bool operator()(Token const& t, std::size_t& c, std::size_t& w, std::size_t& l) const + { + switch (t.id()) { + case ID_WORD: // matched a word + // since we're using a default token type in this example, every + // token instance contains a `iterator_range` as its token + // attribute pointing to the matched character sequence in the input + ++w; c += t.value().size(); + break; + case ID_EOL: // matched a newline character + ++l; ++c; + break; + case ID_CHAR: // matched something else + ++c; + break; + } + return true; // always continue to tokenize + } +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +//[wcf_main +/*` The main function simply loads the given file into memory (as a + `std::string`), instantiates an instance of the token definition template + using the correct iterator type (`word_count_tokens`), + and finally calls `lex::tokenize`, passing an instance of the counter function + object. The return value of `lex::tokenize()` will be `true` if the + whole input sequence has been successfully tokenized, and `false` otherwise. +*/ +int main(int argc, char* argv[]) +{ + // these variables are used to count characters, words and lines + std::size_t c = 0, w = 0, l = 0; + + // read input from the given file + std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + + // create the token definition instance needed to invoke the lexical analyzer + word_count_tokens > word_count_functor; + + // tokenize the given string, the bound functor gets invoked for each of + // the matched tokens + char const* first = str.c_str(); + char const* last = &first[str.size()]; + bool r = lex::tokenize(first, last, word_count_functor, + boost::bind(counter(), _1, boost::ref(c), boost::ref(w), boost::ref(l))); + + // print results + if (r) { + std::cout << "lines: " << l << ", words: " << w + << ", characters: " << c << "\n"; + } + else { + std::string rest(first, last); + std::cout << "Lexical analysis failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] + diff --git a/src/boost/libs/spirit/example/lex/word_count_functor.flex b/src/boost/libs/spirit/example/lex/word_count_functor.flex new file mode 100644 index 00000000..d9c00503 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count_functor.flex @@ -0,0 +1,59 @@ +%{ +// Copyright (c) 2001-2009 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#if defined(_WIN32) + #include +#endif + #define ID_WORD 1000 + #define ID_EOL 1001 + #define ID_CHAR 1002 +%} + +%% +[^ \t\n]+ { return ID_WORD; } +\n { return ID_EOL; } +. { return ID_CHAR; } +%% + +bool count(int tok, int* c, int* w, int* l) +{ + switch (tok) { + case ID_WORD: ++*w; *c += yyleng; break; + case ID_EOL: ++*l; ++*c; break; + case ID_CHAR: ++*c; break; + default: + return false; + } + return true; +} + +int main(int argc, char* argv[]) +{ + int tok = EOF; + int c = 0, w = 0, l = 0; + yyin = fopen(1 == argc ? "word_count.input" : argv[1], "r"); + if (NULL == yyin) { + fprintf(stderr, "Couldn't open input file!\n"); + exit(-1); + } + + boost::timer tim; + do { + tok = yylex(); + if (!count(tok, &c, &w, &l)) + break; + } while (EOF != tok); + printf("lines: %d, words: %d, characters: %d\n", l, w, c); + fclose(yyin); + return 0; +} + +extern "C" int yywrap() +{ + return 1; +} + diff --git a/src/boost/libs/spirit/example/lex/word_count_functor_flex.cpp b/src/boost/libs/spirit/example/lex/word_count_functor_flex.cpp new file mode 100644 index 00000000..5fc9e734 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count_functor_flex.cpp @@ -0,0 +1,1576 @@ +#line 2 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor_flex.cpp" +/* A lexical scanner generated by flex */ + +/* Scanner skeleton version: + * $Header: /home/daffy/u0/vern/flex/RCS/flex.skl,v 2.91 96/09/10 16:58:48 vern Exp $ + */ + +#define FLEX_SCANNER +#define YY_FLEX_MAJOR_VERSION 2 +#define YY_FLEX_MINOR_VERSION 5 + +#include +#include + +/* cfront 1.2 defines "c_plusplus" instead of "__cplusplus" */ +#ifdef c_plusplus +#ifndef __cplusplus +#define __cplusplus +#endif +#endif + + +#ifdef __cplusplus + +#include +#ifndef _WIN32 +#include +#endif + +/* Use prototypes in function declarations. */ +#define YY_USE_PROTOS + +/* The "const" storage-class-modifier is valid. */ +#define YY_USE_CONST + +#else /* ! __cplusplus */ + +#if __STDC__ + +#define YY_USE_PROTOS +#define YY_USE_CONST + +#endif /* __STDC__ */ +#endif /* ! __cplusplus */ + +#ifdef __TURBOC__ + #pragma warn -rch + #pragma warn -use +#include +#include +#define YY_USE_CONST +#define YY_USE_PROTOS +#endif + +#ifdef YY_USE_CONST +#define yyconst const +#else +#define yyconst +#endif + + +#ifdef YY_USE_PROTOS +#define YY_PROTO(proto) proto +#else +#define YY_PROTO(proto) () +#endif + + +/* Returned upon end-of-file. */ +#define YY_NULL 0 + +/* Promotes a possibly negative, possibly signed char to an unsigned + * integer for use as an array index. If the signed char is negative, + * we want to instead treat it as an 8-bit unsigned char, hence the + * double cast. + */ +#define YY_SC_TO_UI(c) ((unsigned int) (unsigned char) c) + +/* Enter a start condition. This macro really ought to take a parameter, + * but we do it the disgusting crufty way forced on us by the ()-less + * definition of BEGIN. + */ +#define BEGIN yy_start = 1 + 2 * + +/* Translate the current start state into a value that can be later handed + * to BEGIN to return to the state. The YYSTATE alias is for lex + * compatibility. + */ +#define YY_START ((yy_start - 1) / 2) +#define YYSTATE YY_START + +/* Action number for EOF rule of a given start state. */ +#define YY_STATE_EOF(state) (YY_END_OF_BUFFER + state + 1) + +/* Special action meaning "start processing a new file". */ +#define YY_NEW_FILE yyrestart( yyin ) + +#define YY_END_OF_BUFFER_CHAR 0 + +/* Size of default input buffer. */ +#define YY_BUF_SIZE 16384 + +typedef struct yy_buffer_state *YY_BUFFER_STATE; + +extern int yyleng; +extern FILE *yyin, *yyout; + +#define EOB_ACT_CONTINUE_SCAN 0 +#define EOB_ACT_END_OF_FILE 1 +#define EOB_ACT_LAST_MATCH 2 + +/* The funky do-while in the following #define is used to turn the definition + * int a single C statement (which needs a semi-colon terminator). This + * avoids problems with code like: + * + * if ( condition_holds ) + * yyless( 5 ); + * else + * do_something_else(); + * + * Prior to using the do-while the compiler would get upset at the + * "else" because it interpreted the "if" statement as being all + * done when it reached the ';' after the yyless() call. + */ + +/* Return all but the first 'n' matched characters back to the input stream. */ + +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + *yy_cp = yy_hold_char; \ + YY_RESTORE_YY_MORE_OFFSET \ + yy_c_buf_p = yy_cp = yy_bp + n - YY_MORE_ADJ; \ + YY_DO_BEFORE_ACTION; /* set up yytext again */ \ + } \ + while ( 0 ) + +#define unput(c) yyunput( c, yytext_ptr ) + +/* The following is because we cannot portably get our hands on size_t + * (without autoconf's help, which isn't available because we want + * flex-generated scanners to compile on their own). + */ +typedef unsigned int yy_size_t; + + +struct yy_buffer_state + { + FILE *yy_input_file; + + char *yy_ch_buf; /* input buffer */ + char *yy_buf_pos; /* current position in input buffer */ + + /* Size of input buffer in bytes, not including room for EOB + * characters. + */ + yy_size_t yy_buf_size; + + /* Number of characters read into yy_ch_buf, not including EOB + * characters. + */ + int yy_n_chars; + + /* Whether we "own" the buffer - i.e., we know we created it, + * and can realloc() it to grow it, and should free() it to + * delete it. + */ + int yy_is_our_buffer; + + /* Whether this is an "interactive" input source; if so, and + * if we're using stdio for input, then we want to use getc() + * instead of fread(), to make sure we stop fetching input after + * each newline. + */ + int yy_is_interactive; + + /* Whether we're considered to be at the beginning of a line. + * If so, '^' rules will be active on the next match, otherwise + * not. + */ + int yy_at_bol; + + /* Whether to try to fill the input buffer when we reach the + * end of it. + */ + int yy_fill_buffer; + + int yy_buffer_status; +#define YY_BUFFER_NEW 0 +#define YY_BUFFER_NORMAL 1 + /* When an EOF's been seen but there's still some text to process + * then we mark the buffer as YY_EOF_PENDING, to indicate that we + * shouldn't try reading from the input source any more. We might + * still have a bunch of tokens to match, though, because of + * possible backing-up. + * + * When we actually see the EOF, we change the status to "new" + * (via yyrestart()), so that the user can continue scanning by + * just pointing yyin at a new input file. + */ +#define YY_BUFFER_EOF_PENDING 2 + }; + +static YY_BUFFER_STATE yy_current_buffer = 0; + +/* We provide macros for accessing buffer states in case in the + * future we want to put the buffer states in a more general + * "scanner state". + */ +#define YY_CURRENT_BUFFER yy_current_buffer + + +/* yy_hold_char holds the character lost when yytext is formed. */ +static char yy_hold_char; + +static int yy_n_chars; /* number of characters read into yy_ch_buf */ + + +int yyleng; + +/* Points to current character in buffer. */ +static char *yy_c_buf_p = (char *) 0; +static int yy_init = 1; /* whether we need to initialize */ +static int yy_start = 0; /* start state number */ + +/* Flag which is used to allow yywrap()'s to do buffer switches + * instead of setting up a fresh yyin. A bit of a hack ... + */ +static int yy_did_buffer_switch_on_eof; + +void yyrestart YY_PROTO(( FILE *input_file )); + +void yy_switch_to_buffer YY_PROTO(( YY_BUFFER_STATE new_buffer )); +void yy_load_buffer_state YY_PROTO(( void )); +YY_BUFFER_STATE yy_create_buffer YY_PROTO(( FILE *file, int size )); +void yy_delete_buffer YY_PROTO(( YY_BUFFER_STATE b )); +void yy_init_buffer YY_PROTO(( YY_BUFFER_STATE b, FILE *file )); +void yy_flush_buffer YY_PROTO(( YY_BUFFER_STATE b )); +#define YY_FLUSH_BUFFER yy_flush_buffer( yy_current_buffer ) + +YY_BUFFER_STATE yy_scan_buffer YY_PROTO(( char *base, yy_size_t size )); +YY_BUFFER_STATE yy_scan_string YY_PROTO(( yyconst char *yy_str )); +YY_BUFFER_STATE yy_scan_bytes YY_PROTO(( yyconst char *bytes, int len )); + +static void *yy_flex_alloc YY_PROTO(( yy_size_t )); +static void *yy_flex_realloc YY_PROTO(( void *, yy_size_t )); +static void yy_flex_free YY_PROTO(( void * )); + +#define yy_new_buffer yy_create_buffer + +#define yy_set_interactive(is_interactive) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_is_interactive = is_interactive; \ + } + +#define yy_set_bol(at_bol) \ + { \ + if ( ! yy_current_buffer ) \ + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); \ + yy_current_buffer->yy_at_bol = at_bol; \ + } + +#define YY_AT_BOL() (yy_current_buffer->yy_at_bol) + +typedef unsigned char YY_CHAR; +FILE *yyin = (FILE *) 0, *yyout = (FILE *) 0; +typedef int yy_state_type; +extern char *yytext; +#define yytext_ptr yytext + +static yy_state_type yy_get_previous_state YY_PROTO(( void )); +static yy_state_type yy_try_NUL_trans YY_PROTO(( yy_state_type current_state )); +static int yy_get_next_buffer YY_PROTO(( void )); +static void yy_fatal_error YY_PROTO(( yyconst char msg[] )); + +/* Done after the current pattern has been matched and before the + * corresponding action - sets up yytext. + */ +#define YY_DO_BEFORE_ACTION \ + yytext_ptr = yy_bp; \ + yyleng = (int) (yy_cp - yy_bp); \ + yy_hold_char = *yy_cp; \ + *yy_cp = '\0'; \ + yy_c_buf_p = yy_cp; + +#define YY_NUM_RULES 4 +#define YY_END_OF_BUFFER 5 +static yyconst short int yy_accept[9] = + { 0, + 0, 0, 5, 1, 3, 2, 1, 0 + } ; + +static yyconst int yy_ec[256] = + { 0, + 1, 1, 1, 1, 1, 1, 1, 1, 2, 3, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1 + } ; + +static yyconst int yy_meta[4] = + { 0, + 1, 2, 2 + } ; + +static yyconst short int yy_base[10] = + { 0, + 0, 0, 5, 0, 6, 6, 0, 6, 3 + } ; + +static yyconst short int yy_def[10] = + { 0, + 8, 1, 8, 9, 8, 8, 9, 0, 8 + } ; + +static yyconst short int yy_nxt[10] = + { 0, + 4, 5, 6, 7, 8, 3, 8, 8, 8 + } ; + +static yyconst short int yy_chk[10] = + { 0, + 1, 1, 1, 9, 3, 8, 8, 8, 8 + } ; + +static yy_state_type yy_last_accepting_state; +static char *yy_last_accepting_cpos; + +/* The intent behind this definition is that it'll catch + * any uses of REJECT which flex missed. + */ +#define REJECT reject_used_but_not_detected +#define yymore() yymore_used_but_not_detected +#define YY_MORE_ADJ 0 +#define YY_RESTORE_YY_MORE_OFFSET +char *yytext; +#line 1 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +#define INITIAL 0 +#line 2 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include +#if defined(_WIN32) + #include +#endif + #define ID_WORD 1000 + #define ID_EOL 1001 + #define ID_CHAR 1002 +#line 380 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor_flex.cpp" + +/* Macros after this point can all be overridden by user definitions in + * section 1. + */ + +#ifndef YY_SKIP_YYWRAP +#ifdef __cplusplus +extern "C" int yywrap YY_PROTO(( void )); +#else +extern int yywrap YY_PROTO(( void )); +#endif +#endif + +#ifndef YY_NO_UNPUT +static void yyunput YY_PROTO(( int c, char *buf_ptr )); +#endif + +#ifndef yytext_ptr +static void yy_flex_strncpy YY_PROTO(( char *, yyconst char *, int )); +#endif + +#ifdef YY_NEED_STRLEN +static int yy_flex_strlen YY_PROTO(( yyconst char * )); +#endif + +#ifndef YY_NO_INPUT +#ifdef __cplusplus +static int yyinput YY_PROTO(( void )); +#else +static int input YY_PROTO(( void )); +#endif +#endif + +#if YY_STACK_USED +static int yy_start_stack_ptr = 0; +static int yy_start_stack_depth = 0; +static int *yy_start_stack = 0; +#ifndef YY_NO_PUSH_STATE +static void yy_push_state YY_PROTO(( int new_state )); +#endif +#ifndef YY_NO_POP_STATE +static void yy_pop_state YY_PROTO(( void )); +#endif +#ifndef YY_NO_TOP_STATE +static int yy_top_state YY_PROTO(( void )); +#endif + +#else +#define YY_NO_PUSH_STATE 1 +#define YY_NO_POP_STATE 1 +#define YY_NO_TOP_STATE 1 +#endif + +#ifdef YY_MALLOC_DECL +YY_MALLOC_DECL +#else +#if __STDC__ +#ifndef __cplusplus +#include +#endif +#else +/* Just try to get by without declaring the routines. This will fail + * miserably on non-ANSI systems for which sizeof(size_t) != sizeof(int) + * or sizeof(void*) != sizeof(int). + */ +#endif +#endif + +/* Amount of stuff to slurp up with each read. */ +#ifndef YY_READ_BUF_SIZE +#define YY_READ_BUF_SIZE 8192 +#endif + +/* Copy whatever the last rule matched to the standard output. */ + +#ifndef ECHO +/* This used to be an fputs(), but since the string might contain NUL's, + * we now use fwrite(). + */ +#define ECHO (void) fwrite( yytext, yyleng, 1, yyout ) +#endif + +/* Gets input and stuffs it into "buf". number of characters read, or YY_NULL, + * is returned in "result". + */ +#ifndef YY_INPUT +#define YY_INPUT(buf,result,max_size) \ + if ( yy_current_buffer->yy_is_interactive ) \ + { \ + int c = '*', n; \ + for ( n = 0; n < max_size && \ + (c = getc( yyin )) != EOF && c != '\n'; ++n ) \ + buf[n] = (char) c; \ + if ( c == '\n' ) \ + buf[n++] = (char) c; \ + if ( c == EOF && ferror( yyin ) ) \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + result = n; \ + } \ + else \ + { \ + errno=0; \ + while ( (result = fread(buf, 1, max_size, yyin))==0 && ferror(yyin)) \ + { \ + if( errno != EINTR) \ + { \ + YY_FATAL_ERROR( "input in flex scanner failed" ); \ + break; \ + } \ + errno=0; \ + clearerr(yyin); \ + } \ + } +#endif + +/* No semi-colon after return; correct usage is to write "yyterminate();" - + * we don't want an extra ';' after the "return" because that will cause + * some compilers to complain about unreachable statements. + */ +#ifndef yyterminate +#define yyterminate() return YY_NULL +#endif + +/* Number of entries by which start-condition stack grows. */ +#ifndef YY_START_STACK_INCR +#define YY_START_STACK_INCR 25 +#endif + +/* Report a fatal error. */ +#ifndef YY_FATAL_ERROR +#define YY_FATAL_ERROR(msg) yy_fatal_error( msg ) +#endif + +/* Default declaration of generated scanner - a define so the user can + * easily add parameters. + */ +#ifndef YY_DECL +#define YY_DECL int yylex YY_PROTO(( void )) +#endif + +/* Code executed at the beginning of each rule, after yytext and yyleng + * have been set up. + */ +#ifndef YY_USER_ACTION +#define YY_USER_ACTION +#endif + +/* Code executed at the end of each rule. */ +#ifndef YY_BREAK +#define YY_BREAK break; +#endif + +#define YY_RULE_SETUP \ + YY_USER_ACTION + +YY_DECL + { + register yy_state_type yy_current_state; + register char *yy_cp, *yy_bp; + register int yy_act; + +#line 16 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" + +#line 544 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor_flex.cpp" + + if ( yy_init ) + { + yy_init = 0; + +#ifdef YY_USER_INIT + YY_USER_INIT; +#endif + + if ( ! yy_start ) + yy_start = 1; /* first start state */ + + if ( ! yyin ) + yyin = stdin; + + if ( ! yyout ) + yyout = stdout; + + if ( ! yy_current_buffer ) + yy_current_buffer = + yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_load_buffer_state(); + } + + while ( 1 ) /* loops until end-of-file is reached */ + { + yy_cp = yy_c_buf_p; + + /* Support of yytext. */ + *yy_cp = yy_hold_char; + + /* yy_bp points to the position in yy_ch_buf of the start of + * the current run. + */ + yy_bp = yy_cp; + + yy_current_state = yy_start; +yy_match: + do + { + register YY_CHAR yy_c = yy_ec[YY_SC_TO_UI(*yy_cp)]; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 9 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + ++yy_cp; + } + while ( yy_base[yy_current_state] != 6 ); + +yy_find_action: + yy_act = yy_accept[yy_current_state]; + if ( yy_act == 0 ) + { /* have to back up */ + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + yy_act = yy_accept[yy_current_state]; + } + + YY_DO_BEFORE_ACTION; + + +do_action: /* This label is used only to access EOF actions. */ + + + switch ( yy_act ) + { /* beginning of action switch */ + case 0: /* must back up */ + /* undo the effects of YY_DO_BEFORE_ACTION */ + *yy_cp = yy_hold_char; + yy_cp = yy_last_accepting_cpos; + yy_current_state = yy_last_accepting_state; + goto yy_find_action; + +case 1: +YY_RULE_SETUP +#line 17 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +{ return ID_WORD; } + YY_BREAK +case 2: +YY_RULE_SETUP +#line 18 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +{ return ID_EOL; } + YY_BREAK +case 3: +YY_RULE_SETUP +#line 19 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +{ return ID_CHAR; } + YY_BREAK +case 4: +YY_RULE_SETUP +#line 20 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" +ECHO; + YY_BREAK +#line 647 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor_flex.cpp" +case YY_STATE_EOF(INITIAL): + yyterminate(); + + case YY_END_OF_BUFFER: + { + /* Amount of text matched not including the EOB char. */ + int yy_amount_of_matched_text = (int) (yy_cp - yytext_ptr) - 1; + + /* Undo the effects of YY_DO_BEFORE_ACTION. */ + *yy_cp = yy_hold_char; + YY_RESTORE_YY_MORE_OFFSET + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_NEW ) + { + /* We're scanning a new file or input source. It's + * possible that this happened because the user + * just pointed yyin at a new source and called + * yylex(). If so, then we have to assure + * consistency between yy_current_buffer and our + * globals. Here is the right place to do so, because + * this is the first action (other than possibly a + * back-up) that will match for the new input source. + */ + yy_n_chars = yy_current_buffer->yy_n_chars; + yy_current_buffer->yy_input_file = yyin; + yy_current_buffer->yy_buffer_status = YY_BUFFER_NORMAL; + } + + /* Note that here we test for yy_c_buf_p "<=" to the position + * of the first EOB in the buffer, since yy_c_buf_p will + * already have been incremented past the NUL character + * (since all states make transitions on EOB to the + * end-of-buffer state). Contrast this with the test + * in input(). + */ + if ( yy_c_buf_p <= &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + { /* This was really a NUL. */ + yy_state_type yy_next_state; + + yy_c_buf_p = yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + /* Okay, we're now positioned to make the NUL + * transition. We couldn't have + * yy_get_previous_state() go ahead and do it + * for us because it doesn't know how to deal + * with the possibility of jamming (and we don't + * want to build jamming into it because then it + * will run more slowly). + */ + + yy_next_state = yy_try_NUL_trans( yy_current_state ); + + yy_bp = yytext_ptr + YY_MORE_ADJ; + + if ( yy_next_state ) + { + /* Consume the NUL. */ + yy_cp = ++yy_c_buf_p; + yy_current_state = yy_next_state; + goto yy_match; + } + + else + { + yy_cp = yy_c_buf_p; + goto yy_find_action; + } + } + + else switch ( yy_get_next_buffer() ) + { + case EOB_ACT_END_OF_FILE: + { + yy_did_buffer_switch_on_eof = 0; + + if ( yywrap() ) + { + /* Note: because we've taken care in + * yy_get_next_buffer() to have set up + * yytext, we can now set up + * yy_c_buf_p so that if some total + * hoser (like flex itself) wants to + * call the scanner after we return the + * YY_NULL, it'll still work - another + * YY_NULL will get returned. + */ + yy_c_buf_p = yytext_ptr + YY_MORE_ADJ; + + yy_act = YY_STATE_EOF(YY_START); + goto do_action; + } + + else + { + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; + } + break; + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = + yytext_ptr + yy_amount_of_matched_text; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_match; + + case EOB_ACT_LAST_MATCH: + yy_c_buf_p = + &yy_current_buffer->yy_ch_buf[yy_n_chars]; + + yy_current_state = yy_get_previous_state(); + + yy_cp = yy_c_buf_p; + yy_bp = yytext_ptr + YY_MORE_ADJ; + goto yy_find_action; + } + break; + } + + default: + YY_FATAL_ERROR( + "fatal flex scanner internal error--no action found" ); + } /* end of action switch */ + } /* end of scanning one token */ + } /* end of yylex */ + + +/* yy_get_next_buffer - try to read in a new buffer + * + * Returns a code representing an action: + * EOB_ACT_LAST_MATCH - + * EOB_ACT_CONTINUE_SCAN - continue scanning from current position + * EOB_ACT_END_OF_FILE - end of file + */ + +static int yy_get_next_buffer() + { + register char *dest = yy_current_buffer->yy_ch_buf; + register char *source = yytext_ptr; + register int number_to_move, i; + int ret_val; + + if ( yy_c_buf_p > &yy_current_buffer->yy_ch_buf[yy_n_chars + 1] ) + YY_FATAL_ERROR( + "fatal flex scanner internal error--end of buffer missed" ); + + if ( yy_current_buffer->yy_fill_buffer == 0 ) + { /* Don't try to fill the buffer, so this is an EOF. */ + if ( yy_c_buf_p - yytext_ptr - YY_MORE_ADJ == 1 ) + { + /* We matched a single character, the EOB, so + * treat this as a final EOF. + */ + return EOB_ACT_END_OF_FILE; + } + + else + { + /* We matched some text prior to the EOB, first + * process it. + */ + return EOB_ACT_LAST_MATCH; + } + } + + /* Try to read more data. */ + + /* First move last chars to start of buffer. */ + number_to_move = (int) (yy_c_buf_p - yytext_ptr) - 1; + + for ( i = 0; i < number_to_move; ++i ) + *(dest++) = *(source++); + + if ( yy_current_buffer->yy_buffer_status == YY_BUFFER_EOF_PENDING ) + /* don't do the read, it's not guaranteed to return an EOF, + * just force an EOF + */ + yy_current_buffer->yy_n_chars = yy_n_chars = 0; + + else + { + int num_to_read = + yy_current_buffer->yy_buf_size - number_to_move - 1; + + while ( num_to_read <= 0 ) + { /* Not enough room in the buffer - grow it. */ +#ifdef YY_USES_REJECT + YY_FATAL_ERROR( +"input buffer overflow, can't enlarge buffer because scanner uses REJECT" ); +#else + + /* just a shorter name for the current buffer */ + YY_BUFFER_STATE b = yy_current_buffer; + + int yy_c_buf_p_offset = + (int) (yy_c_buf_p - b->yy_ch_buf); + + if ( b->yy_is_our_buffer ) + { + int new_size = b->yy_buf_size * 2; + + if ( new_size <= 0 ) + b->yy_buf_size += b->yy_buf_size / 8; + else + b->yy_buf_size *= 2; + + b->yy_ch_buf = (char *) + /* Include room in for 2 EOB chars. */ + yy_flex_realloc( (void *) b->yy_ch_buf, + b->yy_buf_size + 2 ); + } + else + /* Can't grow it, we don't own it. */ + b->yy_ch_buf = 0; + + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( + "fatal error - scanner input buffer overflow" ); + + yy_c_buf_p = &b->yy_ch_buf[yy_c_buf_p_offset]; + + num_to_read = yy_current_buffer->yy_buf_size - + number_to_move - 1; +#endif + } + + if ( num_to_read > YY_READ_BUF_SIZE ) + num_to_read = YY_READ_BUF_SIZE; + + /* Read in more data. */ + YY_INPUT( (&yy_current_buffer->yy_ch_buf[number_to_move]), + yy_n_chars, num_to_read ); + + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + if ( yy_n_chars == 0 ) + { + if ( number_to_move == YY_MORE_ADJ ) + { + ret_val = EOB_ACT_END_OF_FILE; + yyrestart( yyin ); + } + + else + { + ret_val = EOB_ACT_LAST_MATCH; + yy_current_buffer->yy_buffer_status = + YY_BUFFER_EOF_PENDING; + } + } + + else + ret_val = EOB_ACT_CONTINUE_SCAN; + + yy_n_chars += number_to_move; + yy_current_buffer->yy_ch_buf[yy_n_chars] = YY_END_OF_BUFFER_CHAR; + yy_current_buffer->yy_ch_buf[yy_n_chars + 1] = YY_END_OF_BUFFER_CHAR; + + yytext_ptr = &yy_current_buffer->yy_ch_buf[0]; + + return ret_val; + } + + +/* yy_get_previous_state - get the state just before the EOB char was reached */ + +static yy_state_type yy_get_previous_state() + { + register yy_state_type yy_current_state; + register char *yy_cp; + + yy_current_state = yy_start; + + for ( yy_cp = yytext_ptr + YY_MORE_ADJ; yy_cp < yy_c_buf_p; ++yy_cp ) + { + register YY_CHAR yy_c = (*yy_cp ? yy_ec[YY_SC_TO_UI(*yy_cp)] : 1); + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 9 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + } + + return yy_current_state; + } + + +/* yy_try_NUL_trans - try to make a transition on the NUL character + * + * synopsis + * next_state = yy_try_NUL_trans( current_state ); + */ + +#ifdef YY_USE_PROTOS +static yy_state_type yy_try_NUL_trans( yy_state_type yy_current_state ) +#else +static yy_state_type yy_try_NUL_trans( yy_current_state ) +yy_state_type yy_current_state; +#endif + { + register int yy_is_jam; + register char *yy_cp = yy_c_buf_p; + + register YY_CHAR yy_c = 1; + if ( yy_accept[yy_current_state] ) + { + yy_last_accepting_state = yy_current_state; + yy_last_accepting_cpos = yy_cp; + } + while ( yy_chk[yy_base[yy_current_state] + yy_c] != yy_current_state ) + { + yy_current_state = (int) yy_def[yy_current_state]; + if ( yy_current_state >= 9 ) + yy_c = yy_meta[(unsigned int) yy_c]; + } + yy_current_state = yy_nxt[yy_base[yy_current_state] + (unsigned int) yy_c]; + yy_is_jam = (yy_current_state == 8); + + return yy_is_jam ? 0 : yy_current_state; + } + + +#ifndef YY_NO_UNPUT +#ifdef YY_USE_PROTOS +static void yyunput( int c, register char *yy_bp ) +#else +static void yyunput( c, yy_bp ) +int c; +register char *yy_bp; +#endif + { + register char *yy_cp = yy_c_buf_p; + + /* undo effects of setting up yytext */ + *yy_cp = yy_hold_char; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + { /* need to shift things up to make room */ + /* +2 for EOB chars. */ + register int number_to_move = yy_n_chars + 2; + register char *dest = &yy_current_buffer->yy_ch_buf[ + yy_current_buffer->yy_buf_size + 2]; + register char *source = + &yy_current_buffer->yy_ch_buf[number_to_move]; + + while ( source > yy_current_buffer->yy_ch_buf ) + *--dest = *--source; + + yy_cp += (int) (dest - source); + yy_bp += (int) (dest - source); + yy_current_buffer->yy_n_chars = + yy_n_chars = yy_current_buffer->yy_buf_size; + + if ( yy_cp < yy_current_buffer->yy_ch_buf + 2 ) + YY_FATAL_ERROR( "flex scanner push-back overflow" ); + } + + *--yy_cp = (char) c; + + + yytext_ptr = yy_bp; + yy_hold_char = *yy_cp; + yy_c_buf_p = yy_cp; + } +#endif /* ifndef YY_NO_UNPUT */ + + +#ifdef __cplusplus +static int yyinput() +#else +static int input() +#endif + { + int c; + + *yy_c_buf_p = yy_hold_char; + + if ( *yy_c_buf_p == YY_END_OF_BUFFER_CHAR ) + { + /* yy_c_buf_p now points to the character we want to return. + * If this occurs *before* the EOB characters, then it's a + * valid NUL; if not, then we've hit the end of the buffer. + */ + if ( yy_c_buf_p < &yy_current_buffer->yy_ch_buf[yy_n_chars] ) + /* This was really a NUL. */ + *yy_c_buf_p = '\0'; + + else + { /* need more input */ + int offset = yy_c_buf_p - yytext_ptr; + ++yy_c_buf_p; + + switch ( yy_get_next_buffer() ) + { + case EOB_ACT_LAST_MATCH: + /* This happens because yy_g_n_b() + * sees that we've accumulated a + * token and flags that we need to + * try matching the token before + * proceeding. But for input(), + * there's no matching to consider. + * So convert the EOB_ACT_LAST_MATCH + * to EOB_ACT_END_OF_FILE. + */ + + /* Reset buffer status. */ + yyrestart( yyin ); + + /* fall through */ + + case EOB_ACT_END_OF_FILE: + { + if ( yywrap() ) + return EOF; + + if ( ! yy_did_buffer_switch_on_eof ) + YY_NEW_FILE; +#ifdef __cplusplus + return yyinput(); +#else + return input(); +#endif + } + + case EOB_ACT_CONTINUE_SCAN: + yy_c_buf_p = yytext_ptr + offset; + break; + } + } + } + + c = *(unsigned char *) yy_c_buf_p; /* cast for 8-bit char's */ + *yy_c_buf_p = '\0'; /* preserve yytext */ + yy_hold_char = *++yy_c_buf_p; + + + return c; + } + + +#ifdef YY_USE_PROTOS +void yyrestart( FILE *input_file ) +#else +void yyrestart( input_file ) +FILE *input_file; +#endif + { + if ( ! yy_current_buffer ) + yy_current_buffer = yy_create_buffer( yyin, YY_BUF_SIZE ); + + yy_init_buffer( yy_current_buffer, input_file ); + yy_load_buffer_state(); + } + + +#ifdef YY_USE_PROTOS +void yy_switch_to_buffer( YY_BUFFER_STATE new_buffer ) +#else +void yy_switch_to_buffer( new_buffer ) +YY_BUFFER_STATE new_buffer; +#endif + { + if ( yy_current_buffer == new_buffer ) + return; + + if ( yy_current_buffer ) + { + /* Flush out information for old buffer. */ + *yy_c_buf_p = yy_hold_char; + yy_current_buffer->yy_buf_pos = yy_c_buf_p; + yy_current_buffer->yy_n_chars = yy_n_chars; + } + + yy_current_buffer = new_buffer; + yy_load_buffer_state(); + + /* We don't actually know whether we did this switch during + * EOF (yywrap()) processing, but the only time this flag + * is looked at is after yywrap() is called, so it's safe + * to go ahead and always set it. + */ + yy_did_buffer_switch_on_eof = 1; + } + + +#ifdef YY_USE_PROTOS +void yy_load_buffer_state( void ) +#else +void yy_load_buffer_state() +#endif + { + yy_n_chars = yy_current_buffer->yy_n_chars; + yytext_ptr = yy_c_buf_p = yy_current_buffer->yy_buf_pos; + yyin = yy_current_buffer->yy_input_file; + yy_hold_char = *yy_c_buf_p; + } + + +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_create_buffer( FILE *file, int size ) +#else +YY_BUFFER_STATE yy_create_buffer( file, size ) +FILE *file; +int size; +#endif + { + YY_BUFFER_STATE b; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_buf_size = size; + + /* yy_ch_buf has to be 2 characters longer than the size given because + * we need to put in 2 end-of-buffer characters. + */ + b->yy_ch_buf = (char *) yy_flex_alloc( b->yy_buf_size + 2 ); + if ( ! b->yy_ch_buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_create_buffer()" ); + + b->yy_is_our_buffer = 1; + + yy_init_buffer( b, file ); + + return b; + } + + +#ifdef YY_USE_PROTOS +void yy_delete_buffer( YY_BUFFER_STATE b ) +#else +void yy_delete_buffer( b ) +YY_BUFFER_STATE b; +#endif + { + if ( ! b ) + return; + + if ( b == yy_current_buffer ) + yy_current_buffer = (YY_BUFFER_STATE) 0; + + if ( b->yy_is_our_buffer ) + yy_flex_free( (void *) b->yy_ch_buf ); + + yy_flex_free( (void *) b ); + } + + +#ifndef _WIN32 +#include +#else +#ifndef YY_ALWAYS_INTERACTIVE +#ifndef YY_NEVER_INTERACTIVE +extern int isatty YY_PROTO(( int )); +#endif +#endif +#endif + +#ifdef YY_USE_PROTOS +void yy_init_buffer( YY_BUFFER_STATE b, FILE *file ) +#else +void yy_init_buffer( b, file ) +YY_BUFFER_STATE b; +FILE *file; +#endif + + + { + yy_flush_buffer( b ); + + b->yy_input_file = file; + b->yy_fill_buffer = 1; + +#if YY_ALWAYS_INTERACTIVE + b->yy_is_interactive = 1; +#else +#if YY_NEVER_INTERACTIVE + b->yy_is_interactive = 0; +#else + b->yy_is_interactive = file ? (isatty( fileno(file) ) > 0) : 0; +#endif +#endif + } + + +#ifdef YY_USE_PROTOS +void yy_flush_buffer( YY_BUFFER_STATE b ) +#else +void yy_flush_buffer( b ) +YY_BUFFER_STATE b; +#endif + + { + if ( ! b ) + return; + + b->yy_n_chars = 0; + + /* We always need two end-of-buffer characters. The first causes + * a transition to the end-of-buffer state. The second causes + * a jam in that state. + */ + b->yy_ch_buf[0] = YY_END_OF_BUFFER_CHAR; + b->yy_ch_buf[1] = YY_END_OF_BUFFER_CHAR; + + b->yy_buf_pos = &b->yy_ch_buf[0]; + + b->yy_at_bol = 1; + b->yy_buffer_status = YY_BUFFER_NEW; + + if ( b == yy_current_buffer ) + yy_load_buffer_state(); + } + + +#ifndef YY_NO_SCAN_BUFFER +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_buffer( char *base, yy_size_t size ) +#else +YY_BUFFER_STATE yy_scan_buffer( base, size ) +char *base; +yy_size_t size; +#endif + { + YY_BUFFER_STATE b; + + if ( size < 2 || + base[size-2] != YY_END_OF_BUFFER_CHAR || + base[size-1] != YY_END_OF_BUFFER_CHAR ) + /* They forgot to leave room for the EOB's. */ + return 0; + + b = (YY_BUFFER_STATE) yy_flex_alloc( sizeof( struct yy_buffer_state ) ); + if ( ! b ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_buffer()" ); + + b->yy_buf_size = size - 2; /* "- 2" to take care of EOB's */ + b->yy_buf_pos = b->yy_ch_buf = base; + b->yy_is_our_buffer = 0; + b->yy_input_file = 0; + b->yy_n_chars = b->yy_buf_size; + b->yy_is_interactive = 0; + b->yy_at_bol = 1; + b->yy_fill_buffer = 0; + b->yy_buffer_status = YY_BUFFER_NEW; + + yy_switch_to_buffer( b ); + + return b; + } +#endif + + +#ifndef YY_NO_SCAN_STRING +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_string( yyconst char *yy_str ) +#else +YY_BUFFER_STATE yy_scan_string( yy_str ) +yyconst char *yy_str; +#endif + { + int len; + for ( len = 0; yy_str[len]; ++len ) + ; + + return yy_scan_bytes( yy_str, len ); + } +#endif + + +#ifndef YY_NO_SCAN_BYTES +#ifdef YY_USE_PROTOS +YY_BUFFER_STATE yy_scan_bytes( yyconst char *bytes, int len ) +#else +YY_BUFFER_STATE yy_scan_bytes( bytes, len ) +yyconst char *bytes; +int len; +#endif + { + YY_BUFFER_STATE b; + char *buf; + yy_size_t n; + int i; + + /* Get memory for full buffer, including space for trailing EOB's. */ + n = len + 2; + buf = (char *) yy_flex_alloc( n ); + if ( ! buf ) + YY_FATAL_ERROR( "out of dynamic memory in yy_scan_bytes()" ); + + for ( i = 0; i < len; ++i ) + buf[i] = bytes[i]; + + buf[len] = buf[len+1] = YY_END_OF_BUFFER_CHAR; + + b = yy_scan_buffer( buf, n ); + if ( ! b ) + YY_FATAL_ERROR( "bad buffer in yy_scan_bytes()" ); + + /* It's okay to grow etc. this buffer, and we should throw it + * away when we're done. + */ + b->yy_is_our_buffer = 1; + + return b; + } +#endif + + +#ifndef YY_NO_PUSH_STATE +#ifdef YY_USE_PROTOS +static void yy_push_state( int new_state ) +#else +static void yy_push_state( new_state ) +int new_state; +#endif + { + if ( yy_start_stack_ptr >= yy_start_stack_depth ) + { + yy_size_t new_size; + + yy_start_stack_depth += YY_START_STACK_INCR; + new_size = yy_start_stack_depth * sizeof( int ); + + if ( ! yy_start_stack ) + yy_start_stack = (int *) yy_flex_alloc( new_size ); + + else + yy_start_stack = (int *) yy_flex_realloc( + (void *) yy_start_stack, new_size ); + + if ( ! yy_start_stack ) + YY_FATAL_ERROR( + "out of memory expanding start-condition stack" ); + } + + yy_start_stack[yy_start_stack_ptr++] = YY_START; + + BEGIN(new_state); + } +#endif + + +#ifndef YY_NO_POP_STATE +static void yy_pop_state() + { + if ( --yy_start_stack_ptr < 0 ) + YY_FATAL_ERROR( "start-condition stack underflow" ); + + BEGIN(yy_start_stack[yy_start_stack_ptr]); + } +#endif + + +#ifndef YY_NO_TOP_STATE +static int yy_top_state() + { + return yy_start_stack[yy_start_stack_ptr - 1]; + } +#endif + +#ifndef YY_EXIT_FAILURE +#define YY_EXIT_FAILURE 2 +#endif + +#ifdef YY_USE_PROTOS +static void yy_fatal_error( yyconst char msg[] ) +#else +static void yy_fatal_error( msg ) +char msg[]; +#endif + { + (void) fprintf( stderr, "%s\n", msg ); + exit( YY_EXIT_FAILURE ); + } + + + +/* Redefine yyless() so it works in section 3 code. */ + +#undef yyless +#define yyless(n) \ + do \ + { \ + /* Undo effects of setting up yytext. */ \ + yytext[yyleng] = yy_hold_char; \ + yy_c_buf_p = yytext + n; \ + yy_hold_char = *yy_c_buf_p; \ + *yy_c_buf_p = '\0'; \ + yyleng = n; \ + } \ + while ( 0 ) + + +/* Internal utility routines. */ + +#ifndef yytext_ptr +#ifdef YY_USE_PROTOS +static void yy_flex_strncpy( char *s1, yyconst char *s2, int n ) +#else +static void yy_flex_strncpy( s1, s2, n ) +char *s1; +yyconst char *s2; +int n; +#endif + { + register int i; + for ( i = 0; i < n; ++i ) + s1[i] = s2[i]; + } +#endif + +#ifdef YY_NEED_STRLEN +#ifdef YY_USE_PROTOS +static int yy_flex_strlen( yyconst char *s ) +#else +static int yy_flex_strlen( s ) +yyconst char *s; +#endif + { + register int n; + for ( n = 0; s[n]; ++n ) + ; + + return n; + } +#endif + + +#ifdef YY_USE_PROTOS +static void *yy_flex_alloc( yy_size_t size ) +#else +static void *yy_flex_alloc( size ) +yy_size_t size; +#endif + { + return (void *) malloc( size ); + } + +#ifdef YY_USE_PROTOS +static void *yy_flex_realloc( void *ptr, yy_size_t size ) +#else +static void *yy_flex_realloc( ptr, size ) +void *ptr; +yy_size_t size; +#endif + { + /* The cast to (char *) in the following accommodates both + * implementations that use char* generic pointers, and those + * that use void* generic pointers. It works with the latter + * because both ANSI C and C++ allow castless assignment from + * any pointer type to void*, and deal with argument conversions + * as though doing an assignment. + */ + return (void *) realloc( (char *) ptr, size ); + } + +#ifdef YY_USE_PROTOS +static void yy_flex_free( void *ptr ) +#else +static void yy_flex_free( ptr ) +void *ptr; +#endif + { + free( ptr ); + } + +#if YY_MAIN +int main() + { + yylex(); + return 0; + } +#endif +#line 20 "c:\\CVS\\boost\\libs\\spirit\\example\\lex\\word_count_functor.flex" + + +bool count(int tok, int* c, int* w, int* l) +{ + switch (tok) { + case ID_WORD: ++*w; *c += yyleng; break; + case ID_EOL: ++*l; ++*c; break; + case ID_CHAR: ++*c; break; + default: + return false; + } + return true; +} + +int main(int argc, char* argv[]) +{ + int tok = EOF; + int c = 0, w = 0, l = 0; + yyin = fopen(1 == argc ? "word_count.input" : argv[1], "r"); + if (NULL == yyin) { + fprintf(stderr, "Couldn't open input file!\n"); + exit(-1); + } + + boost::timer tim; + do { + tok = yylex(); + if (!count(tok, &c, &w, &l)) + break; + } while (EOF != tok); + printf("lines: %d, words: %d, characters: %d\n", l, w, c); + fclose(yyin); + return 0; +} + +extern "C" int yywrap() +{ + return 1; +} + diff --git a/src/boost/libs/spirit/example/lex/word_count_lexer.cpp b/src/boost/libs/spirit/example/lex/word_count_lexer.cpp new file mode 100644 index 00000000..57225e40 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/word_count_lexer.cpp @@ -0,0 +1,152 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example is the equivalent to the following lex program: +/* +//[wcl_flex_version + %{ + int c = 0, w = 0, l = 0; + %} + %% + [^ \t\n]+ { ++w; c += yyleng; } + \n { ++c; ++l; } + . { ++c; } + %% + main() + { + yylex(); + printf("%d %d %d\n", l, w, c); + } +//] +*/ +// Its purpose is to do the word count function of the wc command in UNIX. It +// prints the number of lines, words and characters in a file. +// +// This examples shows how to use semantic actions associated with token +// definitions to directly attach actions to tokens. These get executed +// whenever the corresponding token got matched in the input sequence. Note, +// how this example implements all functionality directly in the lexer +// definition without any need for a parser. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG + +#include +//[wcl_includes +#include +#include +#include +#include +#include +//] + +#include +#include + +#include "example.hpp" + +//[wcl_namespaces +namespace lex = boost::spirit::lex; +//] + +/////////////////////////////////////////////////////////////////////////////// +// Token definition: We use the lexertl based lexer engine as the underlying +// lexer type. +// +// Note, the token definition type is derived from the 'lexertl_actor_lexer' +// template, which is a necessary to being able to use lexer semantic actions. +/////////////////////////////////////////////////////////////////////////////// +struct distance_func +{ + template + struct result : boost::iterator_difference {}; + + template + typename result::type + operator()(Iterator1 const& begin, Iterator2 const& end) const + { + return std::distance(begin, end); + } +}; +boost::phoenix::function const distance = distance_func(); + +//[wcl_token_definition +template +struct word_count_tokens : lex::lexer +{ + word_count_tokens() + : c(0), w(0), l(0) + , word("[^ \t\n]+") // define tokens + , eol("\n") + , any(".") + { + using boost::spirit::lex::_start; + using boost::spirit::lex::_end; + using boost::phoenix::ref; + + // associate tokens with the lexer + this->self + = word [++ref(w), ref(c) += distance(_start, _end)] + | eol [++ref(c), ++ref(l)] + | any [++ref(c)] + ; + } + + std::size_t c, w, l; + lex::token_def<> word, eol, any; +}; +//] + +/////////////////////////////////////////////////////////////////////////////// +//[wcl_main +int main(int argc, char* argv[]) +{ + +/*< Specifying `omit` as the token attribute type generates a token class + not holding any token attribute at all (not even the iterator range of the + matched input sequence), therefore optimizing the token, the lexer, and + possibly the parser implementation as much as possible. Specifying + `mpl::false_` as the 3rd template parameter generates a token + type and an iterator, both holding no lexer state, allowing for even more + aggressive optimizations. As a result the token instances contain the token + ids as the only data member. +>*/ typedef + lex::lexertl::token + token_type; + +/*< This defines the lexer type to use +>*/ typedef lex::lexertl::actor_lexer lexer_type; + +/*< Create the lexer object instance needed to invoke the lexical analysis +>*/ word_count_tokens word_count_lexer; + +/*< Read input from the given file, tokenize all the input, while discarding + all generated tokens +>*/ std::string str (read_from_file(1 == argc ? "word_count.input" : argv[1])); + char const* first = str.c_str(); + char const* last = &first[str.size()]; + +/*< Create a pair of iterators returning the sequence of generated tokens +>*/ lexer_type::iterator_type iter = word_count_lexer.begin(first, last); + lexer_type::iterator_type end = word_count_lexer.end(); + +/*< Here we simply iterate over all tokens, making sure to break the loop + if an invalid token gets returned from the lexer +>*/ while (iter != end && token_is_valid(*iter)) + ++iter; + + if (iter == end) { + std::cout << "lines: " << word_count_lexer.l + << ", words: " << word_count_lexer.w + << ", characters: " << word_count_lexer.c + << "\n"; + } + else { + std::string rest(first, last); + std::cout << "Lexical analysis failed\n" << "stopped at: \"" + << rest << "\"\n"; + } + return 0; +} +//] -- cgit v1.2.3