diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-27 18:24:20 +0000 |
commit | 483eb2f56657e8e7f419ab1a4fab8dce9ade8609 (patch) | |
tree | e5d88d25d870d5dedacb6bbdbe2a966086a0a5cf /src/boost/libs/spirit/example/lex/example3.cpp | |
parent | Initial commit. (diff) | |
download | ceph-upstream.tar.xz ceph-upstream.zip |
Adding upstream version 14.2.21.upstream/14.2.21upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/boost/libs/spirit/example/lex/example3.cpp')
-rw-r--r-- | src/boost/libs/spirit/example/lex/example3.cpp | 150 |
1 files changed, 150 insertions, 0 deletions
diff --git a/src/boost/libs/spirit/example/lex/example3.cpp b/src/boost/libs/spirit/example/lex/example3.cpp new file mode 100644 index 00000000..83800331 --- /dev/null +++ b/src/boost/libs/spirit/example/lex/example3.cpp @@ -0,0 +1,150 @@ +// Copyright (c) 2001-2010 Hartmut Kaiser +// +// Distributed under the Boost Software License, Version 1.0. (See accompanying +// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +// This example shows how to create a simple lexer recognizing a couple of +// different tokens and how to use this with a grammar. This example has a +// heavily backtracking grammar which makes it a candidate for lexer based +// parsing (all tokens are scanned and generated only once, even if +// backtracking is required) which speeds up the overall parsing process +// considerably, out-weighting the overhead needed for setting up the lexer. +// +// Additionally, this example demonstrates, how to define a token set usable +// as the skip parser during parsing, allowing to define several tokens to be +// ignored. +// +// This example recognizes couplets, which are sequences of numbers enclosed +// in matching pairs of parenthesis. See the comments below to for details +// and examples. + +// #define BOOST_SPIRIT_LEXERTL_DEBUG +// #define BOOST_SPIRIT_DEBUG + +#include <boost/config/warning_disable.hpp> +#include <boost/spirit/include/qi.hpp> +#include <boost/spirit/include/lex_lexertl.hpp> + +#include <iostream> +#include <fstream> +#include <string> + +#include "example.hpp" + +using namespace boost::spirit; + +/////////////////////////////////////////////////////////////////////////////// +// Token definition +/////////////////////////////////////////////////////////////////////////////// +template <typename Lexer> +struct example3_tokens : lex::lexer<Lexer> +{ + example3_tokens() + { + // define the tokens to match + ellipses = "\\.\\.\\."; + number = "[0-9]+"; + + // associate the tokens and the token set with the lexer + this->self = ellipses | '(' | ')' | number; + + // define the whitespace to ignore (spaces, tabs, newlines and C-style + // comments) + this->self("WS") + = lex::token_def<>("[ \\t\\n]+") // whitespace + | "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/" // C style comments + ; + } + + // these tokens expose the iterator_range of the matched input sequence + lex::token_def<> ellipses, identifier, number; +}; + +/////////////////////////////////////////////////////////////////////////////// +// Grammar definition +/////////////////////////////////////////////////////////////////////////////// +template <typename Iterator, typename Lexer> +struct example3_grammar + : qi::grammar<Iterator, qi::in_state_skipper<Lexer> > +{ + template <typename TokenDef> + example3_grammar(TokenDef const& tok) + : example3_grammar::base_type(start) + { + start + = +(couplet | tok.ellipses) + ; + + // A couplet matches nested left and right parenthesis. + // For example: + // (1) (1 2) (1 2 3) ... + // ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ... + // (((1))) ... + couplet + = tok.number + | '(' >> +couplet >> ')' + ; + + BOOST_SPIRIT_DEBUG_NODE(start); + BOOST_SPIRIT_DEBUG_NODE(couplet); + } + + qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet; +}; + +/////////////////////////////////////////////////////////////////////////////// +int main() +{ + // iterator type used to expose the underlying input stream + typedef std::string::iterator base_iterator_type; + + // This is the token type to return from the lexer iterator + typedef lex::lexertl::token<base_iterator_type> token_type; + + // This is the lexer type to use to tokenize the input. + // Here we use the lexertl based lexer engine. + typedef lex::lexertl::lexer<token_type> lexer_type; + + // This is the token definition type (derived from the given lexer type). + typedef example3_tokens<lexer_type> example3_tokens; + + // this is the iterator type exposed by the lexer + typedef example3_tokens::iterator_type iterator_type; + + // this is the type of the grammar to parse + typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar; + + // now we use the types defined above to create the lexer and grammar + // object instances needed to invoke the parsing process + example3_tokens tokens; // Our lexer + example3_grammar calc(tokens); // Our parser + + std::string str (read_from_file("example3.input")); + + // At this point we generate the iterator pair used to expose the + // tokenized input stream. + std::string::iterator it = str.begin(); + iterator_type iter = tokens.begin(it, str.end()); + iterator_type end = tokens.end(); + + // Parsing is done based on the token stream, not the character + // stream read from the input. + // Note how we use the lexer defined above as the skip parser. + bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]); + + if (r && iter == end) + { + std::cout << "-------------------------\n"; + std::cout << "Parsing succeeded\n"; + std::cout << "-------------------------\n"; + } + else + { + std::cout << "-------------------------\n"; + std::cout << "Parsing failed\n"; + std::cout << "-------------------------\n"; + } + + std::cout << "Bye... :-) \n\n"; + return 0; +} |