src/boost/libs/spirit/example/lex/example3.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150

//  Copyright (c) 2001-2010 Hartmut Kaiser
// 
//  Distributed under the Boost Software License, Version 1.0. (See accompanying 
//  file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

//  This example shows how to create a simple lexer recognizing a couple of 
//  different tokens and how to use this with a grammar. This example has a 
//  heavily backtracking grammar which makes it a candidate for lexer based 
//  parsing (all tokens are scanned and generated only once, even if 
//  backtracking is required) which speeds up the overall parsing process 
//  considerably, out-weighting the overhead needed for setting up the lexer.
//
//  Additionally, this example demonstrates, how to define a token set usable 
//  as the skip parser during parsing, allowing to define several tokens to be 
//  ignored.
//
//  This example recognizes couplets, which are sequences of numbers enclosed 
//  in matching pairs of parenthesis. See the comments below to for details
//  and examples.

// #define BOOST_SPIRIT_LEXERTL_DEBUG
// #define BOOST_SPIRIT_DEBUG

#include <boost/config/warning_disable.hpp>
#include <boost/spirit/include/qi.hpp>
#include <boost/spirit/include/lex_lexertl.hpp>

#include <iostream>
#include <fstream>
#include <string>

#include "example.hpp"

using namespace boost::spirit;

///////////////////////////////////////////////////////////////////////////////
//  Token definition
///////////////////////////////////////////////////////////////////////////////
template <typename Lexer>
struct example3_tokens : lex::lexer<Lexer>
{
    example3_tokens()
    {
        // define the tokens to match
        ellipses = "\\.\\.\\.";
        number = "[0-9]+";

        // associate the tokens and the token set with the lexer
        this->self = ellipses | '(' | ')' | number;

        // define the whitespace to ignore (spaces, tabs, newlines and C-style 
        // comments)
        this->self("WS") 
            =   lex::token_def<>("[ \\t\\n]+")          // whitespace
            |   "\\/\\*[^*]*\\*+([^/*][^*]*\\*+)*\\/"   // C style comments
            ;
    }

    // these tokens expose the iterator_range of the matched input sequence
    lex::token_def<> ellipses, identifier, number;
};

///////////////////////////////////////////////////////////////////////////////
//  Grammar definition
///////////////////////////////////////////////////////////////////////////////
template <typename Iterator, typename Lexer>
struct example3_grammar 
  : qi::grammar<Iterator, qi::in_state_skipper<Lexer> >
{
    template <typename TokenDef>
    example3_grammar(TokenDef const& tok)
      : example3_grammar::base_type(start)
    {
        start 
            =  +(couplet | tok.ellipses)
            ;

        //  A couplet matches nested left and right parenthesis.
        //  For example:
        //    (1) (1 2) (1 2 3) ...
        //    ((1)) ((1 2)(3 4)) (((1) (2 3) (1 2 (3) 4))) ...
        //    (((1))) ...
        couplet
            =   tok.number
            |   '(' >> +couplet >> ')'
            ;

        BOOST_SPIRIT_DEBUG_NODE(start);
        BOOST_SPIRIT_DEBUG_NODE(couplet);
    }

    qi::rule<Iterator, qi::in_state_skipper<Lexer> > start, couplet;
};

///////////////////////////////////////////////////////////////////////////////
int main()
{
    // iterator type used to expose the underlying input stream
    typedef std::string::iterator base_iterator_type;

    // This is the token type to return from the lexer iterator
    typedef lex::lexertl::token<base_iterator_type> token_type;

    // This is the lexer type to use to tokenize the input.
    // Here we use the lexertl based lexer engine.
    typedef lex::lexertl::lexer<token_type> lexer_type;

    // This is the token definition type (derived from the given lexer type).
    typedef example3_tokens<lexer_type> example3_tokens;

    // this is the iterator type exposed by the lexer 
    typedef example3_tokens::iterator_type iterator_type;

    // this is the type of the grammar to parse
    typedef example3_grammar<iterator_type, example3_tokens::lexer_def> example3_grammar;

    // now we use the types defined above to create the lexer and grammar
    // object instances needed to invoke the parsing process
    example3_tokens tokens;                         // Our lexer
    example3_grammar calc(tokens);                  // Our parser

    std::string str (read_from_file("example3.input"));

    // At this point we generate the iterator pair used to expose the
    // tokenized input stream.
    std::string::iterator it = str.begin();
    iterator_type iter = tokens.begin(it, str.end());
    iterator_type end = tokens.end();

    // Parsing is done based on the token stream, not the character 
    // stream read from the input.
    // Note how we use the lexer defined above as the skip parser.
    bool r = qi::phrase_parse(iter, end, calc, qi::in_state("WS")[tokens.self]);

    if (r && iter == end)
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing succeeded\n";
        std::cout << "-------------------------\n";
    }
    else
    {
        std::cout << "-------------------------\n";
        std::cout << "Parsing failed\n";
        std::cout << "-------------------------\n";
    }

    std::cout << "Bye... :-) \n\n";
    return 0;
}