diff options
Diffstat (limited to 'src/boost/libs/xpressive/tools/perl2xpr.cpp')
-rw-r--r-- | src/boost/libs/xpressive/tools/perl2xpr.cpp | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/boost/libs/xpressive/tools/perl2xpr.cpp b/src/boost/libs/xpressive/tools/perl2xpr.cpp new file mode 100644 index 00000000..66331761 --- /dev/null +++ b/src/boost/libs/xpressive/tools/perl2xpr.cpp @@ -0,0 +1,188 @@ +/////////////////////////////////////////////////////////////////////////////// +// perl2xpr.cpp +// A utility for translating a Perl regular expression into an +// xpressive static regular expression. +// +// Copyright 2007 Eric Niebler. Distributed under the Boost +// Software License, Version 1.0. (See accompanying file +// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + +#include <stack> +#include <string> +#include <iostream> +#include <boost/xpressive/xpressive_static.hpp> +#include <boost/xpressive/regex_actions.hpp> + +namespace x = boost::xpressive; +using namespace x; + +int main(int argc, char *argv[]) +{ + int i = 1, j = 1; + bool nocase = false; + char const *dot = " ~_n "; + char const *bos = " bos "; + char const *eos = " eos "; + + for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i)) + { + switch(argv[i][j]) + { + case 'i': // perl /i modifier + nocase = true; + break; + case 's': // perl /s modifier + dot = " _ "; + break; + case 'm': // perl /m modifier + bos = " bol "; + eos = " eol "; + break; + default: + std::cerr << "Unknown option : " << argv[i] << std::endl; + return -1; + } + } + + if(i == argc) + { + std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n"; + return -1; + } + + // Local variables used by the semantic actions below + local<int> mark_nbr; + local<std::string> tmp; + local<std::stack<std::string> > strings; + + // The rules in the dynamic regex grammar + cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem; + + lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|') + ; + + escape = as_xpr('b') [top(strings) += " _b "] + | as_xpr('B') [top(strings) += " ~_b "] + | as_xpr('d') [top(strings) += " _d "] + | as_xpr('D') [top(strings) += " ~_d "] + | as_xpr('s') [top(strings) += " _s "] + | as_xpr('S') [top(strings) += " ~_s "] + | as_xpr('w') [top(strings) += " _w "] + | as_xpr('W') [top(strings) += " ~_w "] + | _d [top(strings) += " s" + _ + " "] + | _ [top(strings) += " as_xpr('" + _ + "') "] + ; + + group = ( + as_xpr("?:") [top(strings) += " ( "] + | as_xpr("?i:") [top(strings) += " icase( "] + | as_xpr("?>") [top(strings) += " keep( "] + | as_xpr("?=") [top(strings) += " before( "] + | as_xpr("?!") [top(strings) += " ~before( "] + | as_xpr("?<=") [top(strings) += " after( "] + | as_xpr("?<!") [top(strings) += " ~after( "] + | nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "] + ) + >> x::ref(regex) + >> as_xpr(')') [top(strings) += " ) "] + ; + + setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "] + | "[:" >> !as_xpr('^') [top(strings) += "~"] + >> (+_w) [top(strings) += _ ] + >> ":]" + | ( + (s1=~as_xpr(']')) + >> '-' + >> (s2=~as_xpr(']')) + ) [top(strings) += "range('" + s1 + "','" + s2 + "')"] + ; + + charset = !as_xpr('^') [top(strings) += " ~ "] + >> nil [top(strings) += " set[ "] + >> ( + setelem + | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "] + ) + >>*( + nil [top(strings) += " | "] + >> ( + setelem + | (~as_xpr(']')) [top(strings) += "'" + _ + "'"] + ) + ) + >> as_xpr(']') [top(strings) += " ] "] + ; + + atom = ( + +(lit >> ~before((set='*','+','?','{'))) + | lit + ) [top(strings) += " as_xpr(\"" + _ + "\") "] + | as_xpr('.') [top(strings) += dot] + | as_xpr('^') [top(strings) += bos] + | as_xpr('$') [top(strings) += eos] + | '\\' >> escape + | '(' >> group + | '[' >> charset + ; + + repeat = as_xpr('{') [tmp = " repeat<"] + >> (+_d) [tmp += _] + >> !( + as_xpr(',') [tmp += ","] + >> ( + (+_d) [tmp += _] + | nil [tmp += "inf"] + ) + ) + >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "] + ; + + quant = nil [push(strings, "")] + >> atom + >> !( + ( + as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")] + | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")] + | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")] + | repeat + ) + >> !as_xpr('?') [insert(top(strings), 0, " - ")] + ) + >> nil [tmp = top(strings), pop(strings), top(strings) += tmp] + ; + + seq = quant + >> *( + nil [top(strings) += " >> "] + >> quant + ) + ; + + alts = seq + >> *( + as_xpr('|') [top(strings) += " | "] + >> seq + ) + ; + + regex = alts + ; + + strings.get().push(""); + if(!regex_match(argv[i], regex)) + { + std::cerr << "ERROR: unrecognized regular expression" << std::endl; + return -1; + } + else if(nocase) + { + std::cout << "icase( " << strings.get().top() << " )" << std::endl; + } + else + { + std::cout << strings.get().top() << std::endl; + } + + return 0; +} |