summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/xpressive/tools/perl2xpr.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/boost/libs/xpressive/tools/perl2xpr.cpp')
-rw-r--r--src/boost/libs/xpressive/tools/perl2xpr.cpp188
1 files changed, 188 insertions, 0 deletions
diff --git a/src/boost/libs/xpressive/tools/perl2xpr.cpp b/src/boost/libs/xpressive/tools/perl2xpr.cpp
new file mode 100644
index 00000000..66331761
--- /dev/null
+++ b/src/boost/libs/xpressive/tools/perl2xpr.cpp
@@ -0,0 +1,188 @@
+///////////////////////////////////////////////////////////////////////////////
+// perl2xpr.cpp
+// A utility for translating a Perl regular expression into an
+// xpressive static regular expression.
+//
+// Copyright 2007 Eric Niebler. Distributed under the Boost
+// Software License, Version 1.0. (See accompanying file
+// LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+
+#include <stack>
+#include <string>
+#include <iostream>
+#include <boost/xpressive/xpressive_static.hpp>
+#include <boost/xpressive/regex_actions.hpp>
+
+namespace x = boost::xpressive;
+using namespace x;
+
+int main(int argc, char *argv[])
+{
+ int i = 1, j = 1;
+ bool nocase = false;
+ char const *dot = " ~_n ";
+ char const *bos = " bos ";
+ char const *eos = " eos ";
+
+ for(; i < argc && '-' == *argv[i]; argv[i][++j]? 0: (j=1,++i))
+ {
+ switch(argv[i][j])
+ {
+ case 'i': // perl /i modifier
+ nocase = true;
+ break;
+ case 's': // perl /s modifier
+ dot = " _ ";
+ break;
+ case 'm': // perl /m modifier
+ bos = " bol ";
+ eos = " eol ";
+ break;
+ default:
+ std::cerr << "Unknown option : " << argv[i] << std::endl;
+ return -1;
+ }
+ }
+
+ if(i == argc)
+ {
+ std::cerr << "Usage:\n perl2xpr [-i] [-s] [-m] 're'\n";
+ return -1;
+ }
+
+ // Local variables used by the semantic actions below
+ local<int> mark_nbr;
+ local<std::string> tmp;
+ local<std::stack<std::string> > strings;
+
+ // The rules in the dynamic regex grammar
+ cregex regex, alts, seq, quant, repeat, atom, escape, group, lit, charset, setelem;
+
+ lit = ~(set='.','^','$','*','+','?','(',')','{','}','[',']','\\','|')
+ ;
+
+ escape = as_xpr('b') [top(strings) += " _b "]
+ | as_xpr('B') [top(strings) += " ~_b "]
+ | as_xpr('d') [top(strings) += " _d "]
+ | as_xpr('D') [top(strings) += " ~_d "]
+ | as_xpr('s') [top(strings) += " _s "]
+ | as_xpr('S') [top(strings) += " ~_s "]
+ | as_xpr('w') [top(strings) += " _w "]
+ | as_xpr('W') [top(strings) += " ~_w "]
+ | _d [top(strings) += " s" + _ + " "]
+ | _ [top(strings) += " as_xpr('" + _ + "') "]
+ ;
+
+ group = (
+ as_xpr("?:") [top(strings) += " ( "]
+ | as_xpr("?i:") [top(strings) += " icase( "]
+ | as_xpr("?>") [top(strings) += " keep( "]
+ | as_xpr("?=") [top(strings) += " before( "]
+ | as_xpr("?!") [top(strings) += " ~before( "]
+ | as_xpr("?<=") [top(strings) += " after( "]
+ | as_xpr("?<!") [top(strings) += " ~after( "]
+ | nil [top(strings) += " ( s" + as<std::string>(++mark_nbr) + "= "]
+ )
+ >> x::ref(regex)
+ >> as_xpr(')') [top(strings) += " ) "]
+ ;
+
+ setelem = as_xpr('\\') >> _ [top(strings) += " as_xpr('" + _ + "') "]
+ | "[:" >> !as_xpr('^') [top(strings) += "~"]
+ >> (+_w) [top(strings) += _ ]
+ >> ":]"
+ | (
+ (s1=~as_xpr(']'))
+ >> '-'
+ >> (s2=~as_xpr(']'))
+ ) [top(strings) += "range('" + s1 + "','" + s2 + "')"]
+ ;
+
+ charset = !as_xpr('^') [top(strings) += " ~ "]
+ >> nil [top(strings) += " set[ "]
+ >> (
+ setelem
+ | (~as_xpr(']')) [top(strings) += " as_xpr('" + _ + "') "]
+ )
+ >>*(
+ nil [top(strings) += " | "]
+ >> (
+ setelem
+ | (~as_xpr(']')) [top(strings) += "'" + _ + "'"]
+ )
+ )
+ >> as_xpr(']') [top(strings) += " ] "]
+ ;
+
+ atom = (
+ +(lit >> ~before((set='*','+','?','{')))
+ | lit
+ ) [top(strings) += " as_xpr(\"" + _ + "\") "]
+ | as_xpr('.') [top(strings) += dot]
+ | as_xpr('^') [top(strings) += bos]
+ | as_xpr('$') [top(strings) += eos]
+ | '\\' >> escape
+ | '(' >> group
+ | '[' >> charset
+ ;
+
+ repeat = as_xpr('{') [tmp = " repeat<"]
+ >> (+_d) [tmp += _]
+ >> !(
+ as_xpr(',') [tmp += ","]
+ >> (
+ (+_d) [tmp += _]
+ | nil [tmp += "inf"]
+ )
+ )
+ >> as_xpr('}') [top(strings) = tmp + ">( " + top(strings) + " ) "]
+ ;
+
+ quant = nil [push(strings, "")]
+ >> atom
+ >> !(
+ (
+ as_xpr("*") [insert(top(strings), 0, " * ")] // [strings->*top()->*insert(0, " * ")]
+ | as_xpr("+") [insert(top(strings), 0, " + ")] // [strings->*top()->*insert(0, " + ")]
+ | as_xpr("?") [insert(top(strings), 0, " ! ")] // [strings->*top()->*insert(0, " ! ")]
+ | repeat
+ )
+ >> !as_xpr('?') [insert(top(strings), 0, " - ")]
+ )
+ >> nil [tmp = top(strings), pop(strings), top(strings) += tmp]
+ ;
+
+ seq = quant
+ >> *(
+ nil [top(strings) += " >> "]
+ >> quant
+ )
+ ;
+
+ alts = seq
+ >> *(
+ as_xpr('|') [top(strings) += " | "]
+ >> seq
+ )
+ ;
+
+ regex = alts
+ ;
+
+ strings.get().push("");
+ if(!regex_match(argv[i], regex))
+ {
+ std::cerr << "ERROR: unrecognized regular expression" << std::endl;
+ return -1;
+ }
+ else if(nocase)
+ {
+ std::cout << "icase( " << strings.get().top() << " )" << std::endl;
+ }
+ else
+ {
+ std::cout << strings.get().top() << std::endl;
+ }
+
+ return 0;
+}