summaryrefslogtreecommitdiffstats
path: root/src/boost/libs/regex/example/snippets/icu_example.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/boost/libs/regex/example/snippets/icu_example.cpp')
-rw-r--r--src/boost/libs/regex/example/snippets/icu_example.cpp188
1 files changed, 188 insertions, 0 deletions
diff --git a/src/boost/libs/regex/example/snippets/icu_example.cpp b/src/boost/libs/regex/example/snippets/icu_example.cpp
new file mode 100644
index 000000000..6d1fc3bec
--- /dev/null
+++ b/src/boost/libs/regex/example/snippets/icu_example.cpp
@@ -0,0 +1,188 @@
+/*
+ *
+ * Copyright (c) 2004
+ * John Maddock
+ *
+ * Use, modification and distribution are subject to the
+ * Boost Software License, Version 1.0. (See accompanying file
+ * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)
+ *
+ */
+
+ /*
+ * LOCATION: see http://www.boost.org for most recent version.
+ * FILE mfc_example.cpp
+ * VERSION see <boost/version.hpp>
+ * DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types.
+ */
+
+#include <boost/regex/config.hpp>
+
+#ifdef BOOST_HAS_ICU
+
+#include <boost/regex/icu.hpp>
+#include <iostream>
+#include <assert.h>
+
+//
+// Find out if *password* meets our password requirements,
+// as defined by the regular expression *requirements*.
+//
+bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements)
+{
+ return boost::u32regex_match(password, boost::make_u32regex(requirements));
+}
+
+//
+// Extract filename part of a path from a UTF-8 encoded std::string and return the result
+// as another std::string:
+//
+std::string get_filename(const std::string& path)
+{
+ boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)");
+ boost::smatch what;
+ if(boost::u32regex_match(path, what, r))
+ {
+ // extract $1 as a std::string:
+ return what.str(1);
+ }
+ else
+ {
+ throw std::runtime_error("Invalid pathname");
+ }
+}
+
+U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text)
+{
+ // searches through some UTF-16 encoded text for a block encoded in Greek,
+ // this expression is imperfect, but the best we can do for now - searching
+ // for specific scripts is actually pretty hard to do right.
+ boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*");
+ boost::u16match what;
+ if(boost::u32regex_search(text, what, r))
+ {
+ // extract $0 as a UnicodeString:
+ return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0));
+ }
+ else
+ {
+ throw std::runtime_error("No Greek found!");
+ }
+}
+
+void enumerate_currencies(const std::string& text)
+{
+ // enumerate and print all the currency symbols, along
+ // with any associated numeric values:
+ const char* re =
+ "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
+ "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
+ "(?(1)"
+ "|(?(2)"
+ "[[:Cf:][:Cc:][:Z*:]]*"
+ ")"
+ "[[:Sc:]]"
+ ")";
+ boost::u32regex r = boost::make_u32regex(re);
+ boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j;
+ while(i != j)
+ {
+ std::cout << (*i)[0] << std::endl;
+ ++i;
+ }
+}
+
+void enumerate_currencies2(const std::string& text)
+{
+ // enumerate and print all the currency symbols, along
+ // with any associated numeric values:
+ const char* re =
+ "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?"
+ "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?"
+ "(?(1)"
+ "|(?(2)"
+ "[[:Cf:][:Cc:][:Z*:]]*"
+ ")"
+ "[[:Sc:]]"
+ ")";
+ boost::u32regex r = boost::make_u32regex(re);
+ boost::u32regex_token_iterator<std::string::const_iterator>
+ i(boost::make_u32regex_token_iterator(text, r, 1)), j;
+ while(i != j)
+ {
+ std::cout << *i << std::endl;
+ ++i;
+ }
+}
+
+
+//
+// Take a credit card number as a string of digits,
+// and reformat it as a human readable string with "-"
+// separating each group of four digit;,
+// note that we're mixing a UTF-32 regex, with a UTF-16
+// string and a UTF-8 format specifier, and it still all
+// just works:
+//
+const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z");
+const char* human_format = "$1-$2-$3-$4";
+
+U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s)
+{
+ return boost::u32regex_replace(s, e, human_format);
+}
+
+
+int main()
+{
+ // password checks using u32regex_match:
+ U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---";
+ U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}";
+ bool b = is_valid_password(pwd, pwd_check);
+ assert(b);
+ pwd = "abcD-";
+ b = is_valid_password(pwd, pwd_check);
+ assert(!b);
+ // filename extraction with u32regex_match:
+ std::string file = "abc.hpp";
+ file = get_filename(file);
+ assert(file == "abc.hpp");
+ file = "c:\\a\\b\\c\\d.h";
+ file = get_filename(file);
+ assert(file == "d.h");
+
+ // Greek text extraction with u32regex_search:
+ const UChar t[] = {
+ 'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
+ };
+ const UChar g[] = {
+ 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0
+ };
+ U_NAMESPACE_QUALIFIER UnicodeString text = t;
+ U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text);
+ assert(greek == g);
+
+ // extract currency symbols with associated value, use iterator interface:
+ std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8
+ enumerate_currencies(text2);
+ enumerate_currencies2(text2);
+
+ U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321";
+ credit_card_number = human_readable_card_number(credit_card_number);
+ assert(credit_card_number == "1234-5678-8765-4321");
+ return 0;
+}
+
+#else
+
+#include <iostream>
+
+int main()
+{
+ std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>";
+ return 0;
+}
+
+
+#endif
+