diff options
Diffstat (limited to 'src/boost/libs/regex/example/snippets/icu_example.cpp')
-rw-r--r-- | src/boost/libs/regex/example/snippets/icu_example.cpp | 188 |
1 files changed, 188 insertions, 0 deletions
diff --git a/src/boost/libs/regex/example/snippets/icu_example.cpp b/src/boost/libs/regex/example/snippets/icu_example.cpp new file mode 100644 index 000000000..6d1fc3bec --- /dev/null +++ b/src/boost/libs/regex/example/snippets/icu_example.cpp @@ -0,0 +1,188 @@ +/* + * + * Copyright (c) 2004 + * John Maddock + * + * Use, modification and distribution are subject to the + * Boost Software License, Version 1.0. (See accompanying file + * LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt) + * + */ + + /* + * LOCATION: see http://www.boost.org for most recent version. + * FILE mfc_example.cpp + * VERSION see <boost/version.hpp> + * DESCRIPTION: examples of using Boost.Regex with MFC and ATL string types. + */ + +#include <boost/regex/config.hpp> + +#ifdef BOOST_HAS_ICU + +#include <boost/regex/icu.hpp> +#include <iostream> +#include <assert.h> + +// +// Find out if *password* meets our password requirements, +// as defined by the regular expression *requirements*. +// +bool is_valid_password(const U_NAMESPACE_QUALIFIER UnicodeString& password, const U_NAMESPACE_QUALIFIER UnicodeString& requirements) +{ + return boost::u32regex_match(password, boost::make_u32regex(requirements)); +} + +// +// Extract filename part of a path from a UTF-8 encoded std::string and return the result +// as another std::string: +// +std::string get_filename(const std::string& path) +{ + boost::u32regex r = boost::make_u32regex("(?:\\A|.*\\\\)([^\\\\]+)"); + boost::smatch what; + if(boost::u32regex_match(path, what, r)) + { + // extract $1 as a std::string: + return what.str(1); + } + else + { + throw std::runtime_error("Invalid pathname"); + } +} + +U_NAMESPACE_QUALIFIER UnicodeString extract_greek(const U_NAMESPACE_QUALIFIER UnicodeString& text) +{ + // searches through some UTF-16 encoded text for a block encoded in Greek, + // this expression is imperfect, but the best we can do for now - searching + // for specific scripts is actually pretty hard to do right. + boost::u32regex r = boost::make_u32regex(L"[\\x{370}-\\x{3FF}](?:[^[:L*:]]|[\\x{370}-\\x{3FF}])*"); + boost::u16match what; + if(boost::u32regex_search(text, what, r)) + { + // extract $0 as a UnicodeString: + return U_NAMESPACE_QUALIFIER UnicodeString(what[0].first, what.length(0)); + } + else + { + throw std::runtime_error("No Greek found!"); + } +} + +void enumerate_currencies(const std::string& text) +{ + // enumerate and print all the currency symbols, along + // with any associated numeric values: + const char* re = + "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" + "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" + "(?(1)" + "|(?(2)" + "[[:Cf:][:Cc:][:Z*:]]*" + ")" + "[[:Sc:]]" + ")"; + boost::u32regex r = boost::make_u32regex(re); + boost::u32regex_iterator<std::string::const_iterator> i(boost::make_u32regex_iterator(text, r)), j; + while(i != j) + { + std::cout << (*i)[0] << std::endl; + ++i; + } +} + +void enumerate_currencies2(const std::string& text) +{ + // enumerate and print all the currency symbols, along + // with any associated numeric values: + const char* re = + "([[:Sc:]][[:Cf:][:Cc:][:Z*:]]*)?" + "([[:Nd:]]+(?:[[:Po:]][[:Nd:]]+)?)?" + "(?(1)" + "|(?(2)" + "[[:Cf:][:Cc:][:Z*:]]*" + ")" + "[[:Sc:]]" + ")"; + boost::u32regex r = boost::make_u32regex(re); + boost::u32regex_token_iterator<std::string::const_iterator> + i(boost::make_u32regex_token_iterator(text, r, 1)), j; + while(i != j) + { + std::cout << *i << std::endl; + ++i; + } +} + + +// +// Take a credit card number as a string of digits, +// and reformat it as a human readable string with "-" +// separating each group of four digit;, +// note that we're mixing a UTF-32 regex, with a UTF-16 +// string and a UTF-8 format specifier, and it still all +// just works: +// +const boost::u32regex e = boost::make_u32regex("\\A(\\d{3,4})[- ]?(\\d{4})[- ]?(\\d{4})[- ]?(\\d{4})\\z"); +const char* human_format = "$1-$2-$3-$4"; + +U_NAMESPACE_QUALIFIER UnicodeString human_readable_card_number(const U_NAMESPACE_QUALIFIER UnicodeString& s) +{ + return boost::u32regex_replace(s, e, human_format); +} + + +int main() +{ + // password checks using u32regex_match: + U_NAMESPACE_QUALIFIER UnicodeString pwd = "abcDEF---"; + U_NAMESPACE_QUALIFIER UnicodeString pwd_check = "(?=.*[[:lower:]])(?=.*[[:upper:]])(?=.*[[:punct:]]).{6,}"; + bool b = is_valid_password(pwd, pwd_check); + assert(b); + pwd = "abcD-"; + b = is_valid_password(pwd, pwd_check); + assert(!b); + // filename extraction with u32regex_match: + std::string file = "abc.hpp"; + file = get_filename(file); + assert(file == "abc.hpp"); + file = "c:\\a\\b\\c\\d.h"; + file = get_filename(file); + assert(file == "d.h"); + + // Greek text extraction with u32regex_search: + const UChar t[] = { + 'S', 'o', 'm', 'e', ' ', 'w', 'h', 'e', 'r', 'e', ' ', 'i', 'n', 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0 + }; + const UChar g[] = { + 0x0391, 0x039D, 0x0395, 0x0398, 0x0391, 0 + }; + U_NAMESPACE_QUALIFIER UnicodeString text = t; + U_NAMESPACE_QUALIFIER UnicodeString greek = extract_greek(text); + assert(greek == g); + + // extract currency symbols with associated value, use iterator interface: + std::string text2 = " $100.23 or \xC2\xA3""198.12 "; // \xC2\xA3 is the pound sign encoded in UTF-8 + enumerate_currencies(text2); + enumerate_currencies2(text2); + + U_NAMESPACE_QUALIFIER UnicodeString credit_card_number = "1234567887654321"; + credit_card_number = human_readable_card_number(credit_card_number); + assert(credit_card_number == "1234-5678-8765-4321"); + return 0; +} + +#else + +#include <iostream> + +int main() +{ + std::cout << "<NOTE>ICU support not enabled, feature unavailable</NOTE>"; + return 0; +} + + +#endif + |