summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/unicode
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 19:33:14 +0000
commit36d22d82aa202bb199967e9512281e9a53db42c9 (patch)
tree105e8c98ddea1c1e4784a60a5a6410fa416be2de /intl/icu/source/common/unicode
parentInitial commit. (diff)
downloadfirefox-esr-upstream.tar.xz
firefox-esr-upstream.zip
Adding upstream version 115.7.0esr.upstream/115.7.0esrupstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r--intl/icu/source/common/unicode/appendable.h239
-rw-r--r--intl/icu/source/common/unicode/brkiter.h670
-rw-r--r--intl/icu/source/common/unicode/bytestream.h307
-rw-r--r--intl/icu/source/common/unicode/bytestrie.h568
-rw-r--r--intl/icu/source/common/unicode/bytestriebuilder.h193
-rw-r--r--intl/icu/source/common/unicode/caniter.h214
-rw-r--r--intl/icu/source/common/unicode/casemap.h497
-rw-r--r--intl/icu/source/common/unicode/char16ptr.h313
-rw-r--r--intl/icu/source/common/unicode/chariter.h734
-rw-r--r--intl/icu/source/common/unicode/dbbi.h48
-rw-r--r--intl/icu/source/common/unicode/docmain.h237
-rw-r--r--intl/icu/source/common/unicode/dtintrv.h164
-rw-r--r--intl/icu/source/common/unicode/edits.h531
-rw-r--r--intl/icu/source/common/unicode/enumset.h69
-rw-r--r--intl/icu/source/common/unicode/errorcode.h144
-rw-r--r--intl/icu/source/common/unicode/filteredbrk.h152
-rw-r--r--intl/icu/source/common/unicode/icudataver.h43
-rw-r--r--intl/icu/source/common/unicode/icuplug.h391
-rw-r--r--intl/icu/source/common/unicode/idna.h330
-rw-r--r--intl/icu/source/common/unicode/localebuilder.h309
-rw-r--r--intl/icu/source/common/unicode/localematcher.h708
-rw-r--r--intl/icu/source/common/unicode/localpointer.h595
-rw-r--r--intl/icu/source/common/unicode/locdspnm.h211
-rw-r--r--intl/icu/source/common/unicode/locid.h1272
-rw-r--r--intl/icu/source/common/unicode/messagepattern.h949
-rw-r--r--intl/icu/source/common/unicode/normalizer2.h771
-rw-r--r--intl/icu/source/common/unicode/normlzr.h816
-rw-r--r--intl/icu/source/common/unicode/parseerr.h94
-rw-r--r--intl/icu/source/common/unicode/parsepos.h237
-rw-r--r--intl/icu/source/common/unicode/platform.h881
-rw-r--r--intl/icu/source/common/unicode/ptypes.h130
-rw-r--r--intl/icu/source/common/unicode/putil.h183
-rw-r--r--intl/icu/source/common/unicode/rbbi.h745
-rw-r--r--intl/icu/source/common/unicode/rep.h266
-rw-r--r--intl/icu/source/common/unicode/resbund.h498
-rw-r--r--intl/icu/source/common/unicode/schriter.h187
-rw-r--r--intl/icu/source/common/unicode/simpleformatter.h341
-rw-r--r--intl/icu/source/common/unicode/std_string.h41
-rw-r--r--intl/icu/source/common/unicode/strenum.h281
-rw-r--r--intl/icu/source/common/unicode/stringoptions.h190
-rw-r--r--intl/icu/source/common/unicode/stringpiece.h343
-rw-r--r--intl/icu/source/common/unicode/stringtriebuilder.h426
-rw-r--r--intl/icu/source/common/unicode/symtable.h119
-rw-r--r--intl/icu/source/common/unicode/ubidi.h2211
-rw-r--r--intl/icu/source/common/unicode/ubiditransform.h326
-rw-r--r--intl/icu/source/common/unicode/ubrk.h647
-rw-r--r--intl/icu/source/common/unicode/ucasemap.h388
-rw-r--r--intl/icu/source/common/unicode/ucat.h160
-rw-r--r--intl/icu/source/common/unicode/uchar.h4169
-rw-r--r--intl/icu/source/common/unicode/ucharstrie.h623
-rw-r--r--intl/icu/source/common/unicode/ucharstriebuilder.h193
-rw-r--r--intl/icu/source/common/unicode/uchriter.h393
-rw-r--r--intl/icu/source/common/unicode/uclean.h262
-rw-r--r--intl/icu/source/common/unicode/ucnv.h2056
-rw-r--r--intl/icu/source/common/unicode/ucnv_cb.h164
-rw-r--r--intl/icu/source/common/unicode/ucnv_err.h465
-rw-r--r--intl/icu/source/common/unicode/ucnvsel.h193
-rw-r--r--intl/icu/source/common/unicode/uconfig.h466
-rw-r--r--intl/icu/source/common/unicode/ucpmap.h158
-rw-r--r--intl/icu/source/common/unicode/ucptrie.h645
-rw-r--r--intl/icu/source/common/unicode/ucurr.h466
-rw-r--r--intl/icu/source/common/unicode/udata.h440
-rw-r--r--intl/icu/source/common/unicode/udisplaycontext.h173
-rw-r--r--intl/icu/source/common/unicode/uenum.h209
-rw-r--r--intl/icu/source/common/unicode/uidna.h776
-rw-r--r--intl/icu/source/common/unicode/uiter.h709
-rw-r--r--intl/icu/source/common/unicode/uldnames.h307
-rw-r--r--intl/icu/source/common/unicode/uloc.h1393
-rw-r--r--intl/icu/source/common/unicode/umachine.h459
-rw-r--r--intl/icu/source/common/unicode/umisc.h62
-rw-r--r--intl/icu/source/common/unicode/umutablecptrie.h240
-rw-r--r--intl/icu/source/common/unicode/unifilt.h136
-rw-r--r--intl/icu/source/common/unicode/unifunct.h132
-rw-r--r--intl/icu/source/common/unicode/unimatch.h168
-rw-r--r--intl/icu/source/common/unicode/uniset.h1793
-rw-r--r--intl/icu/source/common/unicode/unistr.h4784
-rw-r--r--intl/icu/source/common/unicode/unorm.h476
-rw-r--r--intl/icu/source/common/unicode/unorm2.h606
-rw-r--r--intl/icu/source/common/unicode/uobject.h324
-rw-r--r--intl/icu/source/common/unicode/urename.h1975
-rw-r--r--intl/icu/source/common/unicode/urep.h157
-rw-r--r--intl/icu/source/common/unicode/ures.h911
-rw-r--r--intl/icu/source/common/unicode/uscript.h724
-rw-r--r--intl/icu/source/common/unicode/uset.h1290
-rw-r--r--intl/icu/source/common/unicode/usetiter.h323
-rw-r--r--intl/icu/source/common/unicode/ushape.h476
-rw-r--r--intl/icu/source/common/unicode/usprep.h274
-rw-r--r--intl/icu/source/common/unicode/ustring.h1685
-rw-r--r--intl/icu/source/common/unicode/ustringtrie.h97
-rw-r--r--intl/icu/source/common/unicode/utext.h1603
-rw-r--r--intl/icu/source/common/unicode/utf.h225
-rw-r--r--intl/icu/source/common/unicode/utf16.h734
-rw-r--r--intl/icu/source/common/unicode/utf32.h25
-rw-r--r--intl/icu/source/common/unicode/utf8.h882
-rw-r--r--intl/icu/source/common/unicode/utf_old.h1201
-rw-r--r--intl/icu/source/common/unicode/utrace.h506
-rw-r--r--intl/icu/source/common/unicode/utypes.h730
-rw-r--r--intl/icu/source/common/unicode/uvernum.h191
-rw-r--r--intl/icu/source/common/unicode/uversion.h187
99 files changed, 57805 insertions, 0 deletions
diff --git a/intl/icu/source/common/unicode/appendable.h b/intl/icu/source/common/unicode/appendable.h
new file mode 100644
index 0000000000..0e37f4562a
--- /dev/null
+++ b/intl/icu/source/common/unicode/appendable.h
@@ -0,0 +1,239 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: appendable.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __APPENDABLE_H__
+#define __APPENDABLE_H__
+
+/**
+ * \file
+ * \brief C++ API: Appendable class: Sink for Unicode code points and 16-bit code units (char16_ts).
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Base class for objects to which Unicode characters and strings can be appended.
+ * Combines elements of Java Appendable and ICU4C ByteSink.
+ *
+ * This class can be used in APIs where it does not matter whether the actual destination is
+ * a UnicodeString, a char16_t[] array, a UnicodeSet, or any other object
+ * that receives and processes characters and/or strings.
+ *
+ * Implementation classes must implement at least appendCodeUnit(char16_t).
+ * The base class provides default implementations for the other methods.
+ *
+ * The methods do not take UErrorCode parameters.
+ * If an error occurs (e.g., out-of-memory),
+ * in addition to returning false from failing operations,
+ * the implementation must prevent unexpected behavior (e.g., crashes)
+ * from further calls and should make the error condition available separately
+ * (e.g., store a UErrorCode, make/keep a UnicodeString bogus).
+ * @stable ICU 4.8
+ */
+class U_COMMON_API Appendable : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Appendable();
+
+ /**
+ * Appends a 16-bit code unit.
+ * @param c code unit
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(char16_t c) = 0;
+
+ /**
+ * Appends a code point.
+ * The default implementation calls appendCodeUnit(char16_t) once or twice.
+ * @param c code point 0..0x10ffff
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c);
+
+ /**
+ * Appends a string.
+ * The default implementation calls appendCodeUnit(char16_t) for each code unit.
+ * @param s string, must not be nullptr if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const char16_t *s, int32_t length);
+
+ /**
+ * Tells the object that the caller is going to append roughly
+ * appendCapacity char16_ts. A subclass might use this to pre-allocate
+ * a larger buffer if necessary.
+ * The default implementation does nothing. (It always returns true.)
+ * @param appendCapacity estimated number of char16_ts that will be appended
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity);
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next operation
+ * on this Appendable.
+ *
+ * After writing at most *resultCapacity char16_ts, call appendString() with the
+ * pointer returned from this function and the number of char16_ts written.
+ * Many appendString() implementations will avoid copying char16_ts if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * \code
+ * int32_t capacity;
+ * char16_t* buffer = app.getAppendBuffer(..., &capacity);
+ * ... Write n char16_ts into buffer, with n <= capacity.
+ * app.appendString(buffer, n);
+ * \endcode
+ * In many implementations, that call to append will avoid copying char16_ts.
+ *
+ * If the Appendable allocates or reallocates an internal buffer, it should use
+ * the desiredCapacityHint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desiredCapacityHint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to appendString().
+ * That is, it is not correct to pass an interior pointer to appendString().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual char16_t *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ char16_t *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity);
+};
+
+/**
+ * An Appendable implementation which writes to a UnicodeString.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UnicodeStringAppendable : public Appendable {
+public:
+ /**
+ * Aliases the UnicodeString (keeps its reference) for writing.
+ * @param s The UnicodeString to which this Appendable will write.
+ * @stable ICU 4.8
+ */
+ explicit UnicodeStringAppendable(UnicodeString &s) : str(s) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~UnicodeStringAppendable();
+
+ /**
+ * Appends a 16-bit code unit to the string.
+ * @param c code unit
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodeUnit(char16_t c) override;
+
+ /**
+ * Appends a code point to the string.
+ * @param c code point 0..0x10ffff
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendCodePoint(UChar32 c) override;
+
+ /**
+ * Appends a string to the UnicodeString.
+ * @param s string, must not be nullptr if length!=0
+ * @param length string length, or -1 if NUL-terminated
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool appendString(const char16_t *s, int32_t length) override;
+
+ /**
+ * Tells the UnicodeString that the caller is going to append roughly
+ * appendCapacity char16_ts.
+ * @param appendCapacity estimated number of char16_ts that will be appended
+ * @return true if the operation succeeded
+ * @stable ICU 4.8
+ */
+ virtual UBool reserveAppendCapacity(int32_t appendCapacity) override;
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *resultCapacity. Guarantees *resultCapacity>=minCapacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratchCapacity>=minCapacity.
+ * The returned buffer is only valid until the next write operation
+ * on the UnicodeString.
+ *
+ * For details see Appendable::getAppendBuffer().
+ *
+ * @param minCapacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desiredCapacityHint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratchCapacity capacity of the scratch buffer
+ * @param resultCapacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *resultCapacity>=minCapacity
+ * @stable ICU 4.8
+ */
+ virtual char16_t *getAppendBuffer(int32_t minCapacity,
+ int32_t desiredCapacityHint,
+ char16_t *scratch, int32_t scratchCapacity,
+ int32_t *resultCapacity) override;
+
+private:
+ UnicodeString &str;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __APPENDABLE_H__
diff --git a/intl/icu/source/common/unicode/brkiter.h b/intl/icu/source/common/unicode/brkiter.h
new file mode 100644
index 0000000000..108652799e
--- /dev/null
+++ b/intl/icu/source/common/unicode/brkiter.h
@@ -0,0 +1,670 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*
+* File brkiter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 02/18/97 aliu Added typedef for TextCount. Made DONE const.
+* 05/07/97 aliu Fixed DLL declaration.
+* 07/09/97 jfitz Renamed BreakIterator and interface synced with JDK
+* 08/11/98 helena Sync-up JDK1.2.
+* 01/13/2000 helena Added UErrorCode parameter to createXXXInstance methods.
+********************************************************************************
+*/
+
+#ifndef BRKITER_H
+#define BRKITER_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Break Iterator.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if UCONFIG_NO_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+/*
+ * Allow the declaration of APIs with pointers to BreakIterator
+ * even when break iteration is removed from the build.
+ */
+class BreakIterator;
+
+U_NAMESPACE_END
+
+#else
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#include "unicode/locid.h"
+#include "unicode/ubrk.h"
+#include "unicode/strenum.h"
+#include "unicode/utext.h"
+#include "unicode/umisc.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * The BreakIterator class implements methods for finding the location
+ * of boundaries in text. BreakIterator is an abstract base class.
+ * Instances of BreakIterator maintain a current position and scan over
+ * text returning the index of characters where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis allows users to interact with
+ * characters as they expect to, for example, when moving the cursor
+ * through a text string. Character boundary analysis provides correct
+ * navigation of through character strings, regardless of how the
+ * character is stored. For example, an accented character might be
+ * stored as a base character and a diacritical mark. What users
+ * consider to be a character can differ between languages.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the C++ API defined in this header file, a
+ * plain C API with equivalent functionality is defined in the
+ * file ubrk.h
+ * <p>
+ * Code snippets illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * https://unicode-org.github.io/icu/userguide/boundaryanalysis/
+ * and in the sample program icu/source/samples/break/break.cpp
+ *
+ */
+class U_COMMON_API BreakIterator : public UObject {
+public:
+ /**
+ * destructor
+ * @stable ICU 2.0
+ */
+ virtual ~BreakIterator();
+
+ /**
+ * Return true if another object is semantically equal to this
+ * one. The other object should be an instance of the same subclass of
+ * BreakIterator. Objects of different subclasses are considered
+ * unequal.
+ * <P>
+ * Return true if this BreakIterator is at the same position in the
+ * same text, and is the same class and type (word, line, etc.) of
+ * BreakIterator, as the argument. Text is considered the same if
+ * it contains the same characters, it need not be the same
+ * object, and styles are not considered.
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const BreakIterator&) const = 0;
+
+ /**
+ * Returns the complement of the result of operator==
+ * @param rhs The BreakIterator to be compared for inequality
+ * @return the complement of the result of operator==
+ * @stable ICU 2.0
+ */
+ bool operator!=(const BreakIterator& rhs) const { return !operator==(rhs); }
+
+ /**
+ * Return a polymorphic copy of this object. This is an abstract
+ * method which subclasses implement.
+ * @stable ICU 2.0
+ */
+ virtual BreakIterator* clone() const = 0;
+
+ /**
+ * Return a polymorphic class ID for this object. Different subclasses
+ * will return distinct unequal values.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const override = 0;
+
+ /**
+ * Return a CharacterIterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator& getText(void) const = 0;
+
+
+ /**
+ * Get a UText for the text being analyzed.
+ * The returned UText is a shallow clone of the UText used internally
+ * by the break iterator implementation. It can safely be used to
+ * access the text without impacting any break iterator operations,
+ * but the underlying text itself must not be altered.
+ *
+ * @param fillIn A UText to be filled in. If nullptr, a new UText will be
+ * allocated to hold the result.
+ * @param status receives any error codes.
+ * @return The current UText for this break iterator. If an input
+ * UText was provided, it will always be returned.
+ * @stable ICU 3.4
+ */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ *
+ * The BreakIterator will retain a reference to the supplied string.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param text The UnicodeString used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void setText(const UnicodeString &text) = 0;
+
+ /**
+ * Reset the break iterator to operate over the text represented by
+ * the UText. The iterator position is reset to the start.
+ *
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * Utext that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ *
+ * All index positions returned by break iterator functions are
+ * native indices from the UText. For example, when breaking UTF-8
+ * encoded text, the break positions returned by next(), previous(), etc.
+ * will be UTF-8 string indices, not UTF-16 positions.
+ *
+ * @param text The UText used to change the text.
+ * @param status receives any error codes.
+ * @stable ICU 3.4
+ */
+ virtual void setText(UText *text, UErrorCode &status) = 0;
+
+ /**
+ * Change the text over which this operates. The text boundary is
+ * reset to the start.
+ * Note that setText(UText *) provides similar functionality to this function,
+ * and is more efficient.
+ * @param it The CharacterIterator used to change the text.
+ * @stable ICU 2.0
+ */
+ virtual void adoptText(CharacterIterator* it) = 0;
+
+ enum {
+ /**
+ * DONE is returned by previous() and next() after all valid
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ DONE = (int32_t)-1
+ };
+
+ /**
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The offset of the beginning of the text, zero.
+ * @stable ICU 2.0
+ */
+ virtual int32_t first(void) = 0;
+
+ /**
+ * Set the iterator position to the index immediately BEYOND the last character in the text being scanned.
+ * @return The index immediately BEYOND the last character in the text being scanned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t last(void) = 0;
+
+ /**
+ * Set the iterator position to the boundary preceding the current boundary.
+ * @return The character index of the previous text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t previous(void) = 0;
+
+ /**
+ * Advance the iterator to the boundary following the current boundary.
+ * @return The character index of the next text boundary or DONE if all
+ * boundaries have been returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(void) = 0;
+
+ /**
+ * Return character index of the current iterator position within the text.
+ * @return The boundary most recently returned.
+ * @stable ICU 2.0
+ */
+ virtual int32_t current(void) const = 0;
+
+ /**
+ * Advance the iterator to the first boundary following the specified offset.
+ * The value returned is always greater than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary after the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t following(int32_t offset) = 0;
+
+ /**
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The value returned is always smaller than the offset or
+ * the value BreakIterator.DONE
+ * @param offset the offset to begin scanning.
+ * @return The first boundary before the specified offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t preceding(int32_t offset) = 0;
+
+ /**
+ * Return true if the specified position is a boundary position.
+ * As a side effect, the current position of the iterator is set
+ * to the first boundary position at or following the specified offset.
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ virtual UBool isBoundary(int32_t offset) = 0;
+
+ /**
+ * Set the iterator position to the nth boundary from the current boundary
+ * @param n the number of boundaries to move by. A value of 0
+ * does nothing. Negative values move to previous boundaries
+ * and positive values move to later boundaries.
+ * @return The new iterator position, or
+ * DONE if there are fewer than |n| boundaries in the specified direction.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(int32_t n) = 0;
+
+ /**
+ * For RuleBasedBreakIterators, return the status tag from the break rule
+ * that determined the boundary at the current iteration position.
+ * <p>
+ * For break iterator types that do not support a rule status,
+ * a default value of 0 is returned.
+ * <p>
+ * @return the status from the break rule that determined the boundary at
+ * the current iteration position.
+ * @see RuleBaseBreakIterator::getRuleStatus()
+ * @see UWordBreak
+ * @stable ICU 52
+ */
+ virtual int32_t getRuleStatus() const;
+
+ /**
+ * For RuleBasedBreakIterators, get the status (tag) values from the break rule(s)
+ * that determined the boundary at the current iteration position.
+ * <p>
+ * For break iterator types that do not support rule status,
+ * no values are returned.
+ * <p>
+ * The returned status value(s) are stored into an array provided by the caller.
+ * The values are stored in sorted (ascending) order.
+ * If the capacity of the output array is insufficient to hold the data,
+ * the output will be truncated to the available length, and a
+ * U_BUFFER_OVERFLOW_ERROR will be signaled.
+ * <p>
+ * @see RuleBaseBreakIterator::getRuleStatusVec
+ *
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the boundary at the current iteration position.
+ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+ * is the total number of status values that were available,
+ * not the reduced number that were actually returned.
+ * @see getRuleStatus
+ * @stable ICU 52
+ */
+ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status);
+
+ /**
+ * Create BreakIterator for word-breaks using the given locale.
+ * Returns an instance of a BreakIterator implementing word breaks.
+ * WordBreak is useful for word selection (ex. double click)
+ * @param where the locale.
+ * @param status the error code
+ * @return A BreakIterator for word-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createWordInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for line-breaks using specified locale.
+ * Returns an instance of a BreakIterator implementing line breaks. Line
+ * breaks are logically possible line breaks, actual line breaks are
+ * usually determined based on display width.
+ * LineBreak is useful for word wrapping text.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for line-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createLineInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for character-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing character breaks.
+ * Character breaks are boundaries of combining character sequences.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for character-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createCharacterInstance(const Locale& where, UErrorCode& status);
+
+ /**
+ * Create BreakIterator for sentence-breaks using specified locale
+ * Returns an instance of a BreakIterator implementing sentence breaks.
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for sentence-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @stable ICU 2.0
+ */
+ static BreakIterator* U_EXPORT2
+ createSentenceInstance(const Locale& where, UErrorCode& status);
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Create BreakIterator for title-casing breaks using the specified locale
+ * Returns an instance of a BreakIterator implementing title breaks.
+ * The iterator returned locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use a word boundary iterator. See {@link #createWordInstance }.
+ *
+ * @param where the locale.
+ * @param status The error code.
+ * @return A BreakIterator for title-breaks. The UErrorCode& status
+ * parameter is used to return status information to the user.
+ * To check whether the construction succeeded or not, you should check
+ * the value of U_SUCCESS(err). If you wish more detailed information, you
+ * can check for informational error results which still indicate success.
+ * U_USING_FALLBACK_WARNING indicates that a fall back locale was used. For
+ * example, 'de_CH' was requested, but nothing was found there, so 'de' was
+ * used. U_USING_DEFAULT_WARNING indicates that the default locale data was
+ * used; neither the requested locale nor any of its fall back locales
+ * could be found.
+ * The caller owns the returned object and is responsible for deleting it.
+ * @deprecated ICU 64 Use createWordInstance instead.
+ */
+ static BreakIterator* U_EXPORT2
+ createTitleInstance(const Locale& where, UErrorCode& status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Get the set of Locales for which TextBoundaries are installed.
+ * <p><b>Note:</b> this will not return locales added through the register
+ * call. To see the registered locales too, use the getAvailableLocales
+ * function that returns a StringEnumeration object </p>
+ * @param count the output parameter of number of elements in the locale list
+ * @return available locales
+ * @stable ICU 2.0
+ */
+ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+ /**
+ * Get name of the object for the desired Locale, in the desired language.
+ * @param objectLocale must be from getAvailableLocales.
+ * @param displayLocale specifies the desired locale for output.
+ * @param name the fill-in parameter of the return value
+ * Uses best match.
+ * @return user-displayable name
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ const Locale& displayLocale,
+ UnicodeString& name);
+
+ /**
+ * Get name of the object for the desired Locale, in the language of the
+ * default locale.
+ * @param objectLocale must be from getMatchingLocales
+ * @param name the fill-in parameter of the return value
+ * @return user-displayable name
+ * @stable ICU 2.0
+ */
+ static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
+ UnicodeString& name);
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Deprecated functionality. Use clone() instead.
+ *
+ * Thread safe client-buffer-based cloning operation
+ * Do NOT call delete on a safeclone, since 'new' is not used to create it.
+ * @param stackBuffer user allocated space for the new clone. If nullptr new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * @param BufferSize reference to size of allocated space.
+ * If BufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If BufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used if any allocations were
+ * necessary.
+ * @return pointer to the new clone
+ *
+ * @deprecated ICU 52. Use clone() instead.
+ */
+ virtual BreakIterator * createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status) = 0;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+
+ /**
+ * Determine whether the BreakIterator was created in user memory by
+ * createBufferClone(), and thus should not be deleted. Such objects
+ * must be closed by an explicit call to the destructor (not delete).
+ * @deprecated ICU 52. Always delete the BreakIterator.
+ */
+ inline UBool isBufferClone(void);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#if !UCONFIG_NO_SERVICE
+ /**
+ * Register a new break iterator of the indicated kind, to use in the given locale.
+ * The break iterator will be adopted. Clones of the iterator will be returned
+ * if a request for a break iterator of the given kind matches or falls back to
+ * this locale.
+ * Because ICU may choose to cache BreakIterators internally, this must
+ * be called at application startup, prior to any calls to
+ * BreakIterator::createXXXInstance to avoid undefined behavior.
+ * @param toAdopt the BreakIterator instance to be adopted
+ * @param locale the Locale for which this instance is to be registered
+ * @param kind the type of iterator for which this instance is to be registered
+ * @param status the in/out status code, no special meanings are assigned
+ * @return a registry key that can be used to unregister this instance
+ * @stable ICU 2.4
+ */
+ static URegistryKey U_EXPORT2 registerInstance(BreakIterator* toAdopt,
+ const Locale& locale,
+ UBreakIteratorType kind,
+ UErrorCode& status);
+
+ /**
+ * Unregister a previously-registered BreakIterator using the key returned from the
+ * register call. Key becomes invalid after a successful call and should not be used again.
+ * The BreakIterator corresponding to the key will be deleted.
+ * Because ICU may choose to cache BreakIterators internally, this should
+ * be called during application shutdown, after all calls to
+ * BreakIterator::createXXXInstance to avoid undefined behavior.
+ * @param key the registry key returned by a previous call to registerInstance
+ * @param status the in/out status code, no special meanings are assigned
+ * @return true if the iterator for the key was successfully unregistered
+ * @stable ICU 2.4
+ */
+ static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
+
+ /**
+ * Return a StringEnumeration over the locales available at the time of the call,
+ * including registered locales.
+ * @return a StringEnumeration over the locales available at the time of the call
+ * @stable ICU 2.4
+ */
+ static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
+#endif
+
+ /**
+ * Returns the locale for this break iterator. Two flavors are available: valid and
+ * actual locale.
+ * @stable ICU 2.8
+ */
+ Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const;
+
+#ifndef U_HIDE_INTERNAL_API
+ /** Get the locale for this break iterator object. You can choose between valid and actual locale.
+ * @param type type of the locale we're looking for (valid or actual)
+ * @param status error code for the operation
+ * @return the locale
+ * @internal
+ */
+ const char *getLocaleID(ULocDataLocaleType type, UErrorCode& status) const;
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the matching state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator implementation never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized,
+ * system-level code. One example use case is with garbage collection that moves
+ * the text in memory.
+ *
+ * @param input The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ * @return *this
+ *
+ * @stable ICU 49
+ */
+ virtual BreakIterator &refreshInputText(UText *input, UErrorCode &status) = 0;
+
+ private:
+ static BreakIterator* buildInstance(const Locale& loc, const char *type, UErrorCode& status);
+ static BreakIterator* createInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+ static BreakIterator* makeInstance(const Locale& loc, int32_t kind, UErrorCode& status);
+
+ friend class ICUBreakIteratorFactory;
+ friend class ICUBreakIteratorService;
+
+protected:
+ // Do not enclose protected default/copy constructors with #ifndef U_HIDE_INTERNAL_API
+ // or else the compiler will create a public ones.
+ /** @internal */
+ BreakIterator();
+ /** @internal */
+ BreakIterator (const BreakIterator &other);
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ BreakIterator (const Locale& valid, const Locale &actual);
+ /** @internal. Assignment Operator, used by RuleBasedBreakIterator. */
+ BreakIterator &operator = (const BreakIterator &other);
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+
+ /** @internal (private) */
+ char actualLocale[ULOC_FULLNAME_CAPACITY];
+ char validLocale[ULOC_FULLNAME_CAPACITY];
+};
+
+#ifndef U_HIDE_DEPRECATED_API
+
+inline UBool BreakIterator::isBufferClone()
+{
+ return false;
+}
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // BRKITER_H
+//eof
diff --git a/intl/icu/source/common/unicode/bytestream.h b/intl/icu/source/common/unicode/bytestream.h
new file mode 100644
index 0000000000..997746e428
--- /dev/null
+++ b/intl/icu/source/common/unicode/bytestream.h
@@ -0,0 +1,307 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2012, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2007 Google Inc. All Rights Reserved.
+// Author: sanjay@google.com (Sanjay Ghemawat)
+//
+// Abstract interface that consumes a sequence of bytes (ByteSink).
+//
+// Used so that we can write a single piece of code that can operate
+// on a variety of output string types.
+//
+// Various implementations of this interface are provided:
+// ByteSink:
+// CheckedArrayByteSink Write to a flat array, with bounds checking
+// StringByteSink Write to an STL string
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __BYTESTREAM_H__
+#define __BYTESTREAM_H__
+
+/**
+ * \file
+ * \brief C++ API: Interface for writing bytes, and implementation classes.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A ByteSink can be filled with bytes.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ByteSink : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 4.2
+ */
+ ByteSink() { }
+ /**
+ * Virtual destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~ByteSink();
+
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n) = 0;
+
+ /**
+ * Appends n bytes to this. Same as Append().
+ * Call AppendU8() with u8"string literals" which are const char * in C++11
+ * but const char8_t * in C++20.
+ * If the compiler does support char8_t as a distinct type,
+ * then an AppendU8() overload for that is defined and will be chosen.
+ *
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 67
+ */
+ inline void AppendU8(const char* bytes, int32_t n) {
+ Append(bytes, n);
+ }
+
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Appends n bytes to this. Same as Append() but for a const char8_t * pointer.
+ * Call AppendU8() with u8"string literals" which are const char * in C++11
+ * but const char8_t * in C++20.
+ * If the compiler does support char8_t as a distinct type,
+ * then this AppendU8() overload for that is defined and will be chosen.
+ *
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 67
+ */
+ inline void AppendU8(const char8_t* bytes, int32_t n) {
+ Append(reinterpret_cast<const char*>(bytes), n);
+ }
+#endif
+
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. Guarantees *result_capacity>=min_capacity.
+ * May return a pointer to the caller-owned scratch buffer which must have
+ * scratch_capacity>=min_capacity.
+ * The returned buffer is only valid until the next operation
+ * on this ByteSink.
+ *
+ * After writing at most *result_capacity bytes, call Append() with the
+ * pointer returned from this function and the number of bytes written.
+ * Many Append() implementations will avoid copying bytes if this function
+ * returned an internal buffer.
+ *
+ * Partial usage example:
+ * int32_t capacity;
+ * char* buffer = sink->GetAppendBuffer(..., &capacity);
+ * ... Write n bytes into buffer, with n <= capacity.
+ * sink->Append(buffer, n);
+ * In many implementations, that call to Append will avoid copying bytes.
+ *
+ * If the ByteSink allocates or reallocates an internal buffer, it should use
+ * the desired_capacity_hint if appropriate.
+ * If a caller cannot provide a reasonable guess at the desired capacity,
+ * it should pass desired_capacity_hint=0.
+ *
+ * If a non-scratch buffer is returned, the caller may only pass
+ * a prefix to it to Append().
+ * That is, it is not correct to pass an interior pointer to Append().
+ *
+ * The default implementation always returns the scratch buffer.
+ *
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity);
+
+ /**
+ * Flush internal buffers.
+ * Some byte sinks use internal buffers or provide buffering
+ * and require calling Flush() at the end of the stream.
+ * The ByteSink should be ready for further Append() calls after Flush().
+ * The default implementation of Flush() does nothing.
+ * @stable ICU 4.2
+ */
+ virtual void Flush();
+
+private:
+ ByteSink(const ByteSink &) = delete;
+ ByteSink &operator=(const ByteSink &) = delete;
+};
+
+// -------------------------------------------------------------
+// Some standard implementations
+
+/**
+ * Implementation of ByteSink that writes to a flat byte array,
+ * with bounds-checking:
+ * This sink will not write more than capacity bytes to outbuf.
+ * If more than capacity bytes are Append()ed, then excess bytes are ignored,
+ * and Overflowed() will return true.
+ * Overflow does not cause a runtime error.
+ * @stable ICU 4.2
+ */
+class U_COMMON_API CheckedArrayByteSink : public ByteSink {
+public:
+ /**
+ * Constructs a ByteSink that will write to outbuf[0..capacity-1].
+ * @param outbuf buffer to write to
+ * @param capacity size of the buffer
+ * @stable ICU 4.2
+ */
+ CheckedArrayByteSink(char* outbuf, int32_t capacity);
+ /**
+ * Destructor.
+ * @stable ICU 4.2
+ */
+ virtual ~CheckedArrayByteSink();
+ /**
+ * Returns the sink to its original state, without modifying the buffer.
+ * Useful for reusing both the buffer and the sink for multiple streams.
+ * Resets the state to NumberOfBytesWritten()=NumberOfBytesAppended()=0
+ * and Overflowed()=false.
+ * @return *this
+ * @stable ICU 4.6
+ */
+ virtual CheckedArrayByteSink& Reset();
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param bytes the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* bytes, int32_t n) override;
+ /**
+ * Returns a writable buffer for appending and writes the buffer's capacity to
+ * *result_capacity. For details see the base class documentation.
+ * @param min_capacity required minimum capacity of the returned buffer;
+ * must be non-negative
+ * @param desired_capacity_hint desired capacity of the returned buffer;
+ * must be non-negative
+ * @param scratch default caller-owned buffer
+ * @param scratch_capacity capacity of the scratch buffer
+ * @param result_capacity pointer to an integer which will be set to the
+ * capacity of the returned buffer
+ * @return a buffer with *result_capacity>=min_capacity
+ * @stable ICU 4.2
+ */
+ virtual char* GetAppendBuffer(int32_t min_capacity,
+ int32_t desired_capacity_hint,
+ char* scratch, int32_t scratch_capacity,
+ int32_t* result_capacity) override;
+ /**
+ * Returns the number of bytes actually written to the sink.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.2
+ */
+ int32_t NumberOfBytesWritten() const { return size_; }
+ /**
+ * Returns true if any bytes were discarded, i.e., if there was an
+ * attempt to write more than 'capacity' bytes.
+ * @return true if more than 'capacity' bytes were Append()ed
+ * @stable ICU 4.2
+ */
+ UBool Overflowed() const { return overflowed_; }
+ /**
+ * Returns the number of bytes appended to the sink.
+ * If Overflowed() then NumberOfBytesAppended()>NumberOfBytesWritten()
+ * else they return the same number.
+ * @return number of bytes written to the buffer
+ * @stable ICU 4.6
+ */
+ int32_t NumberOfBytesAppended() const { return appended_; }
+private:
+ char* outbuf_;
+ const int32_t capacity_;
+ int32_t size_;
+ int32_t appended_;
+ UBool overflowed_;
+
+ CheckedArrayByteSink() = delete;
+ CheckedArrayByteSink(const CheckedArrayByteSink &) = delete;
+ CheckedArrayByteSink &operator=(const CheckedArrayByteSink &) = delete;
+};
+
+/**
+ * Implementation of ByteSink that writes to a "string".
+ * The StringClass is usually instantiated with a std::string.
+ * @stable ICU 4.2
+ */
+template<typename StringClass>
+class StringByteSink : public ByteSink {
+ public:
+ /**
+ * Constructs a ByteSink that will append bytes to the dest string.
+ * @param dest pointer to string object to append to
+ * @stable ICU 4.2
+ */
+ StringByteSink(StringClass* dest) : dest_(dest) { }
+ /**
+ * Constructs a ByteSink that reserves append capacity and will append bytes to the dest string.
+ *
+ * @param dest pointer to string object to append to
+ * @param initialAppendCapacity capacity beyond dest->length() to be reserve()d
+ * @stable ICU 60
+ */
+ StringByteSink(StringClass* dest, int32_t initialAppendCapacity) : dest_(dest) {
+ if (initialAppendCapacity > 0 &&
+ (uint32_t)initialAppendCapacity > (dest->capacity() - dest->length())) {
+ dest->reserve(dest->length() + initialAppendCapacity);
+ }
+ }
+ /**
+ * Append "bytes[0,n-1]" to this.
+ * @param data the pointer to the bytes
+ * @param n the number of bytes; must be non-negative
+ * @stable ICU 4.2
+ */
+ virtual void Append(const char* data, int32_t n) override { dest_->append(data, n); }
+ private:
+ StringClass* dest_;
+
+ StringByteSink() = delete;
+ StringByteSink(const StringByteSink &) = delete;
+ StringByteSink &operator=(const StringByteSink &) = delete;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTREAM_H__
diff --git a/intl/icu/source/common/unicode/bytestrie.h b/intl/icu/source/common/unicode/bytestrie.h
new file mode 100644
index 0000000000..1719a6bb83
--- /dev/null
+++ b/intl/icu/source/common/unicode/bytestrie.h
@@ -0,0 +1,568 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __BYTESTRIE_H__
+#define __BYTESTRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Trie for mapping byte sequences to integer values.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/ustringtrie.h"
+
+class BytesTrieTest;
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+class BytesTrieBuilder;
+class CharString;
+class UVector32;
+
+/**
+ * Light-weight, non-const reader class for a BytesTrie.
+ * Traverses a byte-serialized data structure with minimal state,
+ * for mapping byte sequences to non-negative integer values.
+ *
+ * This class owns the serialized trie data only if it was constructed by
+ * the builder's build() method.
+ * The public constructor and the copy constructor only alias the data (only copy the pointer).
+ * There is no assignment operator.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API BytesTrie : public UMemory {
+public:
+ /**
+ * Constructs a BytesTrie reader instance.
+ *
+ * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
+ * starting with the first byte of that sequence.
+ * The BytesTrie object will not read more bytes than
+ * the BytesTrieBuilder generated in the corresponding build() call.
+ *
+ * The array is not copied/cloned and must not be modified while
+ * the BytesTrie object is in use.
+ *
+ * @param trieBytes The byte array that contains the serialized trie.
+ * @stable ICU 4.8
+ */
+ BytesTrie(const void *trieBytes)
+ : ownedArray_(nullptr), bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), remainingMatchLength_(-1) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~BytesTrie();
+
+ /**
+ * Copy constructor, copies the other trie reader object and its state,
+ * but not the byte array which will be shared. (Shallow copy.)
+ * @param other Another BytesTrie object.
+ * @stable ICU 4.8
+ */
+ BytesTrie(const BytesTrie &other)
+ : ownedArray_(nullptr), bytes_(other.bytes_),
+ pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
+
+ /**
+ * Resets this trie to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrie &reset() {
+ pos_=bytes_;
+ remainingMatchLength_=-1;
+ return *this;
+ }
+
+ /**
+ * Returns the state of this trie as a 64-bit integer.
+ * The state value is never 0.
+ *
+ * @return opaque state value
+ * @see resetToState64
+ * @stable ICU 65
+ */
+ uint64_t getState64() const {
+ return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
+ (uint64_t)(pos_ - bytes_);
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * Unlike resetToState(State), the 64-bit state value
+ * must be from getState64() from the same trie object or
+ * from one initialized the exact same way.
+ * Because of no validation, this method is faster.
+ *
+ * @param state The opaque trie state value from getState64().
+ * @return *this
+ * @see getState64
+ * @see resetToState
+ * @see reset
+ * @stable ICU 65
+ */
+ BytesTrie &resetToState64(uint64_t state) {
+ remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
+ pos_ = bytes_ + (state & kState64PosMask);
+ return *this;
+ }
+
+ /**
+ * BytesTrie state object, for saving a trie's current state
+ * and resetting the trie back to this state later.
+ * @stable ICU 4.8
+ */
+ class State : public UMemory {
+ public:
+ /**
+ * Constructs an empty State.
+ * @stable ICU 4.8
+ */
+ State() { bytes=nullptr; }
+ private:
+ friend class BytesTrie;
+
+ const uint8_t *bytes;
+ const uint8_t *pos;
+ int32_t remainingMatchLength;
+ };
+
+ /**
+ * Saves the state of this trie.
+ * @param state The State object to hold the trie's state.
+ * @return *this
+ * @see resetToState
+ * @stable ICU 4.8
+ */
+ const BytesTrie &saveState(State &state) const {
+ state.bytes=bytes_;
+ state.pos=pos_;
+ state.remainingMatchLength=remainingMatchLength_;
+ return *this;
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * If the state object contains no state, or the state of a different trie,
+ * then this trie remains unchanged.
+ * @param state The State object which holds a saved trie state.
+ * @return *this
+ * @see saveState
+ * @see reset
+ * @stable ICU 4.8
+ */
+ BytesTrie &resetToState(const State &state) {
+ if(bytes_==state.bytes && bytes_!=nullptr) {
+ pos_=state.pos;
+ remainingMatchLength_=state.remainingMatchLength;
+ }
+ return *this;
+ }
+
+ /**
+ * Determines whether the byte sequence so far matches, whether it has a value,
+ * and whether another input byte can continue a matching byte sequence.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult current() const;
+
+ /**
+ * Traverses the trie from the initial state for this input byte.
+ * Equivalent to reset().next(inByte).
+ * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
+ * Values below -0x100 and above 0xff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ inline UStringTrieResult first(int32_t inByte) {
+ remainingMatchLength_=-1;
+ if(inByte<0) {
+ inByte+=0x100;
+ }
+ return nextImpl(bytes_, inByte);
+ }
+
+ /**
+ * Traverses the trie from the current state for this input byte.
+ * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
+ * Values below -0x100 and above 0xff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(int32_t inByte);
+
+ /**
+ * Traverses the trie from the current state for this byte sequence.
+ * Equivalent to
+ * \code
+ * Result result=current();
+ * for(each c in s)
+ * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
+ * result=next(c);
+ * return result;
+ * \endcode
+ * @param s A string or byte sequence. Can be nullptr if length is 0.
+ * @param length The length of the byte sequence. Can be -1 if NUL-terminated.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(const char *s, int32_t length);
+
+ /**
+ * Returns a matching byte sequence's value if called immediately after
+ * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
+ * getValue() can be called multiple times.
+ *
+ * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
+ * @return The value for the byte sequence so far.
+ * @stable ICU 4.8
+ */
+ inline int32_t getValue() const {
+ const uint8_t *pos=pos_;
+ int32_t leadByte=*pos++;
+ // U_ASSERT(leadByte>=kMinValueLead);
+ return readValue(pos, leadByte>>1);
+ }
+
+ /**
+ * Determines whether all byte sequences reachable from the current state
+ * map to the same value.
+ * @param uniqueValue Receives the unique value, if this function returns true.
+ * (output-only)
+ * @return true if all byte sequences reachable from the current state
+ * map to the same value.
+ * @stable ICU 4.8
+ */
+ inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+ const uint8_t *pos=pos_;
+ // Skip the rest of a pending linear-match node.
+ return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
+ }
+
+ /**
+ * Finds each byte which continues the byte sequence from the current state.
+ * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
+ * @param out Each next byte is appended to this object.
+ * (Only uses the out.Append(s, length) method.)
+ * @return the number of bytes which continue the byte sequence from here
+ * @stable ICU 4.8
+ */
+ int32_t getNextBytes(ByteSink &out) const;
+
+ /**
+ * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
+ * @stable ICU 4.8
+ */
+ class U_COMMON_API Iterator : public UMemory {
+ public:
+ /**
+ * Iterates from the root of a byte-serialized BytesTrie.
+ * @param trieBytes The trie bytes.
+ * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Iterates from the current state of the specified BytesTrie.
+ * @param trie The trie whose state will be copied for iteration.
+ * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Iterator();
+
+ /**
+ * Resets this iterator to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ Iterator &reset();
+
+ /**
+ * @return true if there are more elements.
+ * @stable ICU 4.8
+ */
+ UBool hasNext() const;
+
+ /**
+ * Finds the next (byte sequence, value) pair if there is one.
+ *
+ * If the byte sequence is truncated to the maximum length and does not
+ * have a real value, then the value is set to -1.
+ * In this case, this "not a real value" is indistinguishable from
+ * a real value of -1.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if there is another element.
+ * @stable ICU 4.8
+ */
+ UBool next(UErrorCode &errorCode);
+
+ /**
+ * @return The NUL-terminated byte sequence for the last successful next().
+ * @stable ICU 4.8
+ */
+ StringPiece getString() const;
+ /**
+ * @return The value for the last successful next().
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const { return value_; }
+
+ private:
+ UBool truncateAndStop();
+
+ const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
+
+ const uint8_t *bytes_;
+ const uint8_t *pos_;
+ const uint8_t *initialPos_;
+ int32_t remainingMatchLength_;
+ int32_t initialRemainingMatchLength_;
+
+ CharString *str_;
+ int32_t maxLength_;
+ int32_t value_;
+
+ // The stack stores pairs of integers for backtracking to another
+ // outbound edge of a branch node.
+ // The first integer is an offset from bytes_.
+ // The second integer has the str_->length() from before the node in bits 15..0,
+ // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
+ // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
+ // but the code looks more confusing that way.)
+ UVector32 *stack_;
+ };
+
+private:
+ friend class BytesTrieBuilder;
+ friend class ::BytesTrieTest;
+
+ /**
+ * Constructs a BytesTrie reader instance.
+ * Unlike the public constructor which just aliases an array,
+ * this constructor adopts the builder's array.
+ * This constructor is only called by the builder.
+ */
+ BytesTrie(void *adoptBytes, const void *trieBytes)
+ : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
+ bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), remainingMatchLength_(-1) {}
+
+ // No assignment operator.
+ BytesTrie &operator=(const BytesTrie &other) = delete;
+
+ inline void stop() {
+ pos_=nullptr;
+ }
+
+ // Reads a compact 32-bit integer.
+ // pos is already after the leadByte, and the lead byte is already shifted right by 1.
+ static int32_t readValue(const uint8_t *pos, int32_t leadByte);
+ static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
+ // U_ASSERT(leadByte>=kMinValueLead);
+ if(leadByte>=(kMinTwoByteValueLead<<1)) {
+ if(leadByte<(kMinThreeByteValueLead<<1)) {
+ ++pos;
+ } else if(leadByte<(kFourByteValueLead<<1)) {
+ pos+=2;
+ } else {
+ pos+=3+((leadByte>>1)&1);
+ }
+ }
+ return pos;
+ }
+ static inline const uint8_t *skipValue(const uint8_t *pos) {
+ int32_t leadByte=*pos++;
+ return skipValue(pos, leadByte);
+ }
+
+ // Reads a jump delta and jumps.
+ static const uint8_t *jumpByDelta(const uint8_t *pos);
+
+ static inline const uint8_t *skipDelta(const uint8_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoByteDeltaLead) {
+ if(delta<kMinThreeByteDeltaLead) {
+ ++pos;
+ } else if(delta<kFourByteDeltaLead) {
+ pos+=2;
+ } else {
+ pos+=3+(delta&1);
+ }
+ }
+ return pos;
+ }
+
+ static inline UStringTrieResult valueResult(int32_t node) {
+ return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
+ }
+
+ // Handles a branch node for both next(byte) and next(string).
+ UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
+
+ // Requires remainingLength_<0.
+ UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
+
+ // Helper functions for hasUniqueValue().
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // from a branch.
+ static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue);
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // starting from a position on a node lead byte.
+ static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+ // Helper functions for getNextBytes().
+ // getNextBytes() when pos is on a branch node.
+ static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
+ static void append(ByteSink &out, int c);
+
+ // BytesTrie data structure
+ //
+ // The trie consists of a series of byte-serialized nodes for incremental
+ // string/byte sequence matching. The root node is at the beginning of the trie data.
+ //
+ // Types of nodes are distinguished by their node lead byte ranges.
+ // After each node, except a final-value node, another node follows to
+ // encode match values or continue matching further bytes.
+ //
+ // Node types:
+ // - Value node: Stores a 32-bit integer in a compact, variable-length format.
+ // The value is for the string/byte sequence so far.
+ // One node bit indicates whether the value is final or whether
+ // matching continues with the next node.
+ // - Linear-match node: Matches a number of bytes.
+ // - Branch node: Branches to other nodes according to the current input byte.
+ // The node byte is the length of the branch (number of bytes to select from)
+ // minus 1. It is followed by a sub-node:
+ // - If the length is at most kMaxBranchLinearSubNodeLength, then
+ // there are length-1 (key, value) pairs and then one more comparison byte.
+ // If one of the key bytes matches, then the value is either a final value for
+ // the string/byte sequence so far, or a "jump" delta to the next node.
+ // If the last byte matches, then matching continues with the next node.
+ // (Values have the same encoding as value nodes.)
+ // - If the length is greater than kMaxBranchLinearSubNodeLength, then
+ // there is one byte and one "jump" delta.
+ // If the input byte is less than the sub-node byte, then "jump" by delta to
+ // the next sub-node which will have a length of length/2.
+ // (The delta has its own compact encoding.)
+ // Otherwise, skip the "jump" delta to the next sub-node
+ // which will have a length of length-length/2.
+
+ // Node lead byte values.
+
+ // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
+ // the length is one more than the next byte.
+
+ // For a branch sub-node with at most this many entries, we drop down
+ // to a linear search.
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
+ static const int32_t kMinLinearMatch=0x10;
+ static const int32_t kMaxLinearMatchLength=0x10;
+
+ // 20..ff: Variable-length value node.
+ // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
+ // Then shift-right by 1 bit.
+ // The remaining lead byte value indicates the number of following bytes (0..4)
+ // and contains the value's top bits.
+ static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20
+ // It is a final value if bit 0 is set.
+ static const int32_t kValueIsFinal=1;
+
+ // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
+ static const int32_t kMinOneByteValueLead=kMinValueLead/2; // 0x10
+ static const int32_t kMaxOneByteValue=0x40; // At least 6 bits in the first byte.
+
+ static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51
+ static const int32_t kMaxTwoByteValue=0x1aff;
+
+ static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c
+ static const int32_t kFourByteValueLead=0x7e;
+
+ // A little more than Unicode code points. (0x11ffff)
+ static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
+
+ static const int32_t kFiveByteValueLead=0x7f;
+
+ // Compact delta integers.
+ static const int32_t kMaxOneByteDelta=0xbf;
+ static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0
+ static const int32_t kMinThreeByteDeltaLead=0xf0;
+ static const int32_t kFourByteDeltaLead=0xfe;
+ static const int32_t kFiveByteDeltaLead=0xff;
+
+ static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff
+ static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff
+
+ // For getState64():
+ // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
+ // so we need at least 5 bits for that.
+ // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
+ static constexpr int32_t kState64RemainingShift = 59;
+ static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
+
+ uint8_t *ownedArray_;
+
+ // Fixed value referencing the BytesTrie bytes.
+ const uint8_t *bytes_;
+
+ // Iterator variables.
+
+ // Pointer to next trie byte to read. nullptr if no more matches.
+ const uint8_t *pos_;
+ // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+ int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTRIE_H__
diff --git a/intl/icu/source/common/unicode/bytestriebuilder.h b/intl/icu/source/common/unicode/bytestriebuilder.h
new file mode 100644
index 0000000000..ec9c625473
--- /dev/null
+++ b/intl/icu/source/common/unicode/bytestriebuilder.h
@@ -0,0 +1,193 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010sep25
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C++ API: Builder for icu::BytesTrie
+ */
+
+#ifndef __BYTESTRIEBUILDER_H__
+#define __BYTESTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/bytestrie.h"
+#include "unicode/stringpiece.h"
+#include "unicode/stringtriebuilder.h"
+
+class BytesTrieTest;
+
+U_NAMESPACE_BEGIN
+
+class BytesTrieElement;
+class CharString;
+/**
+ * Builder class for BytesTrie.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
+public:
+ /**
+ * Constructs an empty builder.
+ * @param errorCode Standard ICU error code.
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder(UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~BytesTrieBuilder();
+
+ /**
+ * Adds a (byte sequence, value) pair.
+ * The byte sequence must be unique.
+ * The bytes will be copied; the builder does not keep
+ * a reference to the input StringPiece or its data().
+ * @param s The input byte sequence.
+ * @param value The value associated with this byte sequence.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode);
+
+ /**
+ * Builds a BytesTrie for the add()ed data.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
+ * must have been add()ed.
+ *
+ * This method passes ownership of the builder's internal result array to the new trie object.
+ * Another call to any build() variant will re-serialize the trie.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A new BytesTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Builds a BytesTrie for the add()ed data and byte-serializes it.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
+ * must have been add()ed.
+ *
+ * Multiple calls to buildStringPiece() return StringPieces referring to the
+ * builder's same byte array, without rebuilding.
+ * If buildStringPiece() is called after build(), the trie will be
+ * re-serialized into a new array (because build() passes on ownership).
+ * If build() is called after buildStringPiece(), the trie object returned
+ * by build() will become the owner of the underlying string for the
+ * previously returned StringPiece.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Removes all (byte sequence, value) pairs.
+ * New data can then be add()ed and a new trie can be built.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ BytesTrieBuilder &clear();
+
+private:
+ friend class ::BytesTrieTest;
+
+ BytesTrieBuilder(const BytesTrieBuilder &other) = delete; // no copy constructor
+ BytesTrieBuilder &operator=(const BytesTrieBuilder &other) = delete; // no assignment operator
+
+ void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ virtual int32_t getElementStringLength(int32_t i) const override;
+ virtual char16_t getElementUnit(int32_t i, int32_t byteIndex) const override;
+ virtual int32_t getElementValue(int32_t i) const override;
+
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const override;
+
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const override;
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const override;
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, char16_t byte) const override;
+
+ virtual UBool matchNodesCanHaveValues() const override { return false; }
+
+ virtual int32_t getMaxBranchLinearSubNodeLength() const override { return BytesTrie::kMaxBranchLinearSubNodeLength; }
+ virtual int32_t getMinLinearMatch() const override { return BytesTrie::kMinLinearMatch; }
+ virtual int32_t getMaxLinearMatchLength() const override { return BytesTrie::kMaxLinearMatchLength; }
+
+ /**
+ * @internal (private)
+ */
+ class BTLinearMatchNode : public LinearMatchNode {
+ public:
+ BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
+ virtual bool operator==(const Node &other) const override;
+ virtual void write(StringTrieBuilder &builder) override;
+ private:
+ const char *s;
+ };
+
+ virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
+ Node *nextNode) const override;
+
+ UBool ensureCapacity(int32_t length);
+ virtual int32_t write(int32_t byte) override;
+ int32_t write(const char *b, int32_t length);
+ virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) override;
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override;
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override;
+ virtual int32_t writeDeltaTo(int32_t jumpTarget) override;
+ static int32_t internalEncodeDelta(int32_t i, char intBytes[]);
+
+ CharString *strings; // Pointer not object so we need not #include internal charstr.h.
+ BytesTrieElement *elements;
+ int32_t elementsCapacity;
+ int32_t elementsLength;
+
+ // Byte serialization of the trie.
+ // Grows from the back: bytesLength measures from the end of the buffer!
+ char *bytes;
+ int32_t bytesCapacity;
+ int32_t bytesLength;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __BYTESTRIEBUILDER_H__
diff --git a/intl/icu/source/common/unicode/caniter.h b/intl/icu/source/common/unicode/caniter.h
new file mode 100644
index 0000000000..035bd0e64e
--- /dev/null
+++ b/intl/icu/source/common/unicode/caniter.h
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ * Copyright (C) 1996-2014, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *******************************************************************************
+ */
+
+#ifndef CANITER_H
+#define CANITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+
+/** Should permutation skip characters with combining class zero
+ * Should be either true or false. This is a compile time option
+ * @stable ICU 2.4
+ */
+#ifndef CANITER_SKIP_ZEROES
+#define CANITER_SKIP_ZEROES true
+#endif
+
+U_NAMESPACE_BEGIN
+
+class Hashtable;
+class Normalizer2;
+class Normalizer2Impl;
+
+/**
+ * This class allows one to iterate through all the strings that are canonically equivalent to a given
+ * string. For example, here are some sample results:
+Results for: {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+1: \\u0041\\u030A\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+2: \\u0041\\u030A\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+3: \\u0041\\u030A\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+4: \\u0041\\u030A\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A}{COMBINING RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+5: \\u00C5\\u0064\\u0307\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+6: \\u00C5\\u0064\\u0327\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+7: \\u00C5\\u1E0B\\u0327
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+8: \\u00C5\\u1E11\\u0307
+ = {LATIN CAPITAL LETTER A WITH RING ABOVE}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+9: \\u212B\\u0064\\u0307\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING DOT ABOVE}{COMBINING CEDILLA}
+10: \\u212B\\u0064\\u0327\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D}{COMBINING CEDILLA}{COMBINING DOT ABOVE}
+11: \\u212B\\u1E0B\\u0327
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH DOT ABOVE}{COMBINING CEDILLA}
+12: \\u212B\\u1E11\\u0307
+ = {ANGSTROM SIGN}{LATIN SMALL LETTER D WITH CEDILLA}{COMBINING DOT ABOVE}
+ *<br>Note: the code is intended for use with small strings, and is not suitable for larger ones,
+ * since it has not been optimized for that situation.
+ * Note, CanonicalIterator is not intended to be subclassed.
+ * @author M. Davis
+ * @author C++ port by V. Weinstein
+ * @stable ICU 2.4
+ */
+class U_COMMON_API CanonicalIterator final : public UObject {
+public:
+ /**
+ * Construct a CanonicalIterator object
+ * @param source string to get results for
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ CanonicalIterator(const UnicodeString &source, UErrorCode &status);
+
+ /** Destructor
+ * Cleans pieces
+ * @stable ICU 2.4
+ */
+ virtual ~CanonicalIterator();
+
+ /**
+ * Gets the NFD form of the current source we are iterating over.
+ * @return gets the source: NOTE: it is the NFD form of source
+ * @stable ICU 2.4
+ */
+ UnicodeString getSource();
+
+ /**
+ * Resets the iterator so that one can start again from the beginning.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * Get the next canonically equivalent string.
+ * <br><b>Warning: The strings are not guaranteed to be in any particular order.</b>
+ * @return the next string that is canonically equivalent. A bogus string is returned when
+ * the iteration is done.
+ * @stable ICU 2.4
+ */
+ UnicodeString next();
+
+ /**
+ * Set a new source for this iterator. Allows object reuse.
+ * @param newSource the source string to iterate against. This allows the same iterator to be used
+ * while changing the source string, saving object creation.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @stable ICU 2.4
+ */
+ void setSource(const UnicodeString &newSource, UErrorCode &status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Dumb recursive implementation of permutation.
+ * TODO: optimize
+ * @param source the string to find permutations for
+ * @param skipZeros determine if skip zeros
+ * @param result the results in a set.
+ * @param status Fill-in parameter which receives the status of this operation.
+ * @internal
+ */
+ static void U_EXPORT2 permute(UnicodeString &source, UBool skipZeros, Hashtable *result, UErrorCode &status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+private:
+ // ===================== PRIVATES ==============================
+ // private default constructor
+ CanonicalIterator() = delete;
+
+
+ /**
+ * Copy constructor. Private for now.
+ * @internal (private)
+ */
+ CanonicalIterator(const CanonicalIterator& other) = delete;
+
+ /**
+ * Assignment operator. Private for now.
+ * @internal (private)
+ */
+ CanonicalIterator& operator=(const CanonicalIterator& other) = delete;
+
+ // fields
+ UnicodeString source;
+ UBool done;
+
+ // 2 dimensional array holds the pieces of the string with
+ // their different canonically equivalent representations
+ UnicodeString **pieces;
+ int32_t pieces_length;
+ int32_t *pieces_lengths;
+
+ // current is used in iterating to combine pieces
+ int32_t *current;
+ int32_t current_length;
+
+ // transient fields
+ UnicodeString buffer;
+
+ const Normalizer2 &nfd;
+ const Normalizer2Impl &nfcImpl;
+
+ // we have a segment, in NFD. Find all the strings that are canonically equivalent to it.
+ UnicodeString *getEquivalents(const UnicodeString &segment, int32_t &result_len, UErrorCode &status); //private String[] getEquivalents(String segment)
+
+ //Set getEquivalents2(String segment);
+ Hashtable *getEquivalents2(Hashtable *fillinResult, const char16_t *segment, int32_t segLen, UErrorCode &status);
+ //Hashtable *getEquivalents2(const UnicodeString &segment, int32_t segLen, UErrorCode &status);
+
+ /**
+ * See if the decomposition of cp2 is at segment starting at segmentPos
+ * (with canonical rearrangement!)
+ * If so, take the remainder, and return the equivalents
+ */
+ //Set extract(int comp, String segment, int segmentPos, StringBuffer buffer);
+ Hashtable *extract(Hashtable *fillinResult, UChar32 comp, const char16_t *segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+ //Hashtable *extract(UChar32 comp, const UnicodeString &segment, int32_t segLen, int32_t segmentPos, UErrorCode &status);
+
+ void cleanPieces();
+
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/casemap.h b/intl/icu/source/common/unicode/casemap.h
new file mode 100644
index 0000000000..eca7cbf80a
--- /dev/null
+++ b/intl/icu/source/common/unicode/casemap.h
@@ -0,0 +1,497 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// casemap.h
+// created: 2017jan12 Markus W. Scherer
+
+#ifndef __CASEMAP_H__
+#define __CASEMAP_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Low-level C++ case mapping functions.
+ */
+
+U_NAMESPACE_BEGIN
+
+class BreakIterator;
+class ByteSink;
+class Edits;
+
+/**
+ * Low-level C++ case mapping functions.
+ *
+ * @stable ICU 59
+ */
+class U_COMMON_API CaseMap final : public UMemory {
+public:
+ /**
+ * Lowercases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToLower
+ * @stable ICU 59
+ */
+ static int32_t toLower(
+ const char *locale, uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToUpper
+ * @stable ICU 59
+ */
+ static int32_t toUpper(
+ const char *locale, uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-16 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setText())
+ * and used one or more times for iteration (first() and next()).
+ * If nullptr, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strToTitle
+ * @see ucasemap_toTitle
+ * @stable ICU 59
+ */
+ static int32_t toTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-16 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of char16_ts). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see u_strFoldCase
+ * @stable ICU 59
+ */
+ static int32_t fold(
+ uint32_t options,
+ const char16_t *src, int32_t srcLength,
+ char16_t *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Lowercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToLower
+ * @stable ICU 60
+ */
+ static void utf8ToLower(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToUpper
+ * @stable ICU 60
+ */
+ static void utf8ToUpper(
+ const char *locale, uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setUText())
+ * and used one or more times for iteration (first() and next()).
+ * If nullptr, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 60
+ */
+ static void utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-8 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param sink A ByteSink to which the result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_utf8FoldCase
+ * @stable ICU 60
+ */
+ static void utf8Fold(
+ uint32_t options,
+ StringPiece src, ByteSink &sink, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Lowercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToLower
+ * @stable ICU 59
+ */
+ static int32_t utf8ToLower(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+ /**
+ * Uppercases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToUpper
+ * @stable ICU 59
+ */
+ static int32_t utf8ToUpper(
+ const char *locale, uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecases a UTF-8 string and optionally records edits.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options bits.)
+ *
+ * @param locale The locale ID. ("" = root locale, nullptr = default locale.)
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @param iter A break iterator to find the first characters of words that are to be titlecased.
+ * It is set to the source string (setUText())
+ * and used one or more times for iteration (first() and next()).
+ * If nullptr, then a word break iterator for the locale is used
+ * (or something equivalent).
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 59
+ */
+ static int32_t utf8ToTitle(
+ const char *locale, uint32_t options, BreakIterator *iter,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Case-folds a UTF-8 string and optionally records edits.
+ *
+ * Case folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT, U_EDITS_NO_RESET,
+ * U_FOLD_CASE_DEFAULT, U_FOLD_CASE_EXCLUDE_SPECIAL_I.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be nullptr and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Reference to an in/out error code value
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful.
+ * When the result would be longer than destCapacity,
+ * the full length is returned and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucasemap_utf8FoldCase
+ * @stable ICU 59
+ */
+ static int32_t utf8Fold(
+ uint32_t options,
+ const char *src, int32_t srcLength,
+ char *dest, int32_t destCapacity, Edits *edits,
+ UErrorCode &errorCode);
+
+private:
+ CaseMap() = delete;
+ CaseMap(const CaseMap &other) = delete;
+ CaseMap &operator=(const CaseMap &other) = delete;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __CASEMAP_H__
diff --git a/intl/icu/source/common/unicode/char16ptr.h b/intl/icu/source/common/unicode/char16ptr.h
new file mode 100644
index 0000000000..de8182c7ad
--- /dev/null
+++ b/intl/icu/source/common/unicode/char16ptr.h
@@ -0,0 +1,313 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// char16ptr.h
+// created: 2017feb28 Markus W. Scherer
+
+#ifndef __CHAR16PTR_H__
+#define __CHAR16PTR_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+
+/**
+ * \file
+ * \brief C++ API: char16_t pointer wrappers with
+ * implicit conversion from bit-compatible raw pointer types.
+ * Also conversion functions from char16_t * to UChar * and OldUChar *.
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \def U_ALIASING_BARRIER
+ * Barrier for pointer anti-aliasing optimizations even across function boundaries.
+ * @internal
+ */
+#ifdef U_ALIASING_BARRIER
+ // Use the predefined value.
+#elif (defined(__clang__) || defined(__GNUC__)) && U_PLATFORM != U_PF_BROWSER_NATIVE_CLIENT
+# define U_ALIASING_BARRIER(ptr) asm volatile("" : : "rm"(ptr) : "memory")
+#elif defined(U_IN_DOXYGEN)
+# define U_ALIASING_BARRIER(ptr)
+#endif
+
+/**
+ * char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
+ * @stable ICU 59
+ */
+class U_COMMON_API Char16Ptr final {
+public:
+ /**
+ * Copies the pointer.
+ * @param p pointer
+ * @stable ICU 59
+ */
+ inline Char16Ptr(char16_t *p);
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Converts the pointer to char16_t *.
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline Char16Ptr(uint16_t *p);
+#endif
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Converts the pointer to char16_t *.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline Char16Ptr(wchar_t *p);
+#endif
+ /**
+ * nullptr constructor.
+ * @param p nullptr
+ * @stable ICU 59
+ */
+ inline Char16Ptr(std::nullptr_t p);
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ inline ~Char16Ptr();
+
+ /**
+ * Pointer access.
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline char16_t *get() const;
+ /**
+ * char16_t pointer access via type conversion (e.g., static_cast).
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline operator char16_t *() const { return get(); }
+
+private:
+ Char16Ptr() = delete;
+
+#ifdef U_ALIASING_BARRIER
+ template<typename T> static char16_t *cast(T *t) {
+ U_ALIASING_BARRIER(t);
+ return reinterpret_cast<char16_t *>(t);
+ }
+
+ char16_t *p_;
+#else
+ union {
+ char16_t *cp;
+ uint16_t *up;
+ wchar_t *wp;
+ } u_;
+#endif
+};
+
+/// \cond
+#ifdef U_ALIASING_BARRIER
+
+Char16Ptr::Char16Ptr(char16_t *p) : p_(p) {}
+#if !U_CHAR16_IS_TYPEDEF
+Char16Ptr::Char16Ptr(uint16_t *p) : p_(cast(p)) {}
+#endif
+#if U_SIZEOF_WCHAR_T==2
+Char16Ptr::Char16Ptr(wchar_t *p) : p_(cast(p)) {}
+#endif
+Char16Ptr::Char16Ptr(std::nullptr_t p) : p_(p) {}
+Char16Ptr::~Char16Ptr() {
+ U_ALIASING_BARRIER(p_);
+}
+
+char16_t *Char16Ptr::get() const { return p_; }
+
+#else
+
+Char16Ptr::Char16Ptr(char16_t *p) { u_.cp = p; }
+#if !U_CHAR16_IS_TYPEDEF
+Char16Ptr::Char16Ptr(uint16_t *p) { u_.up = p; }
+#endif
+#if U_SIZEOF_WCHAR_T==2
+Char16Ptr::Char16Ptr(wchar_t *p) { u_.wp = p; }
+#endif
+Char16Ptr::Char16Ptr(std::nullptr_t p) { u_.cp = p; }
+Char16Ptr::~Char16Ptr() {}
+
+char16_t *Char16Ptr::get() const { return u_.cp; }
+
+#endif
+/// \endcond
+
+/**
+ * const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
+ * @stable ICU 59
+ */
+class U_COMMON_API ConstChar16Ptr final {
+public:
+ /**
+ * Copies the pointer.
+ * @param p pointer
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const char16_t *p);
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Converts the pointer to char16_t *.
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const uint16_t *p);
+#endif
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Converts the pointer to char16_t *.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * @param p pointer to be converted
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const wchar_t *p);
+#endif
+ /**
+ * nullptr constructor.
+ * @param p nullptr
+ * @stable ICU 59
+ */
+ inline ConstChar16Ptr(const std::nullptr_t p);
+
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ inline ~ConstChar16Ptr();
+
+ /**
+ * Pointer access.
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline const char16_t *get() const;
+ /**
+ * char16_t pointer access via type conversion (e.g., static_cast).
+ * @return the wrapped pointer
+ * @stable ICU 59
+ */
+ inline operator const char16_t *() const { return get(); }
+
+private:
+ ConstChar16Ptr() = delete;
+
+#ifdef U_ALIASING_BARRIER
+ template<typename T> static const char16_t *cast(const T *t) {
+ U_ALIASING_BARRIER(t);
+ return reinterpret_cast<const char16_t *>(t);
+ }
+
+ const char16_t *p_;
+#else
+ union {
+ const char16_t *cp;
+ const uint16_t *up;
+ const wchar_t *wp;
+ } u_;
+#endif
+};
+
+/// \cond
+#ifdef U_ALIASING_BARRIER
+
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) : p_(p) {}
+#if !U_CHAR16_IS_TYPEDEF
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) : p_(cast(p)) {}
+#endif
+#if U_SIZEOF_WCHAR_T==2
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) : p_(cast(p)) {}
+#endif
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) : p_(p) {}
+ConstChar16Ptr::~ConstChar16Ptr() {
+ U_ALIASING_BARRIER(p_);
+}
+
+const char16_t *ConstChar16Ptr::get() const { return p_; }
+
+#else
+
+ConstChar16Ptr::ConstChar16Ptr(const char16_t *p) { u_.cp = p; }
+#if !U_CHAR16_IS_TYPEDEF
+ConstChar16Ptr::ConstChar16Ptr(const uint16_t *p) { u_.up = p; }
+#endif
+#if U_SIZEOF_WCHAR_T==2
+ConstChar16Ptr::ConstChar16Ptr(const wchar_t *p) { u_.wp = p; }
+#endif
+ConstChar16Ptr::ConstChar16Ptr(const std::nullptr_t p) { u_.cp = p; }
+ConstChar16Ptr::~ConstChar16Ptr() {}
+
+const char16_t *ConstChar16Ptr::get() const { return u_.cp; }
+
+#endif
+/// \endcond
+
+/**
+ * Converts from const char16_t * to const UChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as const UChar *
+ * @stable ICU 59
+ */
+inline const UChar *toUCharPtr(const char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<const UChar *>(p);
+}
+
+/**
+ * Converts from char16_t * to UChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as UChar *
+ * @stable ICU 59
+ */
+inline UChar *toUCharPtr(char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<UChar *>(p);
+}
+
+/**
+ * Converts from const char16_t * to const OldUChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as const OldUChar *
+ * @stable ICU 59
+ */
+inline const OldUChar *toOldUCharPtr(const char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<const OldUChar *>(p);
+}
+
+/**
+ * Converts from char16_t * to OldUChar *.
+ * Includes an aliasing barrier if available.
+ * @param p pointer
+ * @return p as OldUChar *
+ * @stable ICU 59
+ */
+inline OldUChar *toOldUCharPtr(char16_t *p) {
+#ifdef U_ALIASING_BARRIER
+ U_ALIASING_BARRIER(p);
+#endif
+ return reinterpret_cast<OldUChar *>(p);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __CHAR16PTR_H__
diff --git a/intl/icu/source/common/unicode/chariter.h b/intl/icu/source/common/unicode/chariter.h
new file mode 100644
index 0000000000..45f4d984c7
--- /dev/null
+++ b/intl/icu/source/common/unicode/chariter.h
@@ -0,0 +1,734 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************
+*
+* Copyright (C) 1997-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+********************************************************************
+*/
+
+#ifndef CHARITER_H
+#define CHARITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+/**
+ * \file
+ * \brief C++ API: Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * Abstract class that defines an API for forward-only iteration
+ * on text objects.
+ * This is a minimal interface for iteration without random access
+ * or backwards iteration. It is especially useful for wrapping
+ * streams with converters into an object for collation or
+ * normalization.
+ *
+ * <p>Characters can be accessed in two ways: as code units or as
+ * code points.
+ * Unicode code points are 21-bit integers and are the scalar values
+ * of Unicode characters. ICU uses the type UChar32 for them.
+ * Unicode code units are the storage units of a given
+ * Unicode/UCS Transformation Format (a character encoding scheme).
+ * With UTF-16, all code points can be represented with either one
+ * or two code units ("surrogates").
+ * String storage is typically based on code units, while properties
+ * of characters are typically determined using code point values.
+ * Some processes may be designed to work with sequences of code units,
+ * or it may be known that all characters that are important to an
+ * algorithm can be represented with single code units.
+ * Other processes will need to use the code point access functions.</p>
+ *
+ * <p>ForwardCharacterIterator provides nextPostInc() to access
+ * a code unit and advance an internal position into the text object,
+ * similar to a <code>return text[position++]</code>.<br>
+ * It provides next32PostInc() to access a code point and advance an internal
+ * position.</p>
+ *
+ * <p>next32PostInc() assumes that the current position is that of
+ * the beginning of a code point, i.e., of its first code unit.
+ * After next32PostInc(), this will be true again.
+ * In general, access to code units and code points in the same
+ * iteration loop should not be mixed. In UTF-16, if the current position
+ * is on a second code unit (Low Surrogate), then only that code unit
+ * is returned even by next32PostInc().</p>
+ *
+ * <p>For iteration with either function, there are two ways to
+ * check for the end of the iteration. When there are no more
+ * characters in the text object:
+ * <ul>
+ * <li>The hasNext() function returns false.</li>
+ * <li>nextPostInc() and next32PostInc() return DONE
+ * when one attempts to read beyond the end of the text object.</li>
+ * </ul>
+ *
+ * Example:
+ * \code
+ * void function1(ForwardCharacterIterator &it) {
+ * UChar32 c;
+ * while(it.hasNext()) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ *
+ * void function1(ForwardCharacterIterator &it) {
+ * char16_t c;
+ * while((c=it.nextPostInc())!=ForwardCharacterIterator::DONE) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * </p>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ForwardCharacterIterator : public UObject {
+public:
+ /**
+ * Value returned by most of ForwardCharacterIterator's functions
+ * when the iterator has reached the limits of its iteration.
+ * @stable ICU 2.0
+ */
+ enum { DONE = 0xffff };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ForwardCharacterIterator();
+
+ /**
+ * Returns true when both iterators refer to the same
+ * character in the same character-storage object.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true when both iterators refer to the same
+ * character in the same character-storage object
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const ForwardCharacterIterator& that) const = 0;
+
+ /**
+ * Returns true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object.
+ * @param that The ForwardCharacterIterator to be compared for inequality
+ * @return true when the iterators refer to different
+ * text-storage objects, or to different characters in the
+ * same text-storage object
+ * @stable ICU 2.0
+ */
+ inline bool operator!=(const ForwardCharacterIterator& that) const;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const = 0;
+
+ /**
+ * Returns a UClassID for this ForwardCharacterIterator ("poor man's
+ * RTTI").<P> Despite the fact that this function is public,
+ * DO NOT CONSIDER IT PART OF CHARACTERITERATOR'S API!
+ * @return a UClassID for this ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const override = 0;
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t nextPostInc(void) = 0;
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void) = 0;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext() = 0;
+
+protected:
+ /** Default constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator();
+
+ /** Copy constructor to be overridden in the implementing class. @stable ICU 2.0*/
+ ForwardCharacterIterator(const ForwardCharacterIterator &other);
+
+ /**
+ * Assignment operator to be overridden in the implementing class.
+ * @stable ICU 2.0
+ */
+ ForwardCharacterIterator &operator=(const ForwardCharacterIterator&) { return *this; }
+};
+
+/**
+ * Abstract class that defines an API for iteration
+ * on text objects.
+ * This is an interface for forward and backward iteration
+ * and random access into a text object.
+ *
+ * <p>The API provides backward compatibility to the Java and older ICU
+ * CharacterIterator classes but extends them significantly:
+ * <ol>
+ * <li>CharacterIterator is now a subclass of ForwardCharacterIterator.</li>
+ * <li>While the old API functions provided forward iteration with
+ * "pre-increment" semantics, the new one also provides functions
+ * with "post-increment" semantics. They are more efficient and should
+ * be the preferred iterator functions for new implementations.
+ * The backward iteration always had "pre-decrement" semantics, which
+ * are efficient.</li>
+ * <li>Just like ForwardCharacterIterator, it provides access to
+ * both code units and code points. Code point access versions are available
+ * for the old and the new iteration semantics.</li>
+ * <li>There are new functions for setting and moving the current position
+ * without returning a character, for efficiency.</li>
+ * </ol>
+ *
+ * See ForwardCharacterIterator for examples for using the new forward iteration
+ * functions. For backward iteration, there is also a hasPrevious() function
+ * that can be used analogously to hasNext().
+ * The old functions work as before and are shown below.</p>
+ *
+ * <p>Examples for some of the new functions:</p>
+ *
+ * Forward iteration with hasNext():
+ * \code
+ * void forward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToStart(); it.hasNext();) {
+ * c=it.next32PostInc();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Forward iteration more similar to loops with the old forward iteration,
+ * showing a way to convert simple for() loops:
+ * \code
+ * void forward2(CharacterIterator &it) {
+ * char16_t c;
+ * for(c=it.firstPostInc(); c!=CharacterIterator::DONE; c=it.nextPostInc()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with setToEnd() and hasPrevious():
+ * \code
+ * void backward1(CharacterIterator &it) {
+ * UChar32 c;
+ * for(it.setToEnd(); it.hasPrevious();) {
+ * c=it.previous32();
+ * // use c
+ * }
+ * }
+ * \endcode
+ * Backward iteration with a more traditional for() loop:
+ * \code
+ * void backward2(CharacterIterator &it) {
+ * char16_t c;
+ * for(c=it.last(); c!=CharacterIterator::DONE; c=it.previous()) {
+ * // use c
+ * }
+ * }
+ * \endcode
+ *
+ * Example for random access:
+ * \code
+ * void random(CharacterIterator &it) {
+ * // set to the third code point from the beginning
+ * it.move32(3, CharacterIterator::kStart);
+ * // get a code point from here without moving the position
+ * UChar32 c=it.current32();
+ * // get the position
+ * int32_t pos=it.getIndex();
+ * // get the previous code unit
+ * char16_t u=it.previous();
+ * // move back one more code unit
+ * it.move(-1, CharacterIterator::kCurrent);
+ * // set the position back to where it was
+ * // and read the same code point c and move beyond it
+ * it.setIndex(pos);
+ * if(c!=it.next32PostInc()) {
+ * exit(1); // CharacterIterator inconsistent
+ * }
+ * }
+ * \endcode
+ *
+ * <p>Examples, especially for the old API:</p>
+ *
+ * Function processing characters, in this example simple output
+ * <pre>
+ * \code
+ * void processChar( char16_t c )
+ * {
+ * cout << " " << c;
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text from start to finish
+ * <pre>
+ * \code
+ * void traverseForward(CharacterIterator& iter)
+ * {
+ * for(char16_t c = iter.first(); c != CharacterIterator::DONE; c = iter.next()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse the text backwards, from end to start
+ * <pre>
+ * \code
+ * void traverseBackward(CharacterIterator& iter)
+ * {
+ * for(char16_t c = iter.last(); c != CharacterIterator::DONE; c = iter.previous()) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Traverse both forward and backward from a given position in the text.
+ * Calls to notBoundary() in this example represents some additional stopping criteria.
+ * <pre>
+ * \code
+ * void traverseOut(CharacterIterator& iter, int32_t pos)
+ * {
+ * char16_t c;
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.next()) {}
+ * int32_t end = iter.getIndex();
+ * for (c = iter.setIndex(pos);
+ * c != CharacterIterator::DONE && (Unicode::isLetter(c) || Unicode::isDigit(c));
+ * c = iter.previous()) {}
+ * int32_t start = iter.getIndex() + 1;
+ *
+ * cout << "start: " << start << " end: " << end << endl;
+ * for (c = iter.setIndex(start); iter.getIndex() < end; c = iter.next() ) {
+ * processChar(c);
+ * }
+ * }
+ * \endcode
+ * </pre>
+ * Creating a StringCharacterIterator and calling the test functions
+ * <pre>
+ * \code
+ * void CharacterIterator_Example( void )
+ * {
+ * cout << endl << "===== CharacterIterator_Example: =====" << endl;
+ * UnicodeString text("Ein kleiner Satz.");
+ * StringCharacterIterator iterator(text);
+ * cout << "----- traverseForward: -----------" << endl;
+ * traverseForward( iterator );
+ * cout << endl << endl << "----- traverseBackward: ----------" << endl;
+ * traverseBackward( iterator );
+ * cout << endl << endl << "----- traverseOut: ---------------" << endl;
+ * traverseOut( iterator, 7 );
+ * cout << endl << endl << "-----" << endl;
+ * }
+ * \endcode
+ * </pre>
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API CharacterIterator : public ForwardCharacterIterator {
+public:
+ /**
+ * Origin enumeration for the move() and move32() functions.
+ * @stable ICU 2.0
+ */
+ enum EOrigin { kStart, kCurrent, kEnd };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~CharacterIterator();
+
+ /**
+ * Returns a pointer to a new CharacterIterator of the same
+ * concrete class as this one, and referring to the same
+ * character in the same text-storage object as this one. The
+ * caller is responsible for deleting the new clone.
+ * @return a pointer to a new CharacterIterator
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator* clone() const = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t first(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t firstPostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void);
+
+ /**
+ * Sets the iterator to refer to the first code unit or code point in its
+ * iteration range. This can be used to begin a forward
+ * iteration with nextPostInc() or next32PostInc().
+ * @return the start position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToStart();
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t last(void) = 0;
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) = 0;
+
+ /**
+ * Sets the iterator to the end of its iteration range, just behind
+ * the last code unit or code point. This can be used to begin a backward
+ * iteration with previous() or previous32().
+ * @return the end position of the iteration range
+ * @stable ICU 2.0
+ */
+ inline int32_t setToEnd();
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t setIndex(int32_t position) = 0;
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the "position"-th code unit in the text-storage object
+ * @return the "position"-th code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) = 0;
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t current(void) const = 0;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the current code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const = 0;
+
+ /**
+ * Advances to the next code unit in the iteration range
+ * (toward endIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the next code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t next(void) = 0;
+
+ /**
+ * Advances to the next code point in the iteration range
+ * (toward endIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) = 0;
+
+ /**
+ * Advances to the previous code unit in the iteration range
+ * (toward startIndex()), and returns that code unit. If there are
+ * no more code units to return, returns DONE.
+ * @return the previous code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t previous(void) = 0;
+
+ /**
+ * Advances to the previous code point in the iteration range
+ * (toward startIndex()), and returns that code point. If there are
+ * no more code points to return, returns DONE.
+ * @return the previous code point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) = 0;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * before the current position in the iteration range, return true otherwise.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() = 0;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character returned by first(). Since it's
+ * possible to create an iterator that iterates across only
+ * part of a text-storage object, this number isn't
+ * necessarily 0.
+ * @returns the numeric index in the underlying text-storage
+ * object of the character returned by first().
+ * @stable ICU 2.0
+ */
+ inline int32_t startIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @return the numeric index in the underlying text-storage
+ * object of the position immediately BEYOND the character
+ * returned by last().
+ * @stable ICU 2.0
+ */
+ inline int32_t endIndex(void) const;
+
+ /**
+ * Returns the numeric index in the underlying text-storage
+ * object of the character the iterator currently refers to
+ * (i.e., the character returned by current()).
+ * @return the numeric index in the text-storage object of
+ * the character the iterator currently refers to
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Returns the length of the entire text in the underlying
+ * text-storage object.
+ * @return the length of the entire text in the text-storage object
+ * @stable ICU 2.0
+ */
+ inline int32_t getLength() const;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+#ifdef move32
+ // One of the system headers right now is sometimes defining a conflicting macro we don't use
+#undef move32
+#endif
+ virtual int32_t move32(int32_t delta, EOrigin origin) = 0;
+
+ /**
+ * Copies the text under iteration into the UnicodeString
+ * referred to by "result".
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) = 0;
+
+protected:
+ /**
+ * Empty constructor.
+ * @stable ICU 2.0
+ */
+ CharacterIterator();
+
+ /**
+ * Constructor, just setting the length field in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length);
+
+ /**
+ * Constructor, just setting the length and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t position);
+
+ /**
+ * Constructor, just setting the length, start, end, and position fields in this base class.
+ * @stable ICU 2.0
+ */
+ CharacterIterator(int32_t length, int32_t textBegin, int32_t textEnd, int32_t position);
+
+ /**
+ * Copy constructor.
+ *
+ * @param that The CharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ CharacterIterator(const CharacterIterator &that);
+
+ /**
+ * Assignment operator. Sets this CharacterIterator to have the same behavior,
+ * as the one passed in.
+ * @param that The CharacterIterator passed in.
+ * @return the newly set CharacterIterator.
+ * @stable ICU 2.0
+ */
+ CharacterIterator &operator=(const CharacterIterator &that);
+
+ /**
+ * Base class text length field.
+ * Necessary this for correct getText() and hashCode().
+ * @stable ICU 2.0
+ */
+ int32_t textLength;
+
+ /**
+ * Base class field for the current position.
+ * @stable ICU 2.0
+ */
+ int32_t pos;
+
+ /**
+ * Base class field for the start of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t begin;
+
+ /**
+ * Base class field for the end of the iteration range.
+ * @stable ICU 2.0
+ */
+ int32_t end;
+};
+
+inline bool
+ForwardCharacterIterator::operator!=(const ForwardCharacterIterator& that) const {
+ return !operator==(that);
+}
+
+inline int32_t
+CharacterIterator::setToStart() {
+ return move(0, kStart);
+}
+
+inline int32_t
+CharacterIterator::setToEnd() {
+ return move(0, kEnd);
+}
+
+inline int32_t
+CharacterIterator::startIndex(void) const {
+ return begin;
+}
+
+inline int32_t
+CharacterIterator::endIndex(void) const {
+ return end;
+}
+
+inline int32_t
+CharacterIterator::getIndex(void) const {
+ return pos;
+}
+
+inline int32_t
+CharacterIterator::getLength(void) const {
+ return textLength;
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/dbbi.h b/intl/icu/source/common/unicode/dbbi.h
new file mode 100644
index 0000000000..3de9cc3814
--- /dev/null
+++ b/intl/icu/source/common/unicode/dbbi.h
@@ -0,0 +1,48 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2006,2013 IBM Corp. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 12/1/99 rgillam Complete port from Java.
+* 01/13/2000 helena Added UErrorCode to ctors.
+**********************************************************************
+*/
+
+#ifndef DBBI_H
+#define DBBI_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/rbbi.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * \file
+ * \brief C++ API: Dictionary Based Break Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * An obsolete subclass of RuleBasedBreakIterator. Handling of dictionary-
+ * based break iteration has been folded into the base class. This class
+ * is deprecated as of ICU 3.6.
+ * @deprecated ICU 3.6
+ */
+typedef RuleBasedBreakIterator DictionaryBasedBreakIterator;
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/docmain.h b/intl/icu/source/common/unicode/docmain.h
new file mode 100644
index 0000000000..1349f49703
--- /dev/null
+++ b/intl/icu/source/common/unicode/docmain.h
@@ -0,0 +1,237 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1997-2012, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ *
+ * FILE NAME: DOCMAIN.h
+ *
+ * Date Name Description
+ * 12/11/2000 Ram Creation.
+ */
+
+/**
+ * \file
+ * \brief (Non API- contains Doxygen definitions)
+ *
+ * This file contains documentation for Doxygen and does not have
+ * any significance with respect to C or C++ API
+ */
+
+/*! \mainpage
+ *
+ * \section API API Reference Usage
+ *
+ * <h3>C++ Programmers:</h3>
+ * <p>Use <a href="hierarchy.html">Class Hierarchy</a> or <a href="classes.html"> Alphabetical List </a>
+ * or <a href="annotated.html"> Compound List</a>
+ * to find the class you are interested in. For example, to find BreakIterator,
+ * you can go to the <a href="classes.html"> Alphabetical List</a>, then click on
+ * "BreakIterator". Once you are at the class, you will find an inheritance
+ * chart, a list of the public members, a detailed description of the class,
+ * then detailed member descriptions.</p>
+ *
+ * <h3>C Programmers:</h3>
+ * <p>Use <a href="#Module">Module List</a> or <a href="globals_u.html">File Members</a>
+ * to find a list of all the functions and constants.
+ * For example, to find BreakIterator functions you would click on
+ * <a href="files.html"> File List</a>,
+ * then find "ubrk.h" and click on it. You will find descriptions of Defines,
+ * Typedefs, Enumerations, and Functions, with detailed descriptions below.
+ * If you want to find a specific function, such as ubrk_next(), then click
+ * first on <a href="globals.html"> File Members</a>, then use your browser
+ * Find dialog to search for "ubrk_next()".</p>
+ *
+ *
+ * <h3>API References for Previous Releases</h3>
+ * <p>The API References for each release of ICU are also available as
+ * a zip file from the ICU
+ * <a href="https://icu.unicode.org/download">download page</a>.</p>
+ *
+ * <hr>
+ *
+ * <h2>Architecture (User's Guide)</h2>
+ * <ul>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/">Introduction</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/i18n">Internationalization</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/design">Locale Model, Multithreading, Error Handling, etc.</a></li>
+ * <li><a href="https://unicode-org.github.io/icu/userguide/conversion">Conversion</a></li>
+ * </ul>
+ *
+ * <hr>
+ *\htmlonly <h2><a NAME="Module">Module List</a></h2> \endhtmlonly
+ * <table border="1" cols="3" align="center">
+ * <tr>
+ * <td><strong>Module Name</strong></td>
+ * <td><strong>C</strong></td>
+ * <td><strong>C++</strong></td>
+ * </tr>
+ * <tr>
+ * <td>Basic Types and Constants</td>
+ * <td>utypes.h</td>
+ * <td>utypes.h</td>
+ * </tr>
+ * <tr>
+ * <td>Strings and Character Iteration</td>
+ * <td>ustring.h, utf8.h, utf16.h, icu::StringPiece, UText, UCharIterator, icu::ByteSink</td>
+ * <td>icu::UnicodeString, icu::CharacterIterator, icu::Appendable, icu::StringPiece,icu::ByteSink</td>
+ * </tr>
+ * <tr>
+ * <td>Unicode Character<br/>Properties and Names</td>
+ * <td>uchar.h, uscript.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Sets of Unicode Code Points and Strings</td>
+ * <td>uset.h</td>
+ * <td>icu::UnicodeSet</td>
+ * </tr>
+ * <tr>
+ * <td>Maps from Unicode Code Points to Integer Values</td>
+ * <td>ucptrie.h, umutablecptrie.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Maps from Strings to Integer Values</td>
+ * <td>(no C API)</td>
+ * <td>icu::BytesTrie, icu::UCharsTrie</td>
+ * </tr>
+ * <tr>
+ * <td>Codepage Conversion</td>
+ * <td>ucnv.h, ucnvsel.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Codepage Detection</td>
+ * <td>ucsdet.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Unicode Text Compression</td>
+ * <td>ucnv.h<br/>(encoding name "SCSU" or "BOCU-1")</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Locales </td>
+ * <td>uloc.h</a></td>
+ * <td>icu::Locale, icu::LocaleBuilder, icu::LocaleMatcher</td>
+ * </tr>
+ * <tr>
+ * <td>Resource Bundles</td>
+ * <td>ures.h</td>
+ * <td>icu::ResourceBundle</td>
+ * </tr>
+ * <tr>
+ * <td>Normalization</td>
+ * <td>unorm2.h</td>
+ * <td>icu::Normalizer2</td>
+ * </tr>
+ * <tr>
+ * <td>Calendars and Time Zones</td>
+ * <td>ucal.h</td>
+ * <td>icu::Calendar, icu::TimeZone</td>
+ * </tr>
+ * <tr>
+ * <td>Date and Time Formatting</td>
+ * <td>udat.h</td>
+ * <td>icu::DateFormat</td>
+ * </tr>
+ * <tr>
+ * <td>Message Formatting</td>
+ * <td>umsg.h</td>
+ * <td>icu::MessageFormat</td>
+ * </tr>
+ * <tr>
+ * <td>List Formatting</td>
+ * <td>ulistformatter.h</td>
+ * <td>icu::ListFormatter</td>
+ * </tr>
+ * <tr>
+ * <td>Number Formatting<br/>(includes currency and unit formatting)</td>
+ * <td>unumberformatter.h, unum.h, usimplenumberformatter.h</td>
+ * <td>icu::number::NumberFormatter (ICU 60+) or icu::NumberFormat (older versions)<br>icu::number::SimpleNumberFormatter (ICU 73+)</td>
+ * </tr>
+ * <tr>
+ * <td>Number Range Formatting<br />(includes currency and unit ranges)</td>
+ * <td>unumberrangeformatter.h</td>
+ * <td>icu::number::NumberRangeFormatter</td>
+ * </tr>
+ * <tr>
+ * <td>Number Spellout<br/>(Rule Based Number Formatting)</td>
+ * <td>unum.h<br/>(use UNUM_SPELLOUT)</td>
+ * <td>icu::RuleBasedNumberFormat</td>
+ * </tr>
+ * <tr>
+ * <td>Text Transformation<br/>(Transliteration)</td>
+ * <td>utrans.h</td>
+ * <td>icu::Transliterator</td>
+ * </tr>
+ * <tr>
+ * <td>Bidirectional Algorithm</td>
+ * <td>ubidi.h, ubiditransform.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Arabic Shaping</td>
+ * <td>ushape.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Collation</td>
+ * <td>ucol.h</td>
+ * <td>icu::Collator</td>
+ * </tr>
+ * <tr>
+ * <td>String Searching</td>
+ * <td>usearch.h</td>
+ * <td>icu::StringSearch</td>
+ * </tr>
+ * <tr>
+ * <td>Index Characters/<br/>Bucketing for Sorted Lists</td>
+ * <td>(no C API)</td>
+ * <td>icu::AlphabeticIndex</td>
+ * </tr>
+ * <tr>
+ * <td>Text Boundary Analysis<br/>(Break Iteration)</td>
+ * <td>ubrk.h</td>
+ * <td>icu::BreakIterator</td>
+ * </tr>
+ * <tr>
+ * <td>Regular Expressions</td>
+ * <td>uregex.h</td>
+ * <td>icu::RegexPattern, icu::RegexMatcher</td>
+ * </tr>
+ * <tr>
+ * <td>StringPrep</td>
+ * <td>usprep.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>International Domain Names in Applications:<br/>
+ * UTS #46 in C/C++, IDNA2003 only via C API</td>
+ * <td>uidna.h</td>
+ * <td>idna.h</td>
+ * </tr>
+ * <tr>
+ * <td>Identifier Spoofing & Confusability</td>
+ * <td>uspoof.h</td>
+ * <td>C API</td>
+ * <tr>
+ * <td>Universal Time Scale</td>
+ * <td>utmscale.h</td>
+ * <td>C API</td>
+ * </tr>
+ * <tr>
+ * <td>Paragraph Layout / Complex Text Layout</td>
+ * <td>playout.h</td>
+ * <td>icu::ParagraphLayout</td>
+ * </tr>
+ * <tr>
+ * <td>ICU I/O</td>
+ * <td>ustdio.h</td>
+ * <td>ustream.h</td>
+ * </tr>
+ * </table>
+ * <i>This main page is generated from docmain.h</i>
+ */
diff --git a/intl/icu/source/common/unicode/dtintrv.h b/intl/icu/source/common/unicode/dtintrv.h
new file mode 100644
index 0000000000..8c172eb7a5
--- /dev/null
+++ b/intl/icu/source/common/unicode/dtintrv.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2008-2009, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*
+* File DTINTRV.H
+*
+*******************************************************************************
+*/
+
+#ifndef __DTINTRV_H__
+#define __DTINTRV_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Date Interval data type
+ */
+
+U_NAMESPACE_BEGIN
+
+
+/**
+ * This class represents a date interval.
+ * It is a pair of UDate representing from UDate 1 to UDate 2.
+ * @stable ICU 4.0
+**/
+class U_COMMON_API DateInterval : public UObject {
+public:
+
+ /**
+ * Construct a DateInterval given a from date and a to date.
+ * @param fromDate The from date in date interval.
+ * @param toDate The to date in date interval.
+ * @stable ICU 4.0
+ */
+ DateInterval(UDate fromDate, UDate toDate);
+
+ /**
+ * destructor
+ * @stable ICU 4.0
+ */
+ virtual ~DateInterval();
+
+ /**
+ * Get the from date.
+ * @return the from date in dateInterval.
+ * @stable ICU 4.0
+ */
+ inline UDate getFromDate() const;
+
+ /**
+ * Get the to date.
+ * @return the to date in dateInterval.
+ * @stable ICU 4.0
+ */
+ inline UDate getToDate() const;
+
+
+ /**
+ * Return the class ID for this class. This is useful only for comparing to
+ * a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . derived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @stable ICU 4.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override. This
+ * method is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and clone()
+ * methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 4.0
+ */
+ virtual UClassID getDynamicClassID(void) const override;
+
+
+ /**
+ * Copy constructor.
+ * @stable ICU 4.0
+ */
+ DateInterval(const DateInterval& other);
+
+ /**
+ * Default assignment operator
+ * @stable ICU 4.0
+ */
+ DateInterval& operator=(const DateInterval&);
+
+ /**
+ * Equality operator.
+ * @return true if the two DateIntervals are the same
+ * @stable ICU 4.0
+ */
+ virtual bool operator==(const DateInterval& other) const;
+
+ /**
+ * Non-equality operator
+ * @return true if the two DateIntervals are not the same
+ * @stable ICU 4.0
+ */
+ inline bool operator!=(const DateInterval& other) const;
+
+
+ /**
+ * clone this object.
+ * The caller owns the result and should delete it when done.
+ * @return a cloned DateInterval
+ * @stable ICU 4.0
+ */
+ virtual DateInterval* clone() const;
+
+private:
+ /**
+ * Default constructor, not implemented.
+ */
+ DateInterval() = delete;
+
+ UDate fromDate;
+ UDate toDate;
+
+} ;// end class DateInterval
+
+
+inline UDate
+DateInterval::getFromDate() const {
+ return fromDate;
+}
+
+
+inline UDate
+DateInterval::getToDate() const {
+ return toDate;
+}
+
+
+inline bool
+DateInterval::operator!=(const DateInterval& other) const {
+ return ( !operator==(other) );
+}
+
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/edits.h b/intl/icu/source/common/unicode/edits.h
new file mode 100644
index 0000000000..dda9d3ca75
--- /dev/null
+++ b/intl/icu/source/common/unicode/edits.h
@@ -0,0 +1,531 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// edits.h
+// created: 2016dec30 Markus W. Scherer
+
+#ifndef __EDITS_H__
+#define __EDITS_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: C++ class Edits for low-level string transformations on styled text.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * Records lengths of string edits but not replacement text. Supports replacements, insertions, deletions
+ * in linear progression. Does not support moving/reordering of text.
+ *
+ * There are two types of edits: <em>change edits</em> and <em>no-change edits</em>. Add edits to
+ * instances of this class using {@link #addReplace(int32_t, int32_t)} (for change edits) and
+ * {@link #addUnchanged(int32_t)} (for no-change edits). Change edits are retained with full granularity,
+ * whereas adjacent no-change edits are always merged together. In no-change edits, there is a one-to-one
+ * mapping between code points in the source and destination strings.
+ *
+ * After all edits have been added, instances of this class should be considered immutable, and an
+ * {@link Edits::Iterator} can be used for queries.
+ *
+ * There are four flavors of Edits::Iterator:
+ *
+ * <ul>
+ * <li>{@link #getFineIterator()} retains full granularity of change edits.
+ * <li>{@link #getFineChangesIterator()} retains full granularity of change edits, and when calling
+ * next() on the iterator, skips over no-change edits (unchanged regions).
+ * <li>{@link #getCoarseIterator()} treats adjacent change edits as a single edit. (Adjacent no-change
+ * edits are automatically merged during the construction phase.)
+ * <li>{@link #getCoarseChangesIterator()} treats adjacent change edits as a single edit, and when
+ * calling next() on the iterator, skips over no-change edits (unchanged regions).
+ * </ul>
+ *
+ * For example, consider the string "abcßDeF", which case-folds to "abcssdef". This string has the
+ * following fine edits:
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ß ⇨ ss (change)
+ * <li>D ⇨ d (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ * and the following coarse edits (note how adjacent change edits get merged together):
+ * <ul>
+ * <li>abc ⇨ abc (no-change)
+ * <li>ßD ⇨ ssd (change)
+ * <li>e ⇨ e (no-change)
+ * <li>F ⇨ f (change)
+ * </ul>
+ *
+ * The "fine changes" and "coarse changes" iterators will step through only the change edits when their
+ * `Edits::Iterator::next()` methods are called. They are identical to the non-change iterators when
+ * their `Edits::Iterator::findSourceIndex()` or `Edits::Iterator::findDestinationIndex()`
+ * methods are used to walk through the string.
+ *
+ * For examples of how to use this class, see the test `TestCaseMapEditsIteratorDocs` in
+ * UCharacterCaseTest.java.
+ *
+ * An Edits object tracks a separate UErrorCode, but ICU string transformation functions
+ * (e.g., case mapping functions) merge any such errors into their API's UErrorCode.
+ *
+ * @stable ICU 59
+ */
+class U_COMMON_API Edits final : public UMemory {
+public:
+ /**
+ * Constructs an empty object.
+ * @stable ICU 59
+ */
+ Edits() :
+ array(stackArray), capacity(STACK_CAPACITY), length(0), delta(0), numChanges(0),
+ errorCode_(U_ZERO_ERROR) {}
+ /**
+ * Copy constructor.
+ * @param other source edits
+ * @stable ICU 60
+ */
+ Edits(const Edits &other) :
+ array(stackArray), capacity(STACK_CAPACITY), length(other.length),
+ delta(other.delta), numChanges(other.numChanges),
+ errorCode_(other.errorCode_) {
+ copyArray(other);
+ }
+ /**
+ * Move constructor, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * @param src source edits
+ * @stable ICU 60
+ */
+ Edits(Edits &&src) noexcept :
+ array(stackArray), capacity(STACK_CAPACITY), length(src.length),
+ delta(src.delta), numChanges(src.numChanges),
+ errorCode_(src.errorCode_) {
+ moveArray(src);
+ }
+
+ /**
+ * Destructor.
+ * @stable ICU 59
+ */
+ ~Edits();
+
+ /**
+ * Assignment operator.
+ * @param other source edits
+ * @return *this
+ * @stable ICU 60
+ */
+ Edits &operator=(const Edits &other);
+
+ /**
+ * Move assignment operator, might leave src empty.
+ * This object will have the same contents that the source object had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source edits
+ * @return *this
+ * @stable ICU 60
+ */
+ Edits &operator=(Edits &&src) noexcept;
+
+ /**
+ * Resets the data but may not release memory.
+ * @stable ICU 59
+ */
+ void reset() noexcept;
+
+ /**
+ * Adds a no-change edit: a record for an unchanged segment of text.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @stable ICU 59
+ */
+ void addUnchanged(int32_t unchangedLength);
+ /**
+ * Adds a change edit: a record for a text replacement/insertion/deletion.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @stable ICU 59
+ */
+ void addReplace(int32_t oldLength, int32_t newLength);
+ /**
+ * Sets the UErrorCode if an error occurred while recording edits.
+ * Preserves older error codes in the outErrorCode.
+ * Normally called from inside ICU string transformation functions, not user code.
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while recording edits.
+ * Otherwise unchanged.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 59
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * How much longer is the new text compared with the old text?
+ * @return new length minus old length
+ * @stable ICU 59
+ */
+ int32_t lengthDelta() const { return delta; }
+ /**
+ * @return true if there are any change edits
+ * @stable ICU 59
+ */
+ UBool hasChanges() const { return numChanges != 0; }
+
+ /**
+ * @return the number of change edits
+ * @stable ICU 60
+ */
+ int32_t numberOfChanges() const { return numChanges; }
+
+ /**
+ * Access to the list of edits.
+ *
+ * At any moment in time, an instance of this class points to a single edit: a "window" into a span
+ * of the source string and the corresponding span of the destination string. The source string span
+ * starts at {@link #sourceIndex()} and runs for {@link #oldLength()} chars; the destination string
+ * span starts at {@link #destinationIndex()} and runs for {@link #newLength()} chars.
+ *
+ * The iterator can be moved between edits using the `next()`, `findSourceIndex(int32_t, UErrorCode &)`,
+ * and `findDestinationIndex(int32_t, UErrorCode &)` methods.
+ * Calling any of these methods mutates the iterator to make it point to the corresponding edit.
+ *
+ * For more information, see the documentation for {@link Edits}.
+ *
+ * @see getCoarseIterator
+ * @see getFineIterator
+ * @stable ICU 59
+ */
+ struct U_COMMON_API Iterator final : public UMemory {
+ /**
+ * Default constructor, empty iterator.
+ * @stable ICU 60
+ */
+ Iterator() :
+ array(nullptr), index(0), length(0),
+ remaining(0), onlyChanges_(false), coarse(false),
+ dir(0), changed(false), oldLength_(0), newLength_(0),
+ srcIndex(0), replIndex(0), destIndex(0) {}
+ /**
+ * Copy constructor.
+ * @stable ICU 59
+ */
+ Iterator(const Iterator &other) = default;
+ /**
+ * Assignment operator.
+ * @stable ICU 59
+ */
+ Iterator &operator=(const Iterator &other) = default;
+
+ /**
+ * Advances the iterator to the next edit.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if there is another edit
+ * @stable ICU 59
+ */
+ UBool next(UErrorCode &errorCode) { return next(onlyChanges_, errorCode); }
+
+ /**
+ * Moves the iterator to the edit that contains the source index.
+ * The source index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the edit for the source index was found
+ * @stable ICU 59
+ */
+ UBool findSourceIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, true, errorCode) == 0;
+ }
+
+ /**
+ * Moves the iterator to the edit that contains the destination index.
+ * The destination index may be found in a no-change edit
+ * even if normal iteration would skip no-change edits.
+ * Normal iteration can continue from a found edit.
+ *
+ * The iterator state before this search logically does not matter.
+ * (It may affect the performance of the search.)
+ *
+ * The iterator state after this search is undefined
+ * if the source index is out of bounds for the source string.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the edit for the destination index was found
+ * @stable ICU 60
+ */
+ UBool findDestinationIndex(int32_t i, UErrorCode &errorCode) {
+ return findIndex(i, false, errorCode) == 0;
+ }
+
+ /**
+ * Computes the destination index corresponding to the given source index.
+ * If the source index is inside a change edit (not at its start),
+ * then the destination index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i source index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return destination index; undefined if i is not 0..string length
+ * @stable ICU 60
+ */
+ int32_t destinationIndexFromSourceIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Computes the source index corresponding to the given destination index.
+ * If the destination index is inside a change edit (not at its start),
+ * then the source index at the end of that edit is returned,
+ * since there is no information about index mapping inside a change edit.
+ *
+ * (This means that indexes to the start and middle of an edit,
+ * for example around a grapheme cluster, are mapped to indexes
+ * encompassing the entire edit.
+ * The alternative, mapping an interior index to the start,
+ * would map such an interval to an empty one.)
+ *
+ * This operation will usually but not always modify this object.
+ * The iterator state after this search is undefined.
+ *
+ * @param i destination index
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return source index; undefined if i is not 0..string length
+ * @stable ICU 60
+ */
+ int32_t sourceIndexFromDestinationIndex(int32_t i, UErrorCode &errorCode);
+
+ /**
+ * Returns whether the edit currently represented by the iterator is a change edit.
+ *
+ * @return true if this edit replaces oldLength() units with newLength() different ones.
+ * false if oldLength units remain unchanged.
+ * @stable ICU 59
+ */
+ UBool hasChange() const { return changed; }
+
+ /**
+ * The length of the current span in the source string, which starts at {@link #sourceIndex}.
+ *
+ * @return the number of units in the original string which are replaced or remain unchanged.
+ * @stable ICU 59
+ */
+ int32_t oldLength() const { return oldLength_; }
+
+ /**
+ * The length of the current span in the destination string, which starts at
+ * {@link #destinationIndex}, or in the replacement string, which starts at
+ * {@link #replacementIndex}.
+ *
+ * @return the number of units in the modified string, if hasChange() is true.
+ * Same as oldLength if hasChange() is false.
+ * @stable ICU 59
+ */
+ int32_t newLength() const { return newLength_; }
+
+ /**
+ * The start index of the current span in the source string; the span has length
+ * {@link #oldLength}.
+ *
+ * @return the current index into the source string
+ * @stable ICU 59
+ */
+ int32_t sourceIndex() const { return srcIndex; }
+
+ /**
+ * The start index of the current span in the replacement string; the span has length
+ * {@link #newLength}. Well-defined only if the current edit is a change edit.
+ *
+ * The *replacement string* is the concatenation of all substrings of the destination
+ * string corresponding to change edits.
+ *
+ * This method is intended to be used together with operations that write only replacement
+ * characters (e.g. operations specifying the \ref U_OMIT_UNCHANGED_TEXT option).
+ * The source string can then be modified in-place.
+ *
+ * @return the current index into the replacement-characters-only string,
+ * not counting unchanged spans
+ * @stable ICU 59
+ */
+ int32_t replacementIndex() const {
+ // TODO: Throw an exception if we aren't in a change edit?
+ return replIndex;
+ }
+
+ /**
+ * The start index of the current span in the destination string; the span has length
+ * {@link #newLength}.
+ *
+ * @return the current index into the full destination string
+ * @stable ICU 59
+ */
+ int32_t destinationIndex() const { return destIndex; }
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * A string representation of the current edit represented by the iterator for debugging. You
+ * should not depend on the contents of the return string.
+ * @internal
+ */
+ UnicodeString& toString(UnicodeString& appendTo) const;
+#endif // U_HIDE_INTERNAL_API
+
+ private:
+ friend class Edits;
+
+ Iterator(const uint16_t *a, int32_t len, UBool oc, UBool crs);
+
+ int32_t readLength(int32_t head);
+ void updateNextIndexes();
+ void updatePreviousIndexes();
+ UBool noNext();
+ UBool next(UBool onlyChanges, UErrorCode &errorCode);
+ UBool previous(UErrorCode &errorCode);
+ /** @return -1: error or i<0; 0: found; 1: i>=string length */
+ int32_t findIndex(int32_t i, UBool findSource, UErrorCode &errorCode);
+
+ const uint16_t *array;
+ int32_t index, length;
+ // 0 if we are not within compressed equal-length changes.
+ // Otherwise the number of remaining changes, including the current one.
+ int32_t remaining;
+ UBool onlyChanges_, coarse;
+
+ int8_t dir; // iteration direction: back(<0), initial(0), forward(>0)
+ UBool changed;
+ int32_t oldLength_, newLength_;
+ int32_t srcIndex, replIndex, destIndex;
+ };
+
+ /**
+ * Returns an Iterator for coarse-grained change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Skips no-change edits.
+ * @return an Iterator that merges adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getCoarseChangesIterator() const {
+ return Iterator(array, length, true, true);
+ }
+
+ /**
+ * Returns an Iterator for coarse-grained change and no-change edits
+ * (adjacent change edits are treated as one).
+ * Can be used to perform simple string updates.
+ * Adjacent change edits are treated as one edit.
+ * @return an Iterator that merges adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getCoarseIterator() const {
+ return Iterator(array, length, false, true);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
+ * Skips no-change edits.
+ * @return an Iterator that separates adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getFineChangesIterator() const {
+ return Iterator(array, length, true, false);
+ }
+
+ /**
+ * Returns an Iterator for fine-grained change and no-change edits
+ * (full granularity of change edits is retained).
+ * Can be used for modifying styled text.
+ * @return an Iterator that separates adjacent changes.
+ * @stable ICU 59
+ */
+ Iterator getFineIterator() const {
+ return Iterator(array, length, false, false);
+ }
+
+ /**
+ * Merges the two input Edits and appends the result to this object.
+ *
+ * Consider two string transformations (for example, normalization and case mapping)
+ * where each records Edits in addition to writing an output string.<br>
+ * Edits ab reflect how substrings of input string a
+ * map to substrings of intermediate string b.<br>
+ * Edits bc reflect how substrings of intermediate string b
+ * map to substrings of output string c.<br>
+ * This function merges ab and bc such that the additional edits
+ * recorded in this object reflect how substrings of input string a
+ * map to substrings of output string c.
+ *
+ * If unrelated Edits are passed in where the output string of the first
+ * has a different length than the input string of the second,
+ * then a U_ILLEGAL_ARGUMENT_ERROR is reported.
+ *
+ * @param ab reflects how substrings of input string a
+ * map to substrings of intermediate string b.
+ * @param bc reflects how substrings of intermediate string b
+ * map to substrings of output string c.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return *this, with the merged edits appended
+ * @stable ICU 60
+ */
+ Edits &mergeAndAppend(const Edits &ab, const Edits &bc, UErrorCode &errorCode);
+
+private:
+ void releaseArray() noexcept;
+ Edits &copyArray(const Edits &other);
+ Edits &moveArray(Edits &src) noexcept;
+
+ void setLastUnit(int32_t last) { array[length - 1] = (uint16_t)last; }
+ int32_t lastUnit() const { return length > 0 ? array[length - 1] : 0xffff; }
+
+ void append(int32_t r);
+ UBool growArray();
+
+ static const int32_t STACK_CAPACITY = 100;
+ uint16_t *array;
+ int32_t capacity;
+ int32_t length;
+ int32_t delta;
+ int32_t numChanges;
+ UErrorCode errorCode_;
+ uint16_t stackArray[STACK_CAPACITY];
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __EDITS_H__
diff --git a/intl/icu/source/common/unicode/enumset.h b/intl/icu/source/common/unicode/enumset.h
new file mode 100644
index 0000000000..bde8c455c0
--- /dev/null
+++ b/intl/icu/source/common/unicode/enumset.h
@@ -0,0 +1,69 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2012,2014 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C++: internal template EnumSet<>
+ */
+
+#ifndef ENUMSET_H
+#define ENUMSET_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/* Can't use #ifndef U_HIDE_INTERNAL_API for the entire EnumSet class, needed in .h file declarations */
+/**
+ * enum bitset for boolean fields. Similar to Java EnumSet<>.
+ * Needs to range check. Used for private instance variables.
+ * @internal
+ * \cond
+ */
+template<typename T, uint32_t minValue, uint32_t limitValue>
+class EnumSet {
+public:
+ inline EnumSet() : fBools(0) {}
+ inline EnumSet(const EnumSet<T,minValue,limitValue>& other) : fBools(other.fBools) {}
+ inline ~EnumSet() {}
+#ifndef U_HIDE_INTERNAL_API
+ inline void clear() { fBools=0; }
+ inline void add(T toAdd) { set(toAdd, 1); }
+ inline void remove(T toRemove) { set(toRemove, 0); }
+ inline int32_t contains(T toCheck) const { return get(toCheck); }
+ inline void set(T toSet, int32_t v) { fBools=(fBools&(~flag(toSet)))|(v?(flag(toSet)):0); }
+ inline int32_t get(T toCheck) const { return (fBools & flag(toCheck))?1:0; }
+ inline UBool isValidEnum(T toCheck) const { return (toCheck>=minValue&&toCheck<limitValue); }
+ inline UBool isValidValue(int32_t v) const { return (v==0||v==1); }
+ inline const EnumSet<T,minValue,limitValue>& operator=(const EnumSet<T,minValue,limitValue>& other) {
+ fBools = other.fBools;
+ return *this;
+ }
+
+ inline uint32_t getAll() const {
+ return fBools;
+ }
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+ inline uint32_t flag(T toCheck) const { return (1<<(toCheck-minValue)); }
+private:
+ uint32_t fBools;
+};
+
+/** \endcond */
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* ENUMSET_H */
diff --git a/intl/icu/source/common/unicode/errorcode.h b/intl/icu/source/common/unicode/errorcode.h
new file mode 100644
index 0000000000..fe7b518323
--- /dev/null
+++ b/intl/icu/source/common/unicode/errorcode.h
@@ -0,0 +1,144 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: errorcode.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009mar10
+* created by: Markus W. Scherer
+*/
+
+#ifndef __ERRORCODE_H__
+#define __ERRORCODE_H__
+
+/**
+ * \file
+ * \brief C++ API: ErrorCode class intended to make it easier to use
+ * ICU C and C++ APIs from C++ user code.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Wrapper class for UErrorCode, with conversion operators for direct use
+ * in ICU C and C++ APIs.
+ * Intended to be used as a base class, where a subclass overrides
+ * the handleFailure() function so that it throws an exception,
+ * does an assert(), logs an error, etc.
+ * This is not an abstract base class. This class can be used and instantiated
+ * by itself, although it will be more useful when subclassed.
+ *
+ * Features:
+ * - The constructor initializes the internal UErrorCode to U_ZERO_ERROR,
+ * removing one common source of errors.
+ * - Same use in C APIs taking a UErrorCode * (pointer)
+ * and C++ taking UErrorCode & (reference) via conversion operators.
+ * - Possible automatic checking for success when it goes out of scope.
+ *
+ * Note: For automatic checking for success in the destructor, a subclass
+ * must implement such logic in its own destructor because the base class
+ * destructor cannot call a subclass function (like handleFailure()).
+ * The ErrorCode base class destructor does nothing.
+ *
+ * Note also: While it is possible for a destructor to throw an exception,
+ * it is generally unsafe to do so. This means that in a subclass the destructor
+ * and the handleFailure() function may need to take different actions.
+ *
+ * Sample code:
+ * \code
+ * class IcuErrorCode: public icu::ErrorCode {
+ * public:
+ * virtual ~IcuErrorCode() { // should be defined in .cpp as "key function"
+ * // Safe because our handleFailure() does not throw exceptions.
+ * if(isFailure()) { handleFailure(); }
+ * }
+ * protected:
+ * virtual void handleFailure() const {
+ * log_failure(u_errorName(errorCode));
+ * exit(errorCode);
+ * }
+ * };
+ * IcuErrorCode error_code;
+ * UConverter *cnv = ucnv_open("Shift-JIS", error_code);
+ * length = ucnv_fromUChars(dest, capacity, src, length, error_code);
+ * ucnv_close(cnv);
+ * // IcuErrorCode destructor checks for success.
+ * \endcode
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API ErrorCode: public UMemory {
+public:
+ /**
+ * Default constructor. Initializes its UErrorCode to U_ZERO_ERROR.
+ * @stable ICU 4.2
+ */
+ ErrorCode() : errorCode(U_ZERO_ERROR) {}
+ /** Destructor, does nothing. See class documentation for details. @stable ICU 4.2 */
+ virtual ~ErrorCode();
+ /** Conversion operator, returns a reference. @stable ICU 4.2 */
+ operator UErrorCode & () { return errorCode; }
+ /** Conversion operator, returns a pointer. @stable ICU 4.2 */
+ operator UErrorCode * () { return &errorCode; }
+ /** Tests for U_SUCCESS(). @stable ICU 4.2 */
+ UBool isSuccess() const { return U_SUCCESS(errorCode); }
+ /** Tests for U_FAILURE(). @stable ICU 4.2 */
+ UBool isFailure() const { return U_FAILURE(errorCode); }
+ /** Returns the UErrorCode value. @stable ICU 4.2 */
+ UErrorCode get() const { return errorCode; }
+ /** Sets the UErrorCode value. @stable ICU 4.2 */
+ void set(UErrorCode value) { errorCode=value; }
+ /** Returns the UErrorCode value and resets it to U_ZERO_ERROR. @stable ICU 4.2 */
+ UErrorCode reset();
+ /**
+ * Asserts isSuccess().
+ * In other words, this method checks for a failure code,
+ * and the base class handles it like this:
+ * \code
+ * if(isFailure()) { handleFailure(); }
+ * \endcode
+ * @stable ICU 4.4
+ */
+ void assertSuccess() const;
+ /**
+ * Return a string for the UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum.
+ * @stable ICU 4.4
+ */
+ const char* errorName() const;
+
+protected:
+ /**
+ * Internal UErrorCode, accessible to subclasses.
+ * @stable ICU 4.2
+ */
+ UErrorCode errorCode;
+ /**
+ * Called by assertSuccess() if isFailure() is true.
+ * A subclass should override this function to deal with a failure code:
+ * Throw an exception, log an error, terminate the program, or similar.
+ * @stable ICU 4.2
+ */
+ virtual void handleFailure() const {}
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __ERRORCODE_H__
diff --git a/intl/icu/source/common/unicode/filteredbrk.h b/intl/icu/source/common/unicode/filteredbrk.h
new file mode 100644
index 0000000000..8b07e39ae1
--- /dev/null
+++ b/intl/icu/source/common/unicode/filteredbrk.h
@@ -0,0 +1,152 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+********************************************************************************
+* Copyright (C) 1997-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+********************************************************************************
+*/
+
+#ifndef FILTEREDBRK_H
+#define FILTEREDBRK_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/brkiter.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: FilteredBreakIteratorBuilder
+ */
+
+/**
+ * The BreakIteratorFilter is used to modify the behavior of a BreakIterator
+ * by constructing a new BreakIterator which suppresses certain segment boundaries.
+ * See http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions .
+ * For example, a typical English Sentence Break Iterator would break on the space
+ * in the string "Mr. Smith" (resulting in two segments),
+ * but with "Mr." as an exception, a filtered break iterator
+ * would consider the string "Mr. Smith" to be a single segment.
+ *
+ * @stable ICU 56
+ */
+class U_COMMON_API FilteredBreakIteratorBuilder : public UObject {
+ public:
+ /**
+ * destructor.
+ * @stable ICU 56
+ */
+ virtual ~FilteredBreakIteratorBuilder();
+
+ /**
+ * Construct a FilteredBreakIteratorBuilder based on rules in a locale.
+ * The rules are taken from CLDR exception data for the locale,
+ * see http://www.unicode.org/reports/tr35/tr35-general.html#Segmentation_Exceptions
+ * This is the equivalent of calling createInstance(UErrorCode&)
+ * and then repeatedly calling addNoBreakAfter(...) with the contents
+ * of the CLDR exception data.
+ * @param where the locale.
+ * @param status The error code.
+ * @return the new builder
+ * @stable ICU 56
+ */
+ static FilteredBreakIteratorBuilder *createInstance(const Locale& where, UErrorCode& status);
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of createEmptyInstance, which has
+ * identical behavior.
+ * @param status The error code.
+ * @return the new builder
+ * @deprecated ICU 60 use createEmptyInstance instead
+ * @see createEmptyInstance()
+ */
+ static FilteredBreakIteratorBuilder *createInstance(UErrorCode &status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Construct an empty FilteredBreakIteratorBuilder.
+ * In this state, it will not suppress any segment boundaries.
+ * @param status The error code.
+ * @return the new builder
+ * @stable ICU 60
+ */
+ static FilteredBreakIteratorBuilder *createEmptyInstance(UErrorCode &status);
+
+ /**
+ * Suppress a certain string from being the end of a segment.
+ * For example, suppressing "Mr.", then segments ending in "Mr." will not be returned
+ * by the iterator.
+ * @param string the string to suppress, such as "Mr."
+ * @param status error code
+ * @return returns true if the string was not present and now added,
+ * false if the call was a no-op because the string was already being suppressed.
+ * @stable ICU 56
+ */
+ virtual UBool suppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
+
+ /**
+ * Stop suppressing a certain string from being the end of the segment.
+ * This function does not create any new segment boundaries, but only serves to un-do
+ * the effect of earlier calls to suppressBreakAfter, or to un-do the effect of
+ * locale data which may be suppressing certain strings.
+ * @param string the exception to remove
+ * @param status error code
+ * @return returns true if the string was present and now removed,
+ * false if the call was a no-op because the string was not being suppressed.
+ * @stable ICU 56
+ */
+ virtual UBool unsuppressBreakAfter(const UnicodeString& string, UErrorCode& status) = 0;
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * This function has been deprecated in favor of wrapIteratorWithFilter()
+ * The behavior is identical.
+ * @param adoptBreakIterator the break iterator to adopt
+ * @param status error code
+ * @return the new BreakIterator, owned by the caller.
+ * @deprecated ICU 60 use wrapIteratorWithFilter() instead
+ * @see wrapBreakIteratorWithFilter()
+ */
+ virtual BreakIterator *build(BreakIterator* adoptBreakIterator, UErrorCode& status) = 0;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ /**
+ * Wrap (adopt) an existing break iterator in a new filtered instance.
+ * The resulting BreakIterator is owned by the caller.
+ * The BreakIteratorFilter may be destroyed before the BreakIterator is destroyed.
+ * Note that the adoptBreakIterator is adopted by the new BreakIterator
+ * and should no longer be used by the caller.
+ * The FilteredBreakIteratorBuilder may be reused.
+ * This function is an alias for build()
+ * @param adoptBreakIterator the break iterator to adopt
+ * @param status error code
+ * @return the new BreakIterator, owned by the caller.
+ * @stable ICU 60
+ */
+ inline BreakIterator *wrapIteratorWithFilter(BreakIterator* adoptBreakIterator, UErrorCode& status) {
+ return build(adoptBreakIterator, status);
+ }
+
+ protected:
+ /**
+ * For subclass use
+ * @stable ICU 56
+ */
+ FilteredBreakIteratorBuilder();
+};
+
+
+U_NAMESPACE_END
+
+#endif // #if !UCONFIG_NO_BREAK_ITERATION && !UCONFIG_NO_FILTERED_BREAK_ITERATION
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // #ifndef FILTEREDBRK_H
diff --git a/intl/icu/source/common/unicode/icudataver.h b/intl/icu/source/common/unicode/icudataver.h
new file mode 100644
index 0000000000..f218ed8ebc
--- /dev/null
+++ b/intl/icu/source/common/unicode/icudataver.h
@@ -0,0 +1,43 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*/
+
+
+/**
+ * \file
+ * \brief C API: access to ICU Data Version number
+ */
+
+#ifndef __ICU_DATA_VER_H__
+#define __ICU_DATA_VER_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * @stable ICU 49
+ */
+#define U_ICU_VERSION_BUNDLE "icuver"
+
+/**
+ * @stable ICU 49
+ */
+#define U_ICU_DATA_KEY "DataVersion"
+
+/**
+ * Retrieves the data version from icuver and stores it in dataVersionFillin.
+ *
+ * @param dataVersionFillin icuver data version information to be filled in if not-null
+ * @param status stores the error code from the calls to resource bundle
+ *
+ * @stable ICU 49
+ */
+U_CAPI void U_EXPORT2 u_getDataVersion(UVersionInfo dataVersionFillin, UErrorCode *status);
+
+#endif
diff --git a/intl/icu/source/common/unicode/icuplug.h b/intl/icu/source/common/unicode/icuplug.h
new file mode 100644
index 0000000000..c787fcd426
--- /dev/null
+++ b/intl/icu/source/common/unicode/icuplug.h
@@ -0,0 +1,391 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : icuplug.h
+*
+* Date Name Description
+* 10/29/2009 sl New.
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: ICU Plugin API
+ *
+ * <h2>C API: ICU Plugin API</h2>
+ *
+ * <p>C API allowing run-time loadable modules that extend or modify ICU functionality.</p>
+ *
+ * <h3>Loading and Configuration</h3>
+ *
+ * <p>At ICU startup time, the environment variable "ICU_PLUGINS" will be
+ * queried for a directory name. If it is not set, the preprocessor symbol
+ * "DEFAULT_ICU_PLUGINS" will be checked for a default value.</p>
+ *
+ * <p>Within the above-named directory, the file "icuplugins##.txt" will be
+ * opened, if present, where ## is the major+minor number of the currently
+ * running ICU (such as, 44 for ICU 4.4, thus icuplugins44.txt)</p>
+ *
+ * <p>The configuration file has this format:</p>
+ *
+ * <ul>
+ * <li>Hash (#) begins a comment line</li>
+ *
+ * <li>Non-comment lines have two or three components:
+ * LIBRARYNAME ENTRYPOINT [ CONFIGURATION .. ]</li>
+ *
+ * <li>Tabs or spaces separate the three items.</li>
+ *
+ * <li>LIBRARYNAME is the name of a shared library, either a short name if
+ * it is on the loader path, or a full pathname.</li>
+ *
+ * <li>ENTRYPOINT is the short (undecorated) symbol name of the plugin's
+ * entrypoint, as above.</li>
+ *
+ * <li>CONFIGURATION is the entire rest of the line . It's passed as-is to
+ * the plugin.</li>
+ * </ul>
+ *
+ * <p>An example configuration file is, in its entirety:</p>
+ *
+ * \code
+ * # this is icuplugins44.txt
+ * testplug.dll myPlugin hello=world
+ * \endcode
+ * <p>Plugins are categorized as "high" or "low" level. Low level are those
+ * which must be run BEFORE high level plugins, and before any operations
+ * which cause ICU to be 'initialized'. If a plugin is low level but
+ * causes ICU to allocate memory or become initialized, that plugin is said
+ * to cause a 'level change'. </p>
+ *
+ * <p>At load time, ICU first queries all plugins to determine their level,
+ * then loads all 'low' plugins first, and then loads all 'high' plugins.
+ * Plugins are otherwise loaded in the order listed in the configuration file.</p>
+ *
+ * <h3>Implementing a Plugin</h3>
+ * \code
+ * U_CAPI UPlugTokenReturn U_EXPORT2
+ * myPlugin (UPlugData *plug, UPlugReason reason, UErrorCode *status) {
+ * if(reason==UPLUG_REASON_QUERY) {
+ * uplug_setPlugName(plug, "Simple Plugin");
+ * uplug_setPlugLevel(plug, UPLUG_LEVEL_HIGH);
+ * } else if(reason==UPLUG_REASON_LOAD) {
+ * ... Set up some ICU things here....
+ * } else if(reason==UPLUG_REASON_UNLOAD) {
+ * ... unload, clean up ...
+ * }
+ * return UPLUG_TOKEN;
+ * }
+ * \endcode
+ *
+ * <p>The UPlugData* is an opaque pointer to the plugin-specific data, and is
+ * used in all other API calls.</p>
+ *
+ * <p>The API contract is:</p>
+ * <ol><li>The plugin MUST always return UPLUG_TOKEN as a return value- to
+ * indicate that it is a valid plugin.</li>
+ *
+ * <li>When the 'reason' parameter is set to UPLUG_REASON_QUERY, the
+ * plugin MUST call uplug_setPlugLevel() to indicate whether it is a high
+ * level or low level plugin.</li>
+ *
+ * <li>When the 'reason' parameter is UPLUG_REASON_QUERY, the plugin
+ * SHOULD call uplug_setPlugName to indicate a human readable plugin name.</li></ol>
+ *
+ *
+ * \internal ICU 4.4 Technology Preview
+ */
+
+
+#ifndef ICUPLUG_H
+#define ICUPLUG_H
+
+#include "unicode/utypes.h"
+
+
+#if UCONFIG_ENABLE_PLUGINS || defined(U_IN_DOXYGEN)
+
+
+
+/* === Basic types === */
+
+#ifndef U_HIDE_INTERNAL_API
+struct UPlugData;
+/**
+ * @{
+ * Typedef for opaque structure passed to/from a plugin.
+ * Use the APIs to access it.
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef struct UPlugData UPlugData;
+
+/** @} */
+
+/**
+ * Random Token to identify a valid ICU plugin. Plugins must return this
+ * from the entrypoint.
+ * @internal ICU 4.4 Technology Preview
+ */
+#define UPLUG_TOKEN 0x54762486
+
+/**
+ * Max width of names, symbols, and configuration strings
+ * @internal ICU 4.4 Technology Preview
+ */
+#define UPLUG_NAME_MAX 100
+
+
+/**
+ * Return value from a plugin entrypoint.
+ * Must always be set to UPLUG_TOKEN
+ * @see UPLUG_TOKEN
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef uint32_t UPlugTokenReturn;
+
+/**
+ * Reason code for the entrypoint's call
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef enum {
+ UPLUG_REASON_QUERY = 0, /**< The plugin is being queried for info. **/
+ UPLUG_REASON_LOAD = 1, /**< The plugin is being loaded. **/
+ UPLUG_REASON_UNLOAD = 2, /**< The plugin is being unloaded. **/
+ /**
+ * Number of known reasons.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UPLUG_REASON_COUNT
+} UPlugReason;
+
+
+/**
+ * Level of plugin loading
+ * INITIAL: UNKNOWN
+ * QUERY: INVALID -> { LOW | HIGH }
+ * ERR -> INVALID
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef enum {
+ UPLUG_LEVEL_INVALID = 0, /**< The plugin is invalid, hasn't called uplug_setLevel, or can't load. **/
+ UPLUG_LEVEL_UNKNOWN = 1, /**< The plugin is waiting to be installed. **/
+ UPLUG_LEVEL_LOW = 2, /**< The plugin must be called before u_init completes **/
+ UPLUG_LEVEL_HIGH = 3, /**< The plugin can run at any time. **/
+ /**
+ * Number of known levels.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UPLUG_LEVEL_COUNT
+} UPlugLevel;
+
+/**
+ * Entrypoint for an ICU plugin.
+ * @param plug the UPlugData handle.
+ * @param reason the reason code for the entrypoint's call.
+ * @param status Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A valid plugin must return UPLUG_TOKEN
+ * @internal ICU 4.4 Technology Preview
+ */
+typedef UPlugTokenReturn (U_EXPORT2 UPlugEntrypoint) (
+ UPlugData *plug,
+ UPlugReason reason,
+ UErrorCode *status);
+
+/* === Needed for Implementing === */
+
+/**
+ * Request that this plugin not be unloaded at cleanup time.
+ * This is appropriate for plugins which cannot be cleaned up.
+ * @see u_cleanup()
+ * @param plug plugin
+ * @param dontUnload set true if this plugin can't be unloaded
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugNoUnload(UPlugData *plug, UBool dontUnload);
+
+/**
+ * Set the level of this plugin.
+ * @param plug plugin data handle
+ * @param level the level of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugLevel(UPlugData *plug, UPlugLevel level);
+
+/**
+ * Get the level of this plugin.
+ * @param plug plugin data handle
+ * @return the level of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getPlugLevel(UPlugData *plug);
+
+/**
+ * Get the lowest level of plug which can currently load.
+ * For example, if UPLUG_LEVEL_LOW is returned, then low level plugins may load
+ * if UPLUG_LEVEL_HIGH is returned, then only high level plugins may load.
+ * @return the lowest level of plug which can currently load
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugLevel U_EXPORT2
+uplug_getCurrentLevel(void);
+
+
+/**
+ * Get plug load status
+ * @return The error code of this plugin's load attempt.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UErrorCode U_EXPORT2
+uplug_getPlugLoadStatus(UPlugData *plug);
+
+/**
+ * Set the human-readable name of this plugin.
+ * @param plug plugin data handle
+ * @param name the name of this plugin. The first UPLUG_NAME_MAX characters willi be copied into a new buffer.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setPlugName(UPlugData *plug, const char *name);
+
+/**
+ * Get the human-readable name of this plugin.
+ * @param plug plugin data handle
+ * @return the name of this plugin
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getPlugName(UPlugData *plug);
+
+/**
+ * Return the symbol name for this plugin, if known.
+ * @param plug plugin data handle
+ * @return the symbol name, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getSymbolName(UPlugData *plug);
+
+/**
+ * Return the library name for this plugin, if known.
+ * @param plug plugin data handle
+ * @param status error code
+ * @return the library name, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getLibraryName(UPlugData *plug, UErrorCode *status);
+
+/**
+ * Return the library used for this plugin, if known.
+ * Plugins could use this to load data out of their
+ * @param plug plugin data handle
+ * @return the library, or NULL
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void * U_EXPORT2
+uplug_getLibrary(UPlugData *plug);
+
+/**
+ * Return the plugin-specific context data.
+ * @param plug plugin data handle
+ * @return the context, or NULL if not set
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void * U_EXPORT2
+uplug_getContext(UPlugData *plug);
+
+/**
+ * Set the plugin-specific context data.
+ * @param plug plugin data handle
+ * @param context new context to set
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_setContext(UPlugData *plug, void *context);
+
+
+/**
+ * Get the configuration string, if available.
+ * The string is in the platform default codepage.
+ * @param plug plugin data handle
+ * @return configuration string, or else null.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI const char * U_EXPORT2
+uplug_getConfiguration(UPlugData *plug);
+
+/**
+ * Return all currently installed plugins, from newest to oldest
+ * Usage Example:
+ * \code
+ * UPlugData *plug = NULL;
+ * while(plug=uplug_nextPlug(plug)) {
+ * ... do something with 'plug' ...
+ * }
+ * \endcode
+ * Not thread safe- do not call while plugs are added or removed.
+ * @param prior pass in 'NULL' to get the first (most recent) plug,
+ * otherwise pass the value returned on a prior call to uplug_nextPlug
+ * @return the next oldest plugin, or NULL if no more.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_nextPlug(UPlugData *prior);
+
+/**
+ * Inject a plugin as if it were loaded from a library.
+ * This is useful for testing plugins.
+ * Note that it will have a 'NULL' library pointer associated
+ * with it, and therefore no llibrary will be closed at cleanup time.
+ * Low level plugins may not be able to load, as ordering can't be enforced.
+ * @param entrypoint entrypoint to install
+ * @param config user specified configuration string, if available, or NULL.
+ * @param status error result
+ * @return the new UPlugData associated with this plugin, or NULL if error.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromEntrypoint(UPlugEntrypoint *entrypoint, const char *config, UErrorCode *status);
+
+
+/**
+ * Inject a plugin from a library, as if the information came from a config file.
+ * Low level plugins may not be able to load, and ordering can't be enforced.
+ * @param libName DLL name to load
+ * @param sym symbol of plugin (UPlugEntrypoint function)
+ * @param config configuration string, or NULL
+ * @param status error result
+ * @return the new UPlugData associated with this plugin, or NULL if error.
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI UPlugData* U_EXPORT2
+uplug_loadPlugFromLibrary(const char *libName, const char *sym, const char *config, UErrorCode *status);
+
+/**
+ * Remove a plugin.
+ * Will request the plugin to be unloaded, and close the library if needed
+ * @param plug plugin handle to close
+ * @param status error result
+ * @internal ICU 4.4 Technology Preview
+ */
+U_CAPI void U_EXPORT2
+uplug_removePlug(UPlugData *plug, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+#endif /* UCONFIG_ENABLE_PLUGINS */
+
+#endif /* _ICUPLUG */
+
diff --git a/intl/icu/source/common/unicode/idna.h b/intl/icu/source/common/unicode/idna.h
new file mode 100644
index 0000000000..1c57205bae
--- /dev/null
+++ b/intl/icu/source/common/unicode/idna.h
@@ -0,0 +1,330 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: idna.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010mar05
+* created by: Markus W. Scherer
+*/
+
+#ifndef __IDNA_H__
+#define __IDNA_H__
+
+/**
+ * \file
+ * \brief C++ API: Internationalizing Domain Names in Applications (IDNA)
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/bytestream.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uidna.h"
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+
+class IDNAInfo;
+
+/**
+ * Abstract base class for IDNA processing.
+ * See http://www.unicode.org/reports/tr46/
+ * and http://www.ietf.org/rfc/rfc3490.txt
+ *
+ * The IDNA class is not intended for public subclassing.
+ *
+ * This C++ API currently only implements UTS #46.
+ * The uidna.h C API implements both UTS #46 (functions using UIDNA service object)
+ * and IDNA2003 (functions that do not use a service object).
+ * @stable ICU 4.6
+ */
+class U_COMMON_API IDNA : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.6
+ */
+ ~IDNA();
+
+ /**
+ * Returns an IDNA instance which implements UTS #46.
+ * Returns an unmodifiable instance, owned by the caller.
+ * Cache it for multiple operations, and delete it when done.
+ * The instance is thread-safe, that is, it can be used concurrently.
+ *
+ * UTS #46 defines Unicode IDNA Compatibility Processing,
+ * updated to the latest version of Unicode and compatible with both
+ * IDNA2003 and IDNA2008.
+ *
+ * The worker functions use transitional processing, including deviation mappings,
+ * unless UIDNA_NONTRANSITIONAL_TO_ASCII or UIDNA_NONTRANSITIONAL_TO_UNICODE
+ * is used in which case the deviation characters are passed through without change.
+ *
+ * Disallowed characters are mapped to U+FFFD.
+ *
+ * For available options see the uidna.h header.
+ * Operations with the UTS #46 instance do not support the
+ * UIDNA_ALLOW_UNASSIGNED option.
+ *
+ * By default, the UTS #46 implementation allows all ASCII characters (as valid or mapped).
+ * When the UIDNA_USE_STD3_RULES option is used, ASCII characters other than
+ * letters, digits, hyphen (LDH) and dot/full stop are disallowed and mapped to U+FFFD.
+ *
+ * @param options Bit set to modify the processing and error checking.
+ * See option bit set values in uidna.h.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the UTS #46 IDNA instance, if successful
+ * @stable ICU 4.6
+ */
+ static IDNA *
+ createUTS46Instance(uint32_t options, UErrorCode &errorCode);
+
+ /**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * If any processing step fails, then info.hasErrors() will be true and
+ * the result might not be an ASCII string.
+ * The label might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param label Input domain name label
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ labelToASCII(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * If any processing step fails, then info.hasErrors() will be true.
+ * The label might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param label Input domain name label
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ labelToUnicode(const UnicodeString &label, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * If any processing step fails, then info.hasErrors() will be true and
+ * the result might not be an ASCII string.
+ * The domain name might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param name Input domain name
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ nameToASCII(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * If any processing step fails, then info.hasErrors() will be true.
+ * The domain name might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param name Input domain name
+ * @param dest Destination string object
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual UnicodeString &
+ nameToUnicode(const UnicodeString &name, UnicodeString &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const = 0;
+
+ // UTF-8 versions of the processing methods ---------------------------- ***
+
+ /**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * UTF-8 version of labelToASCII(), same behavior.
+ *
+ * @param label Input domain name label
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ labelToASCII_UTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * UTF-8 version of labelToUnicode(), same behavior.
+ *
+ * @param label Input domain name label
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ labelToUnicodeUTF8(StringPiece label, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * UTF-8 version of nameToASCII(), same behavior.
+ *
+ * @param name Input domain name
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ nameToASCII_UTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+
+ /**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * UTF-8 version of nameToUnicode(), same behavior.
+ *
+ * @param name Input domain name
+ * @param dest Destination byte sink; Flush()ed if successful
+ * @param info Output container of IDNA processing details.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.6
+ */
+ virtual void
+ nameToUnicodeUTF8(StringPiece name, ByteSink &dest,
+ IDNAInfo &info, UErrorCode &errorCode) const;
+};
+
+class UTS46;
+
+/**
+ * Output container for IDNA processing errors.
+ * The IDNAInfo class is not suitable for subclassing.
+ * @stable ICU 4.6
+ */
+class U_COMMON_API IDNAInfo : public UMemory {
+public:
+ /**
+ * Constructor for stack allocation.
+ * @stable ICU 4.6
+ */
+ IDNAInfo() : errors(0), labelErrors(0), isTransDiff(false), isBiDi(false), isOkBiDi(true) {}
+ /**
+ * Were there IDNA processing errors?
+ * @return true if there were processing errors
+ * @stable ICU 4.6
+ */
+ UBool hasErrors() const { return errors!=0; }
+ /**
+ * Returns a bit set indicating IDNA processing errors.
+ * See UIDNA_ERROR_... constants in uidna.h.
+ * @return bit set of processing errors
+ * @stable ICU 4.6
+ */
+ uint32_t getErrors() const { return errors; }
+ /**
+ * Returns true if transitional and nontransitional processing produce different results.
+ * This is the case when the input label or domain name contains
+ * one or more deviation characters outside a Punycode label (see UTS #46).
+ * <ul>
+ * <li>With nontransitional processing, such characters are
+ * copied to the destination string.
+ * <li>With transitional processing, such characters are
+ * mapped (sharp s/sigma) or removed (joiner/nonjoiner).
+ * </ul>
+ * @return true if transitional and nontransitional processing produce different results
+ * @stable ICU 4.6
+ */
+ UBool isTransitionalDifferent() const { return isTransDiff; }
+
+private:
+ friend class UTS46;
+
+ IDNAInfo(const IDNAInfo &other) = delete; // no copying
+ IDNAInfo &operator=(const IDNAInfo &other) = delete; // no copying
+
+ void reset() {
+ errors=labelErrors=0;
+ isTransDiff=false;
+ isBiDi=false;
+ isOkBiDi=true;
+ }
+
+ uint32_t errors, labelErrors;
+ UBool isTransDiff;
+ UBool isBiDi;
+ UBool isOkBiDi;
+};
+
+U_NAMESPACE_END
+
+#endif // UCONFIG_NO_IDNA
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __IDNA_H__
diff --git a/intl/icu/source/common/unicode/localebuilder.h b/intl/icu/source/common/unicode/localebuilder.h
new file mode 100644
index 0000000000..f708a7ed7c
--- /dev/null
+++ b/intl/icu/source/common/unicode/localebuilder.h
@@ -0,0 +1,309 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+#ifndef __LOCALEBUILDER_H__
+#define __LOCALEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/localematcher.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder API for Locale
+ */
+
+U_NAMESPACE_BEGIN
+class CharString;
+
+/**
+ * <code>LocaleBuilder</code> is used to build instances of <code>Locale</code>
+ * from values configured by the setters. Unlike the <code>Locale</code>
+ * constructors, the <code>LocaleBuilder</code> checks if a value configured by a
+ * setter satisfies the syntax requirements defined by the <code>Locale</code>
+ * class. A <code>Locale</code> object created by a <code>LocaleBuilder</code> is
+ * well-formed and can be transformed to a well-formed IETF BCP 47 language tag
+ * without losing information.
+ *
+ * <p>The following example shows how to create a <code>Locale</code> object
+ * with the <code>LocaleBuilder</code>.
+ * <blockquote>
+ * <pre>
+ * UErrorCode status = U_ZERO_ERROR;
+ * Locale aLocale = LocaleBuilder()
+ * .setLanguage("sr")
+ * .setScript("Latn")
+ * .setRegion("RS")
+ * .build(status);
+ * if (U_SUCCESS(status)) {
+ * // ...
+ * }
+ * </pre>
+ * </blockquote>
+ *
+ * <p>LocaleBuilders can be reused; <code>clear()</code> resets all
+ * fields to their default values.
+ *
+ * <p>LocaleBuilder tracks errors in an internal UErrorCode. For all setters,
+ * except setLanguageTag and setLocale, LocaleBuilder will return immediately
+ * if the internal UErrorCode is in error state.
+ * To reset internal state and error code, call clear method.
+ * The setLanguageTag and setLocale method will first clear the internal
+ * UErrorCode, then track the error of the validation of the input parameter
+ * into the internal UErrorCode.
+ *
+ * @stable ICU 64
+ */
+class U_COMMON_API LocaleBuilder : public UObject {
+public:
+ /**
+ * Constructs an empty LocaleBuilder. The default value of all
+ * fields, extensions, and private use information is the
+ * empty string.
+ *
+ * @stable ICU 64
+ */
+ LocaleBuilder();
+
+ /**
+ * Destructor
+ * @stable ICU 64
+ */
+ virtual ~LocaleBuilder();
+
+ /**
+ * Resets the <code>LocaleBuilder</code> to match the provided
+ * <code>locale</code>. Existing state is discarded.
+ *
+ * <p>All fields of the locale must be well-formed.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param locale the locale
+ * @return This builder.
+ *
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLocale(const Locale& locale);
+
+ /**
+ * Resets the LocaleBuilder to match the provided IETF BCP 47 language tag.
+ * Discards the existing state.
+ * The empty string causes the builder to be reset, like {@link #clear}.
+ * Legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * are converted to their canonical form before being processed.
+ * Otherwise, the <code>language tag</code> must be well-formed,
+ * or else the build() method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @param tag the language tag, defined as IETF BCP 47 language tag.
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLanguageTag(StringPiece tag);
+
+ /**
+ * Sets the language. If <code>language</code> is the empty string, the
+ * language in this <code>LocaleBuilder</code> is removed. Otherwise, the
+ * <code>language</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The syntax of language value is defined as
+ * [unicode_language_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_language_subtag).
+ *
+ * @param language the language
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setLanguage(StringPiece language);
+
+ /**
+ * Sets the script. If <code>script</code> is the empty string, the script in
+ * this <code>LocaleBuilder</code> is removed.
+ * Otherwise, the <code>script</code> must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The script value is a four-letter script code as
+ * [unicode_script_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_script_subtag)
+ * defined by ISO 15924
+ *
+ * @param script the script
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setScript(StringPiece script);
+
+ /**
+ * Sets the region. If region is the empty string, the region in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>region</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>The region value is defined by
+ * [unicode_region_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_region_subtag)
+ * as a two-letter ISO 3166 code or a three-digit UN M.49 area code.
+ *
+ * <p>The region value in the <code>Locale</code> created by the
+ * <code>LocaleBuilder</code> is always normalized to upper case.
+ *
+ * @param region the region
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setRegion(StringPiece region);
+
+ /**
+ * Sets the variant. If variant is the empty string, the variant in this
+ * <code>LocaleBuilder</code> is removed. Otherwise, the <code>variant</code>
+ * must be well-formed, or else the build() method will later report an
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> This method checks if <code>variant</code>
+ * satisfies the
+ * [unicode_variant_subtag](http://www.unicode.org/reports/tr35/tr35.html#unicode_variant_subtag)
+ * syntax requirements, and normalizes the value to lowercase letters. However,
+ * the <code>Locale</code> class does not impose any syntactic
+ * restriction on variant. To set an ill-formed variant, use a Locale constructor.
+ * If there are multiple unicode_variant_subtag, the caller must concatenate
+ * them with '-' as separator (ex: "foobar-fibar").
+ *
+ * @param variant the variant
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setVariant(StringPiece variant);
+
+ /**
+ * Sets the extension for the given key. If the value is the empty string,
+ * the extension is removed. Otherwise, the <code>key</code> and
+ * <code>value</code> must be well-formed, or else the build() method will
+ * later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p><b>Note:</b> The key ('u') is used for the Unicode locale extension.
+ * Setting a value for this key replaces any existing Unicode locale key/type
+ * pairs with those defined in the extension.
+ *
+ * <p><b>Note:</b> The key ('x') is used for the private use code. To be
+ * well-formed, the value for this key needs only to have subtags of one to
+ * eight alphanumeric characters, not two to eight as in the general case.
+ *
+ * @param key the extension key
+ * @param value the extension value
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setExtension(char key, StringPiece value);
+
+ /**
+ * Sets the Unicode locale keyword type for the given key. If the type
+ * StringPiece is constructed with a nullptr, the keyword is removed.
+ * If the type is the empty string, the keyword is set without type subtags.
+ * Otherwise, the key and type must be well-formed, or else the build()
+ * method will later report an U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * <p>Keys and types are converted to lower case.
+ *
+ * <p><b>Note</b>:Setting the 'u' extension via {@link #setExtension}
+ * replaces all Unicode locale keywords with those defined in the
+ * extension.
+ *
+ * @param key the Unicode locale key
+ * @param type the Unicode locale type
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& setUnicodeLocaleKeyword(
+ StringPiece key, StringPiece type);
+
+ /**
+ * Adds a unicode locale attribute, if not already present, otherwise
+ * has no effect. The attribute must not be empty string and must be
+ * well-formed or U_ILLEGAL_ARGUMENT_ERROR will be set to status
+ * during the build() call.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& addUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Removes a unicode locale attribute, if present, otherwise has no
+ * effect. The attribute must not be empty string and must be well-formed
+ * or U_ILLEGAL_ARGUMENT_ERROR will be set to status during the build() call.
+ *
+ * <p>Attribute comparison for removal is case-insensitive.
+ *
+ * @param attribute the attribute
+ * @return This builder.
+ * @stable ICU 64
+ */
+ LocaleBuilder& removeUnicodeLocaleAttribute(StringPiece attribute);
+
+ /**
+ * Resets the builder to its initial, empty state.
+ * <p>This method clears the internal UErrorCode.
+ *
+ * @return this builder
+ * @stable ICU 64
+ */
+ LocaleBuilder& clear();
+
+ /**
+ * Resets the extensions to their initial, empty state.
+ * Language, script, region and variant are unchanged.
+ *
+ * @return this builder
+ * @stable ICU 64
+ */
+ LocaleBuilder& clearExtensions();
+
+ /**
+ * Returns an instance of <code>Locale</code> created from the fields set
+ * on this builder.
+ * If any set methods or during the build() call require memory allocation
+ * but fail U_MEMORY_ALLOCATION_ERROR will be set to status.
+ * If any of the fields set by the setters are not well-formed, the status
+ * will be set to U_ILLEGAL_ARGUMENT_ERROR. The state of the builder will
+ * not change after the build() call and the caller is free to keep using
+ * the same builder to build more locales.
+ *
+ * @return a new Locale
+ * @stable ICU 64
+ */
+ Locale build(UErrorCode& status);
+
+ /**
+ * Sets the UErrorCode if an error occurred while recording sets.
+ * Preserves older error codes in the outErrorCode.
+ * @param outErrorCode Set to an error code that occurred while setting subtags.
+ * Unchanged if there is no such error or if outErrorCode
+ * already contained an error.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+private:
+ friend class LocaleMatcher::Result;
+
+ void copyExtensionsFrom(const Locale& src, UErrorCode& errorCode);
+
+ UErrorCode status_;
+ char language_[9];
+ char script_[5];
+ char region_[4];
+ CharString *variant_; // Pointer not object so we need not #include internal charstr.h.
+ icu::Locale *extensions_; // Pointer not object. Storage for all other fields.
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __LOCALEBUILDER_H__
diff --git a/intl/icu/source/common/unicode/localematcher.h b/intl/icu/source/common/unicode/localematcher.h
new file mode 100644
index 0000000000..603daf7231
--- /dev/null
+++ b/intl/icu/source/common/unicode/localematcher.h
@@ -0,0 +1,708 @@
+// © 2019 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// localematcher.h
+// created: 2019may08 Markus W. Scherer
+
+#ifndef __LOCALEMATCHER_H__
+#define __LOCALEMATCHER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/locid.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale matcher: User's desired locales vs. application's supported locales.
+ */
+
+/**
+ * Builder option for whether the language subtag or the script subtag is most important.
+ *
+ * @see LocaleMatcher::Builder#setFavorSubtag(ULocMatchFavorSubtag)
+ * @stable ICU 65
+ */
+enum ULocMatchFavorSubtag {
+ /**
+ * Language differences are most important, then script differences, then region differences.
+ * (This is the default behavior.)
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_FAVOR_LANGUAGE,
+ /**
+ * Makes script differences matter relatively more than language differences.
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_FAVOR_SCRIPT
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchFavorSubtag ULocMatchFavorSubtag;
+#endif
+
+/**
+ * Builder option for whether all desired locales are treated equally or
+ * earlier ones are preferred.
+ *
+ * @see LocaleMatcher::Builder#setDemotionPerDesiredLocale(ULocMatchDemotion)
+ * @stable ICU 65
+ */
+enum ULocMatchDemotion {
+ /**
+ * All desired locales are treated equally.
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_DEMOTION_NONE,
+ /**
+ * Earlier desired locales are preferred.
+ *
+ * <p>From each desired locale to the next,
+ * the distance to any supported locale is increased by an additional amount
+ * which is at least as large as most region mismatches.
+ * A later desired locale has to have a better match with some supported locale
+ * due to more than merely having the same region subtag.
+ *
+ * <p>For example: <code>Supported={en, sv} desired=[en-GB, sv]</code>
+ * yields <code>Result(en-GB, en)</code> because
+ * with the demotion of sv its perfect match is no better than
+ * the region distance between the earlier desired locale en-GB and en=en-US.
+ *
+ * <p>Notes:
+ * <ul>
+ * <li>In some cases, language and/or script differences can be as small as
+ * the typical region difference. (Example: sr-Latn vs. sr-Cyrl)
+ * <li>It is possible for certain region differences to be larger than usual,
+ * and larger than the demotion.
+ * (As of CLDR 35 there is no such case, but
+ * this is possible in future versions of the data.)
+ * </ul>
+ *
+ * @stable ICU 65
+ */
+ ULOCMATCH_DEMOTION_REGION
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDemotion ULocMatchDemotion;
+#endif
+
+/**
+ * Builder option for whether to include or ignore one-way (fallback) match data.
+ * The LocaleMatcher uses CLDR languageMatch data which includes fallback (oneway=true) entries.
+ * Sometimes it is desirable to ignore those.
+ *
+ * <p>For example, consider a web application with the UI in a given language,
+ * with a link to another, related web app.
+ * The link should include the UI language, and the target server may also use
+ * the client’s Accept-Language header data.
+ * The target server has its own list of supported languages.
+ * One may want to favor UI language consistency, that is,
+ * if there is a decent match for the original UI language, we want to use it,
+ * but not if it is merely a fallback.
+ *
+ * @see LocaleMatcher::Builder#setDirection(ULocMatchDirection)
+ * @stable ICU 67
+ */
+enum ULocMatchDirection {
+ /**
+ * Locale matching includes one-way matches such as Breton→French. (default)
+ *
+ * @stable ICU 67
+ */
+ ULOCMATCH_DIRECTION_WITH_ONE_WAY,
+ /**
+ * Locale matching limited to two-way matches including e.g. Danish↔Norwegian
+ * but ignoring one-way matches.
+ *
+ * @stable ICU 67
+ */
+ ULOCMATCH_DIRECTION_ONLY_TWO_WAY
+};
+#ifndef U_IN_DOXYGEN
+typedef enum ULocMatchDirection ULocMatchDirection;
+#endif
+
+struct UHashtable;
+
+U_NAMESPACE_BEGIN
+
+struct LSR;
+
+class LocaleDistance;
+class LocaleLsrIterator;
+class UVector;
+class XLikelySubtags;
+
+/**
+ * Immutable class that picks the best match between a user's desired locales and
+ * an application's supported locales.
+ * Movable but not copyable.
+ *
+ * <p>Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * LocaleMatcher matcher = LocaleMatcher::Builder().setSupportedLocales("fr, en-GB, en").build(errorCode);
+ * Locale *bestSupported = matcher.getBestLocale(Locale.US, errorCode); // "en"
+ * </pre>
+ *
+ * <p>A matcher takes into account when languages are close to one another,
+ * such as Danish and Norwegian,
+ * and when regional variants are close, like en-GB and en-AU as opposed to en-US.
+ *
+ * <p>If there are multiple supported locales with the same (language, script, region)
+ * likely subtags, then the current implementation returns the first of those locales.
+ * It ignores variant subtags (except for pseudolocale variants) and extensions.
+ * This may change in future versions.
+ *
+ * <p>For example, the current implementation does not distinguish between
+ * de, de-DE, de-Latn, de-1901, de-u-co-phonebk.
+ *
+ * <p>If you prefer one equivalent locale over another, then provide only the preferred one,
+ * or place it earlier in the list of supported locales.
+ *
+ * <p>Otherwise, the order of supported locales may have no effect on the best-match results.
+ * The current implementation compares each desired locale with supported locales
+ * in the following order:
+ * 1. Default locale, if supported;
+ * 2. CLDR "paradigm locales" like en-GB and es-419;
+ * 3. other supported locales.
+ * This may change in future versions.
+ *
+ * <p>Often a product will just need one matcher instance, built with the languages
+ * that it supports. However, it may want multiple instances with different
+ * default languages based on additional information, such as the domain.
+ *
+ * <p>This class is not intended for public subclassing.
+ *
+ * @stable ICU 65
+ */
+class U_COMMON_API LocaleMatcher : public UMemory {
+public:
+ /**
+ * Data for the best-matching pair of a desired and a supported locale.
+ * Movable but not copyable.
+ *
+ * @stable ICU 65
+ */
+ class U_COMMON_API Result : public UMemory {
+ public:
+ /**
+ * Move constructor; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @stable ICU 65
+ */
+ Result(Result &&src) noexcept;
+
+ /**
+ * Destructor.
+ *
+ * @stable ICU 65
+ */
+ ~Result();
+
+ /**
+ * Move assignment; might modify the source.
+ * This object will have the same contents that the source object had.
+ *
+ * @param src Result to move contents from.
+ * @stable ICU 65
+ */
+ Result &operator=(Result &&src) noexcept;
+
+ /**
+ * Returns the best-matching desired locale.
+ * nullptr if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the best-matching desired locale, or nullptr.
+ * @stable ICU 65
+ */
+ inline const Locale *getDesiredLocale() const { return desiredLocale; }
+
+ /**
+ * Returns the best-matching supported locale.
+ * If none matched well enough, this is the default locale.
+ * The default locale is nullptr if Builder::setNoDefaultLocale() was called,
+ * or if the list of supported locales is empty and no explicit default locale is set.
+ *
+ * @return the best-matching supported locale, or nullptr.
+ * @stable ICU 65
+ */
+ inline const Locale *getSupportedLocale() const { return supportedLocale; }
+
+ /**
+ * Returns the index of the best-matching desired locale in the input Iterable order.
+ * -1 if the list of desired locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching desired locale, or -1.
+ * @stable ICU 65
+ */
+ inline int32_t getDesiredIndex() const { return desiredIndex; }
+
+ /**
+ * Returns the index of the best-matching supported locale in the
+ * constructor’s or builder’s input order (“set” Collection plus “added” locales).
+ * If the matcher was built from a locale list string, then the iteration order is that
+ * of a LocalePriorityList built from the same string.
+ * -1 if the list of supported locales is empty or if none matched well enough.
+ *
+ * @return the index of the best-matching supported locale, or -1.
+ * @stable ICU 65
+ */
+ inline int32_t getSupportedIndex() const { return supportedIndex; }
+
+ /**
+ * Takes the best-matching supported locale and adds relevant fields of the
+ * best-matching desired locale, such as the -t- and -u- extensions.
+ * May replace some fields of the supported locale.
+ * The result is the locale that should be used for date and number formatting, collation, etc.
+ * Returns the root locale if getSupportedLocale() returns nullptr.
+ *
+ * <p>Example: desired=ar-SA-u-nu-latn, supported=ar-EG, resolved locale=ar-SA-u-nu-latn
+ *
+ * @return a locale combining the best-matching desired and supported locales.
+ * @stable ICU 65
+ */
+ Locale makeResolvedLocale(UErrorCode &errorCode) const;
+
+ private:
+ Result(const Locale *desired, const Locale *supported,
+ int32_t desIndex, int32_t suppIndex, UBool owned) :
+ desiredLocale(desired), supportedLocale(supported),
+ desiredIndex(desIndex), supportedIndex(suppIndex),
+ desiredIsOwned(owned) {}
+
+ Result(const Result &other) = delete;
+ Result &operator=(const Result &other) = delete;
+
+ const Locale *desiredLocale;
+ const Locale *supportedLocale;
+ int32_t desiredIndex;
+ int32_t supportedIndex;
+ UBool desiredIsOwned;
+
+ friend class LocaleMatcher;
+ };
+
+ /**
+ * LocaleMatcher builder.
+ * Movable but not copyable.
+ *
+ * @stable ICU 65
+ */
+ class U_COMMON_API Builder : public UMemory {
+ public:
+ /**
+ * Constructs a builder used in chaining parameters for building a LocaleMatcher.
+ *
+ * @return a new Builder object
+ * @stable ICU 65
+ */
+ Builder() {}
+
+ /**
+ * Move constructor; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @stable ICU 65
+ */
+ Builder(Builder &&src) noexcept;
+
+ /**
+ * Destructor.
+ *
+ * @stable ICU 65
+ */
+ ~Builder();
+
+ /**
+ * Move assignment; might modify the source.
+ * This builder will have the same contents that the source builder had.
+ *
+ * @param src Builder to move contents from.
+ * @stable ICU 65
+ */
+ Builder &operator=(Builder &&src) noexcept;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9", and sets the supported locales accordingly.
+ * Allows whitespace in more places but does not allow "*".
+ * Clears any previously set/added supported locales first.
+ *
+ * @param locales the Accept-Language string of locales to set
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setSupportedLocalesFromListString(StringPiece locales);
+
+ /**
+ * Copies the supported locales, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locales the list of locale
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setSupportedLocales(Locale::Iterator &locales);
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ template<typename Iter>
+ Builder &setSupportedLocales(Iter begin, Iter end) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(*begin++);
+ }
+ return *this;
+ }
+
+ /**
+ * Copies the supported locales from the begin/end range, preserving iteration order.
+ * Calls the converter to convert each *begin to a Locale or const Locale &.
+ * Clears any previously set/added supported locales first.
+ * Duplicates are allowed, and are not removed.
+ *
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ template<typename Iter, typename Conv>
+ Builder &setSupportedLocalesViaConverter(Iter begin, Iter end, Conv converter) {
+ if (U_FAILURE(errorCode_)) { return *this; }
+ clearSupportedLocales();
+ while (begin != end) {
+ addSupportedLocale(converter(*begin++));
+ }
+ return *this;
+ }
+
+ /**
+ * Adds another supported locale.
+ * Duplicates are allowed, and are not removed.
+ *
+ * @param locale another locale
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &addSupportedLocale(const Locale &locale);
+
+ /**
+ * Sets no default locale.
+ * There will be no explicit or implicit default locale.
+ * If there is no good match, then the matcher will return nullptr for the
+ * best supported locale.
+ *
+ * @stable ICU 68
+ */
+ Builder &setNoDefaultLocale();
+
+ /**
+ * Sets the default locale; if nullptr, or if it is not set explicitly,
+ * then the first supported locale is used as the default locale.
+ * There is no default locale at all (nullptr will be returned instead)
+ * if setNoDefaultLocale() is called.
+ *
+ * @param defaultLocale the default locale (will be copied)
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setDefaultLocale(const Locale *defaultLocale);
+
+ /**
+ * If ULOCMATCH_FAVOR_SCRIPT, then the language differences are smaller than script
+ * differences.
+ * This is used in situations (such as maps) where
+ * it is better to fall back to the same script than a similar language.
+ *
+ * @param subtag the subtag to favor
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setFavorSubtag(ULocMatchFavorSubtag subtag);
+
+ /**
+ * Option for whether all desired locales are treated equally or
+ * earlier ones are preferred (this is the default).
+ *
+ * @param demotion the demotion per desired locale to set.
+ * @return this Builder object
+ * @stable ICU 65
+ */
+ Builder &setDemotionPerDesiredLocale(ULocMatchDemotion demotion);
+
+ /**
+ * Option for whether to include or ignore one-way (fallback) match data.
+ * By default, they are included.
+ *
+ * @param matchDirection the match direction to set.
+ * @return this Builder object
+ * @stable ICU 67
+ */
+ Builder &setDirection(ULocMatchDirection matchDirection) {
+ if (U_SUCCESS(errorCode_)) {
+ direction_ = matchDirection;
+ }
+ return *this;
+ }
+
+ /**
+ * Sets the maximum distance for an acceptable match.
+ * The matcher will return a match for a pair of locales only if
+ * they match at least as well as the pair given here.
+ *
+ * For example, setMaxDistance(en-US, en-GB) limits matches to ones where the
+ * (desired, support) locales have a distance no greater than a region subtag difference.
+ * This is much stricter than the CLDR default.
+ *
+ * The details of locale matching are subject to changes in
+ * CLDR data and in the algorithm.
+ * Specifying a maximum distance in relative terms via a sample pair of locales
+ * insulates from changes that affect all distance metrics similarly,
+ * but some changes will necessarily affect relative distances between
+ * different pairs of locales.
+ *
+ * @param desired the desired locale for distance comparison.
+ * @param supported the supported locale for distance comparison.
+ * @return this Builder object
+ * @stable ICU 68
+ */
+ Builder &setMaxDistance(const Locale &desired, const Locale &supported);
+
+ /**
+ * Sets the UErrorCode if an error occurred while setting parameters.
+ * Preserves older error codes in the outErrorCode.
+ *
+ * @param outErrorCode Set to an error code if it does not contain one already
+ * and an error occurred while setting parameters.
+ * Otherwise unchanged.
+ * @return true if U_FAILURE(outErrorCode)
+ * @stable ICU 65
+ */
+ UBool copyErrorTo(UErrorCode &outErrorCode) const;
+
+ /**
+ * Builds and returns a new locale matcher.
+ * This builder can continue to be used.
+ *
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return LocaleMatcher
+ * @stable ICU 65
+ */
+ LocaleMatcher build(UErrorCode &errorCode) const;
+
+ private:
+ friend class LocaleMatcher;
+
+ Builder(const Builder &other) = delete;
+ Builder &operator=(const Builder &other) = delete;
+
+ void clearSupportedLocales();
+ bool ensureSupportedLocaleVector();
+
+ UErrorCode errorCode_ = U_ZERO_ERROR;
+ UVector *supportedLocales_ = nullptr;
+ int32_t thresholdDistance_ = -1;
+ ULocMatchDemotion demotion_ = ULOCMATCH_DEMOTION_REGION;
+ Locale *defaultLocale_ = nullptr;
+ bool withDefault_ = true;
+ ULocMatchFavorSubtag favor_ = ULOCMATCH_FAVOR_LANGUAGE;
+ ULocMatchDirection direction_ = ULOCMATCH_DIRECTION_WITH_ONE_WAY;
+ Locale *maxDistanceDesired_ = nullptr;
+ Locale *maxDistanceSupported_ = nullptr;
+ };
+
+ // FYI No public LocaleMatcher constructors in C++; use the Builder.
+
+ /**
+ * Move copy constructor; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * @param src source matcher
+ * @stable ICU 65
+ */
+ LocaleMatcher(LocaleMatcher &&src) noexcept;
+
+ /**
+ * Destructor.
+ * @stable ICU 65
+ */
+ ~LocaleMatcher();
+
+ /**
+ * Move assignment operator; might modify the source.
+ * This matcher will have the same settings that the source matcher had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source matcher
+ * @return *this
+ * @stable ICU 65
+ */
+ LocaleMatcher &operator=(LocaleMatcher &&src) noexcept;
+
+ /**
+ * Returns the supported locale which best matches the desired locale.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatch(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the supported locale which best matches one of the desired locales.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatch(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+ /**
+ * Parses an Accept-Language string
+ * (<a href="https://tools.ietf.org/html/rfc2616#section-14.4">RFC 2616 Section 14.4</a>),
+ * such as "af, en, fr;q=0.9",
+ * and returns the supported locale which best matches one of the desired locales.
+ * Allows whitespace in more places but does not allow "*".
+ *
+ * @param desiredLocaleList Typically a user's languages, as an Accept-Language string.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching supported locale.
+ * @stable ICU 65
+ */
+ const Locale *getBestMatchForListString(StringPiece desiredLocaleList, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired locale and the supported locales.
+ * If the result's desired locale is not nullptr, then it is the address of the input locale.
+ * It has not been cloned.
+ *
+ * @param desiredLocale Typically a user's language.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of the desired and a supported locale.
+ * @stable ICU 65
+ */
+ Result getBestMatchResult(const Locale &desiredLocale, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the best match between the desired and supported locales.
+ * If the result's desired locale is not nullptr, then it is a clone of
+ * the best-matching desired locale. The Result object owns the clone.
+ *
+ * @param desiredLocales Typically a user's languages, in order of preference (descending).
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return the best-matching pair of a desired and a supported locale.
+ * @stable ICU 65
+ */
+ Result getBestMatchResult(Locale::Iterator &desiredLocales, UErrorCode &errorCode) const;
+
+ /**
+ * Returns true if the pair of locales matches acceptably.
+ * This is influenced by Builder options such as setDirection(), setFavorSubtag(),
+ * and setMaxDistance().
+ *
+ * @param desired The desired locale.
+ * @param supported The supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return true if the pair of locales matches acceptably.
+ * @stable ICU 68
+ */
+ UBool isMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns a fraction between 0 and 1, where 1 means that the languages are a
+ * perfect match, and 0 means that they are completely different.
+ *
+ * <p>This is mostly an implementation detail, and the precise values may change over time.
+ * The implementation may use either the maximized forms or the others ones, or both.
+ * The implementation may or may not rely on the forms to be consistent with each other.
+ *
+ * <p>Callers should construct and use a matcher rather than match pairs of locales directly.
+ *
+ * @param desired Desired locale.
+ * @param supported Supported locale.
+ * @param errorCode ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return value between 0 and 1, inclusive.
+ * @internal (has a known user)
+ */
+ double internalMatch(const Locale &desired, const Locale &supported, UErrorCode &errorCode) const;
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ LocaleMatcher(const Builder &builder, UErrorCode &errorCode);
+ LocaleMatcher(const LocaleMatcher &other) = delete;
+ LocaleMatcher &operator=(const LocaleMatcher &other) = delete;
+
+ int32_t putIfAbsent(const LSR &lsr, int32_t i, int32_t suppLength, UErrorCode &errorCode);
+
+ int32_t getBestSuppIndex(LSR desiredLSR, LocaleLsrIterator *remainingIter, UErrorCode &errorCode) const;
+
+ const XLikelySubtags &likelySubtags;
+ const LocaleDistance &localeDistance;
+ int32_t thresholdDistance;
+ int32_t demotionPerDesiredLocale;
+ ULocMatchFavorSubtag favorSubtag;
+ ULocMatchDirection direction;
+
+ // These are in input order.
+ const Locale ** supportedLocales;
+ LSR *lsrs;
+ int32_t supportedLocalesLength;
+ // These are in preference order: 1. Default locale 2. paradigm locales 3. others.
+ UHashtable *supportedLsrToIndex; // Map<LSR, Integer>
+ // Array versions of the supportedLsrToIndex keys and values.
+ // The distance lookup loops over the supportedLSRs and returns the index of the best match.
+ const LSR **supportedLSRs;
+ int32_t *supportedIndexes;
+ int32_t supportedLSRsLength;
+ Locale *ownedDefaultLocale;
+ const Locale *defaultLocale;
+};
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+#endif // __LOCALEMATCHER_H__
diff --git a/intl/icu/source/common/unicode/localpointer.h b/intl/icu/source/common/unicode/localpointer.h
new file mode 100644
index 0000000000..b8be3d7942
--- /dev/null
+++ b/intl/icu/source/common/unicode/localpointer.h
@@ -0,0 +1,595 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: localpointer.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov13
+* created by: Markus W. Scherer
+*/
+
+#ifndef __LOCALPOINTER_H__
+#define __LOCALPOINTER_H__
+
+/**
+ * \file
+ * \brief C++ API: "Smart pointers" for use with and in ICU4C C++ code.
+ *
+ * These classes are inspired by
+ * - std::auto_ptr
+ * - boost::scoped_ptr & boost::scoped_array
+ * - Taligent Safe Pointers (TOnlyPointerTo)
+ *
+ * but none of those provide for all of the goals for ICU smart pointers:
+ * - Smart pointer owns the object and releases it when it goes out of scope.
+ * - No transfer of ownership via copy/assignment to reduce misuse. Simpler & more robust.
+ * - ICU-compatible: No exceptions.
+ * - Need to be able to orphan/release the pointer and its ownership.
+ * - Need variants for normal C++ object pointers, C++ arrays, and ICU C service objects.
+ *
+ * For details see https://icu.unicode.org/design/cpp/scoped_ptr
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <memory>
+
+U_NAMESPACE_BEGIN
+
+/**
+ * "Smart pointer" base class; do not use directly: use LocalPointer etc.
+ *
+ * Base class for smart pointer classes that do not throw exceptions.
+ *
+ * Do not use this base class directly, since it does not delete its pointer.
+ * A subclass must implement methods that delete the pointer:
+ * Destructor and adoptInstead().
+ *
+ * There is no operator T *() provided because the programmer must decide
+ * whether to use getAlias() (without transfer of ownership) or orphan()
+ * (with transfer of ownership and NULLing of the pointer).
+ *
+ * @see LocalPointer
+ * @see LocalArray
+ * @see U_DEFINE_LOCAL_OPEN_POINTER
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointerBase {
+public:
+ // No heap allocation. Use only on the stack.
+ static void* U_EXPORT2 operator new(size_t) = delete;
+ static void* U_EXPORT2 operator new[](size_t) = delete;
+#if U_HAVE_PLACEMENT_NEW
+ static void* U_EXPORT2 operator new(size_t, void*) = delete;
+#endif
+
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointerBase(T *p=nullptr) : ptr(p) {}
+ /**
+ * Destructor deletes the object it owns.
+ * Subclass must override: Base class does nothing.
+ * @stable ICU 4.4
+ */
+ ~LocalPointerBase() { /* delete ptr; */ }
+ /**
+ * nullptr check.
+ * @return true if ==nullptr
+ * @stable ICU 4.4
+ */
+ UBool isNull() const { return ptr==nullptr; }
+ /**
+ * nullptr check.
+ * @return true if !=nullptr
+ * @stable ICU 4.4
+ */
+ UBool isValid() const { return ptr!=nullptr; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with ==nullptr need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value equals other
+ * @stable ICU 4.4
+ */
+ bool operator==(const T *other) const { return ptr==other; }
+ /**
+ * Comparison with a simple pointer, so that existing code
+ * with !=nullptr need not be changed.
+ * @param other simple pointer for comparison
+ * @return true if this pointer value differs from other
+ * @stable ICU 4.4
+ */
+ bool operator!=(const T *other) const { return ptr!=other; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *getAlias() const { return ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value as a reference
+ * @stable ICU 4.4
+ */
+ T &operator*() const { return *ptr; }
+ /**
+ * Access without ownership change.
+ * @return the pointer value
+ * @stable ICU 4.4
+ */
+ T *operator->() const { return ptr; }
+ /**
+ * Gives up ownership; the internal pointer becomes nullptr.
+ * @return the pointer value;
+ * caller becomes responsible for deleting the object
+ * @stable ICU 4.4
+ */
+ T *orphan() {
+ T *p=ptr;
+ ptr=nullptr;
+ return p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * Subclass must override: Base class does not delete the object.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ // delete ptr;
+ ptr=p;
+ }
+protected:
+ /**
+ * Actual pointer.
+ * @internal
+ */
+ T *ptr;
+private:
+ // No comparison operators with other LocalPointerBases.
+ bool operator==(const LocalPointerBase<T> &other);
+ bool operator!=(const LocalPointerBase<T> &other);
+ // No ownership sharing: No copy constructor, no assignment operator.
+ LocalPointerBase(const LocalPointerBase<T> &other);
+ void operator=(const LocalPointerBase<T> &other);
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the standard C++ delete operator.
+ * For most methods see the LocalPointerBase base class.
+ *
+ * Usage example:
+ * \code
+ * LocalPointer<UnicodeString> s(new UnicodeString((UChar32)0x50005));
+ * int32_t length=s->length(); // 2
+ * char16_t lead=s->charAt(0); // 0xd900
+ * if(some condition) { return; } // no need to explicitly delete the pointer
+ * s.adoptInstead(new UnicodeString((char16_t)0xfffc));
+ * length=s->length(); // 1
+ * // no need to explicitly delete the pointer
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalPointer : public LocalPointerBase<T> {
+public:
+ using LocalPointerBase<T>::operator*;
+ using LocalPointerBase<T>::operator->;
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalPointer(T *p=nullptr) : LocalPointerBase<T>(p) {}
+ /**
+ * Constructor takes ownership and reports an error if nullptr.
+ *
+ * This constructor is intended to be used with other-class constructors
+ * that may report a failure UErrorCode,
+ * so that callers need to check only for U_FAILURE(errorCode)
+ * and not also separately for isNull().
+ *
+ * @param p simple pointer to an object that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==nullptr and no other failure code had been set
+ * @stable ICU 55
+ */
+ LocalPointer(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
+ if(p==nullptr && U_SUCCESS(errorCode)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ /**
+ * Move constructor, leaves src with isNull().
+ * @param src source smart pointer
+ * @stable ICU 56
+ */
+ LocalPointer(LocalPointer<T> &&src) noexcept : LocalPointerBase<T>(src.ptr) {
+ src.ptr=nullptr;
+ }
+
+ /**
+ * Constructs a LocalPointer from a C++11 std::unique_ptr.
+ * The LocalPointer steals the object owned by the std::unique_ptr.
+ *
+ * This constructor works via move semantics. If your std::unique_ptr is
+ * in a local variable, you must use std::move.
+ *
+ * @param p The std::unique_ptr from which the pointer will be stolen.
+ * @stable ICU 64
+ */
+ explicit LocalPointer(std::unique_ptr<T> &&p)
+ : LocalPointerBase<T>(p.release()) {}
+
+ /**
+ * Destructor deletes the object it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalPointer() {
+ delete LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Move assignment operator, leaves src with isNull().
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source smart pointer
+ * @return *this
+ * @stable ICU 56
+ */
+ LocalPointer<T> &operator=(LocalPointer<T> &&src) noexcept {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=src.ptr;
+ src.ptr=nullptr;
+ return *this;
+ }
+
+ /**
+ * Move-assign from an std::unique_ptr to this LocalPointer.
+ * Steals the pointer from the std::unique_ptr.
+ *
+ * @param p The std::unique_ptr from which the pointer will be stolen.
+ * @return *this
+ * @stable ICU 64
+ */
+ LocalPointer<T> &operator=(std::unique_ptr<T> &&p) noexcept {
+ adoptInstead(p.release());
+ return *this;
+ }
+
+ /**
+ * Swap pointers.
+ * @param other other smart pointer
+ * @stable ICU 56
+ */
+ void swap(LocalPointer<T> &other) noexcept {
+ T *temp=LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=other.ptr;
+ other.ptr=temp;
+ }
+ /**
+ * Non-member LocalPointer swap function.
+ * @param p1 will get p2's pointer
+ * @param p2 will get p1's pointer
+ * @stable ICU 56
+ */
+ friend inline void swap(LocalPointer<T> &p1, LocalPointer<T> &p2) noexcept {
+ p1.swap(p2);
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an object that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the object it owns,
+ * and adopts (takes ownership of) the one passed in.
+ *
+ * If U_FAILURE(errorCode), then the current object is retained and the new one deleted.
+ *
+ * If U_SUCCESS(errorCode) but the input pointer is nullptr,
+ * then U_MEMORY_ALLOCATION_ERROR is set,
+ * the current object is deleted, and nullptr is set.
+ *
+ * @param p simple pointer to an object that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==nullptr and no other failure code had been set
+ * @stable ICU 55
+ */
+ void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode)) {
+ delete LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ if(p==nullptr) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ delete p;
+ }
+ }
+
+ /**
+ * Conversion operator to a C++11 std::unique_ptr.
+ * Disowns the object and gives it to the returned std::unique_ptr.
+ *
+ * This operator works via move semantics. If your LocalPointer is
+ * in a local variable, you must use std::move.
+ *
+ * @return An std::unique_ptr owning the pointer previously owned by this
+ * icu::LocalPointer.
+ * @stable ICU 64
+ */
+ operator std::unique_ptr<T> () && {
+ return std::unique_ptr<T>(LocalPointerBase<T>::orphan());
+ }
+};
+
+/**
+ * "Smart pointer" class, deletes objects via the C++ array delete[] operator.
+ * For most methods see the LocalPointerBase base class.
+ * Adds operator[] for array item access.
+ *
+ * Usage example:
+ * \code
+ * LocalArray<UnicodeString> a(new UnicodeString[2]);
+ * a[0].append((char16_t)0x61);
+ * if(some condition) { return; } // no need to explicitly delete the array
+ * a.adoptInstead(new UnicodeString[4]);
+ * a[3].append((char16_t)0x62).append((char16_t)0x63).reverse();
+ * // no need to explicitly delete the array
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @stable ICU 4.4
+ */
+template<typename T>
+class LocalArray : public LocalPointerBase<T> {
+public:
+ using LocalPointerBase<T>::operator*;
+ using LocalPointerBase<T>::operator->;
+ /**
+ * Constructor takes ownership.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ explicit LocalArray(T *p=nullptr) : LocalPointerBase<T>(p) {}
+ /**
+ * Constructor takes ownership and reports an error if nullptr.
+ *
+ * This constructor is intended to be used with other-class constructors
+ * that may report a failure UErrorCode,
+ * so that callers need to check only for U_FAILURE(errorCode)
+ * and not also separately for isNull().
+ *
+ * @param p simple pointer to an array of T objects that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==nullptr and no other failure code had been set
+ * @stable ICU 56
+ */
+ LocalArray(T *p, UErrorCode &errorCode) : LocalPointerBase<T>(p) {
+ if(p==nullptr && U_SUCCESS(errorCode)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ }
+ /**
+ * Move constructor, leaves src with isNull().
+ * @param src source smart pointer
+ * @stable ICU 56
+ */
+ LocalArray(LocalArray<T> &&src) noexcept : LocalPointerBase<T>(src.ptr) {
+ src.ptr=nullptr;
+ }
+
+ /**
+ * Constructs a LocalArray from a C++11 std::unique_ptr of an array type.
+ * The LocalPointer steals the array owned by the std::unique_ptr.
+ *
+ * This constructor works via move semantics. If your std::unique_ptr is
+ * in a local variable, you must use std::move.
+ *
+ * @param p The std::unique_ptr from which the array will be stolen.
+ * @stable ICU 64
+ */
+ explicit LocalArray(std::unique_ptr<T[]> &&p)
+ : LocalPointerBase<T>(p.release()) {}
+
+ /**
+ * Destructor deletes the array it owns.
+ * @stable ICU 4.4
+ */
+ ~LocalArray() {
+ delete[] LocalPointerBase<T>::ptr;
+ }
+ /**
+ * Move assignment operator, leaves src with isNull().
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source smart pointer
+ * @return *this
+ * @stable ICU 56
+ */
+ LocalArray<T> &operator=(LocalArray<T> &&src) noexcept {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=src.ptr;
+ src.ptr=nullptr;
+ return *this;
+ }
+
+ /**
+ * Move-assign from an std::unique_ptr to this LocalPointer.
+ * Steals the array from the std::unique_ptr.
+ *
+ * @param p The std::unique_ptr from which the array will be stolen.
+ * @return *this
+ * @stable ICU 64
+ */
+ LocalArray<T> &operator=(std::unique_ptr<T[]> &&p) noexcept {
+ adoptInstead(p.release());
+ return *this;
+ }
+
+ /**
+ * Swap pointers.
+ * @param other other smart pointer
+ * @stable ICU 56
+ */
+ void swap(LocalArray<T> &other) noexcept {
+ T *temp=LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=other.ptr;
+ other.ptr=temp;
+ }
+ /**
+ * Non-member LocalArray swap function.
+ * @param p1 will get p2's pointer
+ * @param p2 will get p1's pointer
+ * @stable ICU 56
+ */
+ friend inline void swap(LocalArray<T> &p1, LocalArray<T> &p2) noexcept {
+ p1.swap(p2);
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ * @param p simple pointer to an array of T objects that is adopted
+ * @stable ICU 4.4
+ */
+ void adoptInstead(T *p) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ }
+ /**
+ * Deletes the array it owns,
+ * and adopts (takes ownership of) the one passed in.
+ *
+ * If U_FAILURE(errorCode), then the current array is retained and the new one deleted.
+ *
+ * If U_SUCCESS(errorCode) but the input pointer is nullptr,
+ * then U_MEMORY_ALLOCATION_ERROR is set,
+ * the current array is deleted, and nullptr is set.
+ *
+ * @param p simple pointer to an array of T objects that is adopted
+ * @param errorCode in/out UErrorCode, set to U_MEMORY_ALLOCATION_ERROR
+ * if p==nullptr and no other failure code had been set
+ * @stable ICU 56
+ */
+ void adoptInsteadAndCheckErrorCode(T *p, UErrorCode &errorCode) {
+ if(U_SUCCESS(errorCode)) {
+ delete[] LocalPointerBase<T>::ptr;
+ LocalPointerBase<T>::ptr=p;
+ if(p==nullptr) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+ } else {
+ delete[] p;
+ }
+ }
+ /**
+ * Array item access (writable).
+ * No index bounds check.
+ * @param i array index
+ * @return reference to the array item
+ * @stable ICU 4.4
+ */
+ T &operator[](ptrdiff_t i) const { return LocalPointerBase<T>::ptr[i]; }
+
+ /**
+ * Conversion operator to a C++11 std::unique_ptr.
+ * Disowns the object and gives it to the returned std::unique_ptr.
+ *
+ * This operator works via move semantics. If your LocalPointer is
+ * in a local variable, you must use std::move.
+ *
+ * @return An std::unique_ptr owning the pointer previously owned by this
+ * icu::LocalPointer.
+ * @stable ICU 64
+ */
+ operator std::unique_ptr<T[]> () && {
+ return std::unique_ptr<T[]>(LocalPointerBase<T>::orphan());
+ }
+};
+
+/**
+ * \def U_DEFINE_LOCAL_OPEN_POINTER
+ * "Smart pointer" definition macro, deletes objects via the closeFunction.
+ * Defines a subclass of LocalPointerBase which works just
+ * like LocalPointer<Type> except that this subclass will use the closeFunction
+ * rather than the C++ delete operator.
+ *
+ * Usage example:
+ * \code
+ * LocalUCaseMapPointer csm(ucasemap_open(localeID, options, &errorCode));
+ * utf8OutLength=ucasemap_utf8ToLower(csm.getAlias(),
+ * utf8Out, (int32_t)sizeof(utf8Out),
+ * utf8In, utf8InLength, &errorCode);
+ * if(U_FAILURE(errorCode)) { return; } // no need to explicitly delete the UCaseMap
+ * \endcode
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction) \
+ class LocalPointerClassName : public LocalPointerBase<Type> { \
+ public: \
+ using LocalPointerBase<Type>::operator*; \
+ using LocalPointerBase<Type>::operator->; \
+ explicit LocalPointerClassName(Type *p=nullptr) : LocalPointerBase<Type>(p) {} \
+ LocalPointerClassName(LocalPointerClassName &&src) noexcept \
+ : LocalPointerBase<Type>(src.ptr) { \
+ src.ptr=nullptr; \
+ } \
+ /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
+ explicit LocalPointerClassName(std::unique_ptr<Type, decltype(&closeFunction)> &&p) \
+ : LocalPointerBase<Type>(p.release()) {} \
+ ~LocalPointerClassName() { if (ptr != nullptr) { closeFunction(ptr); } } \
+ LocalPointerClassName &operator=(LocalPointerClassName &&src) noexcept { \
+ if (ptr != nullptr) { closeFunction(ptr); } \
+ LocalPointerBase<Type>::ptr=src.ptr; \
+ src.ptr=nullptr; \
+ return *this; \
+ } \
+ /* TODO: Be agnostic of the deleter function signature from the user-provided std::unique_ptr? */ \
+ LocalPointerClassName &operator=(std::unique_ptr<Type, decltype(&closeFunction)> &&p) { \
+ adoptInstead(p.release()); \
+ return *this; \
+ } \
+ void swap(LocalPointerClassName &other) noexcept { \
+ Type *temp=LocalPointerBase<Type>::ptr; \
+ LocalPointerBase<Type>::ptr=other.ptr; \
+ other.ptr=temp; \
+ } \
+ friend inline void swap(LocalPointerClassName &p1, LocalPointerClassName &p2) noexcept { \
+ p1.swap(p2); \
+ } \
+ void adoptInstead(Type *p) { \
+ if (ptr != nullptr) { closeFunction(ptr); } \
+ ptr=p; \
+ } \
+ operator std::unique_ptr<Type, decltype(&closeFunction)> () && { \
+ return std::unique_ptr<Type, decltype(&closeFunction)>(LocalPointerBase<Type>::orphan(), closeFunction); \
+ } \
+ }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+#endif /* __LOCALPOINTER_H__ */
diff --git a/intl/icu/source/common/unicode/locdspnm.h b/intl/icu/source/common/unicode/locdspnm.h
new file mode 100644
index 0000000000..4f06f85704
--- /dev/null
+++ b/intl/icu/source/common/unicode/locdspnm.h
@@ -0,0 +1,211 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef LOCDSPNM_H
+#define LOCDSPNM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Provides display names of Locale and its components.
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/locid.h"
+#include "unicode/strenum.h"
+#include "unicode/uscript.h"
+#include "unicode/uldnames.h"
+#include "unicode/udisplaycontext.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Returns display names of Locales and components of Locales. For
+ * more information on language, script, region, variant, key, and
+ * values, see Locale.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API LocaleDisplayNames : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ virtual ~LocaleDisplayNames();
+
+ /**
+ * Convenience overload of
+ * {@link #createInstance(const Locale& locale, UDialectHandling dialectHandling)}
+ * that specifies STANDARD dialect handling.
+ * @param locale the display locale
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 4.4
+ */
+ inline static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale);
+
+ /**
+ * Returns an instance of LocaleDisplayNames that returns names
+ * formatted for the provided locale, using the provided
+ * dialectHandling.
+ *
+ * @param locale the display locale
+ * @param dialectHandling how to select names for locales
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 4.4
+ */
+ static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
+ UDialectHandling dialectHandling);
+
+ /**
+ * Returns an instance of LocaleDisplayNames that returns names formatted
+ * for the provided locale, using the provided UDisplayContext settings.
+ *
+ * @param locale the display locale
+ * @param contexts List of one or more context settings (e.g. for dialect
+ * handling, capitalization, etc.
+ * @param length Number of items in the contexts list
+ * @return a LocaleDisplayNames instance
+ * @stable ICU 51
+ */
+ static LocaleDisplayNames* U_EXPORT2 createInstance(const Locale& locale,
+ UDisplayContext *contexts, int32_t length);
+
+ // getters for state
+ /**
+ * Returns the locale used to determine the display names. This is
+ * not necessarily the same locale passed to {@link #createInstance}.
+ * @return the display locale
+ * @stable ICU 4.4
+ */
+ virtual const Locale& getLocale() const = 0;
+
+ /**
+ * Returns the dialect handling used in the display names.
+ * @return the dialect handling enum
+ * @stable ICU 4.4
+ */
+ virtual UDialectHandling getDialectHandling() const = 0;
+
+ /**
+ * Returns the UDisplayContext value for the specified UDisplayContextType.
+ * @param type the UDisplayContextType whose value to return
+ * @return the UDisplayContext for the specified type.
+ * @stable ICU 51
+ */
+ virtual UDisplayContext getContext(UDisplayContextType type) const = 0;
+
+ // names for entire locales
+ /**
+ * Returns the display name of the provided locale.
+ * @param locale the locale whose display name to return
+ * @param result receives the locale's display name
+ * @return the display name of the provided locale
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& localeDisplayName(const Locale& locale,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided locale id.
+ * @param localeId the id of the locale whose display name to return
+ * @param result receives the locale's display name
+ * @return the display name of the provided locale
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& localeDisplayName(const char* localeId,
+ UnicodeString& result) const = 0;
+
+ // names for components of a locale id
+ /**
+ * Returns the display name of the provided language code.
+ * @param lang the language code
+ * @param result receives the language code's display name
+ * @return the display name of the provided language code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& languageDisplayName(const char* lang,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided script code.
+ * @param script the script code
+ * @param result receives the script code's display name
+ * @return the display name of the provided script code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& scriptDisplayName(const char* script,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided script code.
+ * @param scriptCode the script code number
+ * @param result receives the script code's display name
+ * @return the display name of the provided script code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& scriptDisplayName(UScriptCode scriptCode,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided region code.
+ * @param region the region code
+ * @param result receives the region code's display name
+ * @return the display name of the provided region code
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& regionDisplayName(const char* region,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided variant.
+ * @param variant the variant string
+ * @param result receives the variant's display name
+ * @return the display name of the provided variant
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& variantDisplayName(const char* variant,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided locale key.
+ * @param key the locale key name
+ * @param result receives the locale key's display name
+ * @return the display name of the provided locale key
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& keyDisplayName(const char* key,
+ UnicodeString& result) const = 0;
+
+ /**
+ * Returns the display name of the provided value (used with the provided key).
+ * @param key the locale key name
+ * @param value the locale key's value
+ * @param result receives the value's display name
+ * @return the display name of the provided value
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString& keyValueDisplayName(const char* key, const char* value,
+ UnicodeString& result) const = 0;
+};
+
+inline LocaleDisplayNames* LocaleDisplayNames::createInstance(const Locale& locale) {
+ return LocaleDisplayNames::createInstance(locale, ULDN_STANDARD_NAMES);
+}
+
+U_NAMESPACE_END
+
+#endif
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/locid.h b/intl/icu/source/common/unicode/locid.h
new file mode 100644
index 0000000000..a6fbbb7041
--- /dev/null
+++ b/intl/icu/source/common/unicode/locid.h
@@ -0,0 +1,1272 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File locid.h
+*
+* Created by: Helena Shih
+*
+* Modification History:
+*
+* Date Name Description
+* 02/11/97 aliu Changed gLocPath to fgLocPath and added methods to
+* get and set it.
+* 04/02/97 aliu Made operator!= inline; fixed return value of getName().
+* 04/15/97 aliu Cleanup for AIX/Win32.
+* 04/24/97 aliu Numerous changes per code review.
+* 08/18/98 stephen Added tokenizeString(),changed getDisplayName()
+* 09/08/98 stephen Moved definition of kEmptyString for Mac Port
+* 11/09/99 weiv Added const char * getName() const;
+* 04/12/00 srl removing unicodestring api's and cached hash code
+* 08/10/01 grhoten Change the static Locales to accessor functions
+******************************************************************************
+*/
+
+#ifndef LOCID_H
+#define LOCID_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/bytestream.h"
+#include "unicode/localpointer.h"
+#include "unicode/strenum.h"
+#include "unicode/stringpiece.h"
+#include "unicode/uobject.h"
+#include "unicode/putil.h"
+#include "unicode/uloc.h"
+
+/**
+ * \file
+ * \brief C++ API: Locale ID object.
+ */
+
+U_NAMESPACE_BEGIN
+
+// Forward Declarations
+void U_CALLCONV locale_available_init(); /**< @internal */
+
+class StringEnumeration;
+class UnicodeString;
+
+/**
+ * A <code>Locale</code> object represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture.
+ *
+ * The Locale class is not suitable for subclassing.
+ *
+ * <P>
+ * You can create a <code>Locale</code> object using the constructor in
+ * this class:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * Locale( const char* language,
+ * const char* country,
+ * const char* variant);
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first argument to the constructors is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at:
+ * <BR><a href ="http://www.loc.gov/standards/iso639-2/">
+ * http://www.loc.gov/standards/iso639-2/</a>
+ *
+ * <P>
+ * The second argument to the constructors is a valid <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.iso.org/iso/en/prods-services/iso3166ma/index.html">
+ * http://www.iso.org/iso/en/prods-services/iso3166ma/index.html</a>
+ *
+ * <P>
+ * The third constructor requires a third argument--the <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use REVISED for a language's revised script orthography, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_POSIX".
+ *
+ * <P>
+ * Because a <code>Locale</code> object is just an identifier for a region,
+ * no validity check is performed when you construct a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you construct, you must query those resources. For
+ * example, ask the <code>NumberFormat</code> for the locales it supports
+ * using its <code>getAvailableLocales</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>ResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> class provides a number of convenient constants
+ * that you can use to create <code>Locale</code> objects for commonly used
+ * locales. For example, the following refers to a <code>Locale</code> object
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * Locale::getUS()
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've created a <code>Locale</code> you can query it for information about
+ * itself. Use <code>getCountry</code> to get the ISO Country Code and
+ * <code>getLanguage</code> to get the ISO Language Code. You can
+ * use <code>getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * ICU provides a number of classes that perform locale-sensitive
+ * operations. For example, the <code>NumberFormat</code> class formats
+ * numbers, currency, or percentages in a locale-sensitive manner. Classes
+ * such as <code>NumberFormat</code> have a number of convenience methods
+ * for creating a default object of that type. For example, the
+ * <code>NumberFormat</code> class provides these three convenience methods
+ * for creating a default <code>NumberFormat</code> object:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * UErrorCode success = U_ZERO_ERROR;
+ * Locale myLocale;
+ * NumberFormat *nf;
+ *
+ * nf = NumberFormat::createInstance( success ); delete nf;
+ * nf = NumberFormat::createCurrencyInstance( success ); delete nf;
+ * nf = NumberFormat::createPercentInstance( success ); delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * nf = NumberFormat::createInstance( myLocale, success ); delete nf;
+ * nf = NumberFormat::createCurrencyInstance( myLocale, success ); delete nf;
+ * nf = NumberFormat::createPercentInstance( myLocale, success ); delete nf;
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of object
+ * (<code>NumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying objects,
+ * <STRONG>not</STRONG> a container for the objects themselves.
+ *
+ * <P>
+ * Each class that performs locale-sensitive operations allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * static Locale* getAvailableLocales(int32_t& numLocales)
+ * static UnicodeString& getDisplayName(const Locale& objectLocale,
+ * const Locale& displayLocale,
+ * UnicodeString& displayName)
+ * static UnicodeString& getDisplayName(const Locale& objectLocale,
+ * UnicodeString& displayName)
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * @stable ICU 2.0
+ * @see ResourceBundle
+ */
+class U_COMMON_API Locale : public UObject {
+public:
+ /** Useful constant for the Root locale. @stable ICU 4.4 */
+ static const Locale &U_EXPORT2 getRoot(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getEnglish(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getFrench(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getGerman(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getItalian(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getJapanese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getKorean(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getChinese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getSimplifiedChinese(void);
+ /** Useful constant for this language. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getTraditionalChinese(void);
+
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getFrance(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getGermany(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getItaly(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getJapan(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getKorea(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getChina(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getPRC(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getTaiwan(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getUK(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getUS(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getCanada(void);
+ /** Useful constant for this country/region. @stable ICU 2.0 */
+ static const Locale &U_EXPORT2 getCanadaFrench(void);
+
+
+ /**
+ * Construct a default locale object, a Locale for the default locale ID.
+ *
+ * @see getDefault
+ * @see uloc_getDefault
+ * @stable ICU 2.0
+ */
+ Locale();
+
+ /**
+ * Construct a locale from language, country, variant.
+ * If an error occurs, then the constructed object will be "bogus"
+ * (isBogus() will return true).
+ *
+ * @param language Lowercase two-letter or three-letter ISO-639 code.
+ * This parameter can instead be an ICU style C locale (e.g. "en_US"),
+ * but the other parameters must not be used.
+ * This parameter can be nullptr; if so,
+ * the locale is initialized to match the current default locale.
+ * (This is the same as using the default constructor.)
+ * Please note: The Java Locale class does NOT accept the form
+ * 'new Locale("en_US")' but only 'new Locale("en","US")'
+ *
+ * @param country Uppercase two-letter ISO-3166 code. (optional)
+ * @param variant Uppercase vendor and browser specific code. See class
+ * description. (optional)
+ * @param keywordsAndValues A string consisting of keyword/values pairs, such as
+ * "collation=phonebook;currency=euro"
+ *
+ * @see getDefault
+ * @see uloc_getDefault
+ * @stable ICU 2.0
+ */
+ Locale( const char * language,
+ const char * country = 0,
+ const char * variant = 0,
+ const char * keywordsAndValues = 0);
+
+ /**
+ * Initializes a Locale object from another Locale object.
+ *
+ * @param other The Locale object being copied in.
+ * @stable ICU 2.0
+ */
+ Locale(const Locale& other);
+
+ /**
+ * Move constructor; might leave source in bogus state.
+ * This locale will have the same contents that the source locale had.
+ *
+ * @param other The Locale object being moved in.
+ * @stable ICU 63
+ */
+ Locale(Locale&& other) noexcept;
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~Locale() ;
+
+ /**
+ * Replaces the entire contents of *this with the specified value.
+ *
+ * @param other The Locale object being copied in.
+ * @return *this
+ * @stable ICU 2.0
+ */
+ Locale& operator=(const Locale& other);
+
+ /**
+ * Move assignment operator; might leave source in bogus state.
+ * This locale will have the same contents that the source locale had.
+ * The behavior is undefined if *this and the source are the same object.
+ *
+ * @param other The Locale object being moved in.
+ * @return *this
+ * @stable ICU 63
+ */
+ Locale& operator=(Locale&& other) noexcept;
+
+ /**
+ * Checks if two locale keys are the same.
+ *
+ * @param other The locale key object to be compared with this.
+ * @return true if the two locale keys are the same, false otherwise.
+ * @stable ICU 2.0
+ */
+ bool operator==(const Locale& other) const;
+
+ /**
+ * Checks if two locale keys are not the same.
+ *
+ * @param other The locale key object to be compared with this.
+ * @return true if the two locale keys are not the same, false
+ * otherwise.
+ * @stable ICU 2.0
+ */
+ inline bool operator!=(const Locale& other) const;
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ Locale *clone() const;
+
+#ifndef U_HIDE_SYSTEM_API
+ /**
+ * Common methods of getting the current default Locale. Used for the
+ * presentation: menus, dialogs, etc. Generally set once when your applet or
+ * application is initialized, then never reset. (If you do reset the
+ * default locale, you probably want to reload your GUI, so that the change
+ * is reflected in your interface.)
+ *
+ * More advanced programs will allow users to use different locales for
+ * different fields, e.g. in a spreadsheet.
+ *
+ * Note that the initial setting will match the host system.
+ * @return a reference to the Locale object for the default locale ID
+ * @system
+ * @stable ICU 2.0
+ */
+ static const Locale& U_EXPORT2 getDefault(void);
+
+ /**
+ * Sets the default. Normally set once at the beginning of a process,
+ * then never reset.
+ * setDefault() only changes ICU's default locale ID, <strong>not</strong>
+ * the default locale ID of the runtime environment.
+ *
+ * @param newLocale Locale to set to. If nullptr, set to the value obtained
+ * from the runtime environment.
+ * @param success The error code.
+ * @system
+ * @stable ICU 2.0
+ */
+ static void U_EXPORT2 setDefault(const Locale& newLocale,
+ UErrorCode& success);
+#endif /* U_HIDE_SYSTEM_API */
+
+ /**
+ * Returns a Locale for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param tag the input BCP47 language tag.
+ * @param status error information if creating the Locale failed.
+ * @return the Locale for the specified BCP47 language tag.
+ * @stable ICU 63
+ */
+ static Locale U_EXPORT2 forLanguageTag(StringPiece tag, UErrorCode& status);
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * If this function fails, partial output may have been written to the sink.
+ *
+ * @param sink the output sink receiving the BCP47 language
+ * tag for this Locale.
+ * @param status error information if creating the language tag failed.
+ * @stable ICU 63
+ */
+ void toLanguageTag(ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Returns a well-formed language tag for this Locale.
+ * <p>
+ * <b>Note</b>: Any locale fields which do not satisfy the BCP47 syntax
+ * requirement will be silently omitted from the result.
+ *
+ * @param status error information if creating the language tag failed.
+ * @return the BCP47 language tag for this Locale.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass toLanguageTag(UErrorCode& status) const;
+
+ /**
+ * Creates a locale which has had minimal canonicalization
+ * as per uloc_getName().
+ * @param name The name to create from. If name is null,
+ * the default Locale is used.
+ * @return new locale object
+ * @stable ICU 2.0
+ * @see uloc_getName
+ */
+ static Locale U_EXPORT2 createFromName(const char *name);
+
+ /**
+ * Creates a locale from the given string after canonicalizing
+ * the string according to CLDR by calling uloc_canonicalize().
+ * @param name the locale ID to create from. Must not be nullptr.
+ * @return a new locale object corresponding to the given name
+ * @stable ICU 3.0
+ * @see uloc_canonicalize
+ */
+ static Locale U_EXPORT2 createCanonical(const char* name);
+
+ /**
+ * Returns the locale's ISO-639 language code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getLanguage( ) const;
+
+ /**
+ * Returns the locale's ISO-15924 abbreviation script code.
+ * @return An alias to the code
+ * @see uscript_getShortName
+ * @see uscript_getCode
+ * @stable ICU 2.8
+ */
+ inline const char * getScript( ) const;
+
+ /**
+ * Returns the locale's ISO-3166 country code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getCountry( ) const;
+
+ /**
+ * Returns the locale's variant code.
+ * @return An alias to the code
+ * @stable ICU 2.0
+ */
+ inline const char * getVariant( ) const;
+
+ /**
+ * Returns the programmatic name of the entire locale, with the language,
+ * country and variant separated by underbars. If a field is missing, up
+ * to two leading underbars will occur. Example: "en", "de_DE", "en_US_WIN",
+ * "de__POSIX", "fr__MAC", "__MAC", "_MT", "_FR_EURO"
+ * @return A pointer to "name".
+ * @stable ICU 2.0
+ */
+ inline const char * getName() const;
+
+ /**
+ * Returns the programmatic name of the entire locale as getName() would return,
+ * but without keywords.
+ * @return A pointer to "name".
+ * @see getName
+ * @stable ICU 2.8
+ */
+ const char * getBaseName() const;
+
+ /**
+ * Add the likely subtags for this Locale, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If this Locale is already in the maximal form, or not valid, or there is
+ * no data available for maximization, the Locale will be unchanged.
+ *
+ * For example, "und-Zzzz" cannot be maximized, since there is no
+ * reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param status error information if maximizing this Locale failed.
+ * If this Locale is not well-formed, the error code is
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @stable ICU 63
+ */
+ void addLikelySubtags(UErrorCode& status);
+
+ /**
+ * Minimize the subtags for this Locale, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If this Locale is already in the minimal form, or not valid, or there is
+ * no data available for minimization, the Locale will be unchanged.
+ *
+ * Since the minimization algorithm relies on proper maximization, see the
+ * comments for addLikelySubtags for reasons why there might not be any
+ * data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param status error information if maximizing this Locale failed.
+ * If this Locale is not well-formed, the error code is
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @stable ICU 63
+ */
+ void minimizeSubtags(UErrorCode& status);
+
+ /**
+ * Canonicalize the locale ID of this object according to CLDR.
+ * @param status the status code
+ * @stable ICU 67
+ * @see createCanonical
+ */
+ void canonicalize(UErrorCode& status);
+
+ /**
+ * Gets the list of keywords for the specified locale.
+ *
+ * @param status the status code
+ * @return pointer to StringEnumeration class, or nullptr if there are no keywords.
+ * Client must dispose of it by calling delete.
+ * @see getKeywords
+ * @stable ICU 2.8
+ */
+ StringEnumeration * createKeywords(UErrorCode &status) const;
+
+ /**
+ * Gets the list of Unicode keywords for the specified locale.
+ *
+ * @param status the status code
+ * @return pointer to StringEnumeration class, or nullptr if there are no keywords.
+ * Client must dispose of it by calling delete.
+ * @see getUnicodeKeywords
+ * @stable ICU 63
+ */
+ StringEnumeration * createUnicodeKeywords(UErrorCode &status) const;
+
+ /**
+ * Gets the set of keywords for this Locale.
+ *
+ * A wrapper to call createKeywords() and write the resulting
+ * keywords as standard strings (or compatible objects) into any kind of
+ * container that can be written to by an STL style output iterator.
+ *
+ * @param iterator an STL style output iterator to write the keywords to.
+ * @param status error information if creating set of keywords failed.
+ * @stable ICU 63
+ */
+ template<typename StringClass, typename OutputIterator>
+ inline void getKeywords(OutputIterator iterator, UErrorCode& status) const;
+
+ /**
+ * Gets the set of Unicode keywords for this Locale.
+ *
+ * A wrapper to call createUnicodeKeywords() and write the resulting
+ * keywords as standard strings (or compatible objects) into any kind of
+ * container that can be written to by an STL style output iterator.
+ *
+ * @param iterator an STL style output iterator to write the keywords to.
+ * @param status error information if creating set of keywords failed.
+ * @stable ICU 63
+ */
+ template<typename StringClass, typename OutputIterator>
+ inline void getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value. Case insensitive.
+ * @param buffer The buffer to receive the keyword value.
+ * @param bufferCapacity The capacity of receiving buffer
+ * @param status Returns any error information while performing this operation.
+ * @return the length of the keyword value
+ *
+ * @stable ICU 2.8
+ */
+ int32_t getKeywordValue(const char* keywordName, char *buffer, int32_t bufferCapacity, UErrorCode &status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param sink the sink to receive the keyword value.
+ * @param status error information if getting the value failed.
+ * @stable ICU 63
+ */
+ void getKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Gets the value for a keyword.
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param status error information if getting the value failed.
+ * @return the keyword value.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass getKeywordValue(StringPiece keywordName, UErrorCode& status) const;
+
+ /**
+ * Gets the Unicode value for a Unicode keyword.
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param sink the sink to receive the keyword value.
+ * @param status error information if getting the value failed.
+ * @stable ICU 63
+ */
+ void getUnicodeKeywordValue(StringPiece keywordName, ByteSink& sink, UErrorCode& status) const;
+
+ /**
+ * Gets the Unicode value for a Unicode keyword.
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword for which we want the value.
+ * @param status error information if getting the value failed.
+ * @return the keyword value.
+ * @stable ICU 63
+ */
+ template<typename StringClass>
+ inline StringClass getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const;
+
+ /**
+ * Sets or removes the value for a keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * nullptr, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ *
+ * @stable ICU 49
+ */
+ void setKeywordValue(const char* keywordName, const char* keywordValue, UErrorCode &status);
+
+ /**
+ * Sets or removes the value for a keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses legacy keyword=value pairs, like "collation=phonebook".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * nullptr, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ * @stable ICU 63
+ */
+ void setKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+
+ /**
+ * Sets or removes the Unicode value for a Unicode keyword.
+ *
+ * For removing all keywords, use getBaseName(),
+ * and construct a new Locale if it differs from getName().
+ *
+ * This uses Unicode key-value pairs, like "co-phonebk".
+ *
+ * ICU4C doesn't do automatic conversion between legacy and Unicode
+ * keywords and values in getters and setters (as opposed to ICU4J).
+ *
+ * @param keywordName name of the keyword to be set.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * nullptr, will result in the keyword being removed. No error is given if
+ * that keyword does not exist.
+ * @param status Returns any error information while performing this operation.
+ * @stable ICU 63
+ */
+ void setUnicodeKeywordValue(StringPiece keywordName, StringPiece keywordValue, UErrorCode& status);
+
+ /**
+ * returns the locale's three-letter language code, as specified
+ * in ISO draft standard ISO-639-2.
+ * @return An alias to the code, or an empty string
+ * @stable ICU 2.0
+ */
+ const char * getISO3Language() const;
+
+ /**
+ * Fills in "name" with the locale's three-letter ISO-3166 country code.
+ * @return An alias to the code, or an empty string
+ * @stable ICU 2.0
+ */
+ const char * getISO3Country() const;
+
+ /**
+ * Returns the Windows LCID value corresponding to this locale.
+ * This value is stored in the resource data for the locale as a one-to-four-digit
+ * hexadecimal number. If the resource is missing, in the wrong format, or
+ * there is no Windows LCID value that corresponds to this locale, returns 0.
+ * @stable ICU 2.0
+ */
+ uint32_t getLCID(void) const;
+
+ /**
+ * Returns whether this locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then false is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
+ *
+ * @return true if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+ UBool isRightToLeft() const;
+
+ /**
+ * Fills in "dispLang" with the name of this locale's language in a format suitable for
+ * user display in the default locale. For example, if the locale's language code is
+ * "fr" and the default locale's language code is "en", this function would set
+ * dispLang to "French".
+ * @param dispLang Receives the language's display name.
+ * @return A reference to "dispLang".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayLanguage(UnicodeString& dispLang) const;
+
+ /**
+ * Fills in "dispLang" with the name of this locale's language in a format suitable for
+ * user display in the locale specified by "displayLocale". For example, if the locale's
+ * language code is "en" and displayLocale's language code is "fr", this function would set
+ * dispLang to "Anglais".
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * displayLocale would result in "Anglais", while passing Locale::getGerman()
+ * for displayLocale would result in "Englisch".
+ * @param dispLang Receives the language's display name.
+ * @return A reference to "dispLang".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayLanguage( const Locale& displayLocale,
+ UnicodeString& dispLang) const;
+
+ /**
+ * Fills in "dispScript" with the name of this locale's script in a format suitable
+ * for user display in the default locale. For example, if the locale's script code
+ * is "LATN" and the default locale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @stable ICU 2.8
+ */
+ UnicodeString& getDisplayScript( UnicodeString& dispScript) const;
+
+ /**
+ * Fills in "dispScript" with the name of this locale's country in a format suitable
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * script code is "LATN" and displayLocale's language code is "en", this function would set
+ * dispScript to "Latin".
+ * @param displayLocale Specifies the locale to be used to display the name. In other
+ * words, if the locale's script code is "LATN", passing
+ * Locale::getFrench() for displayLocale would result in "", while
+ * passing Locale::getGerman() for displayLocale would result in
+ * "".
+ * @param dispScript Receives the scripts's display name.
+ * @return A reference to "dispScript".
+ * @stable ICU 2.8
+ */
+ UnicodeString& getDisplayScript( const Locale& displayLocale,
+ UnicodeString& dispScript) const;
+
+ /**
+ * Fills in "dispCountry" with the name of this locale's country in a format suitable
+ * for user display in the default locale. For example, if the locale's country code
+ * is "FR" and the default locale's language code is "en", this function would set
+ * dispCountry to "France".
+ * @param dispCountry Receives the country's display name.
+ * @return A reference to "dispCountry".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayCountry( UnicodeString& dispCountry) const;
+
+ /**
+ * Fills in "dispCountry" with the name of this locale's country in a format suitable
+ * for user display in the locale specified by "displayLocale". For example, if the locale's
+ * country code is "US" and displayLocale's language code is "fr", this function would set
+ * dispCountry to "&Eacute;tats-Unis".
+ * @param displayLocale Specifies the locale to be used to display the name. In other
+ * words, if the locale's country code is "US", passing
+ * Locale::getFrench() for displayLocale would result in "&Eacute;tats-Unis", while
+ * passing Locale::getGerman() for displayLocale would result in
+ * "Vereinigte Staaten".
+ * @param dispCountry Receives the country's display name.
+ * @return A reference to "dispCountry".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayCountry( const Locale& displayLocale,
+ UnicodeString& dispCountry) const;
+
+ /**
+ * Fills in "dispVar" with the name of this locale's variant code in a format suitable
+ * for user display in the default locale.
+ * @param dispVar Receives the variant's name.
+ * @return A reference to "dispVar".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayVariant( UnicodeString& dispVar) const;
+
+ /**
+ * Fills in "dispVar" with the name of this locale's variant code in a format
+ * suitable for user display in the locale specified by "displayLocale".
+ * @param displayLocale Specifies the locale to be used to display the name.
+ * @param dispVar Receives the variant's display name.
+ * @return A reference to "dispVar".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayVariant( const Locale& displayLocale,
+ UnicodeString& dispVar) const;
+
+ /**
+ * Fills in "name" with the name of this locale in a format suitable for user display
+ * in the default locale. This function uses getDisplayLanguage(), getDisplayCountry(),
+ * and getDisplayVariant() to do its work, and outputs the display name in the format
+ * "language (country[,variant])". For example, if the default locale is en_US, then
+ * fr_FR's display name would be "French (France)", and es_MX_Traditional's display name
+ * would be "Spanish (Mexico,Traditional)".
+ * @param name Receives the locale's display name.
+ * @return A reference to "name".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayName( UnicodeString& name) const;
+
+ /**
+ * Fills in "name" with the name of this locale in a format suitable for user display
+ * in the locale specified by "displayLocale". This function uses getDisplayLanguage(),
+ * getDisplayCountry(), and getDisplayVariant() to do its work, and outputs the display
+ * name in the format "language (country[,variant])". For example, if displayLocale is
+ * fr_FR, then en_US's display name would be "Anglais (&Eacute;tats-Unis)", and no_NO_NY's
+ * display name would be "norv&eacute;gien (Norv&egrave;ge,NY)".
+ * @param displayLocale Specifies the locale to be used to display the name.
+ * @param name Receives the locale's display name.
+ * @return A reference to "name".
+ * @stable ICU 2.0
+ */
+ UnicodeString& getDisplayName( const Locale& displayLocale,
+ UnicodeString& name) const;
+
+ /**
+ * Generates a hash code for the locale.
+ * @stable ICU 2.0
+ */
+ int32_t hashCode(void) const;
+
+ /**
+ * Sets the locale to bogus
+ * A bogus locale represents a non-existing locale associated
+ * with services that can be instantiated from non-locale data
+ * in addition to locale (for example, collation can be
+ * instantiated from a locale and from a rule set).
+ * @stable ICU 2.1
+ */
+ void setToBogus();
+
+ /**
+ * Gets the bogus state. Locale object can be bogus if it doesn't exist
+ * @return false if it is a real locale, true if it is a bogus locale
+ * @stable ICU 2.1
+ */
+ inline UBool isBogus(void) const;
+
+ /**
+ * Returns a list of all installed locales.
+ * @param count Receives the number of locales in the list.
+ * @return A pointer to an array of Locale objects. This array is the list
+ * of all locales with installed resource files. The called does NOT
+ * get ownership of this list, and must NOT delete it.
+ * @stable ICU 2.0
+ */
+ static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
+
+ /**
+ * Gets a list of all available 2-letter country codes defined in ISO 3166. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+ static const char* const* U_EXPORT2 getISOCountries();
+
+ /**
+ * Gets a list of all available language codes defined in ISO 639. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+ static const char* const* U_EXPORT2 getISOLanguages();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+ /**
+ * A Locale iterator interface similar to a Java Iterator<Locale>.
+ * @stable ICU 65
+ */
+ class U_COMMON_API Iterator /* not : public UObject because this is an interface/mixin class */ {
+ public:
+ /** @stable ICU 65 */
+ virtual ~Iterator();
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ virtual UBool hasNext() const = 0;
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ virtual const Locale &next() = 0;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * @stable ICU 65
+ */
+ template<typename Iter>
+ class RangeIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value is convertible to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @stable ICU 65
+ */
+ RangeIterator(Iter begin, Iter end) : it_(begin), end_(end) {}
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ const Locale &next() override { return *it_++; }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ };
+
+ /**
+ * A generic Locale iterator implementation over Locale input iterators.
+ * Calls the converter to convert each *begin to a const Locale &.
+ * @stable ICU 65
+ */
+ template<typename Iter, typename Conv>
+ class ConvertingIterator : public Iterator, public UMemory {
+ public:
+ /**
+ * Constructs an iterator from a begin/end range.
+ * Each of the iterator parameter values must be an
+ * input iterator whose value the converter converts to const Locale &.
+ *
+ * @param begin Start of range.
+ * @param end Exclusive end of range.
+ * @param converter Converter from *begin to const Locale & or compatible.
+ * @stable ICU 65
+ */
+ ConvertingIterator(Iter begin, Iter end, Conv converter) :
+ it_(begin), end_(end), converter_(converter) {}
+
+ /**
+ * @return true if next() can be called again.
+ * @stable ICU 65
+ */
+ UBool hasNext() const override { return it_ != end_; }
+
+ /**
+ * @return the next locale.
+ * @stable ICU 65
+ */
+ const Locale &next() override { return converter_(*it_++); }
+
+ private:
+ Iter it_;
+ const Iter end_;
+ Conv converter_;
+ };
+
+protected: /* only protected for testing purposes. DO NOT USE. */
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Set this from a single POSIX style locale string.
+ * @internal
+ */
+ void setFromPOSIXID(const char *posixID);
+#endif /* U_HIDE_INTERNAL_API */
+
+private:
+ /**
+ * Initialize the locale object with a new name.
+ * Was deprecated - used in implementation - moved internal
+ *
+ * @param cLocaleID The new locale name.
+ * @param canonicalize whether to call uloc_canonicalize on cLocaleID
+ */
+ Locale& init(const char* cLocaleID, UBool canonicalize);
+
+ /*
+ * Internal constructor to allow construction of a locale object with
+ * NO side effects. (Default constructor tries to get
+ * the default locale.)
+ */
+ enum ELocaleType {
+ eBOGUS
+ };
+ Locale(ELocaleType);
+
+ /**
+ * Initialize the locale cache for commonly used locales
+ */
+ static Locale *getLocaleCache(void);
+
+ char language[ULOC_LANG_CAPACITY];
+ char script[ULOC_SCRIPT_CAPACITY];
+ char country[ULOC_COUNTRY_CAPACITY];
+ int32_t variantBegin;
+ char* fullName;
+ char fullNameBuffer[ULOC_FULLNAME_CAPACITY];
+ // name without keywords
+ char* baseName;
+ void initBaseName(UErrorCode& status);
+
+ UBool fIsBogus;
+
+ static const Locale &getLocale(int locid);
+
+ /**
+ * A friend to allow the default locale to be set by either the C or C++ API.
+ * @internal (private)
+ */
+ friend Locale *locale_set_default_internal(const char *, UErrorCode& status);
+
+ /**
+ * @internal (private)
+ */
+ friend void U_CALLCONV locale_available_init();
+};
+
+inline bool
+Locale::operator!=(const Locale& other) const
+{
+ return !operator==(other);
+}
+
+template<typename StringClass> inline StringClass
+Locale::toLanguageTag(UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ toLanguageTag(sink, status);
+ return result;
+}
+
+inline const char *
+Locale::getCountry() const
+{
+ return country;
+}
+
+inline const char *
+Locale::getLanguage() const
+{
+ return language;
+}
+
+inline const char *
+Locale::getScript() const
+{
+ return script;
+}
+
+inline const char *
+Locale::getVariant() const
+{
+ return &baseName[variantBegin];
+}
+
+inline const char *
+Locale::getName() const
+{
+ return fullName;
+}
+
+template<typename StringClass, typename OutputIterator> inline void
+Locale::getKeywords(OutputIterator iterator, UErrorCode& status) const
+{
+ LocalPointer<StringEnumeration> keys(createKeywords(status));
+ if (U_FAILURE(status) || keys.isNull()) {
+ return;
+ }
+ for (;;) {
+ int32_t resultLength;
+ const char* buffer = keys->next(&resultLength, status);
+ if (U_FAILURE(status) || buffer == nullptr) {
+ return;
+ }
+ *iterator++ = StringClass(buffer, resultLength);
+ }
+}
+
+template<typename StringClass, typename OutputIterator> inline void
+Locale::getUnicodeKeywords(OutputIterator iterator, UErrorCode& status) const
+{
+ LocalPointer<StringEnumeration> keys(createUnicodeKeywords(status));
+ if (U_FAILURE(status) || keys.isNull()) {
+ return;
+ }
+ for (;;) {
+ int32_t resultLength;
+ const char* buffer = keys->next(&resultLength, status);
+ if (U_FAILURE(status) || buffer == nullptr) {
+ return;
+ }
+ *iterator++ = StringClass(buffer, resultLength);
+ }
+}
+
+template<typename StringClass> inline StringClass
+Locale::getKeywordValue(StringPiece keywordName, UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ getKeywordValue(keywordName, sink, status);
+ return result;
+}
+
+template<typename StringClass> inline StringClass
+Locale::getUnicodeKeywordValue(StringPiece keywordName, UErrorCode& status) const
+{
+ StringClass result;
+ StringByteSink<StringClass> sink(&result);
+ getUnicodeKeywordValue(keywordName, sink, status);
+ return result;
+}
+
+inline UBool
+Locale::isBogus(void) const {
+ return fIsBogus;
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/messagepattern.h b/intl/icu/source/common/unicode/messagepattern.h
new file mode 100644
index 0000000000..55b09bfbd4
--- /dev/null
+++ b/intl/icu/source/common/unicode/messagepattern.h
@@ -0,0 +1,949 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2011-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: messagepattern.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2011mar14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __MESSAGEPATTERN_H__
+#define __MESSAGEPATTERN_H__
+
+/**
+ * \file
+ * \brief C++ API: MessagePattern class: Parses and represents ICU MessageFormat patterns.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_FORMATTING
+
+#include "unicode/parseerr.h"
+#include "unicode/unistr.h"
+
+/**
+ * Mode for when an apostrophe starts quoted literal text for MessageFormat output.
+ * The default is DOUBLE_OPTIONAL unless overridden via uconfig.h
+ * (UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE).
+ * <p>
+ * A pair of adjacent apostrophes always results in a single apostrophe in the output,
+ * even when the pair is between two single, text-quoting apostrophes.
+ * <p>
+ * The following table shows examples of desired MessageFormat.format() output
+ * with the pattern strings that yield that output.
+ * <p>
+ * <table>
+ * <tr>
+ * <th>Desired output</th>
+ * <th>DOUBLE_OPTIONAL</th>
+ * <th>DOUBLE_REQUIRED</th>
+ * </tr>
+ * <tr>
+ * <td>I see {many}</td>
+ * <td>I see '{many}'</td>
+ * <td>(same)</td>
+ * </tr>
+ * <tr>
+ * <td>I said {'Wow!'}</td>
+ * <td>I said '{''Wow!''}'</td>
+ * <td>(same)</td>
+ * </tr>
+ * <tr>
+ * <td>I don't know</td>
+ * <td>I don't know OR<br> I don''t know</td>
+ * <td>I don''t know</td>
+ * </tr>
+ * </table>
+ * @stable ICU 4.8
+ * @see UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ */
+enum UMessagePatternApostropheMode {
+ /**
+ * A literal apostrophe is represented by
+ * either a single or a double apostrophe pattern character.
+ * Within a MessageFormat pattern, a single apostrophe only starts quoted literal text
+ * if it immediately precedes a curly brace {},
+ * or a pipe symbol | if inside a choice format,
+ * or a pound symbol # if inside a plural format.
+ * <p>
+ * This is the default behavior starting with ICU 4.8.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_APOS_DOUBLE_OPTIONAL,
+ /**
+ * A literal apostrophe must be represented by
+ * a double apostrophe pattern character.
+ * A single apostrophe always starts quoted literal text.
+ * <p>
+ * This is the behavior of ICU 4.6 and earlier, and of the JDK.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_APOS_DOUBLE_REQUIRED
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternApostropheMode UMessagePatternApostropheMode;
+
+/**
+ * MessagePattern::Part type constants.
+ * @stable ICU 4.8
+ */
+enum UMessagePatternPartType {
+ /**
+ * Start of a message pattern (main or nested).
+ * The length is 0 for the top-level message
+ * and for a choice argument sub-message, otherwise 1 for the '{'.
+ * The value indicates the nesting level, starting with 0 for the main message.
+ * <p>
+ * There is always a later MSG_LIMIT part.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_MSG_START,
+ /**
+ * End of a message pattern (main or nested).
+ * The length is 0 for the top-level message and
+ * the last sub-message of a choice argument,
+ * otherwise 1 for the '}' or (in a choice argument style) the '|'.
+ * The value indicates the nesting level, starting with 0 for the main message.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_MSG_LIMIT,
+ /**
+ * Indicates a substring of the pattern string which is to be skipped when formatting.
+ * For example, an apostrophe that begins or ends quoted text
+ * would be indicated with such a part.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_SKIP_SYNTAX,
+ /**
+ * Indicates that a syntax character needs to be inserted for auto-quoting.
+ * The length is 0.
+ * The value is the character code of the insertion character. (U+0027=APOSTROPHE)
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_INSERT_CHAR,
+ /**
+ * Indicates a syntactic (non-escaped) # symbol in a plural variant.
+ * When formatting, replace this part's substring with the
+ * (value-offset) for the plural argument value.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_REPLACE_NUMBER,
+ /**
+ * Start of an argument.
+ * The length is 1 for the '{'.
+ * The value is the ordinal value of the ArgType. Use getArgType().
+ * <p>
+ * This part is followed by either an ARG_NUMBER or ARG_NAME,
+ * followed by optional argument sub-parts (see UMessagePatternArgType constants)
+ * and finally an ARG_LIMIT part.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_START,
+ /**
+ * End of an argument.
+ * The length is 1 for the '}'.
+ * The value is the ordinal value of the ArgType. Use getArgType().
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_LIMIT,
+ /**
+ * The argument number, provided by the value.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_NUMBER,
+ /**
+ * The argument name.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_NAME,
+ /**
+ * The argument type.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_TYPE,
+ /**
+ * The argument style text.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_STYLE,
+ /**
+ * A selector substring in a "complex" argument style.
+ * The value is undefined and currently always 0.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_SELECTOR,
+ /**
+ * An integer value, for example the offset or an explicit selector value
+ * in a PluralFormat style.
+ * The part value is the integer value.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_INT,
+ /**
+ * A numeric value, for example the offset or an explicit selector value
+ * in a PluralFormat style.
+ * The part value is an index into an internal array of numeric values;
+ * use getNumericValue().
+ * @stable ICU 4.8
+ */
+ UMSGPAT_PART_TYPE_ARG_DOUBLE
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternPartType UMessagePatternPartType;
+
+/**
+ * Argument type constants.
+ * Returned by Part.getArgType() for ARG_START and ARG_LIMIT parts.
+ *
+ * Messages nested inside an argument are each delimited by MSG_START and MSG_LIMIT,
+ * with a nesting level one greater than the surrounding message.
+ * @stable ICU 4.8
+ */
+enum UMessagePatternArgType {
+ /**
+ * The argument has no specified type.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_NONE,
+ /**
+ * The argument has a "simple" type which is provided by the ARG_TYPE part.
+ * An ARG_STYLE part might follow that.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_SIMPLE,
+ /**
+ * The argument is a ChoiceFormat with one or more
+ * ((ARG_INT | ARG_DOUBLE), ARG_SELECTOR, message) tuples.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_CHOICE,
+ /**
+ * The argument is a cardinal-number PluralFormat with an optional ARG_INT or ARG_DOUBLE offset
+ * (e.g., offset:1)
+ * and one or more (ARG_SELECTOR [explicit-value] message) tuples.
+ * If the selector has an explicit value (e.g., =2), then
+ * that value is provided by the ARG_INT or ARG_DOUBLE part preceding the message.
+ * Otherwise the message immediately follows the ARG_SELECTOR.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_PLURAL,
+ /**
+ * The argument is a SelectFormat with one or more (ARG_SELECTOR, message) pairs.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_TYPE_SELECT,
+ /**
+ * The argument is an ordinal-number PluralFormat
+ * with the same style parts sequence and semantics as UMSGPAT_ARG_TYPE_PLURAL.
+ * @stable ICU 50
+ */
+ UMSGPAT_ARG_TYPE_SELECTORDINAL
+};
+/**
+ * @stable ICU 4.8
+ */
+typedef enum UMessagePatternArgType UMessagePatternArgType;
+
+/**
+ * \def UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE
+ * Returns true if the argument type has a plural style part sequence and semantics,
+ * for example UMSGPAT_ARG_TYPE_PLURAL and UMSGPAT_ARG_TYPE_SELECTORDINAL.
+ * @stable ICU 50
+ */
+#define UMSGPAT_ARG_TYPE_HAS_PLURAL_STYLE(argType) \
+ ((argType)==UMSGPAT_ARG_TYPE_PLURAL || (argType)==UMSGPAT_ARG_TYPE_SELECTORDINAL)
+
+enum {
+ /**
+ * Return value from MessagePattern.validateArgumentName() for when
+ * the string is a valid "pattern identifier" but not a number.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_NAME_NOT_NUMBER=-1,
+
+ /**
+ * Return value from MessagePattern.validateArgumentName() for when
+ * the string is invalid.
+ * It might not be a valid "pattern identifier",
+ * or it have only ASCII digits but there is a leading zero or the number is too large.
+ * @stable ICU 4.8
+ */
+ UMSGPAT_ARG_NAME_NOT_VALID=-2
+};
+
+/**
+ * Special value that is returned by getNumericValue(Part) when no
+ * numeric value is defined for a part.
+ * @see MessagePattern.getNumericValue()
+ * @stable ICU 4.8
+ */
+#define UMSGPAT_NO_NUMERIC_VALUE ((double)(-123456789))
+
+U_NAMESPACE_BEGIN
+
+class MessagePatternDoubleList;
+class MessagePatternPartsList;
+
+/**
+ * Parses and represents ICU MessageFormat patterns.
+ * Also handles patterns for ChoiceFormat, PluralFormat and SelectFormat.
+ * Used in the implementations of those classes as well as in tools
+ * for message validation, translation and format conversion.
+ * <p>
+ * The parser handles all syntax relevant for identifying message arguments.
+ * This includes "complex" arguments whose style strings contain
+ * nested MessageFormat pattern substrings.
+ * For "simple" arguments (with no nested MessageFormat pattern substrings),
+ * the argument style is not parsed any further.
+ * <p>
+ * The parser handles named and numbered message arguments and allows both in one message.
+ * <p>
+ * Once a pattern has been parsed successfully, iterate through the parsed data
+ * with countParts(), getPart() and related methods.
+ * <p>
+ * The data logically represents a parse tree, but is stored and accessed
+ * as a list of "parts" for fast and simple parsing and to minimize object allocations.
+ * Arguments and nested messages are best handled via recursion.
+ * For every _START "part", MessagePattern.getLimitPartIndex() efficiently returns
+ * the index of the corresponding _LIMIT "part".
+ * <p>
+ * List of "parts":
+ * <pre>
+ * message = MSG_START (SKIP_SYNTAX | INSERT_CHAR | REPLACE_NUMBER | argument)* MSG_LIMIT
+ * argument = noneArg | simpleArg | complexArg
+ * complexArg = choiceArg | pluralArg | selectArg
+ *
+ * noneArg = ARG_START.NONE (ARG_NAME | ARG_NUMBER) ARG_LIMIT.NONE
+ * simpleArg = ARG_START.SIMPLE (ARG_NAME | ARG_NUMBER) ARG_TYPE [ARG_STYLE] ARG_LIMIT.SIMPLE
+ * choiceArg = ARG_START.CHOICE (ARG_NAME | ARG_NUMBER) choiceStyle ARG_LIMIT.CHOICE
+ * pluralArg = ARG_START.PLURAL (ARG_NAME | ARG_NUMBER) pluralStyle ARG_LIMIT.PLURAL
+ * selectArg = ARG_START.SELECT (ARG_NAME | ARG_NUMBER) selectStyle ARG_LIMIT.SELECT
+ *
+ * choiceStyle = ((ARG_INT | ARG_DOUBLE) ARG_SELECTOR message)+
+ * pluralStyle = [ARG_INT | ARG_DOUBLE] (ARG_SELECTOR [ARG_INT | ARG_DOUBLE] message)+
+ * selectStyle = (ARG_SELECTOR message)+
+ * </pre>
+ * <ul>
+ * <li>Literal output text is not represented directly by "parts" but accessed
+ * between parts of a message, from one part's getLimit() to the next part's getIndex().
+ * <li><code>ARG_START.CHOICE</code> stands for an ARG_START Part with ArgType CHOICE.
+ * <li>In the choiceStyle, the ARG_SELECTOR has the '<', the '#' or
+ * the less-than-or-equal-to sign (U+2264).
+ * <li>In the pluralStyle, the first, optional numeric Part has the "offset:" value.
+ * The optional numeric Part between each (ARG_SELECTOR, message) pair
+ * is the value of an explicit-number selector like "=2",
+ * otherwise the selector is a non-numeric identifier.
+ * <li>The REPLACE_NUMBER Part can occur only in an immediate sub-message of the pluralStyle.
+ * </ul>
+ * <p>
+ * This class is not intended for public subclassing.
+ *
+ * @stable ICU 4.8
+ */
+class U_COMMON_API MessagePattern : public UObject {
+public:
+ /**
+ * Constructs an empty MessagePattern with default UMessagePatternApostropheMode.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ MessagePattern(UErrorCode &errorCode);
+
+ /**
+ * Constructs an empty MessagePattern.
+ * @param mode Explicit UMessagePatternApostropheMode.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ MessagePattern(UMessagePatternApostropheMode mode, UErrorCode &errorCode);
+
+ /**
+ * Constructs a MessagePattern with default UMessagePatternApostropheMode and
+ * parses the MessageFormat pattern string.
+ * @param pattern a MessageFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * TODO: turn @throws into UErrorCode specifics?
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Copy constructor.
+ * @param other Object to copy.
+ * @stable ICU 4.8
+ */
+ MessagePattern(const MessagePattern &other);
+
+ /**
+ * Assignment operator.
+ * @param other Object to copy.
+ * @return *this=other
+ * @stable ICU 4.8
+ */
+ MessagePattern &operator=(const MessagePattern &other);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~MessagePattern();
+
+ /**
+ * Parses a MessageFormat pattern string.
+ * @param pattern a MessageFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parse(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a ChoiceFormat pattern string.
+ * @param pattern a ChoiceFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parseChoiceStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a PluralFormat pattern string.
+ * @param pattern a PluralFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parsePluralStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Parses a SelectFormat pattern string.
+ * @param pattern a SelectFormat pattern string
+ * @param parseError Struct to receive information on the position
+ * of an error within the pattern.
+ * Can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @throws IllegalArgumentException for syntax errors in the pattern string
+ * @throws IndexOutOfBoundsException if certain limits are exceeded
+ * (e.g., argument number too high, argument name too long, etc.)
+ * @throws NumberFormatException if a number could not be parsed
+ * @stable ICU 4.8
+ */
+ MessagePattern &parseSelectStyle(const UnicodeString &pattern,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Clears this MessagePattern.
+ * countParts() will return 0.
+ * @stable ICU 4.8
+ */
+ void clear();
+
+ /**
+ * Clears this MessagePattern and sets the UMessagePatternApostropheMode.
+ * countParts() will return 0.
+ * @param mode The new UMessagePatternApostropheMode.
+ * @stable ICU 4.8
+ */
+ void clearPatternAndSetApostropheMode(UMessagePatternApostropheMode mode) {
+ clear();
+ aposMode=mode;
+ }
+
+ /**
+ * @param other another object to compare with.
+ * @return true if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ bool operator==(const MessagePattern &other) const;
+
+ /**
+ * @param other another object to compare with.
+ * @return false if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ inline bool operator!=(const MessagePattern &other) const {
+ return !operator==(other);
+ }
+
+ /**
+ * @return A hash code for this object.
+ * @stable ICU 4.8
+ */
+ int32_t hashCode() const;
+
+ /**
+ * @return this instance's UMessagePatternApostropheMode.
+ * @stable ICU 4.8
+ */
+ UMessagePatternApostropheMode getApostropheMode() const {
+ return aposMode;
+ }
+
+ // Java has package-private jdkAposMode() here.
+ // In C++, this is declared in the MessageImpl class.
+
+ /**
+ * @return the parsed pattern string (null if none was parsed).
+ * @stable ICU 4.8
+ */
+ const UnicodeString &getPatternString() const {
+ return msg;
+ }
+
+ /**
+ * Does the parsed pattern have named arguments like {first_name}?
+ * @return true if the parsed pattern has at least one named argument.
+ * @stable ICU 4.8
+ */
+ UBool hasNamedArguments() const {
+ return hasArgNames;
+ }
+
+ /**
+ * Does the parsed pattern have numbered arguments like {2}?
+ * @return true if the parsed pattern has at least one numbered argument.
+ * @stable ICU 4.8
+ */
+ UBool hasNumberedArguments() const {
+ return hasArgNumbers;
+ }
+
+ /**
+ * Validates and parses an argument name or argument number string.
+ * An argument name must be a "pattern identifier", that is, it must contain
+ * no Unicode Pattern_Syntax or Pattern_White_Space characters.
+ * If it only contains ASCII digits, then it must be a small integer with no leading zero.
+ * @param name Input string.
+ * @return &gt;=0 if the name is a valid number,
+ * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
+ * ARG_NAME_NOT_VALID (-2) if it is neither.
+ * @stable ICU 4.8
+ */
+ static int32_t validateArgumentName(const UnicodeString &name);
+
+ /**
+ * Returns a version of the parsed pattern string where each ASCII apostrophe
+ * is doubled (escaped) if it is not already, and if it is not interpreted as quoting syntax.
+ * <p>
+ * For example, this turns "I don't '{know}' {gender,select,female{h''er}other{h'im}}."
+ * into "I don''t '{know}' {gender,select,female{h''er}other{h''im}}."
+ * @return the deep-auto-quoted version of the parsed pattern string.
+ * @see MessageFormat.autoQuoteApostrophe()
+ * @stable ICU 4.8
+ */
+ UnicodeString autoQuoteApostropheDeep() const;
+
+ class Part;
+
+ /**
+ * Returns the number of "parts" created by parsing the pattern string.
+ * Returns 0 if no pattern has been parsed or clear() was called.
+ * @return the number of pattern parts.
+ * @stable ICU 4.8
+ */
+ int32_t countParts() const {
+ return partsLength;
+ }
+
+ /**
+ * Gets the i-th pattern "part".
+ * @param i The index of the Part data. (0..countParts()-1)
+ * @return the i-th pattern "part".
+ * @stable ICU 4.8
+ */
+ const Part &getPart(int32_t i) const {
+ return parts[i];
+ }
+
+ /**
+ * Returns the UMessagePatternPartType of the i-th pattern "part".
+ * Convenience method for getPart(i).getType().
+ * @param i The index of the Part data. (0..countParts()-1)
+ * @return The UMessagePatternPartType of the i-th Part.
+ * @stable ICU 4.8
+ */
+ UMessagePatternPartType getPartType(int32_t i) const {
+ return getPart(i).type;
+ }
+
+ /**
+ * Returns the pattern index of the specified pattern "part".
+ * Convenience method for getPart(partIndex).getIndex().
+ * @param partIndex The index of the Part data. (0..countParts()-1)
+ * @return The pattern index of this Part.
+ * @stable ICU 4.8
+ */
+ int32_t getPatternIndex(int32_t partIndex) const {
+ return getPart(partIndex).index;
+ }
+
+ /**
+ * Returns the substring of the pattern string indicated by the Part.
+ * Convenience method for getPatternString().substring(part.getIndex(), part.getLimit()).
+ * @param part a part of this MessagePattern.
+ * @return the substring associated with part.
+ * @stable ICU 4.8
+ */
+ UnicodeString getSubstring(const Part &part) const {
+ return msg.tempSubString(part.index, part.length);
+ }
+
+ /**
+ * Compares the part's substring with the input string s.
+ * @param part a part of this MessagePattern.
+ * @param s a string.
+ * @return true if getSubstring(part).equals(s).
+ * @stable ICU 4.8
+ */
+ UBool partSubstringMatches(const Part &part, const UnicodeString &s) const {
+ return 0==msg.compare(part.index, part.length, s);
+ }
+
+ /**
+ * Returns the numeric value associated with an ARG_INT or ARG_DOUBLE.
+ * @param part a part of this MessagePattern.
+ * @return the part's numeric value, or UMSGPAT_NO_NUMERIC_VALUE if this is not a numeric part.
+ * @stable ICU 4.8
+ */
+ double getNumericValue(const Part &part) const;
+
+ /**
+ * Returns the "offset:" value of a PluralFormat argument, or 0 if none is specified.
+ * @param pluralStart the index of the first PluralFormat argument style part. (0..countParts()-1)
+ * @return the "offset:" value.
+ * @stable ICU 4.8
+ */
+ double getPluralOffset(int32_t pluralStart) const;
+
+ /**
+ * Returns the index of the ARG|MSG_LIMIT part corresponding to the ARG|MSG_START at start.
+ * @param start The index of some Part data (0..countParts()-1);
+ * this Part should be of Type ARG_START or MSG_START.
+ * @return The first i>start where getPart(i).getType()==ARG|MSG_LIMIT at the same nesting level,
+ * or start itself if getPartType(msgStart)!=ARG|MSG_START.
+ * @stable ICU 4.8
+ */
+ int32_t getLimitPartIndex(int32_t start) const {
+ int32_t limit=getPart(start).limitPartIndex;
+ if(limit<start) {
+ return start;
+ }
+ return limit;
+ }
+
+ /**
+ * A message pattern "part", representing a pattern parsing event.
+ * There is a part for the start and end of a message or argument,
+ * for quoting and escaping of and with ASCII apostrophes,
+ * and for syntax elements of "complex" arguments.
+ * @stable ICU 4.8
+ */
+ class Part : public UMemory {
+ public:
+ /**
+ * Default constructor, do not use.
+ * @internal
+ */
+ Part() {}
+
+ /**
+ * Returns the type of this part.
+ * @return the part type.
+ * @stable ICU 4.8
+ */
+ UMessagePatternPartType getType() const {
+ return type;
+ }
+
+ /**
+ * Returns the pattern string index associated with this Part.
+ * @return this part's pattern string index.
+ * @stable ICU 4.8
+ */
+ int32_t getIndex() const {
+ return index;
+ }
+
+ /**
+ * Returns the length of the pattern substring associated with this Part.
+ * This is 0 for some parts.
+ * @return this part's pattern substring length.
+ * @stable ICU 4.8
+ */
+ int32_t getLength() const {
+ return length;
+ }
+
+ /**
+ * Returns the pattern string limit (exclusive-end) index associated with this Part.
+ * Convenience method for getIndex()+getLength().
+ * @return this part's pattern string limit index, same as getIndex()+getLength().
+ * @stable ICU 4.8
+ */
+ int32_t getLimit() const {
+ return index+length;
+ }
+
+ /**
+ * Returns a value associated with this part.
+ * See the documentation of each part type for details.
+ * @return the part value.
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const {
+ return value;
+ }
+
+ /**
+ * Returns the argument type if this part is of type ARG_START or ARG_LIMIT,
+ * otherwise UMSGPAT_ARG_TYPE_NONE.
+ * @return the argument type for this part.
+ * @stable ICU 4.8
+ */
+ UMessagePatternArgType getArgType() const {
+ UMessagePatternPartType msgType=getType();
+ if(msgType ==UMSGPAT_PART_TYPE_ARG_START || msgType ==UMSGPAT_PART_TYPE_ARG_LIMIT) {
+ return (UMessagePatternArgType)value;
+ } else {
+ return UMSGPAT_ARG_TYPE_NONE;
+ }
+ }
+
+ /**
+ * Indicates whether the Part type has a numeric value.
+ * If so, then that numeric value can be retrieved via MessagePattern.getNumericValue().
+ * @param type The Part type to be tested.
+ * @return true if the Part type has a numeric value.
+ * @stable ICU 4.8
+ */
+ static UBool hasNumericValue(UMessagePatternPartType type) {
+ return type==UMSGPAT_PART_TYPE_ARG_INT || type==UMSGPAT_PART_TYPE_ARG_DOUBLE;
+ }
+
+ /**
+ * @param other another object to compare with.
+ * @return true if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ bool operator==(const Part &other) const;
+
+ /**
+ * @param other another object to compare with.
+ * @return false if this object is equivalent to the other one.
+ * @stable ICU 4.8
+ */
+ inline bool operator!=(const Part &other) const {
+ return !operator==(other);
+ }
+
+ /**
+ * @return A hash code for this object.
+ * @stable ICU 4.8
+ */
+ int32_t hashCode() const {
+ return ((type*37+index)*37+length)*37+value;
+ }
+
+ private:
+ friend class MessagePattern;
+
+ static const int32_t MAX_LENGTH=0xffff;
+ static const int32_t MAX_VALUE=0x7fff;
+
+ // Some fields are not final because they are modified during pattern parsing.
+ // After pattern parsing, the parts are effectively immutable.
+ UMessagePatternPartType type;
+ int32_t index;
+ uint16_t length;
+ int16_t value;
+ int32_t limitPartIndex;
+ };
+
+private:
+ void preParse(const UnicodeString &pattern, UParseError *parseError, UErrorCode &errorCode);
+
+ void postParse();
+
+ int32_t parseMessage(int32_t index, int32_t msgStartLength,
+ int32_t nestingLevel, UMessagePatternArgType parentType,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseArg(int32_t index, int32_t argStartLength, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseSimpleStyle(int32_t index, UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parseChoiceStyle(int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ int32_t parsePluralOrSelectStyle(UMessagePatternArgType argType, int32_t index, int32_t nestingLevel,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ /**
+ * Validates and parses an argument name or argument number string.
+ * This internal method assumes that the input substring is a "pattern identifier".
+ * @return &gt;=0 if the name is a valid number,
+ * ARG_NAME_NOT_NUMBER (-1) if it is a "pattern identifier" but not all ASCII digits,
+ * ARG_NAME_NOT_VALID (-2) if it is neither.
+ * @see #validateArgumentName(String)
+ */
+ static int32_t parseArgNumber(const UnicodeString &s, int32_t start, int32_t limit);
+
+ int32_t parseArgNumber(int32_t start, int32_t limit) {
+ return parseArgNumber(msg, start, limit);
+ }
+
+ /**
+ * Parses a number from the specified message substring.
+ * @param start start index into the message string
+ * @param limit limit index into the message string, must be start<limit
+ * @param allowInfinity true if U+221E is allowed (for ChoiceFormat)
+ * @param parseError
+ * @param errorCode
+ */
+ void parseDouble(int32_t start, int32_t limit, UBool allowInfinity,
+ UParseError *parseError, UErrorCode &errorCode);
+
+ // Java has package-private appendReducedApostrophes() here.
+ // In C++, this is declared in the MessageImpl class.
+
+ int32_t skipWhiteSpace(int32_t index);
+
+ int32_t skipIdentifier(int32_t index);
+
+ /**
+ * Skips a sequence of characters that could occur in a double value.
+ * Does not fully parse or validate the value.
+ */
+ int32_t skipDouble(int32_t index);
+
+ static UBool isArgTypeChar(UChar32 c);
+
+ UBool isChoice(int32_t index);
+
+ UBool isPlural(int32_t index);
+
+ UBool isSelect(int32_t index);
+
+ UBool isOrdinal(int32_t index);
+
+ /**
+ * @return true if we are inside a MessageFormat (sub-)pattern,
+ * as opposed to inside a top-level choice/plural/select pattern.
+ */
+ UBool inMessageFormatPattern(int32_t nestingLevel);
+
+ /**
+ * @return true if we are in a MessageFormat sub-pattern
+ * of a top-level ChoiceFormat pattern.
+ */
+ UBool inTopLevelChoiceMessage(int32_t nestingLevel, UMessagePatternArgType parentType);
+
+ void addPart(UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode);
+
+ void addLimitPart(int32_t start,
+ UMessagePatternPartType type, int32_t index, int32_t length,
+ int32_t value, UErrorCode &errorCode);
+
+ void addArgDoublePart(double numericValue, int32_t start, int32_t length, UErrorCode &errorCode);
+
+ void setParseError(UParseError *parseError, int32_t index);
+
+ UBool init(UErrorCode &errorCode);
+ UBool copyStorage(const MessagePattern &other, UErrorCode &errorCode);
+
+ UMessagePatternApostropheMode aposMode;
+ UnicodeString msg;
+ // ArrayList<Part> parts=new ArrayList<Part>();
+ MessagePatternPartsList *partsList;
+ Part *parts;
+ int32_t partsLength;
+ // ArrayList<Double> numericValues;
+ MessagePatternDoubleList *numericValuesList;
+ double *numericValues;
+ int32_t numericValuesLength;
+ UBool hasArgNames;
+ UBool hasArgNumbers;
+ UBool needsAutoQuoting;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_FORMATTING
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __MESSAGEPATTERN_H__
diff --git a/intl/icu/source/common/unicode/normalizer2.h b/intl/icu/source/common/unicode/normalizer2.h
new file mode 100644
index 0000000000..972894ec42
--- /dev/null
+++ b/intl/icu/source/common/unicode/normalizer2.h
@@ -0,0 +1,771 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: normalizer2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009nov22
+* created by: Markus W. Scherer
+*/
+
+#ifndef __NORMALIZER2_H__
+#define __NORMALIZER2_H__
+
+/**
+ * \file
+ * \brief C++ API: New API for Unicode Normalization.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/stringpiece.h"
+#include "unicode/uniset.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm2.h"
+
+U_NAMESPACE_BEGIN
+
+class ByteSink;
+
+/**
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of this class are unmodifiable/immutable.
+ * Instances returned by getInstance() are singletons that must not be deleted by the caller.
+ * The Normalizer2 class is not intended for public subclassing.
+ *
+ * The primary functions are to produce a normalized string and to detect whether
+ * a string is already normalized.
+ * The most commonly used normalization forms are those defined in
+ * http://www.unicode.org/unicode/reports/tr15/
+ * However, this API supports additional normalization forms for specialized purposes.
+ * For example, NFKC_Casefold is provided via getInstance("nfkc_cf", COMPOSE)
+ * and can be used in implementations of UTS #46.
+ *
+ * Not only are the standard compose and decompose modes supplied,
+ * but additional modes are provided as documented in the Mode enum.
+ *
+ * Some of the functions in this class identify normalization boundaries.
+ * At a normalization boundary, the portions of the string
+ * before it and starting from it do not interact and can be handled independently.
+ *
+ * The spanQuickCheckYes() stops at a normalization boundary.
+ * When the goal is a normalized string, then the text before the boundary
+ * can be copied, and the remainder can be processed with normalizeSecondAndAppend().
+ *
+ * The hasBoundaryBefore(), hasBoundaryAfter() and isInert() functions test whether
+ * a character is guaranteed to be at a normalization boundary,
+ * regardless of context.
+ * This is used for moving from one normalization boundary to the next
+ * or preceding boundary, and for performing iterative normalization.
+ *
+ * Iterative normalization is useful when only a small portion of a
+ * longer string needs to be processed.
+ * For example, in ICU, iterative normalization is used by the NormalizationTransliterator
+ * (to avoid replacing already-normalized text) and ucol_nextSortKeyPart()
+ * (to process only the substring for which sort key bytes are computed).
+ *
+ * The set of normalization boundaries returned by these functions may not be
+ * complete: There may be more boundaries that could be returned.
+ * Different functions may return different boundaries.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API Normalizer2 : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ ~Normalizer2();
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFC normalization.
+ * Same as getInstance(nullptr, "nfc", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFCInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFD normalization.
+ * Same as getInstance(nullptr, "nfc", UNORM2_DECOMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFDInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKC normalization.
+ * Same as getInstance(nullptr, "nfkc", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKCInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKD normalization.
+ * Same as getInstance(nullptr, "nfkc", UNORM2_DECOMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKDInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as getInstance(nullptr, "nfkc_cf", UNORM2_COMPOSE, errorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+ static const Normalizer2 *
+ getNFKCCasefoldInstance(UErrorCode &errorCode);
+
+ /**
+ * Returns a Normalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=nullptr for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName nullptr for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 4.4
+ */
+ static const Normalizer2 *
+ getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode &errorCode);
+
+ /**
+ * Returns the normalized form of the source string.
+ * @param src source string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return normalized src
+ * @stable ICU 4.4
+ */
+ UnicodeString
+ normalize(const UnicodeString &src, UErrorCode &errorCode) const {
+ UnicodeString result;
+ normalize(src, result, errorCode);
+ return result;
+ }
+ /**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the destination string.
+ * The source and destination strings must be different objects.
+ * @param src source string
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Implemented completely for all built-in modes except for FCD.
+ * The base class implementation converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const;
+
+ /**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, will be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const = 0;
+ /**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, should be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const = 0;
+
+ /**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE Normalizer2 instance, but much faster, and except that this function
+ * returns false and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 4.6
+ */
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const = 0;
+
+ /**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the getDecomposition() method but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 char16_ts).
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * The default implementation returns false.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 49
+ */
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const;
+
+ /**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * The default implementation returns a negative value.
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const;
+
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const;
+
+ /**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes.
+ * It is optimized for UTF-8 for all built-in modes except for FCD.
+ * The base class implementation converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const;
+
+
+ /**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const = 0;
+
+ /**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * If true, then the character does not normalization-interact with
+ * preceding characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions before this character and starting from this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryBefore(UChar32 c) const = 0;
+
+ /**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * If true, then the character does not normalization-interact with
+ * following characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions up to this character and after this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * Note that this operation may be significantly slower than hasBoundaryBefore().
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryAfter(UChar32 c) const = 0;
+
+ /**
+ * Tests if the character is normalization-inert.
+ * If true, then the character does not change, nor normalization-interact with
+ * preceding or following characters.
+ * In other words, a string containing this character can be normalized
+ * by processing portions before this character and after this
+ * character independently.
+ * This is used for iterative normalization. See the class documentation for details.
+ * Note that this operation may be significantly slower than hasBoundaryBefore().
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+ virtual UBool isInert(UChar32 c) const = 0;
+};
+
+/**
+ * Normalization filtered by a UnicodeSet.
+ * Normalizes portions of the text contained in the filter set and leaves
+ * portions not contained in the filter set unchanged.
+ * Filtering is done via UnicodeSet::span(..., USET_SPAN_SIMPLE).
+ * Not-in-the-filter text is treated as "is normalized" and "quick check yes".
+ * This class implements all of (and only) the Normalizer2 API.
+ * An instance of this class is unmodifiable/immutable but is constructed and
+ * must be destructed by the owner.
+ * @stable ICU 4.4
+ */
+class U_COMMON_API FilteredNormalizer2 : public Normalizer2 {
+public:
+ /**
+ * Constructs a filtered normalizer wrapping any Normalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param n2 wrapped Normalizer2 instance
+ * @param filterSet UnicodeSet which determines the characters to be normalized
+ * @stable ICU 4.4
+ */
+ FilteredNormalizer2(const Normalizer2 &n2, const UnicodeSet &filterSet) :
+ norm2(n2), set(filterSet) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.4
+ */
+ ~FilteredNormalizer2();
+
+ /**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the destination string.
+ * The source and destination strings must be different objects.
+ * @param src source string
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const override;
+
+ /**
+ * Normalizes a UTF-8 string and optionally records how source substrings
+ * relate to changed and unchanged result substrings.
+ *
+ * Implemented completely for most built-in modes except for FCD.
+ * The base class implementation converts to & from UTF-16 and does not support edits.
+ *
+ * @param options Options bit set, usually 0. See U_OMIT_UNCHANGED_TEXT and U_EDITS_NO_RESET.
+ * @param src Source UTF-8 string.
+ * @param sink A ByteSink to which the normalized UTF-8 result string is written.
+ * sink.Flush() is called at the end.
+ * @param edits Records edits for index mapping, working with styled text,
+ * and getting only changes (if any).
+ * The Edits contents is undefined if any error occurs.
+ * This function calls edits->reset() first unless
+ * options includes U_EDITS_NO_RESET. edits can be nullptr.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 60
+ */
+ virtual void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const override;
+
+ /**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, will be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const override;
+ /**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different objects.
+ * @param first string, should be normalized
+ * @param second string, should be normalized
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const override;
+
+ /**
+ * Gets the decomposition mapping of c.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 4.6
+ */
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const override;
+
+ /**
+ * Gets the raw decomposition mapping of c.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param c code point
+ * @param decomposition String object which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @return true if c has a decomposition, otherwise false
+ * @stable ICU 49
+ */
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override;
+
+ /**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ * For details see the base class documentation.
+ *
+ * This function is independent of the mode of the Normalizer2.
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const override;
+
+ /**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const override;
+
+ /**
+ * Tests if the string is normalized.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override;
+ /**
+ * Tests if the UTF-8 string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ *
+ * This works for all normalization modes.
+ * It is optimized for UTF-8 for all built-in modes except for FCD.
+ * The base class implementation converts to UTF-16 and calls isNormalized().
+ *
+ * @param s UTF-8 input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 60
+ */
+ virtual UBool
+ isNormalizedUTF8(StringPiece s, UErrorCode &errorCode) const override;
+ /**
+ * Tests if the string is normalized.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override;
+ /**
+ * Returns the end of the normalized substring of the input string.
+ * For details see the Normalizer2 base class documentation.
+ * @param s input string
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override;
+
+ /**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryBefore(UChar32 c) const override;
+
+ /**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+ virtual UBool hasBoundaryAfter(UChar32 c) const override;
+
+ /**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+ virtual UBool isInert(UChar32 c) const override;
+private:
+ UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const;
+
+ void
+ normalizeUTF8(uint32_t options, const char *src, int32_t length,
+ ByteSink &sink, Edits *edits,
+ USetSpanCondition spanCondition,
+ UErrorCode &errorCode) const;
+
+ UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UBool doNormalize,
+ UErrorCode &errorCode) const;
+
+ const Normalizer2 &norm2;
+ const UnicodeSet &set;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_NORMALIZATION
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __NORMALIZER2_H__
diff --git a/intl/icu/source/common/unicode/normlzr.h b/intl/icu/source/common/unicode/normlzr.h
new file mode 100644
index 0000000000..03a7aa080d
--- /dev/null
+++ b/intl/icu/source/common/unicode/normlzr.h
@@ -0,0 +1,816 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ ********************************************************************
+ * COPYRIGHT:
+ * Copyright (c) 1996-2015, International Business Machines Corporation and
+ * others. All Rights Reserved.
+ ********************************************************************
+ */
+
+#ifndef NORMLZR_H
+#define NORMLZR_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Unicode Normalization
+ */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/chariter.h"
+#include "unicode/normalizer2.h"
+#include "unicode/unistr.h"
+#include "unicode/unorm.h"
+#include "unicode/uobject.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * Old Unicode normalization API.
+ *
+ * This API has been replaced by the Normalizer2 class and is only available
+ * for backward compatibility. This class simply delegates to the Normalizer2 class.
+ * There is one exception: The new API does not provide a replacement for Normalizer::compare().
+ *
+ * The Normalizer class supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * The Normalizer class consists of two parts:
+ * - static functions that normalize strings or test if strings are normalized
+ * - a Normalizer object is an iterator that takes any kind of text and
+ * provides iteration over its normalized form
+ *
+ * The Normalizer class is not suitable for subclassing.
+ *
+ * For basic information about normalization forms and details about the C API
+ * please see the documentation in unorm.h.
+ *
+ * The iterator API with the Normalizer constructors and the non-static functions
+ * use a CharacterIterator as input. It is possible to pass a string which
+ * is then internally wrapped in a CharacterIterator.
+ * The input text is not normalized all at once, but incrementally where needed
+ * (providing efficient random access).
+ * This allows to pass in a large text but spend only a small amount of time
+ * normalizing a small part of that text.
+ * However, if the entire text is normalized, then the iterator will be
+ * slower than normalizing the entire text at once and iterating over the result.
+ * A possible use of the Normalizer iterator is also to report an index into the
+ * original text that is close to where the normalized characters come from.
+ *
+ * <em>Important:</em> The iterator API was cleaned up significantly for ICU 2.0.
+ * The earlier implementation reported the getIndex() inconsistently,
+ * and previous() could not be used after setIndex(), next(), first(), and current().
+ *
+ * Normalizer allows to start normalizing from anywhere in the input text by
+ * calling setIndexOnly(), first(), or last().
+ * Without calling any of these, the iterator will start at the beginning of the text.
+ *
+ * At any time, next() returns the next normalized code point (UChar32),
+ * with post-increment semantics (like CharacterIterator::next32PostInc()).
+ * previous() returns the previous normalized code point (UChar32),
+ * with pre-decrement semantics (like CharacterIterator::previous32()).
+ *
+ * current() returns the current code point
+ * (respectively the one at the newly set index) without moving
+ * the getIndex(). Note that if the text at the current position
+ * needs to be normalized, then these functions will do that.
+ * (This is why current() is not const.)
+ * It is more efficient to call setIndexOnly() instead, which does not
+ * normalize.
+ *
+ * getIndex() always refers to the position in the input text where the normalized
+ * code points are returned from. It does not always change with each returned
+ * code point.
+ * The code point that is returned from any of the functions
+ * corresponds to text at or after getIndex(), according to the
+ * function's iteration semantics (post-increment or pre-decrement).
+ *
+ * next() returns a code point from at or after the getIndex()
+ * from before the next() call. After the next() call, the getIndex()
+ * might have moved to where the next code point will be returned from
+ * (from a next() or current() call).
+ * This is semantically equivalent to array access with array[index++]
+ * (post-increment semantics).
+ *
+ * previous() returns a code point from at or after the getIndex()
+ * from after the previous() call.
+ * This is semantically equivalent to array access with array[--index]
+ * (pre-decrement semantics).
+ *
+ * Internally, the Normalizer iterator normalizes a small piece of text
+ * starting at the getIndex() and ending at a following "safe" index.
+ * The normalized results is stored in an internal string buffer, and
+ * the code points are iterated from there.
+ * With multiple iteration calls, this is repeated until the next piece
+ * of text needs to be normalized, and the getIndex() needs to be moved.
+ *
+ * The following "safe" index, the internal buffer, and the secondary
+ * iteration index into that buffer are not exposed on the API.
+ * This also means that it is currently not practical to return to
+ * a particular, arbitrary position in the text because one would need to
+ * know, and be able to set, in addition to the getIndex(), at least also the
+ * current index into the internal buffer.
+ * It is currently only possible to observe when getIndex() changes
+ * (with careful consideration of the iteration semantics),
+ * at which time the internal index will be 0.
+ * For example, if getIndex() is different after next() than before it,
+ * then the internal index is 0 and one can return to this getIndex()
+ * later with setIndexOnly().
+ *
+ * Note: While the setIndex() and getIndex() refer to indices in the
+ * underlying Unicode input text, the next() and previous() methods
+ * iterate through characters in the normalized output.
+ * This means that there is not necessarily a one-to-one correspondence
+ * between characters returned by next() and previous() and the indices
+ * passed to and returned from setIndex() and getIndex().
+ * It is for this reason that Normalizer does not implement the CharacterIterator interface.
+ *
+ * @author Laura Werner, Mark Davis, Markus Scherer
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Normalizer : public UObject {
+public:
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * If DONE is returned from an iteration function that returns a code point,
+ * then there are no more normalization results available.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ enum {
+ DONE=0xffff
+ };
+
+ // Constructors
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of a given string.
+ * <p>
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const UnicodeString& str, UNormalizationMode mode);
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of a given string.
+ * <p>
+ * @param str The string to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param length Length of the string, or -1 if NUL-terminated.
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
+
+ /**
+ * Creates a new <code>Normalizer</code> object for iterating over the
+ * normalized form of the given text.
+ * <p>
+ * @param iter The input text to be normalized. The normalization
+ * will start at the beginning of the string.
+ *
+ * @param mode The normalization mode.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const CharacterIterator& iter, UNormalizationMode mode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Copy constructor.
+ * @param copy The object to be copied.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer(const Normalizer& copy);
+
+ /**
+ * Destructor
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ virtual ~Normalizer();
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ //-------------------------------------------------------------------------
+ // Static utility methods
+ //-------------------------------------------------------------------------
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Normalizes a <code>UnicodeString</code> according to the specified normalization mode.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the input string to be normalized.
+ * @param mode the normalization mode
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The normalized string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 normalize(const UnicodeString& source,
+ UNormalizationMode mode, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Compose a <code>UnicodeString</code>.
+ * This is equivalent to normalize() with mode UNORM_NFC or UNORM_NFKC.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the string to be composed.
+ * @param compat Perform compatibility decomposition before composition.
+ * If this argument is <code>false</code>, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The composed string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 compose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Static method to decompose a <code>UnicodeString</code>.
+ * This is equivalent to normalize() with mode UNORM_NFD or UNORM_NFKD.
+ * This is a wrapper for unorm_normalize(), using UnicodeString's.
+ *
+ * The <code>options</code> parameter specifies which optional
+ * <code>Normalizer</code> features are to be enabled for this operation.
+ *
+ * @param source the string to be decomposed.
+ * @param compat Perform compatibility decomposition.
+ * If this argument is <code>false</code>, only canonical
+ * decomposition will be performed.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param result The decomposed string (on output).
+ * @param status The error code.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static void U_EXPORT2 decompose(const UnicodeString& source,
+ UBool compat, int32_t options,
+ UnicodeString& result,
+ UErrorCode &status);
+
+ /**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * This is a wrapper for unorm_quickCheck(), using a UnicodeString.
+ *
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static inline UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
+
+ /**
+ * Performing quick check on a string; same as the other version of quickCheck
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param mode normalization format
+ * @param options the optional features to be enabled (0 for no options)
+ * @param status A reference to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see isNormalized
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UNormalizationCheckResult
+ quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
+
+ /**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a true/false result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static inline UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
+
+ /**
+ * Test if a string is in a given normalization form; same as the other version of isNormalized
+ * but takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param mode Which normalization form to test for.
+ * @param options the optional features to be enabled (0 for no options)
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see quickCheck
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UBool
+ isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
+
+ /**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * For details see unorm_concatenate in unorm.h.
+ *
+ * @param left Left source string.
+ * @param right Right source string.
+ * @param result The output string.
+ * @param mode The normalization mode.
+ * @param options A bit set of normalization options.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return result
+ *
+ * @see unorm_concatenate
+ * @see normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UnicodeString &
+ U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
+ UnicodeString &result,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode &errorCode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Compare two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(s1)), NFD(foldCase(s2)))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param s2 Second source string.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_compare
+ * @see normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+ static inline int32_t
+ compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode);
+
+#ifndef U_HIDE_DEPRECATED_API
+ //-------------------------------------------------------------------------
+ // Iteration API
+ //-------------------------------------------------------------------------
+
+ /**
+ * Return the current character in the normalized text.
+ * current() may need to normalize some text at getIndex().
+ * The getIndex() is not changed.
+ *
+ * @return the current normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 current(void);
+
+ /**
+ * Return the first character in the normalized text.
+ * This is equivalent to setIndexOnly(startIndex()) followed by next().
+ * (Post-increment semantics.)
+ *
+ * @return the first normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 first(void);
+
+ /**
+ * Return the last character in the normalized text.
+ * This is equivalent to setIndexOnly(endIndex()) followed by previous().
+ * (Pre-decrement semantics.)
+ *
+ * @return the last normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 last(void);
+
+ /**
+ * Return the next character in the normalized text.
+ * (Post-increment semantics.)
+ * If the end of the text has already been reached, DONE is returned.
+ * The DONE value could be confused with a U+FFFF non-character code point
+ * in the text. If this is possible, you can test getIndex()<endIndex()
+ * before calling next(), or (getIndex()<endIndex() || last()!=DONE)
+ * after calling next(). (Calling last() will change the iterator state!)
+ *
+ * The C API unorm_next() is more efficient and does not have this ambiguity.
+ *
+ * @return the next normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 next(void);
+
+ /**
+ * Return the previous character in the normalized text and decrement.
+ * (Pre-decrement semantics.)
+ * If the beginning of the text has already been reached, DONE is returned.
+ * The DONE value could be confused with a U+FFFF non-character code point
+ * in the text. If this is possible, you can test
+ * (getIndex()>startIndex() || first()!=DONE). (Calling first() will change
+ * the iterator state!)
+ *
+ * The C API unorm_previous() is more efficient and does not have this ambiguity.
+ *
+ * @return the previous normalized code point
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UChar32 previous(void);
+
+ /**
+ * Set the iteration position in the input text that is being normalized,
+ * without any immediate normalization.
+ * After setIndexOnly(), getIndex() will return the same index that is
+ * specified here.
+ *
+ * @param index the desired index in the input text.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setIndexOnly(int32_t index);
+
+ /**
+ * Reset the index to the beginning of the text.
+ * This is equivalent to setIndexOnly(startIndex)).
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void reset(void);
+
+ /**
+ * Retrieve the current iteration position in the input text that is
+ * being normalized.
+ *
+ * A following call to next() will return a normalized code point from
+ * the input text at or after this index.
+ *
+ * After a call to previous(), getIndex() will point at or before the
+ * position in the input text where the normalized code point
+ * was returned from with previous().
+ *
+ * @return the current index in the input text
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t getIndex(void) const;
+
+ /**
+ * Retrieve the index of the start of the input text. This is the begin index
+ * of the <code>CharacterIterator</code> or the start (i.e. index 0) of the string
+ * over which this <code>Normalizer</code> is iterating.
+ *
+ * @return the smallest index in the input text where the Normalizer operates
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t startIndex(void) const;
+
+ /**
+ * Retrieve the index of the end of the input text. This is the end index
+ * of the <code>CharacterIterator</code> or the length of the string
+ * over which this <code>Normalizer</code> is iterating.
+ * This end index is exclusive, i.e., the Normalizer operates only on characters
+ * before this index.
+ *
+ * @return the first index in the input text where the Normalizer does not operate
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t endIndex(void) const;
+
+ /**
+ * Returns true when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ bool operator==(const Normalizer& that) const;
+
+ /**
+ * Returns false when both iterators refer to the same character in the same
+ * input text.
+ *
+ * @param that a Normalizer object to compare this one to
+ * @return comparison result
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ inline bool operator!=(const Normalizer& that) const;
+
+ /**
+ * Returns a pointer to a new Normalizer that is a clone of this one.
+ * The caller is responsible for deleting the new clone.
+ * @return a pointer to a new Normalizer
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ Normalizer* clone() const;
+
+ /**
+ * Generates a hash code for this iterator.
+ *
+ * @return the hash code
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ int32_t hashCode(void) const;
+
+ //-------------------------------------------------------------------------
+ // Property access methods
+ //-------------------------------------------------------------------------
+
+ /**
+ * Set the normalization mode for this object.
+ * <p>
+ * <b>Note:</b>If the normalization mode is changed while iterating
+ * over a string, calls to {@link #next() } and {@link #previous() } may
+ * return previously buffers characters in the old normalization mode
+ * until the iteration is able to re-sync at the next base character.
+ * It is safest to call {@link #setIndexOnly }, {@link #reset() },
+ * {@link #setText }, {@link #first() },
+ * {@link #last() }, etc. after calling <code>setMode</code>.
+ * <p>
+ * @param newMode the new mode for this <code>Normalizer</code>.
+ * @see #getUMode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setMode(UNormalizationMode newMode);
+
+ /**
+ * Return the normalization mode for this object.
+ *
+ * This is an unusual name because there used to be a getMode() that
+ * returned a different type.
+ *
+ * @return the mode for this <code>Normalizer</code>
+ * @see #setMode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UNormalizationMode getUMode(void) const;
+
+ /**
+ * Set options that affect this <code>Normalizer</code>'s operation.
+ * Options do not change the basic composition or decomposition operation
+ * that is being performed, but they control whether
+ * certain optional portions of the operation are done.
+ * Currently the only available option is obsolete.
+ *
+ * It is possible to specify multiple options that are all turned on or off.
+ *
+ * @param option the option(s) whose value is/are to be set.
+ * @param value the new setting for the option. Use <code>true</code> to
+ * turn the option(s) on and <code>false</code> to turn it/them off.
+ *
+ * @see #getOption
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setOption(int32_t option,
+ UBool value);
+
+ /**
+ * Determine whether an option is turned on or off.
+ * If multiple options are specified, then the result is true if any
+ * of them are set.
+ * <p>
+ * @param option the option(s) that are to be checked
+ * @return true if any of the option(s) are set
+ * @see #setOption
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ UBool getOption(int32_t option) const;
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(const UnicodeString& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a CharacterIterator object that replaces the current input text
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(const CharacterIterator& newText,
+ UErrorCode &status);
+
+ /**
+ * Set the input text over which this <code>Normalizer</code> will iterate.
+ * The iteration position is set to the beginning.
+ *
+ * @param newText a string that replaces the current input text
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @param status a UErrorCode
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void setText(ConstChar16Ptr newText,
+ int32_t length,
+ UErrorCode &status);
+ /**
+ * Copies the input text into the UnicodeString argument.
+ *
+ * @param result Receives a copy of the text under iteration.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ void getText(UnicodeString& result);
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ * @returns a UClassID for this class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+#endif /* U_HIDE_DEPRECATED_API */
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ * @return a UClassID for the actual class.
+ * @deprecated ICU 56 Use Normalizer2 instead.
+ */
+ virtual UClassID getDynamicClassID() const override;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+private:
+ //-------------------------------------------------------------------------
+ // Private functions
+ //-------------------------------------------------------------------------
+
+ Normalizer() = delete; // default constructor not implemented
+ Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented
+
+ // Private utility methods for iteration
+ // For documentation, see the source code
+ UBool nextNormalize();
+ UBool previousNormalize();
+
+ void init();
+ void clearBuffer(void);
+
+ //-------------------------------------------------------------------------
+ // Private data
+ //-------------------------------------------------------------------------
+
+ FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr
+ const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
+ UNormalizationMode fUMode; // deprecated
+ int32_t fOptions;
+
+ // The input text and our position in it
+ CharacterIterator *text;
+
+ // The normalization buffer is the result of normalization
+ // of the source in [currentIndex..nextIndex[ .
+ int32_t currentIndex, nextIndex;
+
+ // A buffer for holding intermediate results
+ UnicodeString buffer;
+ int32_t bufferPos;
+};
+
+//-------------------------------------------------------------------------
+// Inline implementations
+//-------------------------------------------------------------------------
+
+#ifndef U_HIDE_DEPRECATED_API
+inline bool
+Normalizer::operator!= (const Normalizer& other) const
+{ return ! operator==(other); }
+
+inline UNormalizationCheckResult
+Normalizer::quickCheck(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ return quickCheck(source, mode, 0, status);
+}
+
+inline UBool
+Normalizer::isNormalized(const UnicodeString& source,
+ UNormalizationMode mode,
+ UErrorCode &status) {
+ return isNormalized(source, mode, 0, status);
+}
+#endif /* U_HIDE_DEPRECATED_API */
+
+inline int32_t
+Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
+ uint32_t options,
+ UErrorCode &errorCode) {
+ // all argument checking is done in unorm_compare
+ return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
+ toUCharPtr(s2.getBuffer()), s2.length(),
+ options,
+ &errorCode);
+}
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+
+#endif // NORMLZR_H
+
+#endif /* U_SHOW_CPLUSPLUS_API */
diff --git a/intl/icu/source/common/unicode/parseerr.h b/intl/icu/source/common/unicode/parseerr.h
new file mode 100644
index 0000000000..c23cc273b8
--- /dev/null
+++ b/intl/icu/source/common/unicode/parseerr.h
@@ -0,0 +1,94 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 03/14/00 aliu Creation.
+* 06/27/00 aliu Change from C++ class to C struct
+**********************************************************************
+*/
+#ifndef PARSEERR_H
+#define PARSEERR_H
+
+#include "unicode/utypes.h"
+
+
+/**
+ * \file
+ * \brief C API: Parse Error Information
+ */
+/**
+ * The capacity of the context strings in UParseError.
+ * @stable ICU 2.0
+ */
+enum { U_PARSE_CONTEXT_LEN = 16 };
+
+/**
+ * A UParseError struct is used to returned detailed information about
+ * parsing errors. It is used by ICU parsing engines that parse long
+ * rules, patterns, or programs, where the text being parsed is long
+ * enough that more information than a UErrorCode is needed to
+ * localize the error.
+ *
+ * <p>The line, offset, and context fields are optional; parsing
+ * engines may choose not to use to use them.
+ *
+ * <p>The preContext and postContext strings include some part of the
+ * context surrounding the error. If the source text is "let for=7"
+ * and "for" is the error (e.g., because it is a reserved word), then
+ * some examples of what a parser might produce are the following:
+ *
+ * <pre>
+ * preContext postContext
+ * "" "" The parser does not support context
+ * "let " "=7" Pre- and post-context only
+ * "let " "for=7" Pre- and post-context and error text
+ * "" "for" Error text only
+ * </pre>
+ *
+ * <p>Examples of engines which use UParseError (or may use it in the
+ * future) are Transliterator, RuleBasedBreakIterator, and
+ * RegexPattern.
+ *
+ * @stable ICU 2.0
+ */
+typedef struct UParseError {
+
+ /**
+ * The line on which the error occurred. If the parser uses this
+ * field, it sets it to the line number of the source text line on
+ * which the error appears, which will be a value >= 1. If the
+ * parse does not support line numbers, the value will be <= 0.
+ * @stable ICU 2.0
+ */
+ int32_t line;
+
+ /**
+ * The character offset to the error. If the line field is >= 1,
+ * then this is the offset from the start of the line. Otherwise,
+ * this is the offset from the start of the text. If the parser
+ * does not support this field, it will have a value < 0.
+ * @stable ICU 2.0
+ */
+ int32_t offset;
+
+ /**
+ * Textual context before the error. Null-terminated. The empty
+ * string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar preContext[U_PARSE_CONTEXT_LEN];
+
+ /**
+ * The error itself and/or textual context after the error.
+ * Null-terminated. The empty string if not supported by parser.
+ * @stable ICU 2.0
+ */
+ UChar postContext[U_PARSE_CONTEXT_LEN];
+
+} UParseError;
+
+#endif
diff --git a/intl/icu/source/common/unicode/parsepos.h b/intl/icu/source/common/unicode/parsepos.h
new file mode 100644
index 0000000000..d33a812ad0
--- /dev/null
+++ b/intl/icu/source/common/unicode/parsepos.h
@@ -0,0 +1,237 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+* Copyright (C) 1997-2005, International Business Machines Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* File PARSEPOS.H
+*
+* Modification History:
+*
+* Date Name Description
+* 07/09/97 helena Converted from java.
+* 07/17/98 stephen Added errorIndex support.
+* 05/11/99 stephen Cleaned up.
+*******************************************************************************
+*/
+
+#ifndef PARSEPOS_H
+#define PARSEPOS_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \file
+ * \brief C++ API: Canonical Iterator
+ */
+/**
+ * <code>ParsePosition</code> is a simple class used by <code>Format</code>
+ * and its subclasses to keep track of the current position during parsing.
+ * The <code>parseObject</code> method in the various <code>Format</code>
+ * classes requires a <code>ParsePosition</code> object as an argument.
+ *
+ * <p>
+ * By design, as you parse through a string with different formats,
+ * you can use the same <code>ParsePosition</code>, since the index parameter
+ * records the current position.
+ *
+ * The ParsePosition class is not suitable for subclassing.
+ *
+ * @version 1.3 10/30/97
+ * @author Mark Davis, Helena Shih
+ * @see java.text.Format
+ */
+
+class U_COMMON_API ParsePosition : public UObject {
+public:
+ /**
+ * Default constructor, the index starts with 0 as default.
+ * @stable ICU 2.0
+ */
+ ParsePosition()
+ : UObject(),
+ index(0),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Create a new ParsePosition with the given initial index.
+ * @param newIndex the new text offset.
+ * @stable ICU 2.0
+ */
+ ParsePosition(int32_t newIndex)
+ : UObject(),
+ index(newIndex),
+ errorIndex(-1)
+ {}
+
+ /**
+ * Copy constructor
+ * @param copy the object to be copied from.
+ * @stable ICU 2.0
+ */
+ ParsePosition(const ParsePosition& copy)
+ : UObject(copy),
+ index(copy.index),
+ errorIndex(copy.errorIndex)
+ {}
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~ParsePosition();
+
+ /**
+ * Assignment operator
+ * @stable ICU 2.0
+ */
+ inline ParsePosition& operator=(const ParsePosition& copy);
+
+ /**
+ * Equality operator.
+ * @return true if the two parse positions are equal, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline bool operator==(const ParsePosition& that) const;
+
+ /**
+ * Equality operator.
+ * @return true if the two parse positions are not equal, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline bool operator!=(const ParsePosition& that) const;
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ParsePosition *clone() const;
+
+ /**
+ * Retrieve the current parse position. On input to a parse method, this
+ * is the index of the character at which parsing will begin; on output, it
+ * is the index of the character following the last character parsed.
+ * @return the current index.
+ * @stable ICU 2.0
+ */
+ inline int32_t getIndex(void) const;
+
+ /**
+ * Set the current parse position.
+ * @param index the new index.
+ * @stable ICU 2.0
+ */
+ inline void setIndex(int32_t index);
+
+ /**
+ * Set the index at which a parse error occurred. Formatters
+ * should set this before returning an error code from their
+ * parseObject method. The default value is -1 if this is not
+ * set.
+ * @stable ICU 2.0
+ */
+ inline void setErrorIndex(int32_t ei);
+
+ /**
+ * Retrieve the index at which an error occurred, or -1 if the
+ * error index has not been set.
+ * @stable ICU 2.0
+ */
+ inline int32_t getErrorIndex(void) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+private:
+ /**
+ * Input: the place you start parsing.
+ * <br>Output: position where the parse stopped.
+ * This is designed to be used serially,
+ * with each call setting index up for the next one.
+ */
+ int32_t index;
+
+ /**
+ * The index at which a parse error occurred.
+ */
+ int32_t errorIndex;
+
+};
+
+inline ParsePosition&
+ParsePosition::operator=(const ParsePosition& copy)
+{
+ index = copy.index;
+ errorIndex = copy.errorIndex;
+ return *this;
+}
+
+inline bool
+ParsePosition::operator==(const ParsePosition& copy) const
+{
+ if(index != copy.index || errorIndex != copy.errorIndex)
+ return false;
+ else
+ return true;
+}
+
+inline bool
+ParsePosition::operator!=(const ParsePosition& copy) const
+{
+ return !operator==(copy);
+}
+
+inline int32_t
+ParsePosition::getIndex() const
+{
+ return index;
+}
+
+inline void
+ParsePosition::setIndex(int32_t offset)
+{
+ this->index = offset;
+}
+
+inline int32_t
+ParsePosition::getErrorIndex() const
+{
+ return errorIndex;
+}
+
+inline void
+ParsePosition::setErrorIndex(int32_t ei)
+{
+ this->errorIndex = ei;
+}
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/platform.h b/intl/icu/source/common/unicode/platform.h
new file mode 100644
index 0000000000..a997843660
--- /dev/null
+++ b/intl/icu/source/common/unicode/platform.h
@@ -0,0 +1,881 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : platform.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+******************************************************************************
+*/
+
+#ifndef _PLATFORM_H
+#define _PLATFORM_H
+
+#include "unicode/uconfig.h"
+#include "unicode/uvernum.h"
+
+/**
+ * \file
+ * \brief Basic types for the platform.
+ *
+ * This file used to be generated by autoconf/configure.
+ * Starting with ICU 49, platform.h is a normal source file,
+ * to simplify cross-compiling and working with non-autoconf/make build systems.
+ *
+ * When a value in this file does not work on a platform, then please
+ * try to derive it from the U_PLATFORM value
+ * (for which we might need a new value constant in rare cases)
+ * and/or from other macros that are predefined by the compiler
+ * or defined in standard (POSIX or platform or compiler) headers.
+ *
+ * As a temporary workaround, you can add an explicit \#define for some macros
+ * before it is first tested, or add an equivalent -D macro definition
+ * to the compiler's command line.
+ *
+ * Note: Some compilers provide ways to show the predefined macros.
+ * For example, with gcc you can compile an empty .c file and have the compiler
+ * print the predefined macros with
+ * \code
+ * gcc -E -dM -x c /dev/null | sort
+ * \endcode
+ * (You can provide an actual empty .c file rather than /dev/null.
+ * <code>-x c++</code> is for C++.)
+ */
+
+/**
+ * Define some things so that they can be documented.
+ * @internal
+ */
+#ifdef U_IN_DOXYGEN
+/*
+ * Problem: "platform.h:335: warning: documentation for unknown define U_HAVE_STD_STRING found." means that U_HAVE_STD_STRING is not documented.
+ * Solution: #define any defines for non @internal API here, so that they are visible in the docs. If you just set PREDEFINED in Doxyfile.in, they won't be documented.
+ */
+
+/* None for now. */
+#endif
+
+/**
+ * \def U_PLATFORM
+ * The U_PLATFORM macro defines the platform we're on.
+ *
+ * We used to define one different, value-less macro per platform.
+ * That made it hard to know the set of relevant platforms and macros,
+ * and hard to deal with variants of platforms.
+ *
+ * Starting with ICU 49, we define platforms as numeric macros,
+ * with ranges of values for related platforms and their variants.
+ * The U_PLATFORM macro is set to one of these values.
+ *
+ * Historical note from the Solaris Wikipedia article:
+ * AT&T and Sun collaborated on a project to merge the most popular Unix variants
+ * on the market at that time: BSD, System V, and Xenix.
+ * This became Unix System V Release 4 (SVR4).
+ *
+ * @internal
+ */
+
+/** Unknown platform. @internal */
+#define U_PF_UNKNOWN 0
+/** Windows @internal */
+#define U_PF_WINDOWS 1000
+/** MinGW. Windows, calls to Win32 API, but using GNU gcc and binutils. @internal */
+#define U_PF_MINGW 1800
+/**
+ * Cygwin. Windows, calls to cygwin1.dll for Posix functions,
+ * using MSVC or GNU gcc and binutils.
+ * @internal
+ */
+#define U_PF_CYGWIN 1900
+/* Reserve 2000 for U_PF_UNIX? */
+/** HP-UX is based on UNIX System V. @internal */
+#define U_PF_HPUX 2100
+/** Solaris is a Unix operating system based on SVR4. @internal */
+#define U_PF_SOLARIS 2600
+/** BSD is a UNIX operating system derivative. @internal */
+#define U_PF_BSD 3000
+/** AIX is based on UNIX System V Releases and 4.3 BSD. @internal */
+#define U_PF_AIX 3100
+/** IRIX is based on UNIX System V with BSD extensions. @internal */
+#define U_PF_IRIX 3200
+/**
+ * Darwin is a POSIX-compliant operating system, composed of code developed by Apple,
+ * as well as code derived from NeXTSTEP, BSD, and other projects,
+ * built around the Mach kernel.
+ * Darwin forms the core set of components upon which Mac OS X, Apple TV, and iOS are based.
+ * (Original description modified from WikiPedia.)
+ * @internal
+ */
+#define U_PF_DARWIN 3500
+/** iPhone OS (iOS) is a derivative of Mac OS X. @internal */
+#define U_PF_IPHONE 3550
+/** QNX is a commercial Unix-like real-time operating system related to BSD. @internal */
+#define U_PF_QNX 3700
+/** Linux is a Unix-like operating system. @internal */
+#define U_PF_LINUX 4000
+/**
+ * Native Client is pretty close to Linux.
+ * See https://developer.chrome.com/native-client and
+ * http://www.chromium.org/nativeclient
+ * @internal
+ */
+#define U_PF_BROWSER_NATIVE_CLIENT 4020
+/** Android is based on Linux. @internal */
+#define U_PF_ANDROID 4050
+/** Fuchsia is a POSIX-ish platform. @internal */
+#define U_PF_FUCHSIA 4100
+/* Maximum value for Linux-based platform is 4499 */
+/**
+ * Emscripten is a C++ transpiler for the Web that can target asm.js or
+ * WebAssembly. It provides some POSIX-compatible wrappers and stubs and
+ * some Linux-like functionality, but is not fully compatible with
+ * either.
+ * @internal
+ */
+#define U_PF_EMSCRIPTEN 5010
+/** z/OS is the successor to OS/390 which was the successor to MVS. @internal */
+#define U_PF_OS390 9000
+/** "IBM i" is the current name of what used to be i5/OS and earlier OS/400. @internal */
+#define U_PF_OS400 9400
+
+#ifdef U_PLATFORM
+ /* Use the predefined value. */
+#elif defined(__MINGW32__)
+# define U_PLATFORM U_PF_MINGW
+#elif defined(__CYGWIN__)
+# define U_PLATFORM U_PF_CYGWIN
+#elif defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
+# define U_PLATFORM U_PF_WINDOWS
+#elif defined(__ANDROID__)
+# define U_PLATFORM U_PF_ANDROID
+ /* Android wchar_t support depends on the API level. */
+# include <android/api-level.h>
+#elif defined(__pnacl__) || defined(__native_client__)
+# define U_PLATFORM U_PF_BROWSER_NATIVE_CLIENT
+#elif defined(__Fuchsia__)
+# define U_PLATFORM U_PF_FUCHSIA
+#elif defined(linux) || defined(__linux__) || defined(__linux)
+# define U_PLATFORM U_PF_LINUX
+#elif defined(__APPLE__) && defined(__MACH__)
+# include <TargetConditionals.h>
+# if (defined(TARGET_OS_IPHONE) && TARGET_OS_IPHONE) && (defined(TARGET_OS_MACCATALYST) && !TARGET_OS_MACCATALYST) /* variant of TARGET_OS_MAC */
+# define U_PLATFORM U_PF_IPHONE
+# else
+# define U_PLATFORM U_PF_DARWIN
+# endif
+#elif defined(BSD) || defined(__FreeBSD__) || defined(__FreeBSD_kernel__) || defined(__NetBSD__) || defined(__OpenBSD__) || defined(__MirBSD__)
+# if defined(__FreeBSD__)
+# include <sys/endian.h>
+# endif
+# define U_PLATFORM U_PF_BSD
+#elif defined(sun) || defined(__sun)
+ /* Check defined(__SVR4) || defined(__svr4__) to distinguish Solaris from SunOS? */
+# define U_PLATFORM U_PF_SOLARIS
+# if defined(__GNUC__)
+ /* Solaris/GCC needs this header file to get the proper endianness. Normally, this
+ * header file is included with stddef.h but on Solairs/GCC, the GCC version of stddef.h
+ * is included which does not include this header file.
+ */
+# include <sys/isa_defs.h>
+# endif
+#elif defined(_AIX) || defined(__TOS_AIX__)
+# define U_PLATFORM U_PF_AIX
+#elif defined(_hpux) || defined(hpux) || defined(__hpux)
+# define U_PLATFORM U_PF_HPUX
+#elif defined(sgi) || defined(__sgi)
+# define U_PLATFORM U_PF_IRIX
+#elif defined(__QNX__) || defined(__QNXNTO__)
+# define U_PLATFORM U_PF_QNX
+#elif defined(__TOS_MVS__)
+# define U_PLATFORM U_PF_OS390
+#elif defined(__OS400__) || defined(__TOS_OS400__)
+# define U_PLATFORM U_PF_OS400
+#elif defined(__EMSCRIPTEN__)
+# define U_PLATFORM U_PF_EMSCRIPTEN
+#else
+# define U_PLATFORM U_PF_UNKNOWN
+#endif
+
+/**
+ * \def CYGWINMSVC
+ * Defined if this is Windows with Cygwin, but using MSVC rather than gcc.
+ * Otherwise undefined.
+ * @internal
+ */
+/* Commented out because this is already set in mh-cygwin-msvc
+#if U_PLATFORM == U_PF_CYGWIN && defined(_MSC_VER)
+# define CYGWINMSVC
+#endif
+*/
+#ifdef U_IN_DOXYGEN
+# define CYGWINMSVC
+#endif
+
+/**
+ * \def U_PLATFORM_USES_ONLY_WIN32_API
+ * Defines whether the platform uses only the Win32 API.
+ * Set to 1 for Windows/MSVC and MinGW but not Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_USES_ONLY_WIN32_API
+ /* Use the predefined value. */
+#elif (U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_MINGW) || defined(CYGWINMSVC)
+# define U_PLATFORM_USES_ONLY_WIN32_API 1
+#else
+ /* Cygwin implements POSIX. */
+# define U_PLATFORM_USES_ONLY_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WIN32_API
+ * Defines whether the Win32 API is available on the platform.
+ * Set to 1 for Windows/MSVC, MinGW and Cygwin.
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WIN32_API
+ /* Use the predefined value. */
+#elif U_PF_WINDOWS <= U_PLATFORM && U_PLATFORM <= U_PF_CYGWIN
+# define U_PLATFORM_HAS_WIN32_API 1
+#else
+# define U_PLATFORM_HAS_WIN32_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_HAS_WINUWP_API
+ * Defines whether target is intended for Universal Windows Platform API
+ * Set to 1 for Windows10 Release Solution Configuration
+ * @internal
+ */
+#ifdef U_PLATFORM_HAS_WINUWP_API
+ /* Use the predefined value. */
+#else
+# define U_PLATFORM_HAS_WINUWP_API 0
+#endif
+
+/**
+ * \def U_PLATFORM_IMPLEMENTS_POSIX
+ * Defines whether the platform implements (most of) the POSIX API.
+ * Set to 1 for Cygwin and most other platforms.
+ * @internal
+ */
+#ifdef U_PLATFORM_IMPLEMENTS_POSIX
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# define U_PLATFORM_IMPLEMENTS_POSIX 0
+#else
+# define U_PLATFORM_IMPLEMENTS_POSIX 1
+#endif
+
+/**
+ * \def U_PLATFORM_IS_LINUX_BASED
+ * Defines whether the platform is Linux or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_LINUX_BASED
+ /* Use the predefined value. */
+#elif U_PF_LINUX <= U_PLATFORM && U_PLATFORM <= 4499
+# define U_PLATFORM_IS_LINUX_BASED 1
+#else
+# define U_PLATFORM_IS_LINUX_BASED 0
+#endif
+
+/**
+ * \def U_PLATFORM_IS_DARWIN_BASED
+ * Defines whether the platform is Darwin or one of its derivatives.
+ * @internal
+ */
+#ifdef U_PLATFORM_IS_DARWIN_BASED
+ /* Use the predefined value. */
+#elif U_PF_DARWIN <= U_PLATFORM && U_PLATFORM <= U_PF_IPHONE
+# define U_PLATFORM_IS_DARWIN_BASED 1
+#else
+# define U_PLATFORM_IS_DARWIN_BASED 0
+#endif
+
+/**
+ * \def U_HAVE_STDINT_H
+ * Defines whether stdint.h is available. It is a C99 standard header.
+ * We used to include inttypes.h which includes stdint.h but we usually do not need
+ * the additional definitions from inttypes.h.
+ * @internal
+ */
+#ifdef U_HAVE_STDINT_H
+ /* Use the predefined value. */
+#elif U_PLATFORM_USES_ONLY_WIN32_API
+# if defined(__BORLANDC__) || U_PLATFORM == U_PF_MINGW || (defined(_MSC_VER) && _MSC_VER>=1600)
+ /* Windows Visual Studio 9 and below do not have stdint.h & inttypes.h, but VS 2010 adds them. */
+# define U_HAVE_STDINT_H 1
+# else
+# define U_HAVE_STDINT_H 0
+# endif
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_STDINT_H 0
+#else
+# define U_HAVE_STDINT_H 1
+#endif
+
+/**
+ * \def U_HAVE_INTTYPES_H
+ * Defines whether inttypes.h is available. It is a C99 standard header.
+ * We include inttypes.h where it is available but stdint.h is not.
+ * @internal
+ */
+#ifdef U_HAVE_INTTYPES_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_SOLARIS
+ /* Solaris has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#elif U_PLATFORM == U_PF_AIX && !defined(_AIX51) && defined(_POWER)
+ /* PPC AIX <= 4.3 has inttypes.h but not stdint.h. */
+# define U_HAVE_INTTYPES_H 1
+#else
+ /* Most platforms have both inttypes.h and stdint.h, or neither. */
+# define U_HAVE_INTTYPES_H U_HAVE_STDINT_H
+#endif
+
+/*===========================================================================*/
+/** @{ Compiler and environment features */
+/*===========================================================================*/
+
+/**
+ * \def U_GCC_MAJOR_MINOR
+ * Indicates whether the compiler is gcc (test for != 0),
+ * and if so, contains its major (times 100) and minor version numbers.
+ * If the compiler is not gcc, then U_GCC_MAJOR_MINOR == 0.
+ *
+ * For example, for testing for whether we have gcc, and whether it's 4.6 or higher,
+ * use "#if U_GCC_MAJOR_MINOR >= 406".
+ * @internal
+ */
+#ifdef __GNUC__
+# define U_GCC_MAJOR_MINOR (__GNUC__ * 100 + __GNUC_MINOR__)
+#else
+# define U_GCC_MAJOR_MINOR 0
+#endif
+
+/**
+ * \def U_IS_BIG_ENDIAN
+ * Determines the endianness of the platform.
+ * @internal
+ */
+#ifdef U_IS_BIG_ENDIAN
+ /* Use the predefined value. */
+#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN (BYTE_ORDER == BIG_ENDIAN)
+#elif defined(__BYTE_ORDER__) && defined(__ORDER_BIG_ENDIAN__)
+ /* gcc */
+# define U_IS_BIG_ENDIAN (__BYTE_ORDER__ == __ORDER_BIG_ENDIAN__)
+#elif defined(__BIG_ENDIAN__) || defined(_BIG_ENDIAN)
+# define U_IS_BIG_ENDIAN 1
+#elif defined(__LITTLE_ENDIAN__) || defined(_LITTLE_ENDIAN)
+# define U_IS_BIG_ENDIAN 0
+#elif U_PLATFORM == U_PF_OS390 || U_PLATFORM == U_PF_OS400 || defined(__s390__) || defined(__s390x__)
+ /* These platforms do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(_PA_RISC1_0) || defined(_PA_RISC1_1) || defined(_PA_RISC2_0)
+ /* HPPA do not appear to predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#elif defined(sparc) || defined(__sparc) || defined(__sparc__)
+ /* Some sparc based systems (e.g. Linux) do not predefine any endianness macros. */
+# define U_IS_BIG_ENDIAN 1
+#else
+# define U_IS_BIG_ENDIAN 0
+#endif
+
+/**
+ * \def U_HAVE_PLACEMENT_NEW
+ * Determines whether to override placement new and delete for STL.
+ * @stable ICU 2.6
+ */
+#ifdef U_HAVE_PLACEMENT_NEW
+ /* Use the predefined value. */
+#elif defined(__BORLANDC__)
+# define U_HAVE_PLACEMENT_NEW 0
+#else
+# define U_HAVE_PLACEMENT_NEW 1
+#endif
+
+/**
+ * \def U_HAVE_DEBUG_LOCATION_NEW
+ * Define this to define the MFC debug version of the operator new.
+ *
+ * @stable ICU 3.4
+ */
+#ifdef U_HAVE_DEBUG_LOCATION_NEW
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_HAVE_DEBUG_LOCATION_NEW 1
+#else
+# define U_HAVE_DEBUG_LOCATION_NEW 0
+#endif
+
+/* Compatibility with compilers other than clang: http://clang.llvm.org/docs/LanguageExtensions.html */
+#ifdef __has_attribute
+# define UPRV_HAS_ATTRIBUTE(x) __has_attribute(x)
+#else
+# define UPRV_HAS_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_cpp_attribute
+# define UPRV_HAS_CPP_ATTRIBUTE(x) __has_cpp_attribute(x)
+#else
+# define UPRV_HAS_CPP_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_declspec_attribute
+# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) __has_declspec_attribute(x)
+#else
+# define UPRV_HAS_DECLSPEC_ATTRIBUTE(x) 0
+#endif
+#ifdef __has_builtin
+# define UPRV_HAS_BUILTIN(x) __has_builtin(x)
+#else
+# define UPRV_HAS_BUILTIN(x) 0
+#endif
+#ifdef __has_feature
+# define UPRV_HAS_FEATURE(x) __has_feature(x)
+#else
+# define UPRV_HAS_FEATURE(x) 0
+#endif
+#ifdef __has_extension
+# define UPRV_HAS_EXTENSION(x) __has_extension(x)
+#else
+# define UPRV_HAS_EXTENSION(x) 0
+#endif
+#ifdef __has_warning
+# define UPRV_HAS_WARNING(x) __has_warning(x)
+#else
+# define UPRV_HAS_WARNING(x) 0
+#endif
+
+
+#if defined(__clang__)
+#define UPRV_NO_SANITIZE_UNDEFINED __attribute__((no_sanitize("undefined")))
+#else
+#define UPRV_NO_SANITIZE_UNDEFINED
+#endif
+
+/**
+ * \def U_MALLOC_ATTR
+ * Attribute to mark functions as malloc-like
+ * @internal
+ */
+#if defined(__GNUC__) && __GNUC__>=3
+# define U_MALLOC_ATTR __attribute__ ((__malloc__))
+#else
+# define U_MALLOC_ATTR
+#endif
+
+/**
+ * \def U_ALLOC_SIZE_ATTR
+ * Attribute to specify the size of the allocated buffer for malloc-like functions
+ * @internal
+ */
+#if (defined(__GNUC__) && \
+ (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 3))) || \
+ UPRV_HAS_ATTRIBUTE(alloc_size)
+# define U_ALLOC_SIZE_ATTR(X) __attribute__ ((alloc_size(X)))
+# define U_ALLOC_SIZE_ATTR2(X,Y) __attribute__ ((alloc_size(X,Y)))
+#else
+# define U_ALLOC_SIZE_ATTR(X)
+# define U_ALLOC_SIZE_ATTR2(X,Y)
+#endif
+
+/**
+ * \def U_CPLUSPLUS_VERSION
+ * 0 if no C++; 1, 11, 14, ... if C++.
+ * Support for specific features cannot always be determined by the C++ version alone.
+ * @internal
+ */
+#ifdef U_CPLUSPLUS_VERSION
+# if U_CPLUSPLUS_VERSION != 0 && !defined(__cplusplus)
+# undef U_CPLUSPLUS_VERSION
+# define U_CPLUSPLUS_VERSION 0
+# endif
+ /* Otherwise use the predefined value. */
+#elif !defined(__cplusplus)
+# define U_CPLUSPLUS_VERSION 0
+#elif __cplusplus >= 201402L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201402L)
+# define U_CPLUSPLUS_VERSION 14
+#elif __cplusplus >= 201103L || (defined(_MSVC_LANG) && _MSVC_LANG >= 201103L)
+# define U_CPLUSPLUS_VERSION 11
+#else
+ // C++98 or C++03
+# define U_CPLUSPLUS_VERSION 1
+#endif
+
+/**
+ * \def U_FALLTHROUGH
+ * Annotate intentional fall-through between switch labels.
+ * http://clang.llvm.org/docs/AttributeReference.html#fallthrough-clang-fallthrough
+ * @internal
+ */
+#ifndef __cplusplus
+ // Not for C.
+#elif defined(U_FALLTHROUGH)
+ // Use the predefined value.
+#elif defined(__clang__)
+ // Test for compiler vs. feature separately.
+ // Other compilers might choke on the feature test.
+# if UPRV_HAS_CPP_ATTRIBUTE(clang::fallthrough) || \
+ (UPRV_HAS_FEATURE(cxx_attributes) && \
+ UPRV_HAS_WARNING("-Wimplicit-fallthrough"))
+# define U_FALLTHROUGH [[clang::fallthrough]]
+# endif
+#elif defined(__GNUC__) && (__GNUC__ >= 7)
+# define U_FALLTHROUGH __attribute__((fallthrough))
+#endif
+
+#ifndef U_FALLTHROUGH
+# define U_FALLTHROUGH
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Character data types */
+/*===========================================================================*/
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an ASCII based platform.
+ * @stable ICU 2.0
+ */
+#define U_ASCII_FAMILY 0
+
+/**
+ * U_CHARSET_FAMILY is equal to this value when the platform is an EBCDIC based platform.
+ * @stable ICU 2.0
+ */
+#define U_EBCDIC_FAMILY 1
+
+/**
+ * \def U_CHARSET_FAMILY
+ *
+ * <p>These definitions allow to specify the encoding of text
+ * in the char data type as defined by the platform and the compiler.
+ * It is enough to determine the code point values of "invariant characters",
+ * which are the ones shared by all encodings that are in use
+ * on a given platform.</p>
+ *
+ * <p>Those "invariant characters" should be all the uppercase and lowercase
+ * latin letters, the digits, the space, and "basic punctuation".
+ * Also, '\\n', '\\r', '\\t' should be available.</p>
+ *
+ * <p>The list of "invariant characters" is:<br>
+ * \code
+ * A-Z a-z 0-9 SPACE " % &amp; ' ( ) * + , - . / : ; < = > ? _
+ * \endcode
+ * <br>
+ * (52 letters + 10 numbers + 20 punc/sym/space = 82 total)</p>
+ *
+ * <p>This matches the IBM Syntactic Character Set (CS 640).</p>
+ *
+ * <p>In other words, all the graphic characters in 7-bit ASCII should
+ * be safely accessible except the following:</p>
+ *
+ * \code
+ * '\' <backslash>
+ * '[' <left bracket>
+ * ']' <right bracket>
+ * '{' <left brace>
+ * '}' <right brace>
+ * '^' <circumflex>
+ * '~' <tilde>
+ * '!' <exclamation mark>
+ * '#' <number sign>
+ * '|' <vertical line>
+ * '$' <dollar sign>
+ * '@' <commercial at>
+ * '`' <grave accent>
+ * \endcode
+ * @stable ICU 2.0
+ */
+#ifdef U_CHARSET_FAMILY
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_OS390 && (!defined(__CHARSET_LIB) || !__CHARSET_LIB)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#elif U_PLATFORM == U_PF_OS400 && !defined(__UTF32__)
+# define U_CHARSET_FAMILY U_EBCDIC_FAMILY
+#else
+# define U_CHARSET_FAMILY U_ASCII_FAMILY
+#endif
+
+/**
+ * \def U_CHARSET_IS_UTF8
+ *
+ * Hardcode the default charset to UTF-8.
+ *
+ * If this is set to 1, then
+ * - ICU will assume that all non-invariant char*, StringPiece, std::string etc.
+ * contain UTF-8 text, regardless of what the system API uses
+ * - some ICU code will use fast functions like u_strFromUTF8()
+ * rather than the more general and more heavy-weight conversion API (ucnv.h)
+ * - ucnv_getDefaultName() always returns "UTF-8"
+ * - ucnv_setDefaultName() is disabled and will not change the default charset
+ * - static builds of ICU are smaller
+ * - more functionality is available with the UCONFIG_NO_CONVERSION build-time
+ * configuration option (see unicode/uconfig.h)
+ * - the UCONFIG_NO_CONVERSION build option in uconfig.h is more usable
+ *
+ * @stable ICU 4.2
+ * @see UCONFIG_NO_CONVERSION
+ */
+#ifdef U_CHARSET_IS_UTF8
+ /* Use the predefined value. */
+#elif U_PLATFORM_IS_LINUX_BASED || U_PLATFORM_IS_DARWIN_BASED || \
+ U_PLATFORM == U_PF_EMSCRIPTEN
+# define U_CHARSET_IS_UTF8 1
+#else
+# define U_CHARSET_IS_UTF8 0
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Information about wchar support */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_WCHAR_H
+ * Indicates whether <wchar.h> is available (1) or not (0). Set to 1 by default.
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_HAVE_WCHAR_H
+ /* Use the predefined value. */
+#elif U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9
+ /*
+ * Android before Gingerbread (Android 2.3, API level 9) did not support wchar_t.
+ * The type and header existed, but the library functions did not work as expected.
+ * The size of wchar_t was 1 but L"xyz" string literals had 32-bit units anyway.
+ */
+# define U_HAVE_WCHAR_H 0
+#else
+# define U_HAVE_WCHAR_H 1
+#endif
+
+/**
+ * \def U_SIZEOF_WCHAR_T
+ * U_SIZEOF_WCHAR_T==sizeof(wchar_t)
+ *
+ * @stable ICU 2.0
+ */
+#ifdef U_SIZEOF_WCHAR_T
+ /* Use the predefined value. */
+#elif (U_PLATFORM == U_PF_ANDROID && __ANDROID_API__ < 9)
+ /*
+ * Classic Mac OS and Mac OS X before 10.3 (Panther) did not support wchar_t or wstring.
+ * Newer Mac OS X has size 4.
+ */
+# define U_SIZEOF_WCHAR_T 1
+#elif U_PLATFORM_HAS_WIN32_API || U_PLATFORM == U_PF_CYGWIN
+# define U_SIZEOF_WCHAR_T 2
+#elif U_PLATFORM == U_PF_AIX
+ /*
+ * AIX 6.1 information, section "Wide character data representation":
+ * "... the wchar_t datatype is 32-bit in the 64-bit environment and
+ * 16-bit in the 32-bit environment."
+ * and
+ * "All locales use Unicode for their wide character code values (process code),
+ * except the IBM-eucTW codeset."
+ */
+# ifdef __64BIT__
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS390
+ /*
+ * z/OS V1R11 information center, section "LP64 | ILP32":
+ * "In 31-bit mode, the size of long and pointers is 4 bytes and the size of wchar_t is 2 bytes.
+ * Under LP64, the size of long and pointer is 8 bytes and the size of wchar_t is 4 bytes."
+ */
+# ifdef _LP64
+# define U_SIZEOF_WCHAR_T 4
+# else
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#elif U_PLATFORM == U_PF_OS400
+# if defined(__UTF32__)
+ /*
+ * LOCALETYPE(*LOCALEUTF) is specified.
+ * Wide-character strings are in UTF-32,
+ * narrow-character strings are in UTF-8.
+ */
+# define U_SIZEOF_WCHAR_T 4
+# elif defined(__UCS2__)
+ /*
+ * LOCALETYPE(*LOCALEUCS2) is specified.
+ * Wide-character strings are in UCS-2,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# else
+ /*
+ * LOCALETYPE(*CLD) or LOCALETYPE(*LOCALE) is specified.
+ * Wide-character strings are in 16-bit EBCDIC,
+ * narrow-character strings are in EBCDIC.
+ */
+# define U_SIZEOF_WCHAR_T 2
+# endif
+#else
+# define U_SIZEOF_WCHAR_T 4
+#endif
+
+#ifndef U_HAVE_WCSCPY
+#define U_HAVE_WCSCPY U_HAVE_WCHAR_H
+#endif
+
+/** @} */
+
+/**
+ * \def U_HAVE_CHAR16_T
+ * Defines whether the char16_t type is available for UTF-16
+ * and u"abc" UTF-16 string literals are supported.
+ * This is a new standard type and standard string literal syntax in C++11
+ * but has been available in some compilers before.
+ * @internal
+ */
+#ifdef U_HAVE_CHAR16_T
+ /* Use the predefined value. */
+#else
+ /*
+ * Notes:
+ * C++11 and C11 require support for UTF-16 literals
+ * TODO: Fix for plain C. Doesn't work on Mac.
+ */
+# if U_CPLUSPLUS_VERSION >= 11 || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 201112L)
+# define U_HAVE_CHAR16_T 1
+# else
+# define U_HAVE_CHAR16_T 0
+# endif
+#endif
+
+/**
+ * @{
+ * \def U_DECLARE_UTF16
+ * Do not use this macro because it is not defined on all platforms.
+ * Use the UNICODE_STRING or U_STRING_DECL macros instead.
+ * @internal
+ */
+#ifdef U_DECLARE_UTF16
+ /* Use the predefined value. */
+#elif U_HAVE_CHAR16_T \
+ || (defined(__xlC__) && defined(__IBM_UTF_LITERAL) && U_SIZEOF_WCHAR_T != 2) \
+ || (defined(__HP_aCC) && __HP_aCC >= 035000) \
+ || (defined(__HP_cc) && __HP_cc >= 111106) \
+ || (defined(U_IN_DOXYGEN))
+# define U_DECLARE_UTF16(string) u ## string
+#elif U_SIZEOF_WCHAR_T == 2 \
+ && (U_CHARSET_FAMILY == 0 || (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400 && defined(__UCS2__)))
+# define U_DECLARE_UTF16(string) L ## string
+#else
+ /* Leave U_DECLARE_UTF16 undefined. See unistr.h. */
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/** @{ Symbol import-export control */
+/*===========================================================================*/
+
+#ifdef U_EXPORT
+ /* Use the predefined value. */
+#elif defined(U_STATIC_IMPLEMENTATION)
+# define U_EXPORT
+#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
+# define U_EXPORT __declspec(dllexport)
+#elif defined(__GNUC__)
+# define U_EXPORT __attribute__((visibility("default")))
+#elif (defined(__SUNPRO_CC) && __SUNPRO_CC >= 0x550) \
+ || (defined(__SUNPRO_C) && __SUNPRO_C >= 0x550)
+# define U_EXPORT __global
+/*#elif defined(__HP_aCC) || defined(__HP_cc)
+# define U_EXPORT __declspec(dllexport)*/
+#else
+# define U_EXPORT
+#endif
+
+/* U_CALLCONV is related to U_EXPORT2 */
+#ifdef U_EXPORT2
+ /* Use the predefined value. */
+#elif defined(_MSC_VER)
+# define U_EXPORT2 __cdecl
+#else
+# define U_EXPORT2
+#endif
+
+#ifdef U_IMPORT
+ /* Use the predefined value. */
+#elif defined(_MSC_VER) || (UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllexport__) && \
+ UPRV_HAS_DECLSPEC_ATTRIBUTE(__dllimport__))
+ /* Windows needs to export/import data. */
+# define U_IMPORT __declspec(dllimport)
+#else
+# define U_IMPORT
+#endif
+
+/**
+ * \def U_HIDDEN
+ * This is used to mark internal structs declared within external classes,
+ * to prevent the internal structs from having the same visibility as the
+ * class within which they are declared.
+ * @internal
+ */
+#ifdef U_HIDDEN
+ /* Use the predefined value. */
+#elif defined(__GNUC__)
+# define U_HIDDEN __attribute__((visibility("hidden")))
+#else
+# define U_HIDDEN
+#endif
+
+/**
+ * \def U_CALLCONV
+ * Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary
+ * in callback function typedefs to make sure that the calling convention
+ * is compatible.
+ *
+ * This is only used for non-ICU-API functions.
+ * When a function is a public ICU API,
+ * you must use the U_CAPI and U_EXPORT2 qualifiers.
+ *
+ * Please note, you need to use U_CALLCONV after the *.
+ *
+ * NO : "static const char U_CALLCONV *func( . . . )"
+ * YES: "static const char* U_CALLCONV func( . . . )"
+ *
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV __cdecl
+#else
+# define U_CALLCONV U_EXPORT2
+#endif
+
+/**
+ * \def U_CALLCONV_FPTR
+ * Similar to U_CALLCONV, but only used on function pointers.
+ * @internal
+ */
+#if U_PLATFORM == U_PF_OS390 && defined(__cplusplus)
+# define U_CALLCONV_FPTR U_CALLCONV
+#else
+# define U_CALLCONV_FPTR
+#endif
+/** @} */
+
+#endif // _PLATFORM_H
diff --git a/intl/icu/source/common/unicode/ptypes.h b/intl/icu/source/common/unicode/ptypes.h
new file mode 100644
index 0000000000..70324ffee3
--- /dev/null
+++ b/intl/icu/source/common/unicode/ptypes.h
@@ -0,0 +1,130 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : ptypes.h
+*
+* Date Name Description
+* 05/13/98 nos Creation (content moved here from ptypes.h).
+* 03/02/99 stephen Added AS400 support.
+* 03/30/99 stephen Added Linux support.
+* 04/13/99 stephen Reworked for autoconf.
+* 09/18/08 srl Moved basic types back to ptypes.h from platform.h
+******************************************************************************
+*/
+
+/**
+ * \file
+ * \brief C API: Definitions of integer types of various widths
+ */
+
+#ifndef _PTYPES_H
+#define _PTYPES_H
+
+/**
+ * \def __STDC_LIMIT_MACROS
+ * According to the Linux stdint.h, the ISO C99 standard specifies that in C++ implementations
+ * macros like INT32_MIN and UINTPTR_MAX should only be defined if explicitly requested.
+ * We need to define __STDC_LIMIT_MACROS before including stdint.h in C++ code
+ * that uses such limit macros.
+ * @internal
+ */
+#ifndef __STDC_LIMIT_MACROS
+#define __STDC_LIMIT_MACROS
+#endif
+
+/* NULL, size_t, wchar_t */
+#include <stddef.h>
+
+/*
+ * If all compilers provided all of the C99 headers and types,
+ * we would just unconditionally #include <stdint.h> here
+ * and not need any of the stuff after including platform.h.
+ */
+
+/* Find out if we have stdint.h etc. */
+#include "unicode/platform.h"
+
+/*===========================================================================*/
+/* Generic data types */
+/*===========================================================================*/
+
+/* If your platform does not have the <stdint.h> header, you may
+ need to edit the typedefs in the #else section below.
+ Use #if...#else...#endif with predefined compiler macros if possible. */
+#if U_HAVE_STDINT_H
+
+/*
+ * We mostly need <stdint.h> (which defines the standard integer types) but not <inttypes.h>.
+ * <inttypes.h> includes <stdint.h> and adds the printf/scanf helpers PRId32, SCNx16 etc.
+ * which we almost never use, plus stuff like imaxabs() which we never use.
+ */
+#include <stdint.h>
+
+#if U_PLATFORM == U_PF_OS390
+/* The features header is needed to get (u)int64_t sometimes. */
+#include <features.h>
+/* z/OS has <stdint.h>, but some versions are missing uint8_t (APAR PK62248). */
+#if !defined(__uint8_t)
+#define __uint8_t 1
+typedef unsigned char uint8_t;
+#endif
+#endif /* U_PLATFORM == U_PF_OS390 */
+
+#elif U_HAVE_INTTYPES_H
+
+# include <inttypes.h>
+
+#else /* neither U_HAVE_STDINT_H nor U_HAVE_INTTYPES_H */
+
+/// \cond
+#if ! U_HAVE_INT8_T
+typedef signed char int8_t;
+#endif
+
+#if ! U_HAVE_UINT8_T
+typedef unsigned char uint8_t;
+#endif
+
+#if ! U_HAVE_INT16_T
+typedef signed short int16_t;
+#endif
+
+#if ! U_HAVE_UINT16_T
+typedef unsigned short uint16_t;
+#endif
+
+#if ! U_HAVE_INT32_T
+typedef signed int int32_t;
+#endif
+
+#if ! U_HAVE_UINT32_T
+typedef unsigned int uint32_t;
+#endif
+
+#if ! U_HAVE_INT64_T
+#ifdef _MSC_VER
+ typedef signed __int64 int64_t;
+#else
+ typedef signed long long int64_t;
+#endif
+#endif
+
+#if ! U_HAVE_UINT64_T
+#ifdef _MSC_VER
+ typedef unsigned __int64 uint64_t;
+#else
+ typedef unsigned long long uint64_t;
+#endif
+#endif
+/// \endcond
+
+#endif /* U_HAVE_STDINT_H / U_HAVE_INTTYPES_H */
+
+#endif /* _PTYPES_H */
diff --git a/intl/icu/source/common/unicode/putil.h b/intl/icu/source/common/unicode/putil.h
new file mode 100644
index 0000000000..500c21252f
--- /dev/null
+++ b/intl/icu/source/common/unicode/putil.h
@@ -0,0 +1,183 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1997-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* FILE NAME : putil.h
+*
+* Date Name Description
+* 05/14/98 nos Creation (content moved here from utypes.h).
+* 06/17/99 erm Added IEEE_754
+* 07/22/98 stephen Added IEEEremainder, max, min, trunc
+* 08/13/98 stephen Added isNegativeInfinity, isPositiveInfinity
+* 08/24/98 stephen Added longBitsFromDouble
+* 03/02/99 stephen Removed openFile(). Added AS400 support.
+* 04/15/99 stephen Converted to C
+* 11/15/99 helena Integrated S/390 changes for IEEE support.
+* 01/11/00 helena Added u_getVersion.
+******************************************************************************
+*/
+
+#ifndef PUTIL_H
+#define PUTIL_H
+
+#include "unicode/utypes.h"
+ /**
+ * \file
+ * \brief C API: Platform Utilities
+ */
+
+/*==========================================================================*/
+/* Platform utilities */
+/*==========================================================================*/
+
+/**
+ * Platform utilities isolates the platform dependencies of the
+ * library. For each platform which this code is ported to, these
+ * functions may have to be re-implemented.
+ */
+
+/**
+ * Return the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * The data directory is determined as follows:
+ * If u_setDataDirectory() has been called, that is it, otherwise
+ * if the ICU_DATA environment variable is set, use that, otherwise
+ * If a data directory was specified at ICU build time
+ * <code>
+ * \code
+ * #define ICU_DATA_DIR "path"
+ * \endcode
+ * </code> use that,
+ * otherwise no data directory is available.
+ *
+ * @return the data directory, or an empty string ("") if no data directory has
+ * been specified.
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2 u_getDataDirectory(void);
+
+
+/**
+ * Set the ICU data directory.
+ * The data directory is where common format ICU data files (.dat files)
+ * are loaded from. Note that normal use of the built-in ICU
+ * facilities does not require loading of an external data file;
+ * unless you are adding custom data to ICU, the data directory
+ * does not need to be set.
+ *
+ * This function should be called at most once in a process, before the
+ * first ICU operation (e.g., u_init()) that will require the loading of an
+ * ICU data file.
+ * This function is not thread-safe. Use it before calling ICU APIs from
+ * multiple threads.
+ *
+ * @param directory The directory to be set.
+ *
+ * @see u_init
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 u_setDataDirectory(const char *directory);
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Return the time zone files override directory, or an empty string if
+ * no directory was specified. Certain time zone resources will be preferentially
+ * loaded from individual files in this directory.
+ *
+ * @return the time zone data override directory.
+ * @internal
+ */
+U_CAPI const char * U_EXPORT2 u_getTimeZoneFilesDirectory(UErrorCode *status);
+
+/**
+ * Set the time zone files override directory.
+ * This function is not thread safe; it must not be called concurrently with
+ * u_getTimeZoneFilesDirectory() or any other use of ICU time zone functions.
+ * This function should only be called before using any ICU service that
+ * will access the time zone data.
+ * @internal
+ */
+U_CAPI void U_EXPORT2 u_setTimeZoneFilesDirectory(const char *path, UErrorCode *status);
+#endif /* U_HIDE_INTERNAL_API */
+
+
+/**
+ * @{
+ * Filesystem file and path separator characters.
+ * Example: '/' and ':' on Unix, '\\' and ';' on Windows.
+ * @stable ICU 2.0
+ */
+#if U_PLATFORM_USES_ONLY_WIN32_API
+# define U_FILE_SEP_CHAR '\\'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ';'
+# define U_FILE_SEP_STRING "\\"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ";"
+#else
+# define U_FILE_SEP_CHAR '/'
+# define U_FILE_ALT_SEP_CHAR '/'
+# define U_PATH_SEP_CHAR ':'
+# define U_FILE_SEP_STRING "/"
+# define U_FILE_ALT_SEP_STRING "/"
+# define U_PATH_SEP_STRING ":"
+#endif
+
+/** @} */
+
+/**
+ * Convert char characters to UChar characters.
+ * This utility function is useful only for "invariant characters"
+ * that are encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param cs Input string, points to <code>length</code>
+ * character bytes from a subset of the platform encoding.
+ * @param us Output string, points to memory for <code>length</code>
+ * Unicode characters.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_charsToUChars(const char *cs, UChar *us, int32_t length);
+
+/**
+ * Convert UChar characters to char characters.
+ * This utility function is useful only for "invariant characters"
+ * that can be encoded in the platform default encoding.
+ * They are a small, constant subset of the encoding and include
+ * just the latin letters, digits, and some punctuation.
+ * For details, see U_CHARSET_FAMILY.
+ *
+ * @param us Input string, points to <code>length</code>
+ * Unicode characters that can be encoded with the
+ * codepage-invariant subset of the platform encoding.
+ * @param cs Output string, points to memory for <code>length</code>
+ * character bytes.
+ * @param length The number of characters to convert; this may
+ * include the terminating <code>NUL</code>.
+ *
+ * @see U_CHARSET_FAMILY
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_UCharsToChars(const UChar *us, char *cs, int32_t length);
+
+#endif
diff --git a/intl/icu/source/common/unicode/rbbi.h b/intl/icu/source/common/unicode/rbbi.h
new file mode 100644
index 0000000000..418b52e41f
--- /dev/null
+++ b/intl/icu/source/common/unicode/rbbi.h
@@ -0,0 +1,745 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2016 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+
+**********************************************************************
+* Date Name Description
+* 10/22/99 alan Creation.
+* 11/11/99 rgillam Complete port from Java.
+**********************************************************************
+*/
+
+#ifndef RBBI_H
+#define RBBI_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C++ API: Rule Based Break Iterator
+ */
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/udata.h"
+#include "unicode/parseerr.h"
+#include "unicode/schriter.h"
+
+struct UCPTrie;
+
+U_NAMESPACE_BEGIN
+
+/** @internal */
+class LanguageBreakEngine;
+struct RBBIDataHeader;
+class RBBIDataWrapper;
+class UnhandledEngine;
+class UStack;
+
+/**
+ *
+ * A subclass of BreakIterator whose behavior is specified using a list of rules.
+ * <p>Instances of this class are most commonly created by the factory methods of
+ * BreakIterator::createWordInstance(), BreakIterator::createLineInstance(), etc.,
+ * and then used via the abstract API in class BreakIterator</p>
+ *
+ * <p>See the ICU User Guide for information on Break Iterator Rules.</p>
+ *
+ * <p>This class is not intended to be subclassed.</p>
+ */
+class U_COMMON_API RuleBasedBreakIterator /*final*/ : public BreakIterator {
+
+private:
+ /**
+ * The UText through which this BreakIterator accesses the text
+ * @internal (private)
+ */
+ UText fText = UTEXT_INITIALIZER;
+
+#ifndef U_HIDE_INTERNAL_API
+public:
+#endif /* U_HIDE_INTERNAL_API */
+ /**
+ * The rule data for this BreakIterator instance.
+ * Not for general use; Public only for testing purposes.
+ * @internal
+ */
+ RBBIDataWrapper *fData = nullptr;
+
+private:
+ /**
+ * The saved error code associated with this break iterator.
+ * This is the value to be returned by copyErrorTo().
+ */
+ UErrorCode fErrorCode = U_ZERO_ERROR;
+
+ /**
+ * The current position of the iterator. Pinned, 0 < fPosition <= text.length.
+ * Never has the value UBRK_DONE (-1).
+ */
+ int32_t fPosition = 0;
+
+ /**
+ * TODO:
+ */
+ int32_t fRuleStatusIndex = 0;
+
+ /**
+ * Cache of previously determined boundary positions.
+ */
+ class BreakCache;
+ BreakCache *fBreakCache = nullptr;
+
+ /**
+ * Cache of boundary positions within a region of text that has been
+ * sub-divided by dictionary based breaking.
+ */
+ class DictionaryCache;
+ DictionaryCache *fDictionaryCache = nullptr;
+
+ /**
+ *
+ * If present, UStack of LanguageBreakEngine objects that might handle
+ * dictionary characters. Searched from top to bottom to find an object to
+ * handle a given character.
+ * @internal (private)
+ */
+ UStack *fLanguageBreakEngines = nullptr;
+
+ /**
+ *
+ * If present, the special LanguageBreakEngine used for handling
+ * characters that are in the dictionary set, but not handled by any
+ * LanguageBreakEngine.
+ * @internal (private)
+ */
+ UnhandledEngine *fUnhandledBreakEngine = nullptr;
+
+ /**
+ * Counter for the number of characters encountered with the "dictionary"
+ * flag set.
+ * @internal (private)
+ */
+ uint32_t fDictionaryCharCount = 0;
+
+ /**
+ * A character iterator that refers to the same text as the UText, above.
+ * Only included for compatibility with old API, which was based on CharacterIterators.
+ * Value may be adopted from outside, or one of fSCharIter or fDCharIter, below.
+ */
+ CharacterIterator *fCharIter = &fSCharIter;
+
+ /**
+ * When the input text is provided by a UnicodeString, this will point to
+ * a characterIterator that wraps that data. Needed only for the
+ * implementation of getText(), a backwards compatibility issue.
+ */
+ UCharCharacterIterator fSCharIter {u"", 0};
+
+ /**
+ * True when iteration has run off the end, and iterator functions should return UBRK_DONE.
+ */
+ bool fDone = false;
+
+ /**
+ * Array of look-ahead tentative results.
+ */
+ int32_t *fLookAheadMatches = nullptr;
+
+ /**
+ * A flag to indicate if phrase based breaking is enabled.
+ */
+ UBool fIsPhraseBreaking = false;
+
+ //=======================================================================
+ // constructors
+ //=======================================================================
+
+ /**
+ * Constructor from a flattened set of RBBI data in malloced memory.
+ * RulesBasedBreakIterators built from a custom set of rules
+ * are created via this constructor; the rules are compiled
+ * into memory, then the break iterator is constructed here.
+ *
+ * The break iterator adopts the memory, and will
+ * free it when done.
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(RBBIDataHeader* data, UErrorCode &status);
+
+ /**
+ * This constructor uses the udata interface to create a BreakIterator
+ * whose internal tables live in a memory-mapped file. "image" is an
+ * ICU UDataMemory handle for the pre-compiled break iterator tables.
+ * @param image handle to the memory image for the break iterator data.
+ * Ownership of the UDataMemory handle passes to the Break Iterator,
+ * which will be responsible for closing it when it is no longer needed.
+ * @param status Information on any errors encountered.
+ * @param isPhraseBreaking true if phrase based breaking is required, otherwise false.
+ * @see udata_open
+ * @see #getBinaryRules
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(UDataMemory* image, UBool isPhraseBreaking, UErrorCode &status);
+
+ /** @internal */
+ friend class RBBIRuleBuilder;
+ /** @internal */
+ friend class BreakIterator;
+
+ /**
+ * Default constructor with an error code parameter.
+ * Aside from error handling, otherwise identical to the default constructor.
+ * Internally, handles common initialization for other constructors.
+ * @internal (private)
+ */
+ RuleBasedBreakIterator(UErrorCode *status);
+
+public:
+
+ /** Default constructor. Creates an empty shell of an iterator, with no
+ * rules or text to iterate over. Object can subsequently be assigned to,
+ * but is otherwise unusable.
+ * @stable ICU 2.2
+ */
+ RuleBasedBreakIterator();
+
+ /**
+ * Copy constructor. Will produce a break iterator with the same behavior,
+ * and which iterates over the same text, as the one passed in.
+ * @param that The RuleBasedBreakIterator passed to be copied
+ * @stable ICU 2.0
+ */
+ RuleBasedBreakIterator(const RuleBasedBreakIterator& that);
+
+ /**
+ * Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
+ * @param rules The break rules to be used.
+ * @param parseError In the event of a syntax error in the rules, provides the location
+ * within the rules of the problem.
+ * @param status Information on any errors encountered.
+ * @stable ICU 2.2
+ */
+ RuleBasedBreakIterator( const UnicodeString &rules,
+ UParseError &parseError,
+ UErrorCode &status);
+
+ /**
+ * Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
+ * Binary rules are obtained from RulesBasedBreakIterator::getBinaryRules().
+ * Construction of a break iterator in this way is substantially faster than
+ * construction from source rules.
+ *
+ * Ownership of the storage containing the compiled rules remains with the
+ * caller of this function. The compiled rules must not be modified or
+ * deleted during the life of the break iterator.
+ *
+ * The compiled rules are not compatible across different major versions of ICU.
+ * The compiled rules are compatible only between machines with the same
+ * byte ordering (little or big endian) and the same base character set family
+ * (ASCII or EBCDIC).
+ *
+ * @see #getBinaryRules
+ * @param compiledRules A pointer to the compiled break rules to be used.
+ * @param ruleLength The length of the compiled break rules, in bytes. This
+ * corresponds to the length value produced by getBinaryRules().
+ * @param status Information on any errors encountered, including invalid
+ * binary rules.
+ * @stable ICU 4.8
+ */
+ RuleBasedBreakIterator(const uint8_t *compiledRules,
+ uint32_t ruleLength,
+ UErrorCode &status);
+
+ /**
+ * This constructor uses the udata interface to create a BreakIterator
+ * whose internal tables live in a memory-mapped file. "image" is an
+ * ICU UDataMemory handle for the pre-compiled break iterator tables.
+ * @param image handle to the memory image for the break iterator data.
+ * Ownership of the UDataMemory handle passes to the Break Iterator,
+ * which will be responsible for closing it when it is no longer needed.
+ * @param status Information on any errors encountered.
+ * @see udata_open
+ * @see #getBinaryRules
+ * @stable ICU 2.8
+ */
+ RuleBasedBreakIterator(UDataMemory* image, UErrorCode &status);
+
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~RuleBasedBreakIterator();
+
+ /**
+ * Assignment operator. Sets this iterator to have the same behavior,
+ * and iterate over the same text, as the one passed in.
+ * @param that The RuleBasedBreakItertor passed in
+ * @return the newly created RuleBasedBreakIterator
+ * @stable ICU 2.0
+ */
+ RuleBasedBreakIterator& operator=(const RuleBasedBreakIterator& that);
+
+ /**
+ * Equality operator. Returns true if both BreakIterators are of the
+ * same class, have the same behavior, and iterate over the same text.
+ * @param that The BreakIterator to be compared for equality
+ * @return true if both BreakIterators are of the
+ * same class, have the same behavior, and iterate over the same text.
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const BreakIterator& that) const override;
+
+ /**
+ * Not-equal operator. If operator== returns true, this returns false,
+ * and vice versa.
+ * @param that The BreakIterator to be compared for inequality
+ * @return true if both BreakIterators are not same.
+ * @stable ICU 2.0
+ */
+ inline bool operator!=(const BreakIterator& that) const {
+ return !operator==(that);
+ }
+
+ /**
+ * Returns a newly-constructed RuleBasedBreakIterator with the same
+ * behavior, and iterating over the same text, as this one.
+ * Differs from the copy constructor in that it is polymorphic, and
+ * will correctly clone (copy) a derived class.
+ * clone() is thread safe. Multiple threads may simultaneously
+ * clone the same source break iterator.
+ * @return a newly-constructed RuleBasedBreakIterator
+ * @stable ICU 2.0
+ */
+ virtual RuleBasedBreakIterator* clone() const override;
+
+ /**
+ * Compute a hash code for this BreakIterator
+ * @return A hash code
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Returns the description used to create this iterator
+ * @return the description used to create this iterator
+ * @stable ICU 2.0
+ */
+ virtual const UnicodeString& getRules(void) const;
+
+ //=======================================================================
+ // BreakIterator overrides
+ //=======================================================================
+
+ /**
+ * <p>
+ * Return a CharacterIterator over the text being analyzed.
+ * The returned character iterator is owned by the break iterator, and must
+ * not be deleted by the caller. Repeated calls to this function may
+ * return the same CharacterIterator.
+ * </p>
+ * <p>
+ * The returned character iterator must not be used concurrently with
+ * the break iterator. If concurrent operation is needed, clone the
+ * returned character iterator first and operate on the clone.
+ * </p>
+ * <p>
+ * When the break iterator is operating on text supplied via a UText,
+ * this function will fail, returning a CharacterIterator containing no text.
+ * The function getUText() provides similar functionality,
+ * is reliable, and is more efficient.
+ * </p>
+ *
+ * TODO: deprecate this function?
+ *
+ * @return An iterator over the text being analyzed.
+ * @stable ICU 2.0
+ */
+ virtual CharacterIterator& getText(void) const override;
+
+
+ /**
+ * Get a UText for the text being analyzed.
+ * The returned UText is a shallow clone of the UText used internally
+ * by the break iterator implementation. It can safely be used to
+ * access the text without impacting any break iterator operations,
+ * but the underlying text itself must not be altered.
+ *
+ * @param fillIn A UText to be filled in. If nullptr, a new UText will be
+ * allocated to hold the result.
+ * @param status receives any error codes.
+ * @return The current UText for this break iterator. If an input
+ * UText was provided, it will always be returned.
+ * @stable ICU 3.4
+ */
+ virtual UText *getUText(UText *fillIn, UErrorCode &status) const override;
+
+ /**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ * @param newText An iterator over the text to analyze. The BreakIterator
+ * takes ownership of the character iterator. The caller MUST NOT delete it!
+ * @stable ICU 2.0
+ */
+ virtual void adoptText(CharacterIterator* newText) override;
+
+ /**
+ * Set the iterator to analyze a new piece of text. This function resets
+ * the current iteration position to the beginning of the text.
+ *
+ * The BreakIterator will retain a reference to the supplied string.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param newText The text to analyze.
+ * @stable ICU 2.0
+ */
+ virtual void setText(const UnicodeString& newText) override;
+
+ /**
+ * Reset the break iterator to operate over the text represented by
+ * the UText. The iterator position is reset to the start.
+ *
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * Utext that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ *
+ * @param text The UText used to change the text.
+ * @param status Receives any error codes.
+ * @stable ICU 3.4
+ */
+ virtual void setText(UText *text, UErrorCode &status) override;
+
+ /**
+ * Sets the current iteration position to the beginning of the text, position zero.
+ * @return The offset of the beginning of the text, zero.
+ * @stable ICU 2.0
+ */
+ virtual int32_t first(void) override;
+
+ /**
+ * Sets the current iteration position to the end of the text.
+ * @return The text's past-the-end offset.
+ * @stable ICU 2.0
+ */
+ virtual int32_t last(void) override;
+
+ /**
+ * Advances the iterator either forward or backward the specified number of steps.
+ * Negative values move backward, and positive values move forward. This is
+ * equivalent to repeatedly calling next() or previous().
+ * @param n The number of steps to move. The sign indicates the direction
+ * (negative is backwards, and positive is forwards).
+ * @return The character offset of the boundary position n boundaries away from
+ * the current one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(int32_t n) override;
+
+ /**
+ * Advances the iterator to the next boundary position.
+ * @return The position of the first boundary after this one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t next(void) override;
+
+ /**
+ * Moves the iterator backwards, to the last boundary preceding this one.
+ * @return The position of the last boundary position preceding this one.
+ * @stable ICU 2.0
+ */
+ virtual int32_t previous(void) override;
+
+ /**
+ * Sets the iterator to refer to the first boundary position following
+ * the specified position.
+ * @param offset The position from which to begin searching for a break position.
+ * @return The position of the first break after the current position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t following(int32_t offset) override;
+
+ /**
+ * Sets the iterator to refer to the last boundary position before the
+ * specified position.
+ * @param offset The position to begin searching for a break from.
+ * @return The position of the last boundary before the starting position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t preceding(int32_t offset) override;
+
+ /**
+ * Returns true if the specified position is a boundary position. As a side
+ * effect, leaves the iterator pointing to the first boundary position at
+ * or after "offset".
+ * @param offset the offset to check.
+ * @return True if "offset" is a boundary position.
+ * @stable ICU 2.0
+ */
+ virtual UBool isBoundary(int32_t offset) override;
+
+ /**
+ * Returns the current iteration position. Note that UBRK_DONE is never
+ * returned from this function; if iteration has run to the end of a
+ * string, current() will return the length of the string while
+ * next() will return UBRK_DONE).
+ * @return The current iteration position.
+ * @stable ICU 2.0
+ */
+ virtual int32_t current(void) const override;
+
+
+ /**
+ * Return the status tag from the break rule that determined the boundary at
+ * the current iteration position. For break rules that do not specify a
+ * status, a default value of 0 is returned. If more than one break rule
+ * would cause a boundary to be located at some position in the text,
+ * the numerically largest of the applicable status values is returned.
+ * <p>
+ * Of the standard types of ICU break iterators, only word break and
+ * line break provide status values. The values are defined in
+ * the header file ubrk.h. For Word breaks, the status allows distinguishing between words
+ * that contain alphabetic letters, "words" that appear to be numbers,
+ * punctuation and spaces, words containing ideographic characters, and
+ * more. For Line Break, the status distinguishes between hard (mandatory) breaks
+ * and soft (potential) break positions.
+ * <p>
+ * <code>getRuleStatus()</code> can be called after obtaining a boundary
+ * position from <code>next()</code>, <code>previous()</code>, or
+ * any other break iterator functions that returns a boundary position.
+ * <p>
+ * Note that <code>getRuleStatus()</code> returns the value corresponding to
+ * <code>current()</code> index even after <code>next()</code> has returned DONE.
+ * <p>
+ * When creating custom break rules, one is free to define whatever
+ * status values may be convenient for the application.
+ * <p>
+ * @return the status from the break rule that determined the boundary
+ * at the current iteration position.
+ *
+ * @see UWordBreak
+ * @stable ICU 2.2
+ */
+ virtual int32_t getRuleStatus() const override;
+
+ /**
+ * Get the status (tag) values from the break rule(s) that determined the boundary
+ * at the current iteration position.
+ * <p>
+ * The returned status value(s) are stored into an array provided by the caller.
+ * The values are stored in sorted (ascending) order.
+ * If the capacity of the output array is insufficient to hold the data,
+ * the output will be truncated to the available length, and a
+ * U_BUFFER_OVERFLOW_ERROR will be signaled.
+ *
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from the rules that determined
+ * the boundary at the current iteration position.
+ * In the event of a U_BUFFER_OVERFLOW_ERROR, the return value
+ * is the total number of status values that were available,
+ * not the reduced number that were actually returned.
+ * @see getRuleStatus
+ * @stable ICU 3.0
+ */
+ virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override;
+
+ /**
+ * Returns a unique class ID POLYMORPHICALLY. Pure virtual override.
+ * This method is to implement a simple version of RTTI, since not all
+ * C++ compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * @return The class ID for this object. All objects of a
+ * given class have the same class ID. Objects of
+ * other classes have different class IDs.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const override;
+
+ /**
+ * Returns the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ *
+ * Base* polymorphic_pointer = createPolymorphicObject();
+ * if (polymorphic_pointer->getDynamicClassID() ==
+ * Derived::getStaticClassID()) ...
+ *
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+ /**
+ * Deprecated functionality. Use clone() instead.
+ *
+ * Create a clone (copy) of this break iterator in memory provided
+ * by the caller. The idea is to increase performance by avoiding
+ * a storage allocation. Use of this function is NOT RECOMMENDED.
+ * Performance gains are minimal, and correct buffer management is
+ * tricky. Use clone() instead.
+ *
+ * @param stackBuffer The pointer to the memory into which the cloned object
+ * should be placed. If nullptr, allocate heap memory
+ * for the cloned object.
+ * @param BufferSize The size of the buffer. If zero, return the required
+ * buffer size, but do not clone the object. If the
+ * size was too small (but not zero), allocate heap
+ * storage for the cloned object.
+ *
+ * @param status Error status. U_SAFECLONE_ALLOCATED_WARNING will be
+ * returned if the provided buffer was too small, and
+ * the clone was therefore put on the heap.
+ *
+ * @return Pointer to the clone object. This may differ from the stackBuffer
+ * address if the byte alignment of the stack buffer was not suitable
+ * or if the stackBuffer was too small to hold the clone.
+ * @deprecated ICU 52. Use clone() instead.
+ */
+ virtual RuleBasedBreakIterator *createBufferClone(void *stackBuffer,
+ int32_t &BufferSize,
+ UErrorCode &status) override;
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+ /**
+ * Return the binary form of compiled break rules,
+ * which can then be used to create a new break iterator at some
+ * time in the future. Creating a break iterator from pre-compiled rules
+ * is much faster than building one from the source form of the
+ * break rules.
+ *
+ * The binary data can only be used with the same version of ICU
+ * and on the same platform type (processor endian-ness)
+ *
+ * @param length Returns the length of the binary data. (Out parameter.)
+ *
+ * @return A pointer to the binary (compiled) rule data. The storage
+ * belongs to the RulesBasedBreakIterator object, not the
+ * caller, and must not be modified or deleted.
+ * @stable ICU 4.8
+ */
+ virtual const uint8_t *getBinaryRules(uint32_t &length);
+
+ /**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the matching state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator implementation never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized,
+ * system-level code. One example use case is with garbage collection that moves
+ * the text in memory.
+ *
+ * @param input The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ * @return *this
+ *
+ * @stable ICU 49
+ */
+ virtual RuleBasedBreakIterator &refreshInputText(UText *input, UErrorCode &status) override;
+
+
+private:
+ //=======================================================================
+ // implementation
+ //=======================================================================
+ /**
+ * Iterate backwards from an arbitrary position in the input text using the
+ * synthesized Safe Reverse rules.
+ * This locates a "Safe Position" from which the forward break rules
+ * will operate correctly. A Safe Position is not necessarily a boundary itself.
+ *
+ * @param fromPosition the position in the input text to begin the iteration.
+ * @internal (private)
+ */
+ int32_t handleSafePrevious(int32_t fromPosition);
+
+ /**
+ * Find a rule-based boundary by running the state machine.
+ * Input
+ * fPosition, the position in the text to begin from.
+ * Output
+ * fPosition: the boundary following the starting position.
+ * fDictionaryCharCount the number of dictionary characters encountered.
+ * If > 0, the segment will be further subdivided
+ * fRuleStatusIndex Info from the state table indicating which rules caused the boundary.
+ *
+ * @internal (private)
+ */
+ int32_t handleNext();
+
+ /*
+ * Templatized version of handleNext() and handleSafePrevious().
+ *
+ * There will be exactly four instantiations, two each for 8 and 16 bit tables,
+ * two each for 8 and 16 bit trie.
+ * Having separate instantiations for the table types keeps conditional tests of
+ * the table type out of the inner loops, at the expense of replicated code.
+ *
+ * The template parameter for the Trie access function is a value, not a type.
+ * Doing it this way, the compiler will inline the Trie function in the
+ * expanded functions. (Both the 8 and 16 bit access functions have the same type
+ * signature)
+ */
+
+ typedef uint16_t (*PTrieFunc)(const UCPTrie *, UChar32);
+
+ template<typename RowType, PTrieFunc trieFunc>
+ int32_t handleSafePrevious(int32_t fromPosition);
+
+ template<typename RowType, PTrieFunc trieFunc>
+ int32_t handleNext();
+
+
+ /**
+ * This function returns the appropriate LanguageBreakEngine for a
+ * given character c.
+ * @param c A character in the dictionary set
+ * @internal (private)
+ */
+ const LanguageBreakEngine *getLanguageBreakEngine(UChar32 c);
+
+ public:
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Debugging function only.
+ * @internal
+ */
+ void dumpCache();
+
+ /**
+ * Debugging function only.
+ * @internal
+ */
+ void dumpTables();
+#endif /* U_HIDE_INTERNAL_API */
+};
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/rep.h b/intl/icu/source/common/unicode/rep.h
new file mode 100644
index 0000000000..7115c97b82
--- /dev/null
+++ b/intl/icu/source/common/unicode/rep.h
@@ -0,0 +1,266 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**************************************************************************
+* Copyright (C) 1999-2012, International Business Machines Corporation and
+* others. All Rights Reserved.
+**************************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation. Ported from java. Modified to
+* match current UnicodeString API. Forced
+* to use name "handleReplaceBetween" because
+* of existing methods in UnicodeString.
+**************************************************************************
+*/
+
+#ifndef REP_H
+#define REP_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Replaceable String
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeString;
+
+/**
+ * <code>Replaceable</code> is an abstract base class representing a
+ * string of characters that supports the replacement of a range of
+ * itself with a new string of characters. It is used by APIs that
+ * change a piece of text while retaining metadata. Metadata is data
+ * other than the Unicode characters returned by char32At(). One
+ * example of metadata is style attributes; another is an edit
+ * history, marking each character with an author and revision number.
+ *
+ * <p>An implicit aspect of the <code>Replaceable</code> API is that
+ * during a replace operation, new characters take on the metadata of
+ * the old characters. For example, if the string "the <b>bold</b>
+ * font" has range (4, 8) replaced with "strong", then it becomes "the
+ * <b>strong</b> font".
+ *
+ * <p><code>Replaceable</code> specifies ranges using a start
+ * offset and a limit offset. The range of characters thus specified
+ * includes the characters at offset start..limit-1. That is, the
+ * start offset is inclusive, and the limit offset is exclusive.
+ *
+ * <p><code>Replaceable</code> also includes API to access characters
+ * in the string: <code>length()</code>, <code>charAt()</code>,
+ * <code>char32At()</code>, and <code>extractBetween()</code>.
+ *
+ * <p>For a subclass to support metadata, typical behavior of
+ * <code>replace()</code> is the following:
+ * <ul>
+ * <li>Set the metadata of the new text to the metadata of the first
+ * character replaced</li>
+ * <li>If no characters are replaced, use the metadata of the
+ * previous character</li>
+ * <li>If there is no previous character (i.e. start == 0), use the
+ * following character</li>
+ * <li>If there is no following character (i.e. the replaceable was
+ * empty), use default metadata.<br>
+ * <li>If the code point U+FFFF is seen, it should be interpreted as
+ * a special marker having no metadata<li>
+ * </li>
+ * </ul>
+ * If this is not the behavior, the subclass should document any differences.
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API Replaceable : public UObject {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~Replaceable();
+
+ /**
+ * Returns the number of 16-bit code units in the text.
+ * @return number of 16-bit code units in text
+ * @stable ICU 1.8
+ */
+ inline int32_t length() const;
+
+ /**
+ * Returns the 16-bit code unit at the given offset into the text.
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 16-bit code unit of text at given offset
+ * @stable ICU 1.8
+ */
+ inline char16_t charAt(int32_t offset) const;
+
+ /**
+ * Returns the 32-bit code point at the given 16-bit offset into
+ * the text. This assumes the text is stored as 16-bit code units
+ * with surrogate pairs intermixed. If the offset of a leading or
+ * trailing code unit of a surrogate pair is given, return the
+ * code point of the surrogate pair.
+ *
+ * @param offset an integer between 0 and <code>length()</code>-1
+ * inclusive
+ * @return 32-bit code point of text at given offset
+ * @stable ICU 1.8
+ */
+ inline UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Copies characters in the range [<tt>start</tt>, <tt>limit</tt>)
+ * into the UnicodeString <tt>target</tt>.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param target UnicodeString into which to copy characters.
+ * @return A reference to <TT>target</TT>
+ * @stable ICU 2.1
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const = 0;
+
+ /**
+ * Replaces a substring of this object with the given text. If the
+ * characters being replaced have metadata, the new characters
+ * that replace them should be given the same metadata.
+ *
+ * <p>Subclasses must ensure that if the text between start and
+ * limit is equal to the replacement text, that replace has no
+ * effect. That is, any metadata
+ * should be unaffected. In addition, subclasses are encouraged to
+ * check for initial and trailing identical characters, and make a
+ * smaller replacement if possible. This will preserve as much
+ * metadata as possible.
+ * @param start the beginning index, inclusive; <code>0 <= start
+ * <= limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit
+ * <= length()</code>.
+ * @param text the text to replace characters <code>start</code>
+ * to <code>limit - 1</code>
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) = 0;
+ // Note: All other methods in this class take the names of
+ // existing UnicodeString methods. This method is the exception.
+ // It is named differently because all replace methods of
+ // UnicodeString return a UnicodeString&. The 'between' is
+ // required in order to conform to the UnicodeString naming
+ // convention; API taking start/length are named <operation>, and
+ // those taking start/limit are named <operationBetween>. The
+ // 'handle' is added because 'replaceBetween' and
+ // 'doReplaceBetween' are already taken.
+
+ /**
+ * Copies a substring of this object, retaining metadata.
+ * This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; <code>0 <= start <=
+ * limit</code>.
+ * @param limit the ending index, exclusive; <code>start <= limit <=
+ * length()</code>.
+ * @param dest the destination index. The characters from
+ * <code>start..limit-1</code> will be copied to <code>dest</code>.
+ * Implementations of this method may assume that <code>dest <= start ||
+ * dest >= limit</code>.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) = 0;
+
+ /**
+ * Returns true if this object contains metadata. If a
+ * Replaceable object has metadata, calls to the Replaceable API
+ * must be made so as to preserve metadata. If it does not, calls
+ * to the Replaceable API may be optimized to improve performance.
+ * The default implementation returns true.
+ * @return true if this object contains metadata
+ * @stable ICU 2.2
+ */
+ virtual UBool hasMetaData() const;
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual Replaceable *clone() const;
+
+protected:
+
+ /**
+ * Default constructor.
+ * @stable ICU 2.4
+ */
+ inline Replaceable();
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ Replaceable &Replaceable::operator=(const Replaceable &);
+ */
+
+ /**
+ * Virtual version of length().
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const = 0;
+
+ /**
+ * Virtual version of charAt().
+ * @stable ICU 2.4
+ */
+ virtual char16_t getCharAt(int32_t offset) const = 0;
+
+ /**
+ * Virtual version of char32At().
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const = 0;
+};
+
+inline Replaceable::Replaceable() {}
+
+inline int32_t
+Replaceable::length() const {
+ return getLength();
+}
+
+inline char16_t
+Replaceable::charAt(int32_t offset) const {
+ return getCharAt(offset);
+}
+
+inline UChar32
+Replaceable::char32At(int32_t offset) const {
+ return getChar32At(offset);
+}
+
+// There is no rep.cpp, see unistr.cpp for Replaceable function implementations.
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/resbund.h b/intl/icu/source/common/unicode/resbund.h
new file mode 100644
index 0000000000..30fc2ac0ab
--- /dev/null
+++ b/intl/icu/source/common/unicode/resbund.h
@@ -0,0 +1,498 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1996-2013, International Business Machines Corporation
+* and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File resbund.h
+*
+* CREATED BY
+* Richard Gillam
+*
+* Modification History:
+*
+* Date Name Description
+* 2/5/97 aliu Added scanForLocaleInFile. Added
+* constructor which attempts to read resource bundle
+* from a specific file, without searching other files.
+* 2/11/97 aliu Added UErrorCode return values to constructors. Fixed
+* infinite loops in scanForFile and scanForLocale.
+* Modified getRawResourceData to not delete storage
+* in localeData and resourceData which it doesn't own.
+* Added Mac compatibility #ifdefs for tellp() and
+* ios::nocreate.
+* 2/18/97 helena Updated with 100% documentation coverage.
+* 3/13/97 aliu Rewrote to load in entire resource bundle and store
+* it as a Hashtable of ResourceBundleData objects.
+* Added state table to govern parsing of files.
+* Modified to load locale index out of new file
+* distinct from default.txt.
+* 3/25/97 aliu Modified to support 2-d arrays, needed for timezone
+* data. Added support for custom file suffixes. Again,
+* needed to support timezone data.
+* 4/7/97 aliu Cleaned up.
+* 03/02/99 stephen Removed dependency on FILE*.
+* 03/29/99 helena Merged Bertrand and Stephen's changes.
+* 06/11/99 stephen Removed parsing of .txt files.
+* Reworked to use new binary format.
+* Cleaned up.
+* 06/14/99 stephen Removed methods taking a filename suffix.
+* 11/09/99 weiv Added getLocale(), fRealLocale, removed fRealLocaleID
+******************************************************************************
+*/
+
+#ifndef RESBUND_H
+#define RESBUND_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/ures.h"
+#include "unicode/unistr.h"
+#include "unicode/locid.h"
+
+/**
+ * \file
+ * \brief C++ API: Resource Bundle
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A class representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specific information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="https://github.com/unicode-org/icu-docs/blob/main/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the
+ * <a href="https://unicode-org.github.io/icu/userguide/locale/resources">Users Guide</a>.
+ * <P>
+ *
+ * The ResourceBundle class is not suitable for subclassing.
+ *
+ * @stable ICU 2.0
+ */
+class U_COMMON_API ResourceBundle : public UObject {
+public:
+ /**
+ * Constructor
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param locale This is the locale this resource bundle is for. To get resources
+ * for the French locale, for example, you would create a
+ * ResourceBundle passing Locale::FRENCH for the "locale" parameter,
+ * and all subsequent calls to that resource bundle will return
+ * resources that pertain to the French locale. If the caller doesn't
+ * pass a locale parameter, the default locale for the system (as
+ * returned by Locale::getDefault()) will be used.
+ * @param err The Error Code.
+ * The UErrorCode& err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational error results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data was used; neither the requested locale nor any of its
+ * fall back locales could be found.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the default bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const UnicodeString& packageName,
+ UErrorCode& err);
+
+ /**
+ * Construct a resource bundle for the ICU default bundle.
+ *
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UErrorCode &err);
+
+ /**
+ * Standard constructor, constructs a resource bundle for the locale-specific
+ * bundle in the specified package.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated.
+ * nullptr is used to refer to ICU data.
+ * @param locale The locale for which to open a resource bundle.
+ * @param err A UErrorCode value
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const char* packageName,
+ const Locale& locale,
+ UErrorCode& err);
+
+ /**
+ * Copy constructor.
+ *
+ * @param original The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(const ResourceBundle &original);
+
+ /**
+ * Constructor from a C UResourceBundle. The resource bundle is
+ * copied and not adopted. ures_close will still need to be used on the
+ * original resource bundle.
+ *
+ * @param res A pointer to the C resource bundle.
+ * @param status A UErrorCode value.
+ * @stable ICU 2.0
+ */
+ ResourceBundle(UResourceBundle *res,
+ UErrorCode &status);
+
+ /**
+ * Assignment operator.
+ *
+ * @param other The resource bundle to copy.
+ * @stable ICU 2.0
+ */
+ ResourceBundle&
+ operator=(const ResourceBundle& other);
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~ResourceBundle();
+
+ /**
+ * Clone this object.
+ * Clones can be used concurrently in multiple threads.
+ * If an error occurs, then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ ResourceBundle *clone() const;
+
+ /**
+ * Returns the size of a resource. Size for scalar types is always 1, and for vector/table types is
+ * the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ *
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+ int32_t
+ getSize(void) const;
+
+ /**
+ * returns a string from a string resource type
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a zero-terminated char16_t array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getString(UErrorCode& status) const;
+
+ /**
+ * returns a binary data from a resource. Can be used at most primitive resource types (binaries,
+ * strings, ints)
+ *
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const uint8_t*
+ getBinary(int32_t& len, UErrorCode& status) const;
+
+
+ /**
+ * returns an integer vector from a resource.
+ *
+ * @param len fills in the length of resulting integer vector
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a vector of integers that lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+ const int32_t*
+ getIntVector(int32_t& len, UErrorCode& status) const;
+
+ /**
+ * returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an unsigned integer value
+ * @stable ICU 2.0
+ */
+ uint32_t
+ getUInt(UErrorCode& status) const;
+
+ /**
+ * returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a warning
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a signed integer value
+ * @stable ICU 2.0
+ */
+ int32_t
+ getInt(UErrorCode& status) const;
+
+ /**
+ * Checks whether the resource has another element to iterate over.
+ *
+ * @return true if there are more elements, false if there is no more elements
+ * @stable ICU 2.0
+ */
+ UBool
+ hasNext(void) const;
+
+ /**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @stable ICU 2.0
+ */
+ void
+ resetIterator(void);
+
+ /**
+ * Returns the key associated with this resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @return a key associated to this resource, or nullptr if it doesn't have a key
+ * @stable ICU 2.0
+ */
+ const char*
+ getKey(void) const;
+
+ /**
+ * Gets the locale ID of the resource bundle as a string.
+ * Same as getLocale().getName() .
+ *
+ * @return the locale ID of the resource bundle as a string
+ * @stable ICU 2.0
+ */
+ const char*
+ getName(void) const;
+
+
+ /**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @return type of the given resource.
+ * @stable ICU 2.0
+ */
+ UResType
+ getType(void) const;
+
+ /**
+ * Returns the next resource in a given resource or nullptr if there are no more resources
+ *
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ getNext(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or nullptr if there are no more resources
+ * to iterate over.
+ *
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(UErrorCode& status);
+
+ /**
+ * Returns the next string in a resource or nullptr if there are no more resources
+ * to iterate over.
+ *
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object.
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getNextString(const char ** key,
+ UErrorCode& status);
+
+ /**
+ * Returns the resource in a resource at the specified index.
+ *
+ * @param index an index to the wanted resource.
+ * @param status fills in the outgoing error code
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param index an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(int32_t index,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a resource in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted resource
+ * @param status fills in the outgoing error code.
+ * @return ResourceBundle object. If there is an error, resource is invalid.
+ * @stable ICU 2.0
+ */
+ ResourceBundle
+ get(const char* key,
+ UErrorCode& status) const;
+
+ /**
+ * Returns a string in a resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return an UnicodeString object. If there is an error, string is bogus
+ * @stable ICU 2.0
+ */
+ UnicodeString
+ getStringEx(const char* key,
+ UErrorCode& status) const;
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use getVersion, as this method is going to be deprecated.
+ *
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see getVersion
+ * @deprecated ICU 2.8 Use getVersion instead.
+ */
+ const char*
+ getVersionNumber(void) const;
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Return the version number associated with this ResourceBundle as a UVersionInfo array.
+ *
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+ void
+ getVersion(UVersionInfo versionInfo) const;
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ *
+ * @return a Locale object
+ * @deprecated ICU 2.8 Use getLocale(ULocDataLocaleType type, UErrorCode &status) overload instead.
+ */
+ const Locale&
+ getLocale(void) const;
+#endif /* U_HIDE_DEPRECATED_API */
+
+ /**
+ * Return the Locale associated with this ResourceBundle.
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ *
+ * @return a Locale object
+ * @stable ICU 2.8
+ */
+ const Locale
+ getLocale(ULocDataLocaleType type, UErrorCode &status) const;
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * This API implements multilevel fallback
+ * @internal
+ */
+ ResourceBundle
+ getWithFallback(const char* key, UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+private:
+ ResourceBundle() = delete; // default constructor not implemented
+
+ UResourceBundle *fResource;
+ void constructForLocale(const UnicodeString& path, const Locale& locale, UErrorCode& error);
+ Locale *fLocale;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/schriter.h b/intl/icu/source/common/unicode/schriter.h
new file mode 100644
index 0000000000..a2ab17982d
--- /dev/null
+++ b/intl/icu/source/common/unicode/schriter.h
@@ -0,0 +1,187 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+*
+* File schriter.h
+*
+* Modification History:
+*
+* Date Name Description
+* 05/05/99 stephen Cleaned up.
+******************************************************************************
+*/
+
+#ifndef SCHRITER_H
+#define SCHRITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/chariter.h"
+#include "unicode/uchriter.h"
+
+/**
+ * \file
+ * \brief C++ API: String Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a UnicodeString.
+ * It's possible not only to create an
+ * iterator that iterates over an entire UnicodeString, but also to
+ * create one that iterates over only a subrange of a UnicodeString
+ * (iterators over different subranges of the same UnicodeString don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API StringCharacterIterator : public UCharCharacterIterator {
+public:
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range is the whole string, and the starting position is 0.
+ * @param textStr The unicode string used to create an iterator
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The iteration range is the whole string, and the starting
+ * position is specified by "textPos". If "textPos" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textStr The unicode string used to create an iterator
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textPos);
+
+ /**
+ * Create an iterator over the UnicodeString referred to by "textStr".
+ * The UnicodeString object is copied.
+ * The iteration range begins with the code unit specified by
+ * "textBegin" and ends with the code unit BEFORE the code unit specified
+ * by "textEnd". The starting position is specified by "textPos". If
+ * "textBegin" and "textEnd" don't form a valid range on "text" (i.e.,
+ * textBegin >= textEnd or either is negative or greater than text.size()),
+ * or "textPos" is outside the range defined by "textBegin" and "textEnd",
+ * the behavior of this iterator is undefined.
+ * @param textStr The unicode string used to create the StringCharacterIterator
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param textPos The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const UnicodeString& textStr,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t textPos);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * The UnicodeString object in "that" is copied.
+ * @param that The StringCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator(const StringCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~StringCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the same
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied.
+ * @return the newly created object.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator&
+ operator=(const StringCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const ForwardCharacterIterator& that) const override;
+
+ /**
+ * Returns a new StringCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the newly cloned object.
+ * @stable ICU 2.0
+ */
+ virtual StringCharacterIterator* clone() const override;
+
+ /**
+ * Sets the iterator to iterate over the provided string.
+ * @param newText The string to be iterated over
+ * @stable ICU 2.0
+ */
+ void setText(const UnicodeString& newText);
+
+ /**
+ * Copies the UnicodeString under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) override;
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const override;
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+protected:
+ /**
+ * Default constructor, iteration over empty string.
+ * @stable ICU 2.0
+ */
+ StringCharacterIterator();
+
+ /**
+ * Copy of the iterated string object.
+ * @stable ICU 2.0
+ */
+ UnicodeString text;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/simpleformatter.h b/intl/icu/source/common/unicode/simpleformatter.h
new file mode 100644
index 0000000000..7f58106fad
--- /dev/null
+++ b/intl/icu/source/common/unicode/simpleformatter.h
@@ -0,0 +1,341 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* simpleformatter.h
+*/
+
+#ifndef __SIMPLEFORMATTER_H__
+#define __SIMPLEFORMATTER_H__
+
+/**
+ * \file
+ * \brief C++ API: Simple formatter, minimal subset of MessageFormat.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+
+// Forward declaration:
+namespace number {
+namespace impl {
+class SimpleModifier;
+}
+}
+
+/**
+ * Formats simple patterns like "{1} was born in {0}".
+ * Minimal subset of MessageFormat; fast, simple, minimal dependencies.
+ * Supports only numbered arguments with no type nor style parameters,
+ * and formats only string values.
+ * Quoting via ASCII apostrophe compatible with ICU MessageFormat default behavior.
+ *
+ * Factory methods set error codes for syntax errors
+ * and for too few or too many arguments/placeholders.
+ *
+ * SimpleFormatter objects are thread-safe except for assignment and applying new patterns.
+ *
+ * Example:
+ * <pre>
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * SimpleFormatter fmt("{1} '{born}' in {0}", errorCode);
+ * UnicodeString result;
+ *
+ * // Output: "paul {born} in england"
+ * fmt.format("england", "paul", result, errorCode);
+ * </pre>
+ *
+ * This class is not intended for public subclassing.
+ *
+ * @see MessageFormat
+ * @see UMessagePatternApostropheMode
+ * @stable ICU 57
+ */
+class U_COMMON_API SimpleFormatter final : public UMemory {
+public:
+ /**
+ * Default constructor.
+ * @stable ICU 57
+ */
+ SimpleFormatter() : compiledPattern((char16_t)0) {}
+
+ /**
+ * Constructs a formatter from the pattern string.
+ *
+ * @param pattern The pattern string.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const UnicodeString& pattern, UErrorCode &errorCode) {
+ applyPattern(pattern, errorCode);
+ }
+
+ /**
+ * Constructs a formatter from the pattern string.
+ * The number of arguments checked against the given limits is the
+ * highest argument number plus one, not the number of occurrences of arguments.
+ *
+ * @param pattern The pattern string.
+ * @param min The pattern must have at least this many arguments.
+ * @param max The pattern must have at most this many arguments.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
+ * too few or too many arguments.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const UnicodeString& pattern, int32_t min, int32_t max,
+ UErrorCode &errorCode) {
+ applyPatternMinMaxArguments(pattern, min, max, errorCode);
+ }
+
+ /**
+ * Copy constructor.
+ * @stable ICU 57
+ */
+ SimpleFormatter(const SimpleFormatter& other)
+ : compiledPattern(other.compiledPattern) {}
+
+ /**
+ * Assignment operator.
+ * @stable ICU 57
+ */
+ SimpleFormatter &operator=(const SimpleFormatter& other);
+
+ /**
+ * Destructor.
+ * @stable ICU 57
+ */
+ ~SimpleFormatter();
+
+ /**
+ * Changes this object according to the new pattern.
+ *
+ * @param pattern The pattern string.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax.
+ * @return true if U_SUCCESS(errorCode).
+ * @stable ICU 57
+ */
+ UBool applyPattern(const UnicodeString &pattern, UErrorCode &errorCode) {
+ return applyPatternMinMaxArguments(pattern, 0, INT32_MAX, errorCode);
+ }
+
+ /**
+ * Changes this object according to the new pattern.
+ * The number of arguments checked against the given limits is the
+ * highest argument number plus one, not the number of occurrences of arguments.
+ *
+ * @param pattern The pattern string.
+ * @param min The pattern must have at least this many arguments.
+ * @param max The pattern must have at most this many arguments.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * Set to U_ILLEGAL_ARGUMENT_ERROR for bad argument syntax and
+ * too few or too many arguments.
+ * @return true if U_SUCCESS(errorCode).
+ * @stable ICU 57
+ */
+ UBool applyPatternMinMaxArguments(const UnicodeString &pattern,
+ int32_t min, int32_t max, UErrorCode &errorCode);
+
+ /**
+ * @return The max argument number + 1.
+ * @stable ICU 57
+ */
+ int32_t getArgumentLimit() const {
+ return getArgumentLimit(compiledPattern.getBuffer(), compiledPattern.length());
+ }
+
+ /**
+ * Formats the given value, appending to the appendTo builder.
+ * The argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 1.
+ *
+ * @param value0 Value for argument {0}.
+ * @param appendTo Gets the formatted pattern and value appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo builder.
+ * An argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 2.
+ *
+ * @param value0 Value for argument {0}.
+ * @param value1 Value for argument {1}.
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo builder.
+ * An argument value must not be the same object as appendTo.
+ * getArgumentLimit() must be at most 3.
+ *
+ * @param value0 Value for argument {0}.
+ * @param value1 Value for argument {1}.
+ * @param value2 Value for argument {2}.
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &format(
+ const UnicodeString &value0,
+ const UnicodeString &value1,
+ const UnicodeString &value2,
+ UnicodeString &appendTo, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, appending to the appendTo string.
+ *
+ * @param values The argument values.
+ * An argument value must not be the same object as appendTo.
+ * Can be nullptr if valuesLength==getArgumentLimit()==0.
+ * @param valuesLength The length of the values array.
+ * Must be at least getArgumentLimit().
+ * @param appendTo Gets the formatted pattern and values appended.
+ * @param offsets offsets[i] receives the offset of where
+ * values[i] replaced pattern argument {i}.
+ * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return appendTo
+ * @stable ICU 57
+ */
+ UnicodeString &formatAndAppend(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &appendTo,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
+
+ /**
+ * Formats the given values, replacing the contents of the result string.
+ * May optimize by actually appending to the result if it is the same object
+ * as the value corresponding to the initial argument in the pattern.
+ *
+ * @param values The argument values.
+ * An argument value may be the same object as result.
+ * Can be nullptr if valuesLength==getArgumentLimit()==0.
+ * @param valuesLength The length of the values array.
+ * Must be at least getArgumentLimit().
+ * @param result Gets its contents replaced by the formatted pattern and values.
+ * @param offsets offsets[i] receives the offset of where
+ * values[i] replaced pattern argument {i}.
+ * Can be shorter or longer than values. Can be nullptr if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ * @param errorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return result
+ * @stable ICU 57
+ */
+ UnicodeString &formatAndReplace(
+ const UnicodeString *const *values, int32_t valuesLength,
+ UnicodeString &result,
+ int32_t *offsets, int32_t offsetsLength, UErrorCode &errorCode) const;
+
+ /**
+ * Returns the pattern text with none of the arguments.
+ * Like formatting with all-empty string values.
+ * @stable ICU 57
+ */
+ UnicodeString getTextWithNoArguments() const {
+ return getTextWithNoArguments(
+ compiledPattern.getBuffer(),
+ compiledPattern.length(),
+ nullptr,
+ 0);
+ }
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Returns the pattern text with none of the arguments.
+ * Like formatting with all-empty string values.
+ *
+ * TODO(ICU-20406): Replace this with an Iterator interface.
+ *
+ * @param offsets offsets[i] receives the offset of where {i} was located
+ * before it was replaced by an empty string.
+ * For example, "a{0}b{1}" produces offset 1 for i=0 and 2 for i=1.
+ * Can be nullptr if offsetsLength==0.
+ * If there is no {i} in the pattern, then offsets[i] is set to -1.
+ * @param offsetsLength The length of the offsets array.
+ *
+ * @internal
+ */
+ UnicodeString getTextWithNoArguments(int32_t *offsets, int32_t offsetsLength) const {
+ return getTextWithNoArguments(
+ compiledPattern.getBuffer(),
+ compiledPattern.length(),
+ offsets,
+ offsetsLength);
+ }
+#endif // U_HIDE_INTERNAL_API
+
+private:
+ /**
+ * Binary representation of the compiled pattern.
+ * Index 0: One more than the highest argument number.
+ * Followed by zero or more arguments or literal-text segments.
+ *
+ * An argument is stored as its number, less than ARG_NUM_LIMIT.
+ * A literal-text segment is stored as its length (at least 1) offset by ARG_NUM_LIMIT,
+ * followed by that many chars.
+ */
+ UnicodeString compiledPattern;
+
+ static inline int32_t getArgumentLimit(const char16_t *compiledPattern,
+ int32_t compiledPatternLength) {
+ return compiledPatternLength == 0 ? 0 : compiledPattern[0];
+ }
+
+ static UnicodeString getTextWithNoArguments(
+ const char16_t *compiledPattern,
+ int32_t compiledPatternLength,
+ int32_t *offsets,
+ int32_t offsetsLength);
+
+ static UnicodeString &format(
+ const char16_t *compiledPattern, int32_t compiledPatternLength,
+ const UnicodeString *const *values,
+ UnicodeString &result, const UnicodeString *resultCopy, UBool forbidResultAsValue,
+ int32_t *offsets, int32_t offsetsLength,
+ UErrorCode &errorCode);
+
+ // Give access to internals to SimpleModifier for number formatting
+ friend class number::impl::SimpleModifier;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __SIMPLEFORMATTER_H__
diff --git a/intl/icu/source/common/unicode/std_string.h b/intl/icu/source/common/unicode/std_string.h
new file mode 100644
index 0000000000..bf87230167
--- /dev/null
+++ b/intl/icu/source/common/unicode/std_string.h
@@ -0,0 +1,41 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: std_string.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009feb19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STD_STRING_H__
+#define __STD_STRING_H__
+
+/**
+ * \file
+ * \brief C++ API: Central ICU header for including the C++ standard &lt;string&gt;
+ * header and for related definitions.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+// Workaround for a libstdc++ bug before libstdc++4.6 (2011).
+// https://bugs.llvm.org/show_bug.cgi?id=13364
+#if defined(__GLIBCXX__)
+namespace std { class type_info; }
+#endif
+#include <string>
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STD_STRING_H__
diff --git a/intl/icu/source/common/unicode/strenum.h b/intl/icu/source/common/unicode/strenum.h
new file mode 100644
index 0000000000..fba5c9b814
--- /dev/null
+++ b/intl/icu/source/common/unicode/strenum.h
@@ -0,0 +1,281 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+
+#ifndef STRENUM_H
+#define STRENUM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: String Enumeration
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for 'pure' C++ implementations of uenum api. Adds a
+ * method that returns the next UnicodeString since in C++ this can
+ * be a common storage format for strings.
+ *
+ * <p>The model is that the enumeration is over strings maintained by
+ * a 'service.' At any point, the service might change, invalidating
+ * the enumerator (though this is expected to be rare). The iterator
+ * returns an error if this has occurred. Lack of the error is no
+ * guarantee that the service didn't change immediately after the
+ * call, so the returned string still might not be 'valid' on
+ * subsequent use.</p>
+ *
+ * <p>Strings may take the form of const char*, const char16_t*, or const
+ * UnicodeString*. The type you get is determine by the variant of
+ * 'next' that you call. In general the StringEnumeration is
+ * optimized for one of these types, but all StringEnumerations can
+ * return all types. Returned strings are each terminated with a NUL.
+ * Depending on the service data, they might also include embedded NUL
+ * characters, so API is provided to optionally return the true
+ * length, counting the embedded NULs but not counting the terminating
+ * NUL.</p>
+ *
+ * <p>The pointers returned by next, unext, and snext become invalid
+ * upon any subsequent call to the enumeration's destructor, next,
+ * unext, snext, or reset.</p>
+ *
+ * ICU 2.8 adds some default implementations and helper functions
+ * for subclasses.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API StringEnumeration : public UObject {
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~StringEnumeration();
+
+ /**
+ * Clone this object, an instance of a subclass of StringEnumeration.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see getDynamicClassID
+ * @stable ICU 2.8
+ */
+ virtual StringEnumeration *clone() const;
+
+ /**
+ * <p>Return the number of elements that the iterator traverses. If
+ * the iterator is out of sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR, and the return value is zero.</p>
+ *
+ * <p>The return value will not change except possibly as a result of
+ * a subsequent call to reset, or if the iterator becomes out of sync.</p>
+ *
+ * <p>This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched
+ * (depending on the storage format of the data being
+ * traversed).</p>
+ *
+ * @param status the error code.
+ * @return number of elements in the iterator.
+ *
+ * @stable ICU 2.4 */
+ virtual int32_t count(UErrorCode& status) const = 0;
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char*. If there
+ * are no more elements, returns nullptr. If the resultLength pointer
+ * is not nullptr, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
+ *
+ * <p>If the native service string is a char16_t* string, it is
+ * converted to char* with the invariant converter. If the
+ * conversion fails (because a character cannot be converted) then
+ * status is set to U_INVARIANT_CONVERSION_ERROR and the return
+ * value is undefined (though not nullptr).</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be nullptr.
+ * @return a pointer to the string, or nullptr.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char* next(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element as a NUL-terminated char16_t*. If there
+ * are no more elements, returns nullptr. If the resultLength pointer
+ * is not nullptr, the length of the string (not counting the
+ * terminating NUL) is returned at that address. If an error
+ * status is returned, the value at resultLength is undefined.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls snext()
+ * and handles the conversion.
+ *
+ * @param status the error code.
+ * @param resultLength a pointer to receive the length, can be nullptr.
+ * @return a pointer to the string, or nullptr.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const char16_t* unext(int32_t *resultLength, UErrorCode& status);
+
+ /**
+ * <p>Returns the next element a UnicodeString*. If there are no
+ * more elements, returns nullptr.</p>
+ *
+ * <p>The returned pointer is owned by this iterator and must not be
+ * deleted by the caller. The pointer is valid until the next call
+ * to next, unext, snext, reset, or the enumerator's destructor.</p>
+ *
+ * <p>If the iterator is out of sync with its service, status is set
+ * to U_ENUM_OUT_OF_SYNC_ERROR and nullptr is returned.</p>
+ *
+ * Starting with ICU 2.8, the default implementation calls next()
+ * and handles the conversion.
+ * Either next() or snext() must be implemented differently by a subclass.
+ *
+ * @param status the error code.
+ * @return a pointer to the string, or nullptr.
+ *
+ * @stable ICU 2.4
+ */
+ virtual const UnicodeString* snext(UErrorCode& status);
+
+ /**
+ * <p>Resets the iterator. This re-establishes sync with the
+ * service and rewinds the iterator to start at the first
+ * element.</p>
+ *
+ * <p>Previous pointers returned by next, unext, or snext become
+ * invalid, and the value returned by count might change.</p>
+ *
+ * @param status the error code.
+ *
+ * @stable ICU 2.4
+ */
+ virtual void reset(UErrorCode& status) = 0;
+
+ /**
+ * Compares this enumeration to other to check if both are equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return true if the enumerations are equal. false if not.
+ * @stable ICU 3.6
+ */
+ virtual bool operator==(const StringEnumeration& that)const;
+ /**
+ * Compares this enumeration to other to check if both are not equal
+ *
+ * @param that The other string enumeration to compare this object to
+ * @return true if the enumerations are equal. false if not.
+ * @stable ICU 3.6
+ */
+ virtual bool operator!=(const StringEnumeration& that)const;
+
+protected:
+ /**
+ * UnicodeString field for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ UnicodeString unistr;
+ /**
+ * char * default buffer for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ char charsBuffer[32];
+ /**
+ * char * buffer for use with default implementations and subclasses.
+ * Allocated in constructor and in ensureCharsCapacity().
+ * @stable ICU 2.8
+ */
+ char *chars;
+ /**
+ * Capacity of chars, for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ int32_t charsCapacity;
+
+ /**
+ * Default constructor for use with default implementations and subclasses.
+ * @stable ICU 2.8
+ */
+ StringEnumeration();
+
+ /**
+ * Ensures that chars is at least as large as the requested capacity.
+ * For use with default implementations and subclasses.
+ *
+ * @param capacity Requested capacity.
+ * @param status ICU in/out error code.
+ * @stable ICU 2.8
+ */
+ void ensureCharsCapacity(int32_t capacity, UErrorCode &status);
+
+ /**
+ * Converts s to Unicode and sets unistr to the result.
+ * For use with default implementations and subclasses,
+ * especially for implementations of snext() in terms of next().
+ * This is provided with a helper function instead of a default implementation
+ * of snext() to avoid potential infinite loops between next() and snext().
+ *
+ * For example:
+ * \code
+ * const UnicodeString* snext(UErrorCode& status) {
+ * int32_t resultLength=0;
+ * const char *s=next(&resultLength, status);
+ * return setChars(s, resultLength, status);
+ * }
+ * \endcode
+ *
+ * @param s String to be converted to Unicode.
+ * @param length Length of the string.
+ * @param status ICU in/out error code.
+ * @return A pointer to unistr.
+ * @stable ICU 2.8
+ */
+ UnicodeString *setChars(const char *s, int32_t length, UErrorCode &status);
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+/* STRENUM_H */
+#endif
diff --git a/intl/icu/source/common/unicode/stringoptions.h b/intl/icu/source/common/unicode/stringoptions.h
new file mode 100644
index 0000000000..7b9f70944f
--- /dev/null
+++ b/intl/icu/source/common/unicode/stringoptions.h
@@ -0,0 +1,190 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// stringoptions.h
+// created: 2017jun08 Markus W. Scherer
+
+#ifndef __STRINGOPTIONS_H__
+#define __STRINGOPTIONS_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Bit set option bit constants for various string and character processing functions.
+ */
+
+/**
+ * Option value for case folding: Use default mappings defined in CaseFolding.txt.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_DEFAULT 0
+
+/**
+ * Option value for case folding:
+ *
+ * Use the modified set of mappings provided in CaseFolding.txt to handle dotted I
+ * and dotless i appropriately for Turkic languages (tr, az).
+ *
+ * Before Unicode 3.2, CaseFolding.txt contains mappings marked with 'I' that
+ * are to be included for default mappings and
+ * excluded for the Turkic-specific mappings.
+ *
+ * Unicode 3.2 CaseFolding.txt instead contains mappings marked with 'T' that
+ * are to be excluded for default mappings and
+ * included for the Turkic-specific mappings.
+ *
+ * @stable ICU 2.0
+ */
+#define U_FOLD_CASE_EXCLUDE_SPECIAL_I 1
+
+/**
+ * Titlecase the string as a whole rather than each word.
+ * (Titlecase only the character at index 0, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_WHOLE_STRING 0x20
+
+/**
+ * Titlecase sentences rather than words.
+ * (Titlecase only the first character of each sentence, possibly adjusted.)
+ * Option bits value for titlecasing APIs that take an options bit set.
+ *
+ * It is an error to specify multiple titlecasing iterator options together,
+ * including both an options bit and an explicit BreakIterator.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @stable ICU 60
+ */
+#define U_TITLECASE_SENTENCES 0x40
+
+/**
+ * Do not lowercase non-initial parts of words when titlecasing.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will titlecase the character at each
+ * (possibly adjusted) BreakIterator index and
+ * lowercase all other characters up to the next iterator index.
+ * With this option, the other characters will not be modified.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_LOWERCASE 0x100
+
+/**
+ * Do not adjust the titlecasing BreakIterator indexes;
+ * titlecase exactly the characters at breaks from the iterator.
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * By default, titlecasing will take each break iterator index,
+ * adjust it to the next relevant character (see U_TITLECASE_ADJUST_TO_CASED),
+ * and titlecase that one.
+ *
+ * Other characters are lowercased.
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_ADJUST_TO_CASED
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see UnicodeString::toTitle
+ * @see CaseMap::toTitle
+ * @see ucasemap_setOptions
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+#define U_TITLECASE_NO_BREAK_ADJUSTMENT 0x200
+
+/**
+ * Adjust each titlecasing BreakIterator index to the next cased character.
+ * (See the Unicode Standard, chapter 3, Default Case Conversion, R3 toTitlecase(X).)
+ * Option bit for titlecasing APIs that take an options bit set.
+ *
+ * This used to be the default index adjustment in ICU.
+ * Since ICU 60, the default index adjustment is to the next character that is
+ * a letter, number, symbol, or private use code point.
+ * (Uncased modifier letters are skipped.)
+ * The difference in behavior is small for word titlecasing,
+ * but the new adjustment is much better for whole-string and sentence titlecasing:
+ * It yields "49ers" and "«丰(abc)»" instead of "49Ers" and "«丰(Abc)»".
+ *
+ * It is an error to specify multiple titlecasing adjustment options together.
+ *
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 60
+ */
+#define U_TITLECASE_ADJUST_TO_CASED 0x400
+
+/**
+ * Option for string transformation functions to not first reset the Edits object.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_EDITS_NO_RESET 0x2000
+
+/**
+ * Omit unchanged text when recording how source substrings
+ * relate to changed and unchanged result substrings.
+ * Used for example in some case-mapping and normalization functions.
+ *
+ * @see CaseMap
+ * @see Edits
+ * @see Normalizer2
+ * @stable ICU 60
+ */
+#define U_OMIT_UNCHANGED_TEXT 0x4000
+
+/**
+ * Option bit for u_strCaseCompare, u_strcasecmp, unorm_compare, etc:
+ * Compare strings in code point order instead of code unit order.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_CODE_POINT_ORDER 0x8000
+
+/**
+ * Option bit for unorm_compare:
+ * Perform case-insensitive comparison.
+ * @stable ICU 2.2
+ */
+#define U_COMPARE_IGNORE_CASE 0x10000
+
+/**
+ * Option bit for unorm_compare:
+ * Both input strings are assumed to fulfill FCD conditions.
+ * @stable ICU 2.2
+ */
+#define UNORM_INPUT_IS_FCD 0x20000
+
+// Related definitions elsewhere.
+// Options that are not meaningful in the same functions
+// can share the same bits.
+//
+// Public:
+// unicode/unorm.h #define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+//
+// Internal: (may change or be removed)
+// ucase.h #define _STRCASECMP_OPTIONS_MASK 0xffff
+// ucase.h #define _FOLD_CASE_OPTIONS_MASK 7
+// ucasemap_imp.h #define U_TITLECASE_ITERATOR_MASK 0xe0
+// ucasemap_imp.h #define U_TITLECASE_ADJUSTMENT_MASK 0x600
+// ustr_imp.h #define _STRNCMP_STYLE 0x1000
+// unormcmp.cpp #define _COMPARE_EQUIV 0x80000
+
+#endif // __STRINGOPTIONS_H__
diff --git a/intl/icu/source/common/unicode/stringpiece.h b/intl/icu/source/common/unicode/stringpiece.h
new file mode 100644
index 0000000000..df7f36089d
--- /dev/null
+++ b/intl/icu/source/common/unicode/stringpiece.h
@@ -0,0 +1,343 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+// Copyright (C) 2009-2013, International Business Machines
+// Corporation and others. All Rights Reserved.
+//
+// Copyright 2001 and onwards Google Inc.
+// Author: Sanjay Ghemawat
+
+// This code is a contribution of Google code, and the style used here is
+// a compromise between the original Google code and the ICU coding guidelines.
+// For example, data types are ICU-ified (size_t,int->int32_t),
+// and API comments doxygen-ified, but function names and behavior are
+// as in the original, if possible.
+// Assertion-style error handling, not available in ICU, was changed to
+// parameter "pinning" similar to UnicodeString.
+//
+// In addition, this is only a partial port of the original Google code,
+// limited to what was needed so far. The (nearly) complete original code
+// is in the ICU svn repository at icuhtml/trunk/design/strings/contrib
+// (see ICU ticket 6765, r25517).
+
+#ifndef __STRINGPIECE_H__
+#define __STRINGPIECE_H__
+
+/**
+ * \file
+ * \brief C++ API: StringPiece: Read-only byte string wrapper class.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+#include <type_traits>
+
+#include "unicode/uobject.h"
+#include "unicode/std_string.h"
+
+// Arghh! I wish C++ literals were "string".
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A string-like object that points to a sized piece of memory.
+ *
+ * We provide non-explicit singleton constructors so users can pass
+ * in a "const char*" or a "string" wherever a "StringPiece" is
+ * expected.
+ *
+ * Functions or methods may use StringPiece parameters to accept either a
+ * "const char*" or a "string" value that will be implicitly converted to a
+ * StringPiece.
+ *
+ * Systematic usage of StringPiece is encouraged as it will reduce unnecessary
+ * conversions from "const char*" to "string" and back again.
+ *
+ * @stable ICU 4.2
+ */
+class U_COMMON_API StringPiece : public UMemory {
+ private:
+ const char* ptr_;
+ int32_t length_;
+
+ public:
+ /**
+ * Default constructor, creates an empty StringPiece.
+ * @stable ICU 4.2
+ */
+ StringPiece() : ptr_(nullptr), length_(0) { }
+
+ /**
+ * Constructs from a NUL-terminated const char * pointer.
+ * @param str a NUL-terminated const char * pointer
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* str);
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a NUL-terminated const char8_t * pointer.
+ * @param str a NUL-terminated const char8_t * pointer
+ * @stable ICU 67
+ */
+ StringPiece(const char8_t* str) : StringPiece(reinterpret_cast<const char*>(str)) {}
+#endif
+ /**
+ * Constructs an empty StringPiece.
+ * Needed for type disambiguation from multiple other overloads.
+ * @param p nullptr
+ * @stable ICU 67
+ */
+ StringPiece(std::nullptr_t p) : ptr_(p), length_(0) {}
+
+ /**
+ * Constructs from a std::string.
+ * @stable ICU 4.2
+ */
+ StringPiece(const std::string& str)
+ : ptr_(str.data()), length_(static_cast<int32_t>(str.size())) { }
+#if defined(__cpp_lib_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a std::u8string.
+ * @stable ICU 67
+ */
+ StringPiece(const std::u8string& str)
+ : ptr_(reinterpret_cast<const char*>(str.data())),
+ length_(static_cast<int32_t>(str.size())) { }
+#endif
+
+ /**
+ * Constructs from some other implementation of a string piece class, from any
+ * C++ record type that has these two methods:
+ *
+ * \code{.cpp}
+ *
+ * struct OtherStringPieceClass {
+ * const char* data(); // or const char8_t*
+ * size_t size();
+ * };
+ *
+ * \endcode
+ *
+ * The other string piece class will typically be std::string_view from C++17
+ * or absl::string_view from Abseil.
+ *
+ * Starting with C++20, data() may also return a const char8_t* pointer,
+ * as from std::u8string_view.
+ *
+ * @param str the other string piece
+ * @stable ICU 65
+ */
+ template <typename T,
+ typename = typename std::enable_if<
+ (std::is_same<decltype(T().data()), const char*>::value
+#if defined(__cpp_char8_t)
+ || std::is_same<decltype(T().data()), const char8_t*>::value
+#endif
+ ) &&
+ std::is_same<decltype(T().size()), size_t>::value>::type>
+ StringPiece(T str)
+ : ptr_(reinterpret_cast<const char*>(str.data())),
+ length_(static_cast<int32_t>(str.size())) {}
+
+ /**
+ * Constructs from a const char * pointer and a specified length.
+ * @param offset a const char * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @stable ICU 4.2
+ */
+ StringPiece(const char* offset, int32_t len) : ptr_(offset), length_(len) { }
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Constructs from a const char8_t * pointer and a specified length.
+ * @param str a const char8_t * pointer (need not be terminated)
+ * @param len the length of the string; must be non-negative
+ * @stable ICU 67
+ */
+ StringPiece(const char8_t* str, int32_t len) :
+ StringPiece(reinterpret_cast<const char*>(str), len) {}
+#endif
+
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos);
+ /**
+ * Substring of another StringPiece.
+ * @param x the other StringPiece
+ * @param pos start position in x; must be non-negative and <= x.length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most x.length() - pos.
+ * @stable ICU 4.2
+ */
+ StringPiece(const StringPiece& x, int32_t pos, int32_t len);
+
+ /**
+ * Returns the string pointer. May be nullptr if it is empty.
+ *
+ * data() may return a pointer to a buffer with embedded NULs, and the
+ * returned buffer may or may not be null terminated. Therefore it is
+ * typically a mistake to pass data() to a routine that expects a NUL
+ * terminated string.
+ * @return the string pointer
+ * @stable ICU 4.2
+ */
+ const char* data() const { return ptr_; }
+ /**
+ * Returns the string length. Same as length().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t size() const { return length_; }
+ /**
+ * Returns the string length. Same as size().
+ * @return the string length
+ * @stable ICU 4.2
+ */
+ int32_t length() const { return length_; }
+ /**
+ * Returns whether the string is empty.
+ * @return true if the string is empty
+ * @stable ICU 4.2
+ */
+ UBool empty() const { return length_ == 0; }
+
+ /**
+ * Sets to an empty string.
+ * @stable ICU 4.2
+ */
+ void clear() { ptr_ = nullptr; length_ = 0; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be nul terminated.
+ * @param len the length of the new data
+ * @stable ICU 4.8
+ */
+ void set(const char* xdata, int32_t len) { ptr_ = xdata; length_ = len; }
+
+ /**
+ * Reset the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @stable ICU 4.8
+ */
+ void set(const char* str);
+
+#if defined(__cpp_char8_t) || defined(U_IN_DOXYGEN)
+ /**
+ * Resets the stringpiece to refer to new data.
+ * @param xdata pointer the new string data. Need not be NUL-terminated.
+ * @param len the length of the new data
+ * @stable ICU 67
+ */
+ inline void set(const char8_t* xdata, int32_t len) {
+ set(reinterpret_cast<const char*>(xdata), len);
+ }
+
+ /**
+ * Resets the stringpiece to refer to new data.
+ * @param str a pointer to a NUL-terminated string.
+ * @stable ICU 67
+ */
+ inline void set(const char8_t* str) {
+ set(reinterpret_cast<const char*>(str));
+ }
+#endif
+
+ /**
+ * Removes the first n string units.
+ * @param n prefix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_prefix(int32_t n) {
+ if (n >= 0) {
+ if (n > length_) {
+ n = length_;
+ }
+ ptr_ += n;
+ length_ -= n;
+ }
+ }
+
+ /**
+ * Removes the last n string units.
+ * @param n suffix length, must be non-negative and <=length()
+ * @stable ICU 4.2
+ */
+ void remove_suffix(int32_t n) {
+ if (n >= 0) {
+ if (n <= length_) {
+ length_ -= n;
+ } else {
+ length_ = 0;
+ }
+ }
+ }
+
+ /**
+ * Searches the StringPiece for the given search string (needle);
+ * @param needle The string for which to search.
+ * @param offset Where to start searching within this string (haystack).
+ * @return The offset of needle in haystack, or -1 if not found.
+ * @stable ICU 67
+ */
+ int32_t find(StringPiece needle, int32_t offset);
+
+ /**
+ * Compares this StringPiece with the other StringPiece, with semantics
+ * similar to std::string::compare().
+ * @param other The string to compare to.
+ * @return below zero if this < other; above zero if this > other; 0 if this == other.
+ * @stable ICU 67
+ */
+ int32_t compare(StringPiece other);
+
+ /**
+ * Maximum integer, used as a default value for substring methods.
+ * @stable ICU 4.2
+ */
+ static const int32_t npos; // = 0x7fffffff;
+
+ /**
+ * Returns a substring of this StringPiece.
+ * @param pos start position; must be non-negative and <= length().
+ * @param len length of the substring;
+ * must be non-negative and will be pinned to at most length() - pos.
+ * @return the substring StringPiece
+ * @stable ICU 4.2
+ */
+ StringPiece substr(int32_t pos, int32_t len = npos) const {
+ return StringPiece(*this, pos, len);
+ }
+};
+
+/**
+ * Global operator == for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return true if the string data is equal
+ * @stable ICU 4.8
+ */
+U_EXPORT UBool U_EXPORT2
+operator==(const StringPiece& x, const StringPiece& y);
+
+/**
+ * Global operator != for StringPiece
+ * @param x The first StringPiece to compare.
+ * @param y The second StringPiece to compare.
+ * @return true if the string data is not equal
+ * @stable ICU 4.8
+ */
+inline bool operator!=(const StringPiece& x, const StringPiece& y) {
+ return !(x == y);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STRINGPIECE_H__
diff --git a/intl/icu/source/common/unicode/stringtriebuilder.h b/intl/icu/source/common/unicode/stringtriebuilder.h
new file mode 100644
index 0000000000..429d7883f1
--- /dev/null
+++ b/intl/icu/source/common/unicode/stringtriebuilder.h
@@ -0,0 +1,426 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012,2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: stringtriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec24
+* created by: Markus W. Scherer
+*/
+
+#ifndef __STRINGTRIEBUILDER_H__
+#define __STRINGTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder API for trie builders
+ */
+
+// Forward declaration.
+/// \cond
+struct UHashtable;
+typedef struct UHashtable UHashtable;
+/// \endcond
+
+/**
+ * Build options for BytesTrieBuilder and CharsTrieBuilder.
+ * @stable ICU 4.8
+ */
+enum UStringTrieBuildOption {
+ /**
+ * Builds a trie quickly.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_BUILD_FAST,
+ /**
+ * Builds a trie more slowly, attempting to generate
+ * a shorter but equivalent serialization.
+ * This build option also uses more memory.
+ *
+ * This option can be effective when many integer values are the same
+ * and string/byte sequence suffixes can be shared.
+ * Runtime speed is not expected to improve.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_BUILD_SMALL
+};
+
+U_NAMESPACE_BEGIN
+
+/**
+ * Base class for string trie builder classes.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API StringTrieBuilder : public UObject {
+public:
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ static int32_t hashNode(const void *node);
+ /** @internal */
+ static UBool equalNodes(const void *left, const void *right);
+#endif /* U_HIDE_INTERNAL_API */
+
+protected:
+ // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API
+ // or else the compiler will create a public default constructor.
+ /** @internal */
+ StringTrieBuilder();
+ /** @internal */
+ virtual ~StringTrieBuilder();
+
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
+ /** @internal */
+ void deleteCompactBuilder();
+
+ /** @internal */
+ void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
+
+ /** @internal */
+ int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
+ /** @internal */
+ int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
+#endif /* U_HIDE_INTERNAL_API */
+
+ class Node;
+
+#ifndef U_HIDE_INTERNAL_API
+ /** @internal */
+ Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
+ /** @internal */
+ Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
+ int32_t length, UErrorCode &errorCode);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /** @internal */
+ virtual int32_t getElementStringLength(int32_t i) const = 0;
+ /** @internal */
+ virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const = 0;
+ /** @internal */
+ virtual int32_t getElementValue(int32_t i) const = 0;
+
+ // Finds the first unit index after this one where
+ // the first and last element have different units again.
+ /** @internal */
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
+
+ // Number of different units at unitIndex.
+ /** @internal */
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
+ /** @internal */
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
+ /** @internal */
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const = 0;
+
+ /** @internal */
+ virtual UBool matchNodesCanHaveValues() const = 0;
+
+ /** @internal */
+ virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
+ /** @internal */
+ virtual int32_t getMinLinearMatch() const = 0;
+ /** @internal */
+ virtual int32_t getMaxLinearMatchLength() const = 0;
+
+#ifndef U_HIDE_INTERNAL_API
+ // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
+ /** @internal */
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // Maximum number of nested split-branch levels for a branch on all 2^16 possible char16_t units.
+ // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
+ /** @internal */
+ static const int32_t kMaxSplitBranchLevels=14;
+
+ /**
+ * Makes sure that there is only one unique node registered that is
+ * equivalent to newNode.
+ * @param newNode Input node. The builder takes ownership.
+ * @param errorCode ICU in/out UErrorCode.
+ Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr.
+ * @return newNode if it is the first of its kind, or
+ * an equivalent node if newNode is a duplicate.
+ * @internal
+ */
+ Node *registerNode(Node *newNode, UErrorCode &errorCode);
+ /**
+ * Makes sure that there is only one unique FinalValueNode registered
+ * with this value.
+ * Avoids creating a node if the value is a duplicate.
+ * @param value A final value.
+ * @param errorCode ICU in/out UErrorCode.
+ Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==nullptr.
+ * @return A FinalValueNode with the given value.
+ * @internal
+ */
+ Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /*
+ * C++ note:
+ * registerNode() and registerFinalValue() take ownership of their input nodes,
+ * and only return owned nodes.
+ * If they see a failure UErrorCode, they will delete the input node.
+ * If they get a nullptr pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
+ * If there is a failure, they return nullptr.
+ *
+ * nullptr Node pointers can be safely passed into other Nodes because
+ * they call the static Node::hashCode() which checks for a nullptr pointer first.
+ *
+ * Therefore, as long as builder functions register a new node,
+ * they need to check for failures only before explicitly dereferencing
+ * a Node pointer, or before setting a new UErrorCode.
+ */
+
+ // Hash set of nodes, maps from nodes to integer 1.
+ /** @internal */
+ UHashtable *nodes;
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ * \cond
+ */
+ class Node : public UObject {
+ public:
+ Node(int32_t initialHash) : hash(initialHash), offset(0) {}
+ inline int32_t hashCode() const { return hash; }
+ // Handles node==nullptr.
+ static inline int32_t hashCode(const Node *node) { return node==nullptr ? 0 : node->hashCode(); }
+ // Base class operator==() compares the actual class types.
+ virtual bool operator==(const Node &other) const;
+ inline bool operator!=(const Node &other) const { return !operator==(other); }
+ /**
+ * Traverses the Node graph and numbers branch edges, with rightmost edges first.
+ * This is to avoid writing a duplicate node twice.
+ *
+ * Branch nodes in this trie data structure are not symmetric.
+ * Most branch edges "jump" to other nodes but the rightmost branch edges
+ * just continue without a jump.
+ * Therefore, write() must write the rightmost branch edge last
+ * (trie units are written backwards), and must write it at that point even if
+ * it is a duplicate of a node previously written elsewhere.
+ *
+ * This function visits and marks right branch edges first.
+ * Edges are numbered with increasingly negative values because we share the
+ * offset field which gets positive values when nodes are written.
+ * A branch edge also remembers the first number for any of its edges.
+ *
+ * When a further-left branch edge has a number in the range of the rightmost
+ * edge's numbers, then it will be written as part of the required right edge
+ * and we can avoid writing it first.
+ *
+ * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
+ * edge numbers.
+ *
+ * @param edgeNumber The first edge number for this node and its sub-nodes.
+ * @return An edge number that is at least the maximum-negative
+ * of the input edge number and the numbers of this node and all of its sub-nodes.
+ */
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
+ // write() must set the offset to a positive value.
+ virtual void write(StringTrieBuilder &builder) = 0;
+ // See markRightEdgesFirst.
+ inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
+ StringTrieBuilder &builder) {
+ // Note: Edge numbers are negative, lastRight<=firstRight.
+ // If offset>0 then this node and its sub-nodes have been written already
+ // and we need not write them again.
+ // If this node is part of the unwritten right branch edge,
+ // then we wait until that is written.
+ if(offset<0 && (offset<lastRight || firstRight<offset)) {
+ write(builder);
+ }
+ }
+ inline int32_t getOffset() const { return offset; }
+ protected:
+ int32_t hash;
+ int32_t offset;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ // This class should not be overridden because
+ // registerFinalValue() compares a stack-allocated FinalValueNode
+ // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
+ // with the input node, and the
+ // !Node::operator==(other) used inside FinalValueNode::operator==(other)
+ // will be false if the typeid's are different.
+ /** @internal */
+ class FinalValueNode : public Node {
+ public:
+ FinalValueNode(int32_t v) : Node(0x111111u*37u+v), value(v) {}
+ virtual bool operator==(const Node &other) const override;
+ virtual void write(StringTrieBuilder &builder) override;
+ protected:
+ int32_t value;
+ };
+#endif /* U_HIDE_INTERNAL_API */
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ */
+ class ValueNode : public Node {
+ public:
+ ValueNode(int32_t initialHash) : Node(initialHash), hasValue(false), value(0) {}
+ virtual bool operator==(const Node &other) const override;
+ void setValue(int32_t v) {
+ hasValue=true;
+ value=v;
+ hash=hash*37u+v;
+ }
+ protected:
+ UBool hasValue;
+ int32_t value;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ class IntermediateValueNode : public ValueNode {
+ public:
+ IntermediateValueNode(int32_t v, Node *nextNode)
+ : ValueNode(0x222222u*37u+hashCode(nextNode)), next(nextNode) { setValue(v); }
+ virtual bool operator==(const Node &other) const override;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
+ virtual void write(StringTrieBuilder &builder) override;
+ protected:
+ Node *next;
+ };
+#endif /* U_HIDE_INTERNAL_API */
+
+ // Do not conditionalize the following with #ifndef U_HIDE_INTERNAL_API,
+ // it is needed for layout of other objects.
+ /**
+ * @internal
+ */
+ class LinearMatchNode : public ValueNode {
+ public:
+ LinearMatchNode(int32_t len, Node *nextNode)
+ : ValueNode((0x333333u*37u+len)*37u+hashCode(nextNode)),
+ length(len), next(nextNode) {}
+ virtual bool operator==(const Node &other) const override;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
+ protected:
+ int32_t length;
+ Node *next;
+ };
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ class BranchNode : public Node {
+ public:
+ BranchNode(int32_t initialHash) : Node(initialHash) {}
+ protected:
+ int32_t firstEdgeNumber;
+ };
+
+ /**
+ * @internal
+ */
+ class ListBranchNode : public BranchNode {
+ public:
+ ListBranchNode() : BranchNode(0x444444), length(0) {}
+ virtual bool operator==(const Node &other) const override;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
+ virtual void write(StringTrieBuilder &builder) override;
+ // Adds a unit with a final value.
+ void add(int32_t c, int32_t value) {
+ units[length]=(char16_t)c;
+ equal[length]=nullptr;
+ values[length]=value;
+ ++length;
+ hash=(hash*37u+c)*37u+value;
+ }
+ // Adds a unit which leads to another match node.
+ void add(int32_t c, Node *node) {
+ units[length]=(char16_t)c;
+ equal[length]=node;
+ values[length]=0;
+ ++length;
+ hash=(hash*37u+c)*37u+hashCode(node);
+ }
+ protected:
+ Node *equal[kMaxBranchLinearSubNodeLength]; // nullptr means "has final value".
+ int32_t length;
+ int32_t values[kMaxBranchLinearSubNodeLength];
+ char16_t units[kMaxBranchLinearSubNodeLength];
+ };
+
+ /**
+ * @internal
+ */
+ class SplitBranchNode : public BranchNode {
+ public:
+ SplitBranchNode(char16_t middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
+ : BranchNode(((0x555555u*37u+middleUnit)*37u+
+ hashCode(lessThanNode))*37u+hashCode(greaterOrEqualNode)),
+ unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
+ virtual bool operator==(const Node &other) const override;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
+ virtual void write(StringTrieBuilder &builder) override;
+ protected:
+ char16_t unit;
+ Node *lessThan;
+ Node *greaterOrEqual;
+ };
+
+ // Branch head node, for writing the actual node lead unit.
+ /** @internal */
+ class BranchHeadNode : public ValueNode {
+ public:
+ BranchHeadNode(int32_t len, Node *subNode)
+ : ValueNode((0x666666u*37u+len)*37u+hashCode(subNode)),
+ length(len), next(subNode) {}
+ virtual bool operator==(const Node &other) const override;
+ virtual int32_t markRightEdgesFirst(int32_t edgeNumber) override;
+ virtual void write(StringTrieBuilder &builder) override;
+ protected:
+ int32_t length;
+ Node *next; // A branch sub-node.
+ };
+
+#endif /* U_HIDE_INTERNAL_API */
+ /// \endcond
+
+ /** @internal */
+ virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
+ Node *nextNode) const = 0;
+
+ /** @internal */
+ virtual int32_t write(int32_t unit) = 0;
+ /** @internal */
+ virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
+ /** @internal */
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
+ /** @internal */
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
+ /** @internal */
+ virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __STRINGTRIEBUILDER_H__
diff --git a/intl/icu/source/common/unicode/symtable.h b/intl/icu/source/common/unicode/symtable.h
new file mode 100644
index 0000000000..647a3884a0
--- /dev/null
+++ b/intl/icu/source/common/unicode/symtable.h
@@ -0,0 +1,119 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2000-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 02/04/00 aliu Creation.
+**********************************************************************
+*/
+#ifndef SYMTABLE_H
+#define SYMTABLE_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ */
+
+U_NAMESPACE_BEGIN
+
+class ParsePosition;
+class UnicodeFunctor;
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ * An interface that defines both lookup protocol and parsing of
+ * symbolic names.
+ *
+ * <p>A symbol table maintains two kinds of mappings. The first is
+ * between symbolic names and their values. For example, if the
+ * variable with the name "start" is set to the value "alpha"
+ * (perhaps, though not necessarily, through an expression such as
+ * "$start=alpha"), then the call lookup("start") will return the
+ * char[] array ['a', 'l', 'p', 'h', 'a'].
+ *
+ * <p>The second kind of mapping is between character values and
+ * UnicodeMatcher objects. This is used by RuleBasedTransliterator,
+ * which uses characters in the private use area to represent objects
+ * such as UnicodeSets. If U+E015 is mapped to the UnicodeSet [a-z],
+ * then lookupMatcher(0xE015) will return the UnicodeSet [a-z].
+ *
+ * <p>Finally, a symbol table defines parsing behavior for symbolic
+ * names. All symbolic names start with the SYMBOL_REF character.
+ * When a parser encounters this character, it calls parseReference()
+ * with the position immediately following the SYMBOL_REF. The symbol
+ * table parses the name, if there is one, and returns it.
+ *
+ * @stable ICU 2.8
+ */
+class U_COMMON_API SymbolTable /* not : public UObject because this is an interface/mixin class */ {
+public:
+
+ /**
+ * The character preceding a symbol reference name.
+ * @stable ICU 2.8
+ */
+ enum { SYMBOL_REF = 0x0024 /*$*/ };
+
+ /**
+ * Destructor.
+ * @stable ICU 2.8
+ */
+ virtual ~SymbolTable();
+
+ /**
+ * Lookup the characters associated with this string and return it.
+ * Return <tt>nullptr</tt> if no such name exists. The resultant
+ * string may have length zero.
+ * @param s the symbolic name to lookup
+ * @return a string containing the name's value, or <tt>nullptr</tt> if
+ * there is no mapping for s.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeString* lookup(const UnicodeString& s) const = 0;
+
+ /**
+ * Lookup the UnicodeMatcher associated with the given character, and
+ * return it. Return <tt>nullptr</tt> if not found.
+ * @param ch a 32-bit code point from 0 to 0x10FFFF inclusive.
+ * @return the UnicodeMatcher object represented by the given
+ * character, or nullptr if there is no mapping for ch.
+ * @stable ICU 2.8
+ */
+ virtual const UnicodeFunctor* lookupMatcher(UChar32 ch) const = 0;
+
+ /**
+ * Parse a symbol reference name from the given string, starting
+ * at the given position. If no valid symbol reference name is
+ * found, return the empty string and leave pos unchanged. That is, if the
+ * character at pos cannot start a name, or if pos is at or after
+ * text.length(), then return an empty string. This indicates an
+ * isolated SYMBOL_REF character.
+ * @param text the text to parse for the name
+ * @param pos on entry, the index of the first character to parse.
+ * This is the character following the SYMBOL_REF character. On
+ * exit, the index after the last parsed character. If the parse
+ * failed, pos is unchanged on exit.
+ * @param limit the index after the last character to be parsed.
+ * @return the parsed name, or an empty string if there is no
+ * valid symbolic name at the given position.
+ * @stable ICU 2.8
+ */
+ virtual UnicodeString parseReference(const UnicodeString& text,
+ ParsePosition& pos, int32_t limit) const = 0;
+};
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/ubidi.h b/intl/icu/source/common/unicode/ubidi.h
new file mode 100644
index 0000000000..536f4172bc
--- /dev/null
+++ b/intl/icu/source/common/unicode/ubidi.h
@@ -0,0 +1,2211 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ubidi.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999jul27
+* created by: Markus W. Scherer, updated by Matitiahu Allouche
+*/
+
+#ifndef UBIDI_H
+#define UBIDI_H
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ *\file
+ * \brief C API: Bidi algorithm
+ *
+ * <h2>Bidi algorithm for ICU</h2>
+ *
+ * This is an implementation of the Unicode Bidirectional Algorithm.
+ * The algorithm is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>.<p>
+ *
+ * Note: Libraries that perform a bidirectional algorithm and
+ * reorder strings accordingly are sometimes called "Storage Layout Engines".
+ * ICU's Bidi and shaping (u_shapeArabic()) APIs can be used at the core of such
+ * "Storage Layout Engines".
+ *
+ * <h3>General remarks about the API:</h3>
+ *
+ * In functions with an error code parameter,
+ * the <code>pErrorCode</code> pointer must be valid
+ * and the value that it points to must not indicate a failure before
+ * the function call. Otherwise, the function returns immediately.
+ * After the function call, the value indicates success or failure.<p>
+ *
+ * The &quot;limit&quot; of a sequence of characters is the position just after their
+ * last character, i.e., one more than that position.<p>
+ *
+ * Some of the API functions provide access to &quot;runs&quot;.
+ * Such a &quot;run&quot; is defined as a sequence of characters
+ * that are at the same embedding level
+ * after performing the Bidi algorithm.<p>
+ *
+ * @author Markus W. Scherer
+ * @version 1.0
+ *
+ *
+ * <h4> Sample code for the ICU Bidi API </h4>
+ *
+ * <h5>Rendering a paragraph with the ICU Bidi API</h5>
+ *
+ * This is (hypothetical) sample code that illustrates
+ * how the ICU Bidi API could be used to render a paragraph of text.
+ * Rendering code depends highly on the graphics system,
+ * therefore this sample code must make a lot of assumptions,
+ * which may or may not match any existing graphics system's properties.
+ *
+ * <p>The basic assumptions are:</p>
+ * <ul>
+ * <li>Rendering is done from left to right on a horizontal line.</li>
+ * <li>A run of single-style, unidirectional text can be rendered at once.</li>
+ * <li>Such a run of text is passed to the graphics system with
+ * characters (code units) in logical order.</li>
+ * <li>The line-breaking algorithm is very complicated
+ * and Locale-dependent -
+ * and therefore its implementation omitted from this sample code.</li>
+ * </ul>
+ *
+ * <pre>
+ * \code
+ *#include <unicode/ubidi.h>
+ *
+ *typedef enum {
+ * styleNormal=0, styleSelected=1,
+ * styleBold=2, styleItalics=4,
+ * styleSuper=8, styleSub=16
+ *} Style;
+ *
+ *typedef struct { int32_t limit; Style style; } StyleRun;
+ *
+ *int getTextWidth(const UChar *text, int32_t start, int32_t limit,
+ * const StyleRun *styleRuns, int styleRunCount);
+ *
+ * // set *pLimit and *pStyleRunLimit for a line
+ * // from text[start] and from styleRuns[styleRunStart]
+ * // using ubidi_getLogicalRun(para, ...)
+ *void getLineBreak(const UChar *text, int32_t start, int32_t *pLimit,
+ * UBiDi *para,
+ * const StyleRun *styleRuns, int styleRunStart, int *pStyleRunLimit,
+ * int *pLineWidth);
+ *
+ * // render runs on a line sequentially, always from left to right
+ *
+ * // prepare rendering a new line
+ * void startLine(UBiDiDirection textDirection, int lineWidth);
+ *
+ * // render a run of text and advance to the right by the run width
+ * // the text[start..limit-1] is always in logical order
+ * void renderRun(const UChar *text, int32_t start, int32_t limit,
+ * UBiDiDirection textDirection, Style style);
+ *
+ * // We could compute a cross-product
+ * // from the style runs with the directional runs
+ * // and then reorder it.
+ * // Instead, here we iterate over each run type
+ * // and render the intersections -
+ * // with shortcuts in simple (and common) cases.
+ * // renderParagraph() is the main function.
+ *
+ * // render a directional run with
+ * // (possibly) multiple style runs intersecting with it
+ * void renderDirectionalRun(const UChar *text,
+ * int32_t start, int32_t limit,
+ * UBiDiDirection direction,
+ * const StyleRun *styleRuns, int styleRunCount) {
+ * int i;
+ *
+ * // iterate over style runs
+ * if(direction==UBIDI_LTR) {
+ * int styleLimit;
+ *
+ * for(i=0; i<styleRunCount; ++i) {
+ * styleLimit=styleRuns[i].limit;
+ * if(start<styleLimit) {
+ * if(styleLimit>limit) { styleLimit=limit; }
+ * renderRun(text, start, styleLimit,
+ * direction, styleRuns[i].style);
+ * if(styleLimit==limit) { break; }
+ * start=styleLimit;
+ * }
+ * }
+ * } else {
+ * int styleStart;
+ *
+ * for(i=styleRunCount-1; i>=0; --i) {
+ * if(i>0) {
+ * styleStart=styleRuns[i-1].limit;
+ * } else {
+ * styleStart=0;
+ * }
+ * if(limit>=styleStart) {
+ * if(styleStart<start) { styleStart=start; }
+ * renderRun(text, styleStart, limit,
+ * direction, styleRuns[i].style);
+ * if(styleStart==start) { break; }
+ * limit=styleStart;
+ * }
+ * }
+ * }
+ * }
+ *
+ * // the line object represents text[start..limit-1]
+ * void renderLine(UBiDi *line, const UChar *text,
+ * int32_t start, int32_t limit,
+ * const StyleRun *styleRuns, int styleRunCount,
+ * UErrorCode *pErrorCode) {
+ * UBiDiDirection direction=ubidi_getDirection(line);
+ * if(direction!=UBIDI_MIXED) {
+ * // unidirectional
+ * if(styleRunCount<=1) {
+ * renderRun(text, start, limit, direction, styleRuns[0].style);
+ * } else {
+ * renderDirectionalRun(text, start, limit,
+ * direction, styleRuns, styleRunCount);
+ * }
+ * } else {
+ * // mixed-directional
+ * int32_t count, i, length;
+ * UBiDiLevel level;
+ *
+ * count=ubidi_countRuns(line, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * if(styleRunCount<=1) {
+ * Style style=styleRuns[0].style;
+ *
+ * // iterate over directional runs
+ * for(i=0; i<count; ++i) {
+ * direction=ubidi_getVisualRun(line, i, &start, &length);
+ * renderRun(text, start, start+length, direction, style);
+ * }
+ * } else {
+ * int32_t j;
+ *
+ * // iterate over both directional and style runs
+ * for(i=0; i<count; ++i) {
+ * direction=ubidi_getVisualRun(line, i, &start, &length);
+ * renderDirectionalRun(text, start, start+length,
+ * direction, styleRuns, styleRunCount);
+ * }
+ * }
+ * }
+ * }
+ * }
+ *
+ *void renderParagraph(const UChar *text, int32_t length,
+ * UBiDiDirection textDirection,
+ * const StyleRun *styleRuns, int styleRunCount,
+ * int lineWidth,
+ * UErrorCode *pErrorCode) {
+ * UBiDi *para;
+ *
+ * if(pErrorCode==NULL || U_FAILURE(*pErrorCode) || length<=0) {
+ * return;
+ * }
+ *
+ * para=ubidi_openSized(length, 0, pErrorCode);
+ * if(para==NULL) { return; }
+ *
+ * ubidi_setPara(para, text, length,
+ * textDirection ? UBIDI_DEFAULT_RTL : UBIDI_DEFAULT_LTR,
+ * NULL, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * UBiDiLevel paraLevel=1&ubidi_getParaLevel(para);
+ * StyleRun styleRun={ length, styleNormal };
+ * int width;
+ *
+ * if(styleRuns==NULL || styleRunCount<=0) {
+ * styleRunCount=1;
+ * styleRuns=&styleRun;
+ * }
+ *
+ * // assume styleRuns[styleRunCount-1].limit>=length
+ *
+ * width=getTextWidth(text, 0, length, styleRuns, styleRunCount);
+ * if(width<=lineWidth) {
+ * // everything fits onto one line
+ *
+ * // prepare rendering a new line from either left or right
+ * startLine(paraLevel, width);
+ *
+ * renderLine(para, text, 0, length,
+ * styleRuns, styleRunCount, pErrorCode);
+ * } else {
+ * UBiDi *line;
+ *
+ * // we need to render several lines
+ * line=ubidi_openSized(length, 0, pErrorCode);
+ * if(line!=NULL) {
+ * int32_t start=0, limit;
+ * int styleRunStart=0, styleRunLimit;
+ *
+ * for(;;) {
+ * limit=length;
+ * styleRunLimit=styleRunCount;
+ * getLineBreak(text, start, &limit, para,
+ * styleRuns, styleRunStart, &styleRunLimit,
+ * &width);
+ * ubidi_setLine(para, start, limit, line, pErrorCode);
+ * if(U_SUCCESS(*pErrorCode)) {
+ * // prepare rendering a new line
+ * // from either left or right
+ * startLine(paraLevel, width);
+ *
+ * renderLine(line, text, start, limit,
+ * styleRuns+styleRunStart,
+ * styleRunLimit-styleRunStart, pErrorCode);
+ * }
+ * if(limit==length) { break; }
+ * start=limit;
+ * styleRunStart=styleRunLimit-1;
+ * if(start>=styleRuns[styleRunStart].limit) {
+ * ++styleRunStart;
+ * }
+ * }
+ *
+ * ubidi_close(line);
+ * }
+ * }
+ * }
+ *
+ * ubidi_close(para);
+ *}
+ *\endcode
+ * </pre>
+ */
+
+/*DOCXX_TAG*/
+/*@{*/
+
+/**
+ * UBiDiLevel is the type of the level values in this
+ * Bidi implementation.
+ * It holds an embedding level and indicates the visual direction
+ * by its bit&nbsp;0 (even/odd value).<p>
+ *
+ * It can also hold non-level values for the
+ * <code>paraLevel</code> and <code>embeddingLevels</code>
+ * arguments of <code>ubidi_setPara()</code>; there:
+ * <ul>
+ * <li>bit&nbsp;7 of an <code>embeddingLevels[]</code>
+ * value indicates whether the using application is
+ * specifying the level of a character to <i>override</i> whatever the
+ * Bidi implementation would resolve it to.</li>
+ * <li><code>paraLevel</code> can be set to the
+ * pseudo-level values <code>UBIDI_DEFAULT_LTR</code>
+ * and <code>UBIDI_DEFAULT_RTL</code>.</li>
+ * </ul>
+ *
+ * @see ubidi_setPara
+ *
+ * <p>The related constants are not real, valid level values.
+ * <code>UBIDI_DEFAULT_XXX</code> can be used to specify
+ * a default for the paragraph level for
+ * when the <code>ubidi_setPara()</code> function
+ * shall determine it but there is no
+ * strongly typed character in the input.<p>
+ *
+ * Note that the value for <code>UBIDI_DEFAULT_LTR</code> is even
+ * and the one for <code>UBIDI_DEFAULT_RTL</code> is odd,
+ * just like with normal LTR and RTL level values -
+ * these special values are designed that way. Also, the implementation
+ * assumes that UBIDI_MAX_EXPLICIT_LEVEL is odd.
+ *
+ * Note: The numeric values of the related constants will not change:
+ * They are tied to the use of 7-bit byte values (plus the override bit)
+ * and of the UBiDiLevel=uint8_t data type in this API.
+ *
+ * @see UBIDI_DEFAULT_LTR
+ * @see UBIDI_DEFAULT_RTL
+ * @see UBIDI_LEVEL_OVERRIDE
+ * @see UBIDI_MAX_EXPLICIT_LEVEL
+ * @stable ICU 2.0
+ */
+typedef uint8_t UBiDiLevel;
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 0 (left-to-right).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_LTR 0xfe
+
+/** Paragraph level setting.<p>
+ *
+ * Constant indicating that the base direction depends on the first strong
+ * directional character in the text according to the Unicode Bidirectional
+ * Algorithm. If no strong directional character is present,
+ * then set the paragraph level to 1 (right-to-left).<p>
+ *
+ * If this value is used in conjunction with reordering modes
+ * <code>UBIDI_REORDER_INVERSE_LIKE_DIRECT</code> or
+ * <code>UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the text to reorder
+ * is assumed to be visual LTR, and the text after reordering is required
+ * to be the corresponding logical string with appropriate contextual
+ * direction. The direction of the result string will be RTL if either
+ * the righmost or leftmost strong character of the source text is RTL
+ * or Arabic Letter, or if the text contains no strong character;
+ * the direction will be LTR otherwise.<p>
+ *
+ * If reordering option <code>UBIDI_OPTION_INSERT_MARKS</code> is set, an RLM may
+ * be added at the beginning of the result string to ensure round trip
+ * (that the result string, when reordered back to visual, will produce
+ * the original source text).
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL
+ * @stable ICU 2.0
+ */
+#define UBIDI_DEFAULT_RTL 0xff
+
+/**
+ * Maximum explicit embedding level.
+ * Same as the max_depth value in the
+ * <a href="http://www.unicode.org/reports/tr9/#BD2">Unicode Bidirectional Algorithm</a>.
+ * (The maximum resolved level can be up to <code>UBIDI_MAX_EXPLICIT_LEVEL+1</code>).
+ * @stable ICU 2.0
+ */
+#define UBIDI_MAX_EXPLICIT_LEVEL 125
+
+/** Bit flag for level input.
+ * Overrides directional properties.
+ * @stable ICU 2.0
+ */
+#define UBIDI_LEVEL_OVERRIDE 0x80
+
+/**
+ * Special value which can be returned by the mapping functions when a logical
+ * index has no corresponding visual index or vice-versa. This may happen
+ * for the logical-to-visual mapping of a Bidi control when option
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is specified. This can also happen
+ * for the visual-to-logical mapping of a Bidi mark (LRM or RLM) inserted
+ * by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getVisualMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getLogicalMap
+ * @stable ICU 3.6
+ */
+#define UBIDI_MAP_NOWHERE (-1)
+
+/**
+ * <code>UBiDiDirection</code> values indicate the text direction.
+ * @stable ICU 2.0
+ */
+enum UBiDiDirection {
+ /** Left-to-right text. This is a 0 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no right-to-left characters, or
+ * that the source string is empty and the paragraph level is even.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a left-to-right direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_LTR,
+ /** Right-to-left text. This is a 1 value.
+ * <ul>
+ * <li>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains no left-to-right characters, or
+ * that the source string is empty and the paragraph level is odd.
+ * <li> As return value for <code>ubidi_getBaseDirection()</code>, it
+ * means that the first strong character of the source string has
+ * a right-to-left direction.
+ * </ul>
+ * @stable ICU 2.0
+ */
+ UBIDI_RTL,
+ /** Mixed-directional text.
+ * <p>As return value for <code>ubidi_getDirection()</code>, it means
+ * that the source string contains both left-to-right and
+ * right-to-left characters.
+ * @stable ICU 2.0
+ */
+ UBIDI_MIXED,
+ /** No strongly directional text.
+ * <p>As return value for <code>ubidi_getBaseDirection()</code>, it means
+ * that the source string is missing or empty, or contains neither left-to-right
+ * nor right-to-left characters.
+ * @stable ICU 4.6
+ */
+ UBIDI_NEUTRAL
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBiDiDirection UBiDiDirection;
+
+/**
+ * Forward declaration of the <code>UBiDi</code> structure for the declaration of
+ * the API functions. Its fields are implementation-specific.<p>
+ * This structure holds information about a paragraph (or multiple paragraphs)
+ * of text with Bidi-algorithm-related details, or about one line of
+ * such a paragraph.<p>
+ * Reordering can be done on a line, or on one or more paragraphs which are
+ * then interpreted each as one single line.
+ * @stable ICU 2.0
+ */
+struct UBiDi;
+
+/** @stable ICU 2.0 */
+typedef struct UBiDi UBiDi;
+
+/**
+ * Allocate a <code>UBiDi</code> structure.
+ * Such an object is initially empty. It is assigned
+ * the Bidi properties of a piece of text containing one or more paragraphs
+ * by <code>ubidi_setPara()</code>
+ * or the Bidi properties of a line within a paragraph by
+ * <code>ubidi_setLine()</code>.<p>
+ * This object can be reused for as long as it is not deallocated
+ * by calling <code>ubidi_close()</code>.<p>
+ * <code>ubidi_setPara()</code> and <code>ubidi_setLine()</code> will allocate
+ * additional memory for internal structures as necessary.
+ *
+ * @return An empty <code>UBiDi</code> object.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_open(void);
+
+/**
+ * Allocate a <code>UBiDi</code> structure with preallocated memory
+ * for internal structures.
+ * This function provides a <code>UBiDi</code> object like <code>ubidi_open()</code>
+ * with no arguments, but it also preallocates memory for internal structures
+ * according to the sizings supplied by the caller.<p>
+ * Subsequent functions will not allocate any more memory, and are thus
+ * guaranteed not to fail because of lack of memory.<p>
+ * The preallocation can be limited to some of the internal memory
+ * by setting some values to 0 here. That means that if, e.g.,
+ * <code>maxRunCount</code> cannot be reasonably predetermined and should not
+ * be set to <code>maxLength</code> (the only failproof value) to avoid
+ * wasting memory, then <code>maxRunCount</code> could be set to 0 here
+ * and the internal structures that are associated with it will be allocated
+ * on demand, just like with <code>ubidi_open()</code>.
+ *
+ * @param maxLength is the maximum text or line length that internal memory
+ * will be preallocated for. An attempt to associate this object with a
+ * longer text will fail, unless this value is 0, which leaves the allocation
+ * up to the implementation.
+ *
+ * @param maxRunCount is the maximum anticipated number of same-level runs
+ * that internal memory will be preallocated for. An attempt to access
+ * visual runs on an object that was not preallocated for as many runs
+ * as the text was actually resolved to will fail,
+ * unless this value is 0, which leaves the allocation up to the implementation.<br><br>
+ * The number of runs depends on the actual text and maybe anywhere between
+ * 1 and <code>maxLength</code>. It is typically small.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return An empty <code>UBiDi</code> object with preallocated memory.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDi * U_EXPORT2
+ubidi_openSized(int32_t maxLength, int32_t maxRunCount, UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_close()</code> must be called to free the memory
+ * associated with a UBiDi object.<p>
+ *
+ * <strong>Important: </strong>
+ * A parent <code>UBiDi</code> object must not be destroyed or reused if
+ * it still has children.
+ * If a <code>UBiDi</code> object has become the <i>child</i>
+ * of another one (its <i>parent</i>) by calling
+ * <code>ubidi_setLine()</code>, then the child object must
+ * be destroyed (closed) or reused (by calling
+ * <code>ubidi_setPara()</code> or <code>ubidi_setLine()</code>)
+ * before the parent object.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_close(UBiDi *pBiDi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiPointer
+ * "Smart pointer" class, closes a UBiDi via ubidi_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiPointer, UBiDi, ubidi_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Modify the operation of the Bidi algorithm such that it
+ * approximates an "inverse Bidi" algorithm. This function
+ * must be called before <code>ubidi_setPara()</code>.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Technical Report is to take text stored in logical
+ * (keyboard, typing) order and to determine the reordering of it for visual
+ * rendering.
+ * Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * to logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi" and that the current implementation provides only an
+ * approximation of "inverse Bidi".</p>
+ *
+ * <p>With <code>isInverse</code> set to <code>true</code>,
+ * this function changes the behavior of some of the subsequent functions
+ * in a way that they can be used for the inverse Bidi algorithm.
+ * Specifically, runs of text with numeric characters will be treated in a
+ * special way and may need to be surrounded with LRM characters when they are
+ * written in reordered sequence.</p>
+ *
+ * <p>Output runs should be retrieved using <code>ubidi_getVisualRun()</code>.
+ * Since the actual input for "inverse Bidi" is visually ordered text and
+ * <code>ubidi_getVisualRun()</code> gets the reordered runs, these are actually
+ * the runs of the logically ordered output.</p>
+ *
+ * <p>Calling this function with argument <code>isInverse</code> set to
+ * <code>true</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * Calling this function with argument <code>isInverse</code> set to
+ * <code>false</code> is equivalent to calling
+ * <code>ubidi_setReorderingMode</code> with argument
+ * <code>reorderingMode</code>
+ * set to <code>#UBIDI_REORDER_DEFAULT</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param isInverse specifies "forward" or "inverse" Bidi operation.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setInverse(UBiDi *pBiDi, UBool isInverse);
+
+/**
+ * Is this Bidi object set to perform the inverse Bidi algorithm?
+ * <p>Note: calling this function after setting the reordering mode with
+ * <code>ubidi_setReorderingMode</code> will return <code>true</code> if the
+ * reordering mode was set to <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>,
+ * <code>false</code> for all other values.</p>
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return true if the Bidi object is set to perform the inverse Bidi algorithm
+ * by handling numbers as L.
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @stable ICU 2.0
+ */
+
+U_CAPI UBool U_EXPORT2
+ubidi_isInverse(UBiDi *pBiDi);
+
+/**
+ * Specify whether block separators must be allocated level zero,
+ * so that successive paragraphs will progress from left to right.
+ * This function must be called before <code>ubidi_setPara()</code>.
+ * Paragraph separators (B) may appear in the text. Setting them to level zero
+ * means that all paragraph separators (including one possibly appearing
+ * in the last text position) are kept in the reordered text after the text
+ * that they follow in the source text.
+ * When this feature is not enabled, a paragraph separator at the last
+ * position of the text before reordering will go to the first position
+ * of the reordered text when the paragraph level is odd.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ *
+ * @param orderParagraphsLTR specifies whether paragraph separators (B) must
+ * receive level 0, so that successive paragraphs progress from left to right.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_orderParagraphsLTR(UBiDi *pBiDi, UBool orderParagraphsLTR);
+
+/**
+ * Is this Bidi object set to allocate level 0 to block separators so that
+ * successive paragraphs progress from left to right?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return true if the Bidi object is set to allocate level 0 to block
+ * separators.
+ *
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+ubidi_isOrderParagraphsLTR(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingMode</code> values indicate which variant of the Bidi
+ * algorithm to use.
+ *
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingMode {
+ /** Regular Logical to Visual Bidi algorithm according to Unicode.
+ * This is a 0 value.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_DEFAULT = 0,
+ /** Logical to Visual algorithm which handles numbers in a way which
+ * mimics the behavior of Windows XP.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_NUMBERS_SPECIAL,
+ /** Logical to Visual algorithm grouping numbers with adjacent R characters
+ * (reversible algorithm).
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_GROUP_NUMBERS_WITH_R,
+ /** Reorder runs only to transform a Logical LTR string to the Logical RTL
+ * string with the same display, or vice-versa.<br>
+ * If this mode is set together with option
+ * <code>#UBIDI_OPTION_INSERT_MARKS</code>, some Bidi controls in the source
+ * text may be removed and other controls may be added to produce the
+ * minimum combination which has the required display.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_RUNS_ONLY,
+ /** Visual to Logical algorithm which handles numbers like L
+ * (same algorithm as selected by <code>ubidi_setInverse(true)</code>.
+ * @see ubidi_setInverse
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_NUMBERS_AS_L,
+ /** Visual to Logical algorithm equivalent to the regular Logical to Visual
+ * algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_LIKE_DIRECT,
+ /** Inverse Bidi (Visual to Logical) algorithm for the
+ * <code>UBIDI_REORDER_NUMBERS_SPECIAL</code> Bidi algorithm.
+ * @stable ICU 3.6 */
+ UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of values for reordering mode.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBIDI_REORDER_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UBiDiReorderingMode;
+
+/**
+ * Modify the operation of the Bidi algorithm such that it implements some
+ * variant to the basic Bidi algorithm or approximates an "inverse Bidi"
+ * algorithm, depending on different values of the "reordering mode".
+ * This function must be called before <code>ubidi_setPara()</code>, and stays
+ * in effect until called again with a different argument.
+ *
+ * <p>The normal operation of the Bidi algorithm as described
+ * in the Unicode Standard Annex #9 is to take text stored in logical
+ * (keyboard, typing) order and to determine how to reorder it for visual
+ * rendering.</p>
+ *
+ * <p>With the reordering mode set to a value other than
+ * <code>#UBIDI_REORDER_DEFAULT</code>, this function changes the behavior of
+ * some of the subsequent functions in a way such that they implement an
+ * inverse Bidi algorithm or some other algorithm variants.</p>
+ *
+ * <p>Some legacy systems store text in visual order, and for operations
+ * with standard, Unicode-based algorithms, the text needs to be transformed
+ * into logical order. This is effectively the inverse algorithm of the
+ * described Bidi algorithm. Note that there is no standard algorithm for
+ * this "inverse Bidi", so a number of variants are implemented here.</p>
+ *
+ * <p>In other cases, it may be desirable to emulate some variant of the
+ * Logical to Visual algorithm (e.g. one used in MS Windows), or perform a
+ * Logical to Logical transformation.</p>
+ *
+ * <ul>
+ * <li>When the reordering mode is set to <code>#UBIDI_REORDER_DEFAULT</code>,
+ * the standard Bidi Logical to Visual algorithm is applied.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_NUMBERS_SPECIAL</code>,
+ * the algorithm used to perform Bidi transformations when calling
+ * <code>ubidi_setPara</code> should approximate the algorithm used in
+ * Microsoft Windows XP rather than strictly conform to the Unicode Bidi
+ * algorithm.
+ * <br>
+ * The differences between the basic algorithm and the algorithm addressed
+ * by this option are as follows:
+ * <ul>
+ * <li>Within text at an even embedding level, the sequence "123AB"
+ * (where AB represent R or AL letters) is transformed to "123BA" by the
+ * Unicode algorithm and to "BA123" by the Windows algorithm.</li>
+ * <li>Arabic-Indic numbers (AN) are handled by the Windows algorithm just
+ * like regular numbers (EN).</li>
+ * </ul></li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_GROUP_NUMBERS_WITH_R</code>,
+ * numbers located between LTR text and RTL text are associated with the RTL
+ * text. For instance, an LTR paragraph with content "abc 123 DEF" (where
+ * upper case letters represent RTL characters) will be transformed to
+ * "abc FED 123" (and not "abc 123 FED"), "DEF 123 abc" will be transformed
+ * to "123 FED abc" and "123 FED abc" will be transformed to "DEF 123 abc".
+ * This makes the algorithm reversible and makes it useful when round trip
+ * (from visual to logical and back to visual) must be achieved without
+ * adding LRM characters. However, this is a variation from the standard
+ * Unicode Bidi algorithm.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_RUNS_ONLY</code>,
+ * a "Logical to Logical" transformation must be performed:
+ * <ul>
+ * <li>If the default text level of the source text (argument <code>paraLevel</code>
+ * in <code>ubidi_setPara</code>) is even, the source text will be handled as
+ * LTR logical text and will be transformed to the RTL logical text which has
+ * the same LTR visual display.</li>
+ * <li>If the default level of the source text is odd, the source text
+ * will be handled as RTL logical text and will be transformed to the
+ * LTR logical text which has the same LTR visual display.</li>
+ * </ul>
+ * This mode may be needed when logical text which is basically Arabic or
+ * Hebrew, with possible included numbers or phrases in English, has to be
+ * displayed as if it had an even embedding level (this can happen if the
+ * displaying application treats all text as if it was basically LTR).
+ * <br>
+ * This mode may also be needed in the reverse case, when logical text which is
+ * basically English, with possible included phrases in Arabic or Hebrew, has to
+ * be displayed as if it had an odd embedding level.
+ * <br>
+ * Both cases could be handled by adding LRE or RLE at the head of the text,
+ * if the display subsystem supports these formatting controls. If it does not,
+ * the problem may be handled by transforming the source text in this mode
+ * before displaying it, so that it will be displayed properly.<br>
+ * The source text should not contain Bidi control characters other than LRM
+ * or RLM.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>, an "inverse Bidi" algorithm
+ * is applied.
+ * Runs of text with numeric characters will be treated like LTR letters and
+ * may need to be surrounded with LRM characters when they are written in
+ * reordered sequence (the option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> can
+ * be used with function <code>ubidi_writeReordered</code> to this end. This
+ * mode is equivalent to calling <code>ubidi_setInverse()</code> with
+ * argument <code>isInverse</code> set to <code>true</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code>, the "direct" Logical to Visual
+ * Bidi algorithm is used as an approximation of an "inverse Bidi" algorithm.
+ * This mode is similar to mode <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>
+ * but is closer to the regular Bidi algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "FED 123 456 CBA" (where
+ * upper case represents RTL characters) will be transformed to
+ * "ABC 456 123 DEF", as opposed to "DEF 123 456 ABC"
+ * with mode <code>UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.<br>
+ * When used in conjunction with option
+ * <code>#UBIDI_OPTION_INSERT_MARKS</code>, this mode generally
+ * adds Bidi marks to the output significantly more sparingly than mode
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> with option
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to
+ * <code>ubidi_writeReordered</code>.</li>
+ *
+ * <li>When the reordering mode is set to
+ * <code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code>, the Logical to Visual
+ * Bidi algorithm used in Windows XP is used as an approximation of an "inverse Bidi" algorithm.
+ * <br>
+ * For example, an LTR paragraph with the content "abc FED123" (where
+ * upper case represents RTL characters) will be transformed to "abc 123DEF."</li>
+ * </ul>
+ *
+ * <p>In all the reordering modes specifying an "inverse Bidi" algorithm
+ * (i.e. those with a name starting with <code>UBIDI_REORDER_INVERSE</code>),
+ * output runs should be retrieved using
+ * <code>ubidi_getVisualRun()</code>, and the output text with
+ * <code>ubidi_writeReordered()</code>. The caller should keep in mind that in
+ * "inverse Bidi" modes the input is actually visually ordered text and
+ * reordered output returned by <code>ubidi_getVisualRun()</code> or
+ * <code>ubidi_writeReordered()</code> are actually runs or character string
+ * of logically ordered output.<br>
+ * For all the "inverse Bidi" modes, the source text should not contain
+ * Bidi control characters other than LRM or RLM.</p>
+ *
+ * <p>Note that option <code>#UBIDI_OUTPUT_REVERSE</code> of
+ * <code>ubidi_writeReordered</code> has no useful meaning and should not be
+ * used in conjunction with any value of the reordering mode specifying
+ * "inverse Bidi" or with value <code>UBIDI_REORDER_RUNS_ONLY</code>.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingMode specifies the required variant of the Bidi algorithm.
+ *
+ * @see UBiDiReorderingMode
+ * @see ubidi_setInverse
+ * @see ubidi_setPara
+ * @see ubidi_writeReordered
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingMode(UBiDi *pBiDi, UBiDiReorderingMode reorderingMode);
+
+/**
+ * What is the requested reordering mode for a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering mode of the Bidi object
+ * @see ubidi_setReorderingMode
+ * @stable ICU 3.6
+ */
+U_CAPI UBiDiReorderingMode U_EXPORT2
+ubidi_getReorderingMode(UBiDi *pBiDi);
+
+/**
+ * <code>UBiDiReorderingOption</code> values indicate which options are
+ * specified to affect the Bidi algorithm.
+ *
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+typedef enum UBiDiReorderingOption {
+ /**
+ * option value for <code>ubidi_setReorderingOptions</code>:
+ * disable all the options which can be set with this function
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_DEFAULT = 0,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * insert Bidi marks (LRM or RLM) when needed to ensure correct result of
+ * a reordering to a Logical order
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option is significant only with reordering modes which generate
+ * a result with Logical order, specifically:</p>
+ * <ul>
+ * <li><code>#UBIDI_REORDER_RUNS_ONLY</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_LIKE_DIRECT</code></li>
+ * <li><code>#UBIDI_REORDER_INVERSE_FOR_NUMBERS_SPECIAL</code></li>
+ * </ul>
+ *
+ * <p>If this option is set in conjunction with reordering mode
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> or with calling
+ * <code>ubidi_setInverse(true)</code>, it implies
+ * option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+ * in calls to function <code>ubidi_writeReordered()</code>.</p>
+ *
+ * <p>For other reordering modes, a minimum number of LRM or RLM characters
+ * will be added to the source text after reordering it so as to ensure
+ * round trip, i.e. when applying the inverse reordering mode on the
+ * resulting logical text with removal of Bidi marks
+ * (option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> set before calling
+ * <code>ubidi_setPara()</code> or option <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+ * in <code>ubidi_writeReordered</code>), the result will be identical to the
+ * source text in the first transformation.
+ *
+ * <p>This option will be ignored if specified together with option
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>. It inhibits option
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to function
+ * <code>ubidi_writeReordered()</code> and it implies option
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls to function
+ * <code>ubidi_writeReordered()</code> if the reordering mode is
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code>.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_INSERT_MARKS = 1,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * remove Bidi control characters
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option nullifies option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * It inhibits option <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code> in calls
+ * to function <code>ubidi_writeReordered()</code> and it implies option
+ * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code> in calls to that function.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_REMOVE_CONTROLS = 2,
+
+ /**
+ * option bit for <code>ubidi_setReorderingOptions</code>:
+ * process the output as part of a stream to be continued
+ *
+ * <p>This option must be set or reset before calling
+ * <code>ubidi_setPara</code>.</p>
+ *
+ * <p>This option specifies that the caller is interested in processing large
+ * text object in parts.
+ * The results of the successive calls are expected to be concatenated by the
+ * caller. Only the call for the last part will have this option bit off.</p>
+ *
+ * <p>When this option bit is on, <code>ubidi_setPara()</code> may process
+ * less than the full source text in order to truncate the text at a meaningful
+ * boundary. The caller should call <code>ubidi_getProcessedLength()</code>
+ * immediately after calling <code>ubidi_setPara()</code> in order to
+ * determine how much of the source text has been processed.
+ * Source text beyond that length should be resubmitted in following calls to
+ * <code>ubidi_setPara</code>. The processed length may be less than
+ * the length of the source text if a character preceding the last character of
+ * the source text constitutes a reasonable boundary (like a block separator)
+ * for text to be continued.<br>
+ * If the last character of the source text constitutes a reasonable
+ * boundary, the whole text will be processed at once.<br>
+ * If nowhere in the source text there exists
+ * such a reasonable boundary, the processed length will be zero.<br>
+ * The caller should check for such an occurrence and do one of the following:
+ * <ul><li>submit a larger amount of text with a better chance to include
+ * a reasonable boundary.</li>
+ * <li>resubmit the same text after turning off option
+ * <code>UBIDI_OPTION_STREAMING</code>.</li></ul>
+ * In all cases, this option should be turned off before processing the last
+ * part of the text.</p>
+ *
+ * <p>When the <code>UBIDI_OPTION_STREAMING</code> option is used,
+ * it is recommended to call <code>ubidi_orderParagraphsLTR()</code> with
+ * argument <code>orderParagraphsLTR</code> set to <code>true</code> before
+ * calling <code>ubidi_setPara</code> so that later paragraphs may be
+ * concatenated to previous paragraphs on the right.</p>
+ *
+ * @see ubidi_setReorderingMode
+ * @see ubidi_setReorderingOptions
+ * @see ubidi_getProcessedLength
+ * @see ubidi_orderParagraphsLTR
+ * @stable ICU 3.6
+ */
+ UBIDI_OPTION_STREAMING = 4
+} UBiDiReorderingOption;
+
+/**
+ * Specify which of the reordering options
+ * should be applied during Bidi transformations.
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @param reorderingOptions is a combination of zero or more of the following
+ * options:
+ * <code>#UBIDI_OPTION_DEFAULT</code>, <code>#UBIDI_OPTION_INSERT_MARKS</code>,
+ * <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>, <code>#UBIDI_OPTION_STREAMING</code>.
+ *
+ * @see ubidi_getReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setReorderingOptions(UBiDi *pBiDi, uint32_t reorderingOptions);
+
+/**
+ * What are the reordering options applied to a given Bidi object?
+ *
+ * @param pBiDi is a <code>UBiDi</code> object.
+ * @return the current reordering options of the Bidi object
+ * @see ubidi_setReorderingOptions
+ * @stable ICU 3.6
+ */
+U_CAPI uint32_t U_EXPORT2
+ubidi_getReorderingOptions(UBiDi *pBiDi);
+
+/**
+ * Set the context before a call to ubidi_setPara().<p>
+ *
+ * ubidi_setPara() computes the left-right directionality for a given piece
+ * of text which is supplied as one of its arguments. Sometimes this piece
+ * of text (the "main text") should be considered in context, because text
+ * appearing before ("prologue") and/or after ("epilogue") the main text
+ * may affect the result of this computation.<p>
+ *
+ * This function specifies the prologue and/or the epilogue for the next
+ * call to ubidi_setPara(). The characters specified as prologue and
+ * epilogue should not be modified by the calling program until the call
+ * to ubidi_setPara() has returned. If successive calls to ubidi_setPara()
+ * all need specification of a context, ubidi_setContext() must be called
+ * before each call to ubidi_setPara(). In other words, a context is not
+ * "remembered" after the following successful call to ubidi_setPara().<p>
+ *
+ * If a call to ubidi_setPara() specifies UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL as paraLevel and is preceded by a call to
+ * ubidi_setContext() which specifies a prologue, the paragraph level will
+ * be computed taking in consideration the text in the prologue.<p>
+ *
+ * When ubidi_setPara() is called without a previous call to
+ * ubidi_setContext, the main text is handled as if preceded and followed
+ * by strong directional characters at the current paragraph level.
+ * Calling ubidi_setContext() with specification of a prologue will change
+ * this behavior by handling the main text as if preceded by the last
+ * strong character appearing in the prologue, if any.
+ * Calling ubidi_setContext() with specification of an epilogue will change
+ * the behavior of ubidi_setPara() by handling the main text as if followed
+ * by the first strong character or digit appearing in the epilogue, if any.<p>
+ *
+ * Note 1: if <code>ubidi_setContext</code> is called repeatedly without
+ * calling <code>ubidi_setPara</code>, the earlier calls have no effect,
+ * only the last call will be remembered for the next call to
+ * <code>ubidi_setPara</code>.<p>
+ *
+ * Note 2: calling <code>ubidi_setContext(pBiDi, NULL, 0, NULL, 0, &errorCode)</code>
+ * cancels any previous setting of non-empty prologue or epilogue.
+ * The next call to <code>ubidi_setPara()</code> will process no
+ * prologue or epilogue.<p>
+ *
+ * Note 3: users must be aware that even after setting the context
+ * before a call to ubidi_setPara() to perform e.g. a logical to visual
+ * transformation, the resulting string may not be identical to what it
+ * would have been if all the text, including prologue and epilogue, had
+ * been processed together.<br>
+ * Example (upper case letters represent RTL characters):<br>
+ * &nbsp;&nbsp;prologue = "<code>abc DE</code>"<br>
+ * &nbsp;&nbsp;epilogue = none<br>
+ * &nbsp;&nbsp;main text = "<code>FGH xyz</code>"<br>
+ * &nbsp;&nbsp;paraLevel = UBIDI_LTR<br>
+ * &nbsp;&nbsp;display without prologue = "<code>HGF xyz</code>"
+ * ("HGF" is adjacent to "xyz")<br>
+ * &nbsp;&nbsp;display with prologue = "<code>abc HGFED xyz</code>"
+ * ("HGF" is not adjacent to "xyz")<br>
+ *
+ * @param pBiDi is a paragraph <code>UBiDi</code> object.
+ *
+ * @param prologue is a pointer to the text which precedes the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no prologue to consider, then <code>proLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param proLength is the length of the prologue; if <code>proLength==-1</code>
+ * then the prologue must be zero-terminated.
+ * Otherwise proLength must be >= 0. If <code>proLength==0</code>, it means
+ * that there is no prologue to consider.
+ *
+ * @param epilogue is a pointer to the text which follows the text that
+ * will be specified in a coming call to ubidi_setPara().
+ * If there is no epilogue to consider, then <code>epiLength</code>
+ * must be zero and this pointer can be NULL.
+ *
+ * @param epiLength is the length of the epilogue; if <code>epiLength==-1</code>
+ * then the epilogue must be zero-terminated.
+ * Otherwise epiLength must be >= 0. If <code>epiLength==0</code>, it means
+ * that there is no epilogue to consider.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @stable ICU 4.8
+ */
+U_CAPI void U_EXPORT2
+ubidi_setContext(UBiDi *pBiDi,
+ const UChar *prologue, int32_t proLength,
+ const UChar *epilogue, int32_t epiLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Perform the Unicode Bidi algorithm. It is defined in the
+ * <a href="http://www.unicode.org/unicode/reports/tr9/">Unicode Standard Annex #9</a>,
+ * version 13,
+ * also described in The Unicode Standard, Version 4.0 .<p>
+ *
+ * This function takes a piece of plain text containing one or more paragraphs,
+ * with or without externally specified embedding levels from <i>styled</i>
+ * text and computes the left-right-directionality of each character.<p>
+ *
+ * If the entire text is all of the same directionality, then
+ * the function may not perform all the steps described by the algorithm,
+ * i.e., some levels may not be the same as if all steps were performed.
+ * This is not relevant for unidirectional text.<br>
+ * For example, in pure LTR text with numbers the numbers would get
+ * a resolved level of 2 higher than the surrounding text according to
+ * the algorithm. This implementation may set all resolved levels to
+ * the same value in such a case.<p>
+ *
+ * The text can be composed of multiple paragraphs. Occurrence of a block
+ * separator in the text terminates a paragraph, and whatever comes next starts
+ * a new paragraph. The exception to this rule is when a Carriage Return (CR)
+ * is followed by a Line Feed (LF). Both CR and LF are block separators, but
+ * in that case, the pair of characters is considered as terminating the
+ * preceding paragraph, and a new paragraph will be started by a character
+ * coming after the LF.
+ *
+ * @param pBiDi A <code>UBiDi</code> object allocated with <code>ubidi_open()</code>
+ * which will be set to contain the reordering information,
+ * especially the resolved levels for all the characters in <code>text</code>.
+ *
+ * @param text is a pointer to the text that the Bidi algorithm will be performed on.
+ * This pointer is stored in the UBiDi object and can be retrieved
+ * with <code>ubidi_getText()</code>.<br>
+ * <strong>Note:</strong> the text must be (at least) <code>length</code> long.
+ *
+ * @param length is the length of the text; if <code>length==-1</code> then
+ * the text must be zero-terminated.
+ *
+ * @param paraLevel specifies the default level for the text;
+ * it is typically 0 (LTR) or 1 (RTL).
+ * If the function shall determine the paragraph level from the text,
+ * then <code>paraLevel</code> can be set to
+ * either <code>#UBIDI_DEFAULT_LTR</code>
+ * or <code>#UBIDI_DEFAULT_RTL</code>; if the text contains multiple
+ * paragraphs, the paragraph level shall be determined separately for
+ * each paragraph; if a paragraph does not include any strongly typed
+ * character, then the desired default is used (0 for LTR or 1 for RTL).
+ * Any other value between 0 and <code>#UBIDI_MAX_EXPLICIT_LEVEL</code>
+ * is also valid, with odd levels indicating RTL.
+ *
+ * @param embeddingLevels (in) may be used to preset the embedding and override levels,
+ * ignoring characters like LRE and PDF in the text.
+ * A level overrides the directional property of its corresponding
+ * (same index) character if the level has the
+ * <code>#UBIDI_LEVEL_OVERRIDE</code> bit set.<br><br>
+ * Aside from that bit, it must be
+ * <code>paraLevel<=embeddingLevels[]<=UBIDI_MAX_EXPLICIT_LEVEL</code>,
+ * except that level 0 is always allowed.
+ * Level 0 for a paragraph separator prevents reordering of paragraphs;
+ * this only works reliably if <code>#UBIDI_LEVEL_OVERRIDE</code>
+ * is also set for paragraph separators.
+ * Level 0 for other characters is treated as a wildcard
+ * and is lifted up to the resolved level of the surrounding paragraph.<br><br>
+ * <strong>Caution: </strong>A copy of this pointer, not of the levels,
+ * will be stored in the <code>UBiDi</code> object;
+ * the <code>embeddingLevels</code> array must not be
+ * deallocated before the <code>UBiDi</code> structure is destroyed or reused,
+ * and the <code>embeddingLevels</code>
+ * should not be modified to avoid unexpected results on subsequent Bidi operations.
+ * However, the <code>ubidi_setPara()</code> and
+ * <code>ubidi_setLine()</code> functions may modify some or all of the levels.<br><br>
+ * After the <code>UBiDi</code> object is reused or destroyed, the caller
+ * must take care of the deallocation of the <code>embeddingLevels</code> array.<br><br>
+ * <strong>Note:</strong> the <code>embeddingLevels</code> array must be
+ * at least <code>length</code> long.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setPara(UBiDi *pBiDi, const UChar *text, int32_t length,
+ UBiDiLevel paraLevel, UBiDiLevel *embeddingLevels,
+ UErrorCode *pErrorCode);
+
+/**
+ * <code>ubidi_setLine()</code> sets a <code>UBiDi</code> to
+ * contain the reordering information, especially the resolved levels,
+ * for all the characters in a line of text. This line of text is
+ * specified by referring to a <code>UBiDi</code> object representing
+ * this information for a piece of text containing one or more paragraphs,
+ * and by specifying a range of indexes in this text.<p>
+ * In the new line object, the indexes will range from 0 to <code>limit-start-1</code>.<p>
+ *
+ * This is used after calling <code>ubidi_setPara()</code>
+ * for a piece of text, and after line-breaking on that text.
+ * It is not necessary if each paragraph is treated as a single line.<p>
+ *
+ * After line-breaking, rules (L1) and (L2) for the treatment of
+ * trailing WS and for reordering are performed on
+ * a <code>UBiDi</code> object that represents a line.<p>
+ *
+ * <strong>Important: </strong><code>pLineBiDi</code> shares data with
+ * <code>pParaBiDi</code>.
+ * You must destroy or reuse <code>pLineBiDi</code> before <code>pParaBiDi</code>.
+ * In other words, you must destroy or reuse the <code>UBiDi</code> object for a line
+ * before the object for its parent paragraph.<p>
+ *
+ * The text pointer that was stored in <code>pParaBiDi</code> is also copied,
+ * and <code>start</code> is added to it so that it points to the beginning of the
+ * line for this object.
+ *
+ * @param pParaBiDi is the parent paragraph object. It must have been set
+ * by a successful call to ubidi_setPara.
+ *
+ * @param start is the line's first index into the text.
+ *
+ * @param limit is just behind the line's last index into the text
+ * (its last index +1).<br>
+ * It must be <code>0<=start<limit<=</code>containing paragraph limit.
+ * If the specified line crosses a paragraph boundary, the function
+ * will terminate with error code U_ILLEGAL_ARGUMENT_ERROR.
+ *
+ * @param pLineBiDi is the object that will now represent a line of the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_setLine(const UBiDi *pParaBiDi,
+ int32_t start, int32_t limit,
+ UBiDi *pLineBiDi,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get the directionality of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return a value of <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>
+ * or <code>UBIDI_MIXED</code>
+ * that indicates if the entire text
+ * represented by this object is unidirectional,
+ * and which direction, or if it is mixed-directional.
+ * Note - The value <code>UBIDI_NEUTRAL</code> is never returned from this method.
+ *
+ * @see UBiDiDirection
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getDirection(const UBiDi *pBiDi);
+
+/**
+ * Gets the base direction of the text provided according
+ * to the Unicode Bidirectional Algorithm. The base direction
+ * is derived from the first character in the string with bidirectional
+ * character type L, R, or AL. If the first such character has type L,
+ * <code>UBIDI_LTR</code> is returned. If the first such character has
+ * type R or AL, <code>UBIDI_RTL</code> is returned. If the string does
+ * not contain any character of these types, then
+ * <code>UBIDI_NEUTRAL</code> is returned.
+ *
+ * This is a lightweight function for use when only the base direction
+ * is needed and no further bidi processing of the text is needed.
+ *
+ * @param text is a pointer to the text whose base
+ * direction is needed.
+ * Note: the text must be (at least) @c length long.
+ *
+ * @param length is the length of the text;
+ * if <code>length==-1</code> then the text
+ * must be zero-terminated.
+ *
+ * @return <code>UBIDI_LTR</code>, <code>UBIDI_RTL</code>,
+ * <code>UBIDI_NEUTRAL</code>
+ *
+ * @see UBiDiDirection
+ * @stable ICU 4.6
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getBaseDirection(const UChar *text, int32_t length );
+
+/**
+ * Get the pointer to the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The pointer to the text that the UBiDi object was created for.
+ *
+ * @see ubidi_setPara
+ * @see ubidi_setLine
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar * U_EXPORT2
+ubidi_getText(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The length of the text that the UBiDi object was created for.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLength(const UBiDi *pBiDi);
+
+/**
+ * Get the paragraph level of the text.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The paragraph level. If there are multiple paragraphs, their
+ * level may vary if the required paraLevel is UBIDI_DEFAULT_LTR or
+ * UBIDI_DEFAULT_RTL. In that case, the level of the first paragraph
+ * is returned.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getParagraph
+ * @see ubidi_getParagraphByIndex
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getParaLevel(const UBiDi *pBiDi);
+
+/**
+ * Get the number of paragraphs.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @return The number of paragraphs.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countParagraphs(UBiDi *pBiDi);
+
+/**
+ * Get a paragraph, given a position within the text.
+ * This function returns information about a paragraph.<br>
+ * Note: if the paragraph index is known, it is more efficient to
+ * retrieve the paragraph information using ubidi_getParagraphByIndex().<p>
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex is the index of a character within the text, in the
+ * range <code>[0..ubidi_getProcessedLength(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * <code>charIndex</code>.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of the paragraph containing the specified position.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getParagraph(const UBiDi *pBiDi, int32_t charIndex, int32_t *pParaStart,
+ int32_t *pParaLimit, UBiDiLevel *pParaLevel,
+ UErrorCode *pErrorCode);
+
+/**
+ * Get a paragraph, given the index of this paragraph.
+ *
+ * This function returns information about a paragraph.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param paraIndex is the number of the paragraph, in the
+ * range <code>[0..ubidi_countParagraphs(pBiDi)-1]</code>.
+ *
+ * @param pParaStart will receive the index of the first character of the
+ * paragraph in the text.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLimit will receive the limit of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pParaLevel will receive the level of the paragraph.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubidi_getParagraphByIndex(const UBiDi *pBiDi, int32_t paraIndex,
+ int32_t *pParaStart, int32_t *pParaLimit,
+ UBiDiLevel *pParaLevel, UErrorCode *pErrorCode);
+
+/**
+ * Get the level for one character.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param charIndex the index of a character. It must be in the range
+ * [0..ubidi_getProcessedLength(pBiDi)].
+ *
+ * @return The level for the character at charIndex (0 if charIndex is not
+ * in the valid range).
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiLevel U_EXPORT2
+ubidi_getLevelAt(const UBiDi *pBiDi, int32_t charIndex);
+
+/**
+ * Get an array of levels for each character.<p>
+ *
+ * Note that this function may allocate memory under some
+ * circumstances, unlike <code>ubidi_getLevelAt()</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object, whose
+ * text length must be strictly positive.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The levels array for the text,
+ * or <code>NULL</code> if an error occurs.
+ *
+ * @see UBiDiLevel
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI const UBiDiLevel * U_EXPORT2
+ubidi_getLevels(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical run.
+ * This function returns information about a run and is used
+ * to retrieve runs in logical order.<p>
+ * This is especially useful for line-breaking on a paragraph.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalPosition is a logical position within the source text.
+ *
+ * @param pLogicalLimit will receive the limit of the corresponding run.
+ * The l-value that you point to here may be the
+ * same expression (variable) as the one for
+ * <code>logicalPosition</code>.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @param pLevel will receive the level of the corresponding run.
+ * This pointer can be <code>NULL</code> if this
+ * value is not necessary.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalRun(const UBiDi *pBiDi, int32_t logicalPosition,
+ int32_t *pLogicalLimit, UBiDiLevel *pLevel);
+
+/**
+ * Get the number of runs.
+ * This function may invoke the actual reordering on the
+ * <code>UBiDi</code> object, after <code>ubidi_setPara()</code>
+ * may have resolved only the levels of the text. Therefore,
+ * <code>ubidi_countRuns()</code> may have to allocate memory,
+ * and may fail doing so.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The number of runs.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_countRuns(UBiDi *pBiDi, UErrorCode *pErrorCode);
+
+/**
+ * Get one run's logical start, length, and directionality,
+ * which can be 0 for LTR or 1 for RTL.
+ * In an RTL run, the character at the logical start is
+ * visually on the right of the displayed run.
+ * The length is the number of characters in the run.<p>
+ * <code>ubidi_countRuns()</code> should be called
+ * before the runs are retrieved.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param runIndex is the number of the run in visual order, in the
+ * range <code>[0..ubidi_countRuns(pBiDi)-1]</code>.
+ *
+ * @param pLogicalStart is the first logical character index in the text.
+ * The pointer may be <code>NULL</code> if this index is not needed.
+ *
+ * @param pLength is the number of characters (at least one) in the run.
+ * The pointer may be <code>NULL</code> if this is not needed.
+ *
+ * @return the directionality of the run,
+ * <code>UBIDI_LTR==0</code> or <code>UBIDI_RTL==1</code>,
+ * never <code>UBIDI_MIXED</code>,
+ * never <code>UBIDI_NEUTRAL</code>.
+ *
+ * @see ubidi_countRuns
+ *
+ * Example:
+ * <pre>
+ * \code
+ * int32_t i, count=ubidi_countRuns(pBiDi),
+ * logicalStart, visualIndex=0, length;
+ * for(i=0; i<count; ++i) {
+ * if(UBIDI_LTR==ubidi_getVisualRun(pBiDi, i, &logicalStart, &length)) {
+ * do { // LTR
+ * show_char(text[logicalStart++], visualIndex++);
+ * } while(--length>0);
+ * } else {
+ * logicalStart+=length; // logicalLimit
+ * do { // RTL
+ * show_char(text[--logicalStart], visualIndex++);
+ * } while(--length>0);
+ * }
+ * }
+ *\endcode
+ * </pre>
+ *
+ * Note that in right-to-left runs, code like this places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * <p>
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option, can be considered in order
+ * to avoid these issues.
+ * @stable ICU 2.0
+ */
+U_CAPI UBiDiDirection U_EXPORT2
+ubidi_getVisualRun(UBiDi *pBiDi, int32_t runIndex,
+ int32_t *pLogicalStart, int32_t *pLength);
+
+/**
+ * Get the visual position from a logical text position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getLogicalMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * visual position because the corresponding text character is a Bidi control
+ * removed from output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param logicalIndex is the index of a character in the text.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The visual position of this character.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getVisualIndex(UBiDi *pBiDi, int32_t logicalIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get the logical text position from a visual position.
+ * If such a mapping is used many times on the same
+ * <code>UBiDi</code> object, then calling
+ * <code>ubidi_getVisualMap()</code> is more efficient.<p>
+ *
+ * The value returned may be <code>#UBIDI_MAP_NOWHERE</code> if there is no
+ * logical position because the corresponding text character is a Bidi mark
+ * inserted in the output by option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * This is the inverse function to <code>ubidi_getVisualIndex()</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical position returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param visualIndex is the visual position of a character.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The index of this character in the text.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getLogicalIndex(UBiDi *pBiDi, int32_t visualIndex, UErrorCode *pErrorCode);
+
+/**
+ * Get a logical-to-visual index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi controls removed from the visual
+ * output by the option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the visual positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ * <p>
+ * Note that in right-to-left runs, this mapping places
+ * second surrogates before first ones (which is generally a bad idea)
+ * and combining characters before base characters.
+ * Use of <code>ubidi_writeReordered()</code>, optionally with the
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code> option can be considered instead
+ * of using the mapping, in order to avoid these issues.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getProcessedLength()</code>
+ * indexes which will reflect the reordering of the characters.
+ * If option <code>#UBIDI_OPTION_INSERT_MARKS</code> is set, the number
+ * of elements allocated in <code>indexMap</code> must be no less than
+ * <code>ubidi_getResultLength()</code>.
+ * The array does not need to be initialized.<br><br>
+ * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getVisualMap
+ * @see ubidi_getVisualIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getLogicalMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * Get a visual-to-logical index map (array) for the characters in the UBiDi
+ * (paragraph or line) object.
+ * <p>
+ * Some values in the map may be <code>#UBIDI_MAP_NOWHERE</code> if the
+ * corresponding text characters are Bidi marks inserted in the visual output
+ * by the option <code>#UBIDI_OPTION_INSERT_MARKS</code>.
+ * <p>
+ * When the visual output is altered by using options of
+ * <code>ubidi_writeReordered()</code> such as <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>UBIDI_KEEP_BASE_COMBINING</code>, <code>UBIDI_OUTPUT_REVERSE</code>,
+ * <code>UBIDI_REMOVE_BIDI_CONTROLS</code>, the logical positions returned may not
+ * be correct. It is advised to use, when possible, reordering options
+ * such as <code>UBIDI_OPTION_INSERT_MARKS</code> and <code>UBIDI_OPTION_REMOVE_CONTROLS</code>.
+ *
+ * @param pBiDi is the paragraph or line <code>UBiDi</code> object.
+ *
+ * @param indexMap is a pointer to an array of <code>ubidi_getResultLength()</code>
+ * indexes which will reflect the reordering of the characters.
+ * If option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> is set, the number
+ * of elements allocated in <code>indexMap</code> must be no less than
+ * <code>ubidi_getProcessedLength()</code>.
+ * The array does not need to be initialized.<br><br>
+ * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getLogicalMap
+ * @see ubidi_getLogicalIndex
+ * @see ubidi_getProcessedLength
+ * @see ubidi_getResultLength
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_getVisualMap(UBiDi *pBiDi, int32_t *indexMap, UErrorCode *pErrorCode);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getLogicalMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.<p>
+ * The index map will result in <code>indexMap[logicalIndex]==visualIndex</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderLogical(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * This is a convenience function that does not use a UBiDi object.
+ * It is intended to be used for when an application has determined the levels
+ * of objects (character sequences) and just needs to have them reordered (L2).
+ * This is equivalent to using <code>ubidi_getVisualMap()</code> on a
+ * <code>UBiDi</code> object.
+ *
+ * @param levels is an array with <code>length</code> levels that have been determined by
+ * the application.
+ *
+ * @param length is the number of levels in the array, or, semantically,
+ * the number of objects to be reordered.
+ * It must be <code>length>0</code>.
+ *
+ * @param indexMap is a pointer to an array of <code>length</code>
+ * indexes which will reflect the reordering of the characters.
+ * The array does not need to be initialized.<p>
+ * The index map will result in <code>indexMap[visualIndex]==logicalIndex</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_reorderVisual(const UBiDiLevel *levels, int32_t length, int32_t *indexMap);
+
+/**
+ * Invert an index map.
+ * The index mapping of the first map is inverted and written to
+ * the second one.
+ *
+ * @param srcMap is an array with <code>length</code> elements
+ * which defines the original mapping from a source array containing
+ * <code>length</code> elements to a destination array.
+ * Some elements of the source array may have no mapping in the
+ * destination array. In that case, their value will be
+ * the special value <code>UBIDI_MAP_NOWHERE</code>.
+ * All elements must be >=0 or equal to <code>UBIDI_MAP_NOWHERE</code>.
+ * Some elements may have a value >= <code>length</code>, if the
+ * destination array has more elements than the source array.
+ * There must be no duplicate indexes (two or more elements with the
+ * same value except <code>UBIDI_MAP_NOWHERE</code>).
+ *
+ * @param destMap is an array with a number of elements equal to 1 + the highest
+ * value in <code>srcMap</code>.
+ * <code>destMap</code> will be filled with the inverse mapping.
+ * If element with index i in <code>srcMap</code> has a value k different
+ * from <code>UBIDI_MAP_NOWHERE</code>, this means that element i of
+ * the source array maps to element k in the destination array.
+ * The inverse map will have value i in its k-th element.
+ * For all elements of the destination array which do not map to
+ * an element in the source array, the corresponding element in the
+ * inverse map will have a value equal to <code>UBIDI_MAP_NOWHERE</code>.
+ *
+ * @param length is the length of each array.
+ * @see UBIDI_MAP_NOWHERE
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubidi_invertMap(const int32_t *srcMap, int32_t *destMap, int32_t length);
+
+/** option flags for ubidi_writeReordered() */
+
+/**
+ * option bit for ubidi_writeReordered():
+ * keep combining characters after their base characters in RTL runs
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_KEEP_BASE_COMBINING 1
+
+/**
+ * option bit for ubidi_writeReordered():
+ * replace characters with the "mirrored" property in RTL runs
+ * by their mirror-image mappings
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_DO_MIRRORING 2
+
+/**
+ * option bit for ubidi_writeReordered():
+ * surround the run with LRMs if necessary;
+ * this is part of the approximate "inverse Bidi" algorithm
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_setInverse
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_INSERT_LRM_FOR_NUMERIC 4
+
+/**
+ * option bit for ubidi_writeReordered():
+ * remove Bidi control characters
+ * (this does not affect #UBIDI_INSERT_LRM_FOR_NUMERIC)
+ *
+ * <p>This option does not imply corresponding adjustment of the index
+ * mappings.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_REMOVE_BIDI_CONTROLS 8
+
+/**
+ * option bit for ubidi_writeReordered():
+ * write the output in reverse order
+ *
+ * <p>This has the same effect as calling <code>ubidi_writeReordered()</code>
+ * first without this option, and then calling
+ * <code>ubidi_writeReverse()</code> without mirroring.
+ * Doing this in the same step is faster and avoids a temporary buffer.
+ * An example for using this option is output to a character terminal that
+ * is designed for RTL scripts and stores text in reverse order.</p>
+ *
+ * @see ubidi_writeReordered
+ * @stable ICU 2.0
+ */
+#define UBIDI_OUTPUT_REVERSE 16
+
+/**
+ * Get the length of the source text processed by the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_STREAMING</code>
+ * has been set.
+ * <br>
+ * Note that whenever the length of the text affects the execution or the
+ * result of a function, it is the processed length which must be considered,
+ * except for <code>ubidi_setPara</code> (which receives unprocessed source
+ * text) and <code>ubidi_getLength</code> (which returns the original length
+ * of the source text).<br>
+ * In particular, the processed length is the one to consider in the following
+ * cases:
+ * <ul>
+ * <li>maximum value of the <code>limit</code> argument of
+ * <code>ubidi_setLine</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getParagraph</code></li>
+ * <li>maximum value of the <code>charIndex</code> argument of
+ * <code>ubidi_getLevelAt</code></li>
+ * <li>number of elements in the array returned by <code>ubidi_getLevels</code></li>
+ * <li>maximum value of the <code>logicalStart</code> argument of
+ * <code>ubidi_getLogicalRun</code></li>
+ * <li>maximum value of the <code>logicalIndex</code> argument of
+ * <code>ubidi_getVisualIndex</code></li>
+ * <li>number of elements filled in the <code>*indexMap</code> argument of
+ * <code>ubidi_getLogicalMap</code></li>
+ * <li>length of text processed by <code>ubidi_writeReordered</code></li>
+ * </ul>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the part of the source text processed by
+ * the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_STREAMING
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getProcessedLength(const UBiDi *pBiDi);
+
+/**
+ * Get the length of the reordered text resulting from the last call to
+ * <code>ubidi_setPara()</code>. This length may be different from the length
+ * of the source text if option <code>#UBIDI_OPTION_INSERT_MARKS</code>
+ * or option <code>#UBIDI_OPTION_REMOVE_CONTROLS</code> has been set.
+ * <br>
+ * This resulting length is the one to consider in the following cases:
+ * <ul>
+ * <li>maximum value of the <code>visualIndex</code> argument of
+ * <code>ubidi_getLogicalIndex</code></li>
+ * <li>number of elements of the <code>*indexMap</code> argument of
+ * <code>ubidi_getVisualMap</code></li>
+ * </ul>
+ * Note that this length stays identical to the source text length if
+ * Bidi marks are inserted or removed using option bits of
+ * <code>ubidi_writeReordered</code>, or if option
+ * <code>#UBIDI_REORDER_INVERSE_NUMBERS_AS_L</code> has been set.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @return The length of the reordered text resulting from
+ * the last call to <code>ubidi_setPara</code>.
+ * @see ubidi_setPara
+ * @see UBIDI_OPTION_INSERT_MARKS
+ * @see UBIDI_OPTION_REMOVE_CONTROLS
+ * @stable ICU 3.6
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_getResultLength(const UBiDi *pBiDi);
+
+U_CDECL_BEGIN
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Value returned by <code>UBiDiClassCallback</code> callbacks when
+ * there is no need to override the standard Bidi class for a given code point.
+ *
+ * This constant is deprecated; use u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1 instead.
+ *
+ * @see UBiDiClassCallback
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+#define U_BIDI_CLASS_DEFAULT U_CHAR_DIRECTION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+
+/**
+ * Callback type declaration for overriding default Bidi class values with
+ * custom ones.
+ * <p>Usually, the function pointer will be propagated to a <code>UBiDi</code>
+ * object by calling the <code>ubidi_setClassCallback()</code> function;
+ * then the callback will be invoked by the UBA implementation any time the
+ * class of a character is to be determined.</p>
+ *
+ * @param context is a pointer to the callback private data.
+ *
+ * @param c is the code point to get a Bidi class for.
+ *
+ * @return The directional property / Bidi class for the given code point
+ * <code>c</code> if the default class has been overridden, or
+ * <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>
+ * if the standard Bidi class value for <code>c</code> is to be used.
+ * @see ubidi_setClassCallback
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+typedef UCharDirection U_CALLCONV
+UBiDiClassCallback(const void *context, UChar32 c);
+
+U_CDECL_END
+
+/**
+ * Retrieve the Bidi class for a given code point.
+ * <p>If a <code>#UBiDiClassCallback</code> callback is defined and returns a
+ * value other than <code>u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS)+1</code>,
+ * that value is used; otherwise the default class determination mechanism is invoked.</p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param c is the code point whose Bidi class must be retrieved.
+ *
+ * @return The Bidi class for character <code>c</code> based
+ * on the given <code>pBiDi</code> instance.
+ * @see UBiDiClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI UCharDirection U_EXPORT2
+ubidi_getCustomizedClass(UBiDi *pBiDi, UChar32 c);
+
+/**
+ * Set the callback function and callback data used by the UBA
+ * implementation for Bidi class determination.
+ * <p>This may be useful for assigning Bidi classes to PUA characters, or
+ * for special application needs. For instance, an application may want to
+ * handle all spaces like L or R characters (according to the base direction)
+ * when creating the visual ordering of logical lines which are part of a report
+ * organized in columns: there should not be interaction between adjacent
+ * cells.<p>
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param newFn is the new callback function pointer.
+ *
+ * @param newContext is the new callback context pointer. This can be NULL.
+ *
+ * @param oldFn fillin: Returns the old callback function pointer. This can be
+ * NULL.
+ *
+ * @param oldContext fillin: Returns the old callback's context. This can be
+ * NULL.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @see ubidi_getClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_setClassCallback(UBiDi *pBiDi, UBiDiClassCallback *newFn,
+ const void *newContext, UBiDiClassCallback **oldFn,
+ const void **oldContext, UErrorCode *pErrorCode);
+
+/**
+ * Get the current callback function used for Bidi class determination.
+ *
+ * @param pBiDi is the paragraph <code>UBiDi</code> object.
+ *
+ * @param fn fillin: Returns the callback function pointer.
+ *
+ * @param context fillin: Returns the callback's private context.
+ *
+ * @see ubidi_setClassCallback
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ubidi_getClassCallback(UBiDi *pBiDi, UBiDiClassCallback **fn, const void **context);
+
+/**
+ * Take a <code>UBiDi</code> object containing the reordering
+ * information for a piece of text (one or more paragraphs) set by
+ * <code>ubidi_setPara()</code> or for a line of text set by
+ * <code>ubidi_setLine()</code> and write a reordered string to the
+ * destination buffer.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters in RTL runs can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters; see the description of the <code>destSize</code>
+ * and <code>options</code> parameters and of the option bit flags.
+ *
+ * @param pBiDi A pointer to a <code>UBiDi</code> object that
+ * is set by <code>ubidi_setPara()</code> or
+ * <code>ubidi_setLine()</code> and contains the reordering
+ * information for the text that it was defined for,
+ * as well as a pointer to that text.<br><br>
+ * The text was aliased (only the pointer was stored
+ * without copying the contents) and must not have been modified
+ * since the <code>ubidi_setPara()</code> call.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * The source text and <code>dest[destSize]</code>
+ * must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ * in number of UChars.
+ * If the <code>UBIDI_INSERT_LRM_FOR_NUMERIC</code>
+ * option is set, then the destination length could be
+ * as large as
+ * <code>ubidi_getLength(pBiDi)+2*ubidi_countRuns(pBiDi)</code>.
+ * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ * is set, then the destination length may be less than
+ * <code>ubidi_getLength(pBiDi)</code>.
+ * If none of these options is set, then the destination length
+ * will be exactly <code>ubidi_getProcessedLength(pBiDi)</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * The options include mirroring the characters on a code
+ * point basis and inserting LRM characters, which is used
+ * especially for transforming visually stored text
+ * to logically stored text (although this is still an
+ * imperfect implementation of an "inverse Bidi" algorithm
+ * because it uses the "forward Bidi" algorithm at its core).
+ * The available options are:
+ * <code>#UBIDI_DO_MIRRORING</code>,
+ * <code>#UBIDI_INSERT_LRM_FOR_NUMERIC</code>,
+ * <code>#UBIDI_KEEP_BASE_COMBINING</code>,
+ * <code>#UBIDI_OUTPUT_REVERSE</code>,
+ * <code>#UBIDI_REMOVE_BIDI_CONTROLS</code>
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ *
+ * @see ubidi_getProcessedLength
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReordered(UBiDi *pBiDi,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Reverse a Right-To-Left run of Unicode text.
+ *
+ * This function preserves the integrity of characters with multiple
+ * code units and (optionally) combining characters.
+ * Characters can be replaced by mirror-image characters
+ * in the destination buffer. Note that "real" mirroring has
+ * to be done in a rendering engine by glyph selection
+ * and that for many "mirrored" characters there are no
+ * Unicode characters as mirror-image equivalents.
+ * There are also options to insert or remove Bidi control
+ * characters.
+ *
+ * This function is the implementation for reversing RTL runs as part
+ * of <code>ubidi_writeReordered()</code>. For detailed descriptions
+ * of the parameters, see there.
+ * Since no Bidi controls are inserted here, the output string length
+ * will never exceed <code>srcLength</code>.
+ *
+ * @see ubidi_writeReordered
+ *
+ * @param src A pointer to the RTL run text.
+ *
+ * @param srcLength The length of the RTL run.
+ *
+ * @param dest A pointer to where the reordered text is to be copied.
+ * <code>src[srcLength]</code> and <code>dest[destSize]</code>
+ * must not overlap.
+ *
+ * @param destSize The size of the <code>dest</code> buffer,
+ * in number of UChars.
+ * If the <code>UBIDI_REMOVE_BIDI_CONTROLS</code> option
+ * is set, then the destination length may be less than
+ * <code>srcLength</code>.
+ * If this option is not set, then the destination length
+ * will be exactly <code>srcLength</code>.
+ *
+ * @param options A bit set of options for the reordering that control
+ * how the reordered text is written.
+ * See the <code>options</code> parameter in <code>ubidi_writeReordered()</code>.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value.
+ *
+ * @return The length of the output string.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubidi_writeReverse(const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ uint16_t options,
+ UErrorCode *pErrorCode);
+
+/*#define BIDI_SAMPLE_CODE*/
+/*@}*/
+
+#endif
diff --git a/intl/icu/source/common/unicode/ubiditransform.h b/intl/icu/source/common/unicode/ubiditransform.h
new file mode 100644
index 0000000000..24433aa8ac
--- /dev/null
+++ b/intl/icu/source/common/unicode/ubiditransform.h
@@ -0,0 +1,326 @@
+/*
+******************************************************************************
+*
+* © 2016 and later: Unicode, Inc. and others.
+* License & terms of use: http://www.unicode.org/copyright.html
+*
+******************************************************************************
+* file name: ubiditransform.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2016jul24
+* created by: Lina Kemmel
+*
+*/
+
+#ifndef UBIDITRANSFORM_H
+#define UBIDITRANSFORM_H
+
+#include "unicode/utypes.h"
+#include "unicode/ubidi.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Bidi Transformations
+ */
+
+/**
+ * `UBiDiOrder` indicates the order of text.
+ *
+ * This bidi transformation engine supports all possible combinations (4 in
+ * total) of input and output text order:
+ *
+ * - <logical input, visual output>: unless the output direction is RTL, this
+ * corresponds to a normal operation of the Bidi algorithm as described in the
+ * Unicode Technical Report and implemented by `UBiDi` when the
+ * reordering mode is set to `UBIDI_REORDER_DEFAULT`. Visual RTL
+ * mode is not supported by `UBiDi` and is accomplished through
+ * reversing a visual LTR string,
+ *
+ * - <visual input, logical output>: unless the input direction is RTL, this
+ * corresponds to an "inverse bidi algorithm" in `UBiDi` with the
+ * reordering mode set to `UBIDI_REORDER_INVERSE_LIKE_DIRECT`.
+ * Visual RTL mode is not not supported by `UBiDi` and is
+ * accomplished through reversing a visual LTR string,
+ *
+ * - <logical input, logical output>: if the input and output base directions
+ * mismatch, this corresponds to the `UBiDi` implementation with the
+ * reordering mode set to `UBIDI_REORDER_RUNS_ONLY`; and if the
+ * input and output base directions are identical, the transformation engine
+ * will only handle character mirroring and Arabic shaping operations without
+ * reordering,
+ *
+ * - <visual input, visual output>: this reordering mode is not supported by
+ * the `UBiDi` engine; it implies character mirroring, Arabic
+ * shaping, and - if the input/output base directions mismatch - string
+ * reverse operations.
+ * @see ubidi_setInverse
+ * @see ubidi_setReorderingMode
+ * @see UBIDI_REORDER_DEFAULT
+ * @see UBIDI_REORDER_INVERSE_LIKE_DIRECT
+ * @see UBIDI_REORDER_RUNS_ONLY
+ * @stable ICU 58
+ */
+typedef enum {
+ /** 0: Constant indicating a logical order.
+ * This is the default for input text.
+ * @stable ICU 58
+ */
+ UBIDI_LOGICAL = 0,
+ /** 1: Constant indicating a visual order.
+ * This is a default for output text.
+ * @stable ICU 58
+ */
+ UBIDI_VISUAL
+} UBiDiOrder;
+
+/**
+ * <code>UBiDiMirroring</code> indicates whether or not characters with the
+ * "mirrored" property in RTL runs should be replaced with their mirror-image
+ * counterparts.
+ * @see UBIDI_DO_MIRRORING
+ * @see ubidi_setReorderingOptions
+ * @see ubidi_writeReordered
+ * @see ubidi_writeReverse
+ * @stable ICU 58
+ */
+typedef enum {
+ /** 0: Constant indicating that character mirroring should not be
+ * performed.
+ * This is the default.
+ * @stable ICU 58
+ */
+ UBIDI_MIRRORING_OFF = 0,
+ /** 1: Constant indicating that character mirroring should be performed.
+ * This corresponds to calling <code>ubidi_writeReordered</code> or
+ * <code>ubidi_writeReverse</code> with the
+ * <code>UBIDI_DO_MIRRORING</code> option bit set.
+ * @stable ICU 58
+ */
+ UBIDI_MIRRORING_ON
+} UBiDiMirroring;
+
+/**
+ * Forward declaration of the <code>UBiDiTransform</code> structure that stores
+ * information used by the layout transformation engine.
+ * @stable ICU 58
+ */
+typedef struct UBiDiTransform UBiDiTransform;
+
+/**
+ * Performs transformation of text from the bidi layout defined by the input
+ * ordering scheme to the bidi layout defined by the output ordering scheme,
+ * and applies character mirroring and Arabic shaping operations.<p>
+ * In terms of <code>UBiDi</code>, such a transformation implies:
+ * <ul>
+ * <li>calling <code>ubidi_setReorderingMode</code> as needed (when the
+ * reordering mode is other than normal),</li>
+ * <li>calling <code>ubidi_setInverse</code> as needed (when text should be
+ * transformed from a visual to a logical form),</li>
+ * <li>resolving embedding levels of each character in the input text by
+ * calling <code>ubidi_setPara</code>,</li>
+ * <li>reordering the characters based on the computed embedding levels, also
+ * performing character mirroring as needed, and streaming the result to the
+ * output, by calling <code>ubidi_writeReordered</code>,</li>
+ * <li>performing Arabic digit and letter shaping on the output text by calling
+ * <code>u_shapeArabic</code>.</li>
+ * </ul>
+ * An "ordering scheme" encompasses the base direction and the order of text,
+ * and these characteristics must be defined by the caller for both input and
+ * output explicitly .<p>
+ * There are 36 possible combinations of <input, output> ordering schemes,
+ * which are partially supported by <code>UBiDi</code> already. Examples of the
+ * currently supported combinations:
+ * <ul>
+ * <li><Logical LTR, Visual LTR>: this is equivalent to calling
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
+ * <li><Logical RTL, Visual LTR>: this is equivalent to calling
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>,</li>
+ * <li><Logical Default ("Auto") LTR, Visual LTR>: this is equivalent to
+ * calling <code>ubidi_setPara</code> with
+ * <code>paraLevel == UBIDI_DEFAULT_LTR</code>,</li>
+ * <li><Logical Default ("Auto") RTL, Visual LTR>: this is equivalent to
+ * calling <code>ubidi_setPara</code> with
+ * <code>paraLevel == UBIDI_DEFAULT_RTL</code>,</li>
+ * <li><Visual LTR, Logical LTR>: this is equivalent to
+ * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_LTR</code>,</li>
+ * <li><Visual LTR, Logical RTL>: this is equivalent to
+ * calling <code>ubidi_setInverse(UBiDi*, true)</code> and then
+ * <code>ubidi_setPara</code> with <code>paraLevel == UBIDI_RTL</code>.</li>
+ * </ul>
+ * All combinations that involve the Visual RTL scheme are unsupported by
+ * <code>UBiDi</code>, for instance:
+ * <ul>
+ * <li><Logical LTR, Visual RTL>,</li>
+ * <li><Visual RTL, Logical RTL>.</li>
+ * </ul>
+ * <p>Example of usage of the transformation engine:<br>
+ * <pre>
+ * \code
+ * UChar text1[] = {'a', 'b', 'c', 0x0625, '1', 0};
+ * UChar text2[] = {'a', 'b', 'c', 0x0625, '1', 0};
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // Run a transformation.
+ * ubiditransform_transform(pBidiTransform,
+ * text1, -1, text2, -1,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_MIRRORING_OFF,
+ * U_SHAPE_DIGITS_AN2EN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
+ * &errorCode);
+ * // Do something with text2.
+ * text2[4] = '2';
+ * // Run a reverse transformation.
+ * ubiditransform_transform(pBidiTransform,
+ * text2, -1, text1, -1,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_OFF,
+ * U_SHAPE_DIGITS_EN2AN | U_SHAPE_DIGIT_TYPE_AN_EXTENDED,
+ * &errorCode);
+ *\endcode
+ * </pre>
+ * </p>
+ *
+ * @param pBiDiTransform A pointer to a <code>UBiDiTransform</code> object
+ * allocated with <code>ubiditransform_open()</code> or
+ * <code>NULL</code>.<p>
+ * This object serves for one-time setup to amortize initialization
+ * overheads. Use of this object is not thread-safe. All other threads
+ * should allocate a new <code>UBiDiTransform</code> object by calling
+ * <code>ubiditransform_open()</code> before using it. Alternatively,
+ * a caller can set this parameter to <code>NULL</code>, in which case
+ * the object will be allocated by the engine on the fly.</p>
+ * @param src A pointer to the text that the Bidi layout transformations will
+ * be performed on.
+ * <p><strong>Note:</strong> the text must be (at least)
+ * <code>srcLength</code> long.</p>
+ * @param srcLength The length of the text, in number of UChars. If
+ * <code>length == -1</code> then the text must be zero-terminated.
+ * @param dest A pointer to where the processed text is to be copied.
+ * @param destSize The size of the <code>dest</code> buffer, in number of
+ * UChars. If the <code>U_SHAPE_LETTERS_UNSHAPE</code> option is set,
+ * then the destination length could be as large as
+ * <code>srcLength * 2</code>. Otherwise, the destination length will
+ * not exceed <code>srcLength</code>. If the caller reserves the last
+ * position for zero-termination, it should be excluded from
+ * <code>destSize</code>.
+ * <p><code>destSize == -1</code> is allowed and makes sense when
+ * <code>dest</code> was holds some meaningful value, e.g. that of
+ * <code>src</code>. In this case <code>dest</code> must be
+ * zero-terminated.</p>
+ * @param inParaLevel A base embedding level of the input as defined in
+ * <code>ubidi_setPara</code> documentation for the
+ * <code>paraLevel</code> parameter.
+ * @param inOrder An order of the input, which can be one of the
+ * <code>UBiDiOrder</code> values.
+ * @param outParaLevel A base embedding level of the output as defined in
+ * <code>ubidi_setPara</code> documentation for the
+ * <code>paraLevel</code> parameter.
+ * @param outOrder An order of the output, which can be one of the
+ * <code>UBiDiOrder</code> values.
+ * @param doMirroring Indicates whether or not to perform character mirroring,
+ * and can accept one of the <code>UBiDiMirroring</code> values.
+ * @param shapingOptions Arabic digit and letter shaping options defined in the
+ * ushape.h documentation.
+ * <p><strong>Note:</strong> Direction indicator options are computed by
+ * the transformation engine based on the effective ordering schemes, so
+ * user-defined direction indicators will be ignored.</p>
+ * @param pErrorCode A pointer to an error code value.
+ *
+ * @return The destination length, i.e. the number of UChars written to
+ * <code>dest</code>. If the transformation fails, the return value
+ * will be 0 (and the error code will be written to
+ * <code>pErrorCode</code>).
+ *
+ * @see UBiDiLevel
+ * @see UBiDiOrder
+ * @see UBiDiMirroring
+ * @see ubidi_setPara
+ * @see u_shapeArabic
+ * @stable ICU 58
+ */
+U_CAPI uint32_t U_EXPORT2
+ubiditransform_transform(UBiDiTransform *pBiDiTransform,
+ const UChar *src, int32_t srcLength,
+ UChar *dest, int32_t destSize,
+ UBiDiLevel inParaLevel, UBiDiOrder inOrder,
+ UBiDiLevel outParaLevel, UBiDiOrder outOrder,
+ UBiDiMirroring doMirroring, uint32_t shapingOptions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Allocates a <code>UBiDiTransform</code> object. This object can be reused,
+ * e.g. with different ordering schemes, mirroring or shaping options.<p>
+ * <strong>Note:</strong>The object can only be reused in the same thread.
+ * All other threads should allocate a new <code>UBiDiTransform</code> object
+ * before using it.<p>
+ * Example of usage:<p>
+ * <pre>
+ * \code
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // Open a new UBiDiTransform.
+ * UBiDiTransform* transform = ubiditransform_open(&errorCode);
+ * // Run a transformation.
+ * ubiditransform_transform(transform,
+ * text1, -1, text2, -1,
+ * UBIDI_RTL, UBIDI_LOGICAL,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_ON,
+ * U_SHAPE_DIGITS_EN2AN,
+ * &errorCode);
+ * // Do something with the output text and invoke another transformation using
+ * // that text as input.
+ * ubiditransform_transform(transform,
+ * text2, -1, text3, -1,
+ * UBIDI_LTR, UBIDI_VISUAL,
+ * UBIDI_RTL, UBIDI_VISUAL,
+ * UBIDI_MIRRORING_ON,
+ * 0, &errorCode);
+ *\endcode
+ * </pre>
+ * <p>
+ * The <code>UBiDiTransform</code> object must be deallocated by calling
+ * <code>ubiditransform_close()</code>.
+ *
+ * @return An empty <code>UBiDiTransform</code> object.
+ * @stable ICU 58
+ */
+U_CAPI UBiDiTransform* U_EXPORT2
+ubiditransform_open(UErrorCode *pErrorCode);
+
+/**
+ * Deallocates the given <code>UBiDiTransform</code> object.
+ * @stable ICU 58
+ */
+U_CAPI void U_EXPORT2
+ubiditransform_close(UBiDiTransform *pBidiTransform);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBiDiTransformPointer
+ * "Smart pointer" class, closes a UBiDiTransform via ubiditransform_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 58
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBiDiTransformPointer, UBiDiTransform, ubiditransform_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/intl/icu/source/common/unicode/ubrk.h b/intl/icu/source/common/unicode/ubrk.h
new file mode 100644
index 0000000000..2b3dc7aa57
--- /dev/null
+++ b/intl/icu/source/common/unicode/ubrk.h
@@ -0,0 +1,647 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1996-2015, International Business Machines Corporation and others.
+* All Rights Reserved.
+******************************************************************************
+*/
+
+#ifndef UBRK_H
+#define UBRK_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+#include "unicode/utext.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * A text-break iterator.
+ * For usage in C programs.
+ */
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+ /**
+ * Opaque type representing an ICU Break iterator object.
+ * @stable ICU 2.0
+ */
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/parseerr.h"
+
+/**
+ * \file
+ * \brief C API: BreakIterator
+ *
+ * <h2> BreakIterator C API </h2>
+ *
+ * The BreakIterator C API defines methods for finding the location
+ * of boundaries in text. Pointer to a UBreakIterator maintain a
+ * current position and scan over text returning the index of characters
+ * where boundaries occur.
+ * <p>
+ * Line boundary analysis determines where a text string can be broken
+ * when line-wrapping. The mechanism correctly handles punctuation and
+ * hyphenated words.
+ * <p>
+ * Note: The locale keyword "lb" can be used to modify line break
+ * behavior according to the CSS level 3 line-break options, see
+ * <http://dev.w3.org/csswg/css-text/#line-breaking>. For example:
+ * "ja@lb=strict", "zh@lb=loose".
+ * <p>
+ * Sentence boundary analysis allows selection with correct
+ * interpretation of periods within numbers and abbreviations, and
+ * trailing punctuation marks such as quotation marks and parentheses.
+ * <p>
+ * Note: The locale keyword "ss" can be used to enable use of
+ * segmentation suppression data (preventing breaks in English after
+ * abbreviations such as "Mr." or "Est.", for example), as follows:
+ * "en@ss=standard".
+ * <p>
+ * Word boundary analysis is used by search and replace functions, as
+ * well as within text editing applications that allow the user to
+ * select words with a double click. Word selection provides correct
+ * interpretation of punctuation marks within and following
+ * words. Characters that are not part of a word, such as symbols or
+ * punctuation marks, have word-breaks on both sides.
+ * <p>
+ * Character boundary analysis identifies the boundaries of
+ * "Extended Grapheme Clusters", which are groupings of codepoints
+ * that should be treated as character-like units for many text operations.
+ * Please see Unicode Standard Annex #29, Unicode Text Segmentation,
+ * http://www.unicode.org/reports/tr29/ for additional information
+ * on grapheme clusters and guidelines on their use.
+ * <p>
+ * Title boundary analysis locates all positions,
+ * typically starts of words, that should be set to Title Case
+ * when title casing the text.
+ * <p>
+ * The text boundary positions are found according to the rules
+ * described in Unicode Standard Annex #29, Text Boundaries, and
+ * Unicode Standard Annex #14, Line Breaking Properties. These
+ * are available at http://www.unicode.org/reports/tr14/ and
+ * http://www.unicode.org/reports/tr29/.
+ * <p>
+ * In addition to the plain C API defined in this header file, an
+ * object oriented C++ API with equivalent functionality is defined in the
+ * file brkiter.h.
+ * <p>
+ * Code snippets illustrating the use of the Break Iterator APIs
+ * are available in the ICU User Guide,
+ * https://unicode-org.github.io/icu/userguide/boundaryanalysis/
+ * and in the sample program icu/source/samples/break/break.cpp
+ */
+
+/** The possible types of text boundaries. @stable ICU 2.0 */
+typedef enum UBreakIteratorType {
+ /** Character breaks @stable ICU 2.0 */
+ UBRK_CHARACTER = 0,
+ /** Word breaks @stable ICU 2.0 */
+ UBRK_WORD = 1,
+ /** Line breaks @stable ICU 2.0 */
+ UBRK_LINE = 2,
+ /** Sentence breaks @stable ICU 2.0 */
+ UBRK_SENTENCE = 3,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Title Case breaks
+ * The iterator created using this type locates title boundaries as described for
+ * Unicode 3.2 only. For Unicode 4.0 and above title boundary iteration,
+ * please use Word Boundary iterator.
+ *
+ * @deprecated ICU 2.8 Use the word break iterator for titlecasing for Unicode 4 and later.
+ */
+ UBRK_TITLE = 4,
+ /**
+ * One more than the highest normal UBreakIteratorType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBRK_COUNT = 5
+#endif // U_HIDE_DEPRECATED_API
+} UBreakIteratorType;
+
+/** Value indicating all text boundaries have been returned.
+ * @stable ICU 2.0
+ */
+#define UBRK_DONE ((int32_t) -1)
+
+
+/**
+ * Enum constants for the word break tags returned by
+ * getRuleStatus(). A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.2
+*/
+typedef enum UWordBreak {
+ /** Tag value for "words" that do not fit into any of other categories.
+ * Includes spaces and most punctuation. */
+ UBRK_WORD_NONE = 0,
+ /** Upper bound for tags for uncategorized words. */
+ UBRK_WORD_NONE_LIMIT = 100,
+ /** Tag value for words that appear to be numbers, lower limit. */
+ UBRK_WORD_NUMBER = 100,
+ /** Tag value for words that appear to be numbers, upper limit. */
+ UBRK_WORD_NUMBER_LIMIT = 200,
+ /** Tag value for words that contain letters, excluding
+ * hiragana, katakana or ideographic characters, lower limit. */
+ UBRK_WORD_LETTER = 200,
+ /** Tag value for words containing letters, upper limit */
+ UBRK_WORD_LETTER_LIMIT = 300,
+ /** Tag value for words containing kana characters, lower limit */
+ UBRK_WORD_KANA = 300,
+ /** Tag value for words containing kana characters, upper limit */
+ UBRK_WORD_KANA_LIMIT = 400,
+ /** Tag value for words containing ideographic characters, lower limit */
+ UBRK_WORD_IDEO = 400,
+ /** Tag value for words containing ideographic characters, upper limit */
+ UBRK_WORD_IDEO_LIMIT = 500
+} UWordBreak;
+
+/**
+ * Enum constants for the line break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * word, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum ULineBreakTag {
+ /** Tag value for soft line breaks, positions at which a line break
+ * is acceptable but not required */
+ UBRK_LINE_SOFT = 0,
+ /** Upper bound for soft line breaks. */
+ UBRK_LINE_SOFT_LIMIT = 100,
+ /** Tag value for a hard, or mandatory line break */
+ UBRK_LINE_HARD = 100,
+ /** Upper bound for hard line breaks. */
+ UBRK_LINE_HARD_LIMIT = 200
+} ULineBreakTag;
+
+
+
+/**
+ * Enum constants for the sentence break tags returned by getRuleStatus().
+ * A range of values is defined for each category of
+ * sentence, to allow for further subdivisions of a category in future releases.
+ * Applications should check for tag values falling within the range, rather
+ * than for single individual values.
+ *
+ * The numeric values of all of these constants are stable (will not change).
+ *
+ * @stable ICU 2.8
+*/
+typedef enum USentenceBreakTag {
+ /** Tag value for for sentences ending with a sentence terminator
+ * ('.', '?', '!', etc.) character, possibly followed by a
+ * hard separator (CR, LF, PS, etc.)
+ */
+ UBRK_SENTENCE_TERM = 0,
+ /** Upper bound for tags for sentences ended by sentence terminators. */
+ UBRK_SENTENCE_TERM_LIMIT = 100,
+ /** Tag value for for sentences that do not contain an ending
+ * sentence terminator ('.', '?', '!', etc.) character, but
+ * are ended only by a hard separator (CR, LF, PS, etc.) or end of input.
+ */
+ UBRK_SENTENCE_SEP = 100,
+ /** Upper bound for tags for sentences ended by a separator. */
+ UBRK_SENTENCE_SEP_LIMIT = 200
+ /** Tag value for a hard, or mandatory line break */
+} USentenceBreakTag;
+
+
+/**
+ * Open a new UBreakIterator for locating text boundaries for a specified locale.
+ * A UBreakIterator may be used for detecting character, line, word,
+ * and sentence breaks in text.
+ * @param type The type of UBreakIterator to open: one of UBRK_CHARACTER, UBRK_WORD,
+ * UBRK_LINE, UBRK_SENTENCE
+ * @param locale The locale specifying the text-breaking conventions. Note that
+ * locale keys such as "lb" and "ss" may be used to modify text break behavior,
+ * see general discussion of BreakIterator C API.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified locale.
+ * @see ubrk_openRules
+ * @stable ICU 2.0
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_open(UBreakIteratorType type,
+ const char *locale,
+ const UChar *text,
+ int32_t textLength,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using specified breaking rules.
+ * The rule syntax is ... (TBD)
+ * @param rules A set of rules specifying the text breaking conventions.
+ * @param rulesLength The number of characters in rules, or -1 if null-terminated.
+ * @param text The text to be iterated over. May be null, in which case ubrk_setText() is
+ * used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param parseErr Receives position and context information for any syntax errors
+ * detected while parsing the rules.
+ * @param status A UErrorCode to receive any errors.
+ * @return A UBreakIterator for the specified rules.
+ * @see ubrk_open
+ * @stable ICU 2.2
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openRules(const UChar *rules,
+ int32_t rulesLength,
+ const UChar *text,
+ int32_t textLength,
+ UParseError *parseErr,
+ UErrorCode *status);
+
+/**
+ * Open a new UBreakIterator for locating text boundaries using precompiled binary rules.
+ * Opening a UBreakIterator this way is substantially faster than using ubrk_openRules.
+ * Binary rules may be obtained using ubrk_getBinaryRules. The compiled rules are not
+ * compatible across different major versions of ICU, nor across platforms of different
+ * endianness or different base character set family (ASCII vs EBCDIC).
+ * @param binaryRules A set of compiled binary rules specifying the text breaking
+ * conventions. Ownership of the storage containing the compiled
+ * rules remains with the caller of this function. The compiled
+ * rules must not be modified or deleted during the life of the
+ * break iterator.
+ * @param rulesLength The length of binaryRules in bytes; must be >= 0.
+ * @param text The text to be iterated over. May be null, in which case
+ * ubrk_setText() is used to specify the text to be iterated.
+ * @param textLength The number of characters in text, or -1 if null-terminated.
+ * @param status Pointer to UErrorCode to receive any errors.
+ * @return UBreakIterator for the specified rules.
+ * @see ubrk_getBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI UBreakIterator* U_EXPORT2
+ubrk_openBinaryRules(const uint8_t *binaryRules, int32_t rulesLength,
+ const UChar * text, int32_t textLength,
+ UErrorCode * status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * Thread safe cloning operation
+ * @param bi iterator to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_BRK_SAFECLONE_BUFFERSIZE.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
+ * If *pBufferSize == 0, a sufficient size for use in cloning will
+ * be returned ('pre-flighting')
+ * If *pBufferSize is not enough for a stack-based safe clone,
+ * new memory will be allocated.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_ERROR, is used
+ * if pBufferSize != NULL and any allocations were necessary
+ * @return pointer to the new clone
+ * @deprecated ICU 69 Use ubrk_clone() instead.
+ */
+U_DEPRECATED UBreakIterator * U_EXPORT2
+ubrk_safeClone(
+ const UBreakIterator *bi,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Thread safe cloning operation.
+ * @param bi iterator to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @stable ICU 69
+ */
+U_CAPI UBreakIterator * U_EXPORT2
+ubrk_clone(const UBreakIterator *bi,
+ UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * A recommended size (in bytes) for the memory buffer to be passed to ubrk_saveClone().
+ * @deprecated ICU 52. Do not rely on ubrk_safeClone() cloning into any provided buffer.
+ */
+#define U_BRK_SAFECLONE_BUFFERSIZE 1
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+* Close a UBreakIterator.
+* Once closed, a UBreakIterator may no longer be used.
+* @param bi The break iterator to close.
+ * @stable ICU 2.0
+*/
+U_CAPI void U_EXPORT2
+ubrk_close(UBreakIterator *bi);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUBreakIteratorPointer
+ * "Smart pointer" class, closes a UBreakIterator via ubrk_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUBreakIteratorPointer, UBreakIterator, ubrk_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Sets an existing iterator to point to a new piece of text.
+ * The break iterator retains a pointer to the supplied text.
+ * The caller must not modify or delete the text while the BreakIterator
+ * retains the reference.
+ *
+ * @param bi The iterator to use
+ * @param text The text to be set
+ * @param textLength The length of the text
+ * @param status The error code
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ubrk_setText(UBreakIterator* bi,
+ const UChar* text,
+ int32_t textLength,
+ UErrorCode* status);
+
+
+/**
+ * Sets an existing iterator to point to a new piece of text.
+ *
+ * All index positions returned by break iterator functions are
+ * native indices from the UText. For example, when breaking UTF-8
+ * encoded text, the break positions returned by \ref ubrk_next, \ref ubrk_previous, etc.
+ * will be UTF-8 string indices, not UTF-16 positions.
+ *
+ * @param bi The iterator to use
+ * @param text The text to be set.
+ * This function makes a shallow clone of the supplied UText. This means
+ * that the caller is free to immediately close or otherwise reuse the
+ * UText that was passed as a parameter, but that the underlying text itself
+ * must not be altered while being referenced by the break iterator.
+ * @param status The error code
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ubrk_setUText(UBreakIterator* bi,
+ UText* text,
+ UErrorCode* status);
+
+
+
+/**
+ * Determine the most recently-returned text boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index most recently returned by \ref ubrk_next, \ref ubrk_previous,
+ * \ref ubrk_first, or \ref ubrk_last.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_current(const UBreakIterator *bi);
+
+/**
+ * Advance the iterator to the boundary following the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the next text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_previous
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_next(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the boundary preceding the current boundary.
+ *
+ * @param bi The break iterator to use.
+ * @return The character index of the preceding text boundary, or UBRK_DONE
+ * if all text boundaries have been returned.
+ * @see ubrk_next
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_previous(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to zero, the start of the text being scanned.
+ * @param bi The break iterator to use.
+ * @return The new iterator position (zero).
+ * @see ubrk_last
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_first(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the index immediately <EM>beyond</EM> the last character in the text being scanned.
+ * This is not the same as the last character.
+ * @param bi The break iterator to use.
+ * @return The character offset immediately <EM>beyond</EM> the last character in the
+ * text being scanned.
+ * @see ubrk_first
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_last(UBreakIterator *bi);
+
+/**
+ * Set the iterator position to the first boundary preceding the specified offset.
+ * The new position is always smaller than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary preceding offset, or UBRK_DONE.
+ * @see ubrk_following
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_preceding(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+ * Advance the iterator to the first boundary following the specified offset.
+ * The value returned is always greater than offset, or UBRK_DONE.
+ * @param bi The break iterator to use.
+ * @param offset The offset to begin scanning.
+ * @return The text boundary following offset, or UBRK_DONE.
+ * @see ubrk_preceding
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_following(UBreakIterator *bi,
+ int32_t offset);
+
+/**
+* Get a locale for which text breaking information is available.
+* A UBreakIterator in a locale returned by this function will perform the correct
+* text breaking for the locale.
+* @param index The index of the desired locale.
+* @return A locale for which number text breaking information is available, or 0 if none.
+* @see ubrk_countAvailable
+* @stable ICU 2.0
+*/
+U_CAPI const char* U_EXPORT2
+ubrk_getAvailable(int32_t index);
+
+/**
+* Determine how many locales have text breaking information available.
+* This function is most useful as determining the loop ending condition for
+* calls to \ref ubrk_getAvailable.
+* @return The number of locales for which text breaking information is available.
+* @see ubrk_getAvailable
+* @stable ICU 2.0
+*/
+U_CAPI int32_t U_EXPORT2
+ubrk_countAvailable(void);
+
+
+/**
+* Returns true if the specified position is a boundary position. As a side
+* effect, leaves the iterator pointing to the first boundary position at
+* or after "offset".
+* @param bi The break iterator to use.
+* @param offset the offset to check.
+* @return True if "offset" is a boundary position.
+* @stable ICU 2.0
+*/
+U_CAPI UBool U_EXPORT2
+ubrk_isBoundary(UBreakIterator *bi, int32_t offset);
+
+/**
+ * Return the status from the break rule that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. For rules that do not specify a
+ * status, a default value of 0 is returned.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatus(UBreakIterator *bi);
+
+/**
+ * Get the statuses from the break rules that determined the most recently
+ * returned break position. The values appear in the rule source
+ * within brackets, {123}, for example. The default status value for rules
+ * that do not explicitly provide one is zero.
+ * <p>
+ * For word break iterators, the possible values are defined in enum UWordBreak.
+ * @param bi The break iterator to use
+ * @param fillInVec an array to be filled in with the status values.
+ * @param capacity the length of the supplied vector. A length of zero causes
+ * the function to return the number of status values, in the
+ * normal way, without attempting to store any values.
+ * @param status receives error codes.
+ * @return The number of rule status values from rules that determined
+ * the most recent boundary returned by the break iterator.
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getRuleStatusVec(UBreakIterator *bi, int32_t *fillInVec, int32_t capacity, UErrorCode *status);
+
+/**
+ * Return the locale of the break iterator. You can choose between the valid and
+ * the actual locale.
+ * @param bi break iterator
+ * @param type locale type (valid or actual)
+ * @param status error code
+ * @return locale string
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ubrk_getLocaleByType(const UBreakIterator *bi, ULocDataLocaleType type, UErrorCode* status);
+
+/**
+ * Set the subject text string upon which the break iterator is operating
+ * without changing any other aspect of the state.
+ * The new and previous text strings must have the same content.
+ *
+ * This function is intended for use in environments where ICU is operating on
+ * strings that may move around in memory. It provides a mechanism for notifying
+ * ICU that the string has been relocated, and providing a new UText to access the
+ * string in its new position.
+ *
+ * Note that the break iterator never copies the underlying text
+ * of a string being processed, but always operates directly on the original text
+ * provided by the user. Refreshing simply drops the references to the old text
+ * and replaces them with references to the new.
+ *
+ * Caution: this function is normally used only by very specialized
+ * system-level code. One example use case is with garbage collection
+ * that moves the text in memory.
+ *
+ * @param bi The break iterator.
+ * @param text The new (moved) text string.
+ * @param status Receives errors detected by this function.
+ *
+ * @stable ICU 49
+ */
+U_CAPI void U_EXPORT2
+ubrk_refreshUText(UBreakIterator *bi,
+ UText *text,
+ UErrorCode *status);
+
+
+/**
+ * Get a compiled binary version of the rules specifying the behavior of a UBreakIterator.
+ * The binary rules may be used with ubrk_openBinaryRules to open a new UBreakIterator
+ * more quickly than using ubrk_openRules. The compiled rules are not compatible across
+ * different major versions of ICU, nor across platforms of different endianness or
+ * different base character set family (ASCII vs EBCDIC). Supports preflighting (with
+ * binaryRules=NULL and rulesCapacity=0) to get the rules length without copying them to
+ * the binaryRules buffer. However, whether preflighting or not, if the actual length
+ * is greater than INT32_MAX, then the function returns 0 and sets *status to
+ * U_INDEX_OUTOFBOUNDS_ERROR.
+
+ * @param bi The break iterator to use.
+ * @param binaryRules Buffer to receive the compiled binary rules; set to NULL for
+ * preflighting.
+ * @param rulesCapacity Capacity (in bytes) of the binaryRules buffer; set to 0 for
+ * preflighting. Must be >= 0.
+ * @param status Pointer to UErrorCode to receive any errors, such as
+ * U_BUFFER_OVERFLOW_ERROR, U_INDEX_OUTOFBOUNDS_ERROR, or
+ * U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual byte length of the binary rules, if <= INT32_MAX;
+ * otherwise 0. If not preflighting and this is larger than
+ * rulesCapacity, *status will be set to an error.
+ * @see ubrk_openBinaryRules
+ * @stable ICU 59
+ */
+U_CAPI int32_t U_EXPORT2
+ubrk_getBinaryRules(UBreakIterator *bi,
+ uint8_t * binaryRules, int32_t rulesCapacity,
+ UErrorCode * status);
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucasemap.h b/intl/icu/source/common/unicode/ucasemap.h
new file mode 100644
index 0000000000..d1c1b483ab
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucasemap.h
@@ -0,0 +1,388 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2005-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: ucasemap.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2005may06
+* created by: Markus W. Scherer
+*
+* Case mapping service object and functions using it.
+*/
+
+#ifndef __UCASEMAP_H__
+#define __UCASEMAP_H__
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ustring.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Unicode case mapping functions using a UCaseMap service object.
+ *
+ * The service object takes care of memory allocations, data loading, and setup
+ * for the attributes, as usual.
+ *
+ * Currently, the functionality provided here does not overlap with uchar.h
+ * and ustring.h, except for ucasemap_toTitle().
+ *
+ * ucasemap_utf8XYZ() functions operate directly on UTF-8 strings.
+ */
+
+/**
+ * UCaseMap is an opaque service object for newer ICU case mapping functions.
+ * Older functions did not use a service object.
+ * @stable ICU 3.4
+ */
+struct UCaseMap;
+typedef struct UCaseMap UCaseMap; /**< C typedef for struct UCaseMap. @stable ICU 3.4 */
+
+/**
+ * Open a UCaseMap service object for a locale and a set of options.
+ * The locale ID and options are preprocessed so that functions using the
+ * service object need not process them in each call.
+ *
+ * @param locale ICU locale ID, used for language-dependent
+ * upper-/lower-/title-casing according to the Unicode standard.
+ * Usual semantics: ""=root, NULL=default locale, etc.
+ * @param options Options bit set, used for case folding and string comparisons.
+ * Same flags as for u_foldCase(), u_strFoldCase(),
+ * u_strCaseCompare(), etc.
+ * Use 0 or U_FOLD_CASE_DEFAULT for default behavior.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return Pointer to a UCaseMap service object, if successful.
+ *
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.4
+ */
+U_CAPI UCaseMap * U_EXPORT2
+ucasemap_open(const char *locale, uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Close a UCaseMap service object.
+ * @param csm Object to be closed.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_close(UCaseMap *csm);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCaseMapPointer
+ * "Smart pointer" class, closes a UCaseMap via ucasemap_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCaseMapPointer, UCaseMap, ucasemap_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Get the locale ID that is used for language-dependent case mappings.
+ * @param csm UCaseMap service object.
+ * @return locale ID
+ * @stable ICU 3.4
+ */
+U_CAPI const char * U_EXPORT2
+ucasemap_getLocale(const UCaseMap *csm);
+
+/**
+ * Get the options bit set that is used for case folding and string comparisons.
+ * @param csm UCaseMap service object.
+ * @return options bit set
+ * @stable ICU 3.4
+ */
+U_CAPI uint32_t U_EXPORT2
+ucasemap_getOptions(const UCaseMap *csm);
+
+/**
+ * Set the locale ID that is used for language-dependent case mappings.
+ *
+ * @param csm UCaseMap service object.
+ * @param locale Locale ID, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setLocale(UCaseMap *csm, const char *locale, UErrorCode *pErrorCode);
+
+/**
+ * Set the options bit set that is used for case folding and string comparisons.
+ *
+ * @param csm UCaseMap service object.
+ * @param options Options bit set, see ucasemap_open().
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_open
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setOptions(UCaseMap *csm, uint32_t options, UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Get the break iterator that is used for titlecasing.
+ * Do not modify the returned break iterator.
+ * @param csm UCaseMap service object.
+ * @return titlecasing break iterator
+ * @stable ICU 3.8
+ */
+U_CAPI const UBreakIterator * U_EXPORT2
+ucasemap_getBreakIterator(const UCaseMap *csm);
+
+/**
+ * Set the break iterator that is used for titlecasing.
+ * The UCaseMap service object releases a previously set break iterator
+ * and "adopts" this new one, taking ownership of it.
+ * It will be released in a subsequent call to ucasemap_setBreakIterator()
+ * or ucasemap_close().
+ *
+ * Break iterator operations are not thread-safe. Therefore, titlecasing
+ * functions use non-const UCaseMap objects. It is not possible to titlecase
+ * strings concurrently using the same UCaseMap.
+ *
+ * @param csm UCaseMap service object.
+ * @param iterToAdopt Break iterator to be adopted for titlecasing.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see ucasemap_toTitle
+ * @see ucasemap_utf8ToTitle
+ * @stable ICU 3.8
+ */
+U_CAPI void U_EXPORT2
+ucasemap_setBreakIterator(UCaseMap *csm, UBreakIterator *iterToAdopt, UErrorCode *pErrorCode);
+
+/**
+ * Titlecase a UTF-16 string. This function is almost a duplicate of u_strToTitle(),
+ * except that it takes ucasemap_setOptions() into account and has performance
+ * advantages from being able to use a UCaseMap object for multiple case mapping
+ * operations, saving setup time.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_toTitle(UCaseMap *csm,
+ UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif // UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Lowercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToLower
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToLower(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Uppercase the characters in a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToUpper
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToUpper(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a UTF-8 string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with ucasemap_setOptions().)
+ *
+ * Note: This function takes a non-const UCaseMap pointer because it will
+ * open a default break iterator if no break iterator was set yet,
+ * and effectively call ucasemap_setBreakIterator();
+ * also because the break iterator is stateful and will be modified during
+ * the iteration.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setUText(), first(), next() and close() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object. This pointer is non-const!
+ * See the note above for details.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strToTitle
+ * @see U_TITLECASE_NO_LOWERCASE
+ * @see U_TITLECASE_NO_BREAK_ADJUSTMENT
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8ToTitle(UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a UTF-8 string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer must not overlap.
+ *
+ * @param csm UCaseMap service object.
+ * @param dest A buffer for the result string. The result will be NUL-terminated if
+ * the buffer is large enough.
+ * The contents is undefined in case of failure.
+ * @param destCapacity The size of the buffer (number of bytes). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string.
+ * @param srcLength The length of the original string. If -1, then src must be NUL-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ *
+ * @see u_strFoldCase
+ * @see ucasemap_setOptions
+ * @see U_FOLD_CASE_DEFAULT
+ * @see U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucasemap_utf8FoldCase(const UCaseMap *csm,
+ char *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucat.h b/intl/icu/source/common/unicode/ucat.h
new file mode 100644
index 0000000000..93850348ff
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucat.h
@@ -0,0 +1,160 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2003-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* Author: Alan Liu
+* Created: March 19 2003
+* Since: ICU 2.6
+**********************************************************************
+*/
+#ifndef UCAT_H
+#define UCAT_H
+
+#include "unicode/utypes.h"
+#include "unicode/ures.h"
+
+/**
+ * \file
+ * \brief C API: Message Catalog Wrappers
+ *
+ * This C API provides look-alike functions that deliberately resemble
+ * the POSIX catopen, catclose, and catgets functions. The underlying
+ * implementation is in terms of ICU resource bundles, rather than
+ * POSIX message catalogs.
+ *
+ * The ICU resource bundles obey standard ICU inheritance policies.
+ * To facilitate this, sets and messages are flattened into one tier.
+ * This is done by creating resource bundle keys of the form
+ * &lt;set_num&gt;%&lt;msg_num&gt; where set_num is the set number and msg_num is
+ * the message number, formatted as decimal strings.
+ *
+ * Example: Consider a message catalog containing two sets:
+ *
+ * Set 1: Message 4 = "Good morning."
+ * Message 5 = "Good afternoon."
+ * Message 7 = "Good evening."
+ * Message 8 = "Good night."
+ * Set 4: Message 14 = "Please "
+ * Message 19 = "Thank you."
+ * Message 20 = "Sincerely,"
+ *
+ * The ICU resource bundle source file would, assuming it is named
+ * "greet.txt", would look like this:
+ *
+ * greet
+ * {
+ * 1%4 { "Good morning." }
+ * 1%5 { "Good afternoon." }
+ * 1%7 { "Good evening." }
+ * 1%8 { "Good night." }
+ *
+ * 4%14 { "Please " }
+ * 4%19 { "Thank you." }
+ * 4%20 { "Sincerely," }
+ * }
+ *
+ * The catgets function is commonly used in combination with functions
+ * like printf and strftime. ICU components like message format can
+ * be used instead, although they use a different format syntax.
+ * There is an ICU package, icuio, that provides some of
+ * the POSIX-style formatting API.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * An ICU message catalog descriptor, analogous to nl_catd.
+ *
+ * @stable ICU 2.6
+ */
+typedef UResourceBundle* u_nl_catd;
+
+/**
+ * Open and return an ICU message catalog descriptor. The descriptor
+ * may be passed to u_catgets() to retrieve localized strings.
+ *
+ * @param name string containing the full path pointing to the
+ * directory where the resources reside followed by the package name
+ * e.g. "/usr/resource/my_app/resources/guimessages" on a Unix system.
+ * If NULL, ICU default data files will be used.
+ *
+ * Unlike POSIX, environment variables are not interpolated within the
+ * name.
+ *
+ * @param locale the locale for which we want to open the resource. If
+ * NULL, the default ICU locale will be used (see uloc_getDefault). If
+ * strlen(locale) == 0, the root locale will be used.
+ *
+ * @param ec input/output error code. Upon output,
+ * U_USING_FALLBACK_WARNING indicates that a fallback locale was
+ * used. For example, 'de_CH' was requested, but nothing was found
+ * there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that the
+ * default locale data or root locale data was used; neither the
+ * requested locale nor any of its fallback locales were found.
+ *
+ * @return a message catalog descriptor that may be passed to
+ * u_catgets(). If the ec parameter indicates success, then the caller
+ * is responsible for calling u_catclose() to close the message
+ * catalog. If the ec parameter indicates failure, then NULL will be
+ * returned.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI u_nl_catd U_EXPORT2
+u_catopen(const char* name, const char* locale, UErrorCode* ec);
+
+/**
+ * Close an ICU message catalog, given its descriptor.
+ *
+ * @param catd a message catalog descriptor to be closed. May be NULL,
+ * in which case no action is taken.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+u_catclose(u_nl_catd catd);
+
+/**
+ * Retrieve a localized string from an ICU message catalog.
+ *
+ * @param catd a message catalog descriptor returned by u_catopen.
+ *
+ * @param set_num the message catalog set number. Sets need not be
+ * numbered consecutively.
+ *
+ * @param msg_num the message catalog message number within the
+ * set. Messages need not be numbered consecutively.
+ *
+ * @param s the default string. This is returned if the string
+ * specified by the set_num and msg_num is not found. It must be
+ * zero-terminated.
+ *
+ * @param len fill-in parameter to receive the length of the result.
+ * May be NULL, in which case it is ignored.
+ *
+ * @param ec input/output error code. May be U_USING_FALLBACK_WARNING
+ * or U_USING_DEFAULT_WARNING. U_MISSING_RESOURCE_ERROR indicates that
+ * the set_num/msg_num tuple does not specify a valid message string
+ * in this catalog.
+ *
+ * @return a pointer to a zero-terminated UChar array which lives in
+ * an internal buffer area, typically a memory mapped/DLL file. The
+ * caller must NOT delete this pointer. If the call is unsuccessful
+ * for any reason, then s is returned. This includes the situation in
+ * which ec indicates a failing error code upon entry to this
+ * function.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI const UChar* U_EXPORT2
+u_catgets(u_nl_catd catd, int32_t set_num, int32_t msg_num,
+ const UChar* s,
+ int32_t* len, UErrorCode* ec);
+
+U_CDECL_END
+
+#endif /*UCAT_H*/
+/*eof*/
diff --git a/intl/icu/source/common/unicode/uchar.h b/intl/icu/source/common/unicode/uchar.h
new file mode 100644
index 0000000000..4f82a9fb58
--- /dev/null
+++ b/intl/icu/source/common/unicode/uchar.h
@@ -0,0 +1,4169 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File UCHAR.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/02/97 aliu Creation.
+* 03/29/99 helena Updated for C APIs.
+* 4/15/99 Madhu Updated for C Implementation and Javadoc
+* 5/20/99 Madhu Added the function u_getVersion()
+* 8/19/1999 srl Upgraded scripts to Unicode 3.0
+* 8/27/1999 schererm UCharDirection constants: U_...
+* 11/11/1999 weiv added u_isalnum(), cleaned comments
+* 01/11/2000 helena Renamed u_getVersion to u_getUnicodeVersion().
+******************************************************************************
+*/
+
+#ifndef UCHAR_H
+#define UCHAR_H
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/ucpmap.h"
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
+
+U_CDECL_BEGIN
+
+/*==========================================================================*/
+/* Unicode version number */
+/*==========================================================================*/
+/**
+ * Unicode version number, default for the current ICU version.
+ * The actual Unicode Character Database (UCD) data is stored in uprops.dat
+ * and may be generated from UCD files from a different Unicode version.
+ * Call u_getUnicodeVersion to get the actual Unicode version of the data.
+ *
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.0
+ */
+#define U_UNICODE_VERSION "15.0"
+
+/**
+ * \file
+ * \brief C API: Unicode Properties
+ *
+ * This C API provides low-level access to the Unicode Character Database.
+ * In addition to raw property values, some convenience functions calculate
+ * derived properties, for example for Java-style programming.
+ *
+ * Unicode assigns each code point (not just assigned character) values for
+ * many properties.
+ * Most of them are simple boolean flags, or constants from a small enumerated list.
+ * For some properties, values are strings or other relatively more complex types.
+ *
+ * For more information see
+ * "About the Unicode Character Database" (http://www.unicode.org/ucd/)
+ * and the ICU User Guide chapter on Properties (https://unicode-org.github.io/icu/userguide/strings/properties).
+ *
+ * Many properties are accessible via generic functions that take a UProperty selector.
+ * - u_hasBinaryProperty() returns a binary value (true/false) per property and code point.
+ * - u_getIntPropertyValue() returns an integer value per property and code point.
+ * For each supported enumerated or catalog property, there is
+ * an enum type for all of the property's values, and
+ * u_getIntPropertyValue() returns the numeric values of those constants.
+ * - u_getBinaryPropertySet() returns a set for each ICU-supported binary property with
+ * all code points for which the property is true.
+ * - u_getIntPropertyMap() returns a map for each
+ * ICU-supported enumerated/catalog/int-valued property which
+ * maps all Unicode code points to their values for that property.
+ *
+ * Many functions are designed to match java.lang.Character functions.
+ * See the individual function documentation,
+ * and see the JDK 1.4 java.lang.Character documentation
+ * at http://java.sun.com/j2se/1.4/docs/api/java/lang/Character.html
+ *
+ * There are also functions that provide easy migration from C/POSIX functions
+ * like isblank(). Their use is generally discouraged because the C/POSIX
+ * standards do not define their semantics beyond the ASCII range, which means
+ * that different implementations exhibit very different behavior.
+ * Instead, Unicode properties should be used directly.
+ *
+ * There are also only a few, broad C/POSIX character classes, and they tend
+ * to be used for conflicting purposes. For example, the "isalpha()" class
+ * is sometimes used to determine word boundaries, while a more sophisticated
+ * approach would at least distinguish initial letters from continuation
+ * characters (the latter including combining marks).
+ * (In ICU, BreakIterator is the most sophisticated API for word boundaries.)
+ * Another example: There is no "istitle()" class for titlecase characters.
+ *
+ * ICU 3.4 and later provides API access for all twelve C/POSIX character classes.
+ * ICU implements them according to the Standard Recommendations in
+ * Annex C: Compatibility Properties of UTS #18 Unicode Regular Expressions
+ * (http://www.unicode.org/reports/tr18/#Compatibility_Properties).
+ *
+ * API access for C/POSIX character classes is as follows:
+ * - alpha: u_isUAlphabetic(c) or u_hasBinaryProperty(c, UCHAR_ALPHABETIC)
+ * - lower: u_isULowercase(c) or u_hasBinaryProperty(c, UCHAR_LOWERCASE)
+ * - upper: u_isUUppercase(c) or u_hasBinaryProperty(c, UCHAR_UPPERCASE)
+ * - punct: u_ispunct(c)
+ * - digit: u_isdigit(c) or u_charType(c)==U_DECIMAL_DIGIT_NUMBER
+ * - xdigit: u_isxdigit(c) or u_hasBinaryProperty(c, UCHAR_POSIX_XDIGIT)
+ * - alnum: u_hasBinaryProperty(c, UCHAR_POSIX_ALNUM)
+ * - space: u_isUWhiteSpace(c) or u_hasBinaryProperty(c, UCHAR_WHITE_SPACE)
+ * - blank: u_isblank(c) or u_hasBinaryProperty(c, UCHAR_POSIX_BLANK)
+ * - cntrl: u_charType(c)==U_CONTROL_CHAR
+ * - graph: u_hasBinaryProperty(c, UCHAR_POSIX_GRAPH)
+ * - print: u_hasBinaryProperty(c, UCHAR_POSIX_PRINT)
+ *
+ * Note: Some of the u_isxyz() functions in uchar.h predate, and do not match,
+ * the Standard Recommendations in UTS #18. Instead, they match Java
+ * functions according to their API documentation.
+ *
+ * \htmlonly
+ * The C/POSIX character classes are also available in UnicodeSet patterns,
+ * using patterns like [:graph:] or \p{graph}.
+ * \endhtmlonly
+ *
+ * Note: There are several ICU whitespace functions.
+ * Comparison:
+ * - u_isUWhiteSpace=UCHAR_WHITE_SPACE: Unicode White_Space property;
+ * most of general categories "Z" (separators) + most whitespace ISO controls
+ * (including no-break spaces, but excluding IS1..IS4)
+ * - u_isWhitespace: Java isWhitespace; Z + whitespace ISO controls but excluding no-break spaces
+ * - u_isJavaSpaceChar: Java isSpaceChar; just Z (including no-break spaces)
+ * - u_isspace: Z + whitespace ISO controls (including no-break spaces)
+ * - u_isblank: "horizontal spaces" = TAB + Zs
+ */
+
+/**
+ * Constants.
+ */
+
+/** The lowest Unicode code point value. Code points are non-negative. @stable ICU 2.0 */
+#define UCHAR_MIN_VALUE 0
+
+/**
+ * The highest Unicode code point value (scalar value) according to
+ * The Unicode Standard. This is a 21-bit value (20.1 bits, rounded up).
+ * For a single character, UChar32 is a simple type that can hold any code point value.
+ *
+ * @see UChar32
+ * @stable ICU 2.0
+ */
+#define UCHAR_MAX_VALUE 0x10ffff
+
+/**
+ * Get a single-bit bit set (a flag) from a bit number 0..31.
+ * @stable ICU 2.1
+ */
+#define U_MASK(x) ((uint32_t)1<<(x))
+
+/**
+ * Selection constants for Unicode properties.
+ * These constants are used in functions like u_hasBinaryProperty to select
+ * one of the Unicode properties.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ *
+ * For details about the properties see
+ * UAX #44: Unicode Character Database (http://www.unicode.org/reports/tr44/).
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below, e.g., 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ * Check u_getUnicodeVersion to be sure.
+ *
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+typedef enum UProperty {
+ /*
+ * Note: UProperty constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UCHAR_<Unicode property name>=<integer>,
+ */
+
+ /* Note: Place UCHAR_ALPHABETIC before UCHAR_BINARY_START so that
+ debuggers display UCHAR_ALPHABETIC as the symbolic name for 0,
+ rather than UCHAR_BINARY_START. Likewise for other *_START
+ identifiers. */
+
+ /** Binary property Alphabetic. Same as u_isUAlphabetic, different from u_isalpha.
+ Lu+Ll+Lt+Lm+Lo+Nl+Other_Alphabetic @stable ICU 2.1 */
+ UCHAR_ALPHABETIC=0,
+ /** First constant for binary Unicode properties. @stable ICU 2.1 */
+ UCHAR_BINARY_START=UCHAR_ALPHABETIC,
+ /** Binary property ASCII_Hex_Digit. 0-9 A-F a-f @stable ICU 2.1 */
+ UCHAR_ASCII_HEX_DIGIT=1,
+ /** Binary property Bidi_Control.
+ Format controls which have specific functions
+ in the Bidi Algorithm. @stable ICU 2.1 */
+ UCHAR_BIDI_CONTROL=2,
+ /** Binary property Bidi_Mirrored.
+ Characters that may change display in RTL text.
+ Same as u_isMirrored.
+ See Bidi Algorithm, UTR 9. @stable ICU 2.1 */
+ UCHAR_BIDI_MIRRORED=3,
+ /** Binary property Dash. Variations of dashes. @stable ICU 2.1 */
+ UCHAR_DASH=4,
+ /** Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
+ Ignorable in most processing.
+ <2060..206F, FFF0..FFFB, E0000..E0FFF>+Other_Default_Ignorable_Code_Point+(Cf+Cc+Cs-White_Space) @stable ICU 2.1 */
+ UCHAR_DEFAULT_IGNORABLE_CODE_POINT=5,
+ /** Binary property Deprecated (new in Unicode 3.2).
+ The usage of deprecated characters is strongly discouraged. @stable ICU 2.1 */
+ UCHAR_DEPRECATED=6,
+ /** Binary property Diacritic. Characters that linguistically modify
+ the meaning of another character to which they apply. @stable ICU 2.1 */
+ UCHAR_DIACRITIC=7,
+ /** Binary property Extender.
+ Extend the value or shape of a preceding alphabetic character,
+ e.g., length and iteration marks. @stable ICU 2.1 */
+ UCHAR_EXTENDER=8,
+ /** Binary property Full_Composition_Exclusion.
+ CompositionExclusions.txt+Singleton Decompositions+
+ Non-Starter Decompositions. @stable ICU 2.1 */
+ UCHAR_FULL_COMPOSITION_EXCLUSION=9,
+ /** Binary property Grapheme_Base (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ [0..10FFFF]-Cc-Cf-Cs-Co-Cn-Zl-Zp-Grapheme_Link-Grapheme_Extend-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_BASE=10,
+ /** Binary property Grapheme_Extend (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries.
+ Me+Mn+Mc+Other_Grapheme_Extend-Grapheme_Link-CGJ @stable ICU 2.1 */
+ UCHAR_GRAPHEME_EXTEND=11,
+ /** Binary property Grapheme_Link (new in Unicode 3.2).
+ For programmatic determination of grapheme cluster boundaries. @stable ICU 2.1 */
+ UCHAR_GRAPHEME_LINK=12,
+ /** Binary property Hex_Digit.
+ Characters commonly used for hexadecimal numbers. @stable ICU 2.1 */
+ UCHAR_HEX_DIGIT=13,
+ /** Binary property Hyphen. Dashes used to mark connections
+ between pieces of words, plus the Katakana middle dot. @stable ICU 2.1 */
+ UCHAR_HYPHEN=14,
+ /** Binary property ID_Continue.
+ Characters that can continue an identifier.
+ DerivedCoreProperties.txt also says "NOTE: Cf characters should be filtered out."
+ ID_Start+Mn+Mc+Nd+Pc @stable ICU 2.1 */
+ UCHAR_ID_CONTINUE=15,
+ /** Binary property ID_Start.
+ Characters that can start an identifier.
+ Lu+Ll+Lt+Lm+Lo+Nl @stable ICU 2.1 */
+ UCHAR_ID_START=16,
+ /** Binary property Ideographic.
+ CJKV ideographs. @stable ICU 2.1 */
+ UCHAR_IDEOGRAPHIC=17,
+ /** Binary property IDS_Binary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_BINARY_OPERATOR=18,
+ /** Binary property IDS_Trinary_Operator (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_IDS_TRINARY_OPERATOR=19,
+ /** Binary property Join_Control.
+ Format controls for cursive joining and ligation. @stable ICU 2.1 */
+ UCHAR_JOIN_CONTROL=20,
+ /** Binary property Logical_Order_Exception (new in Unicode 3.2).
+ Characters that do not use logical order and
+ require special handling in most processing. @stable ICU 2.1 */
+ UCHAR_LOGICAL_ORDER_EXCEPTION=21,
+ /** Binary property Lowercase. Same as u_isULowercase, different from u_islower.
+ Ll+Other_Lowercase @stable ICU 2.1 */
+ UCHAR_LOWERCASE=22,
+ /** Binary property Math. Sm+Other_Math @stable ICU 2.1 */
+ UCHAR_MATH=23,
+ /** Binary property Noncharacter_Code_Point.
+ Code points that are explicitly defined as illegal
+ for the encoding of characters. @stable ICU 2.1 */
+ UCHAR_NONCHARACTER_CODE_POINT=24,
+ /** Binary property Quotation_Mark. @stable ICU 2.1 */
+ UCHAR_QUOTATION_MARK=25,
+ /** Binary property Radical (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_RADICAL=26,
+ /** Binary property Soft_Dotted (new in Unicode 3.2).
+ Characters with a "soft dot", like i or j.
+ An accent placed on these characters causes
+ the dot to disappear. @stable ICU 2.1 */
+ UCHAR_SOFT_DOTTED=27,
+ /** Binary property Terminal_Punctuation.
+ Punctuation characters that generally mark
+ the end of textual units. @stable ICU 2.1 */
+ UCHAR_TERMINAL_PUNCTUATION=28,
+ /** Binary property Unified_Ideograph (new in Unicode 3.2).
+ For programmatic determination of
+ Ideographic Description Sequences. @stable ICU 2.1 */
+ UCHAR_UNIFIED_IDEOGRAPH=29,
+ /** Binary property Uppercase. Same as u_isUUppercase, different from u_isupper.
+ Lu+Other_Uppercase @stable ICU 2.1 */
+ UCHAR_UPPERCASE=30,
+ /** Binary property White_Space.
+ Same as u_isUWhiteSpace, different from u_isspace and u_isWhitespace.
+ Space characters+TAB+CR+LF-ZWSP-ZWNBSP @stable ICU 2.1 */
+ UCHAR_WHITE_SPACE=31,
+ /** Binary property XID_Continue.
+ ID_Continue modified to allow closure under
+ normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_CONTINUE=32,
+ /** Binary property XID_Start. ID_Start modified to allow
+ closure under normalization forms NFKC and NFKD. @stable ICU 2.1 */
+ UCHAR_XID_START=33,
+ /** Binary property Case_Sensitive. Either the source of a case
+ mapping or _in_ the target of a case mapping. Not the same as
+ the general category Cased_Letter. @stable ICU 2.6 */
+ UCHAR_CASE_SENSITIVE=34,
+ /** Binary property STerm (new in Unicode 4.0.1).
+ Sentence Terminal. Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ @stable ICU 3.0 */
+ UCHAR_S_TERM=35,
+ /** Binary property Variation_Selector (new in Unicode 4.0.1).
+ Indicates all those characters that qualify as Variation Selectors.
+ For details on the behavior of these characters,
+ see StandardizedVariants.html and 15.6 Variation Selectors.
+ @stable ICU 3.0 */
+ UCHAR_VARIATION_SELECTOR=36,
+ /** Binary property NFD_Inert.
+ ICU-specific property for characters that are inert under NFD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFD_INERT=37,
+ /** Binary property NFKD_Inert.
+ ICU-specific property for characters that are inert under NFKD,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKD_INERT=38,
+ /** Binary property NFC_Inert.
+ ICU-specific property for characters that are inert under NFC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFC_INERT=39,
+ /** Binary property NFKC_Inert.
+ ICU-specific property for characters that are inert under NFKC,
+ i.e., they do not interact with adjacent characters.
+ See the documentation for the Normalizer2 class and the
+ Normalizer2::isInert() method.
+ @stable ICU 3.0 */
+ UCHAR_NFKC_INERT=40,
+ /** Binary Property Segment_Starter.
+ ICU-specific property for characters that are starters in terms of
+ Unicode normalization and combining character sequences.
+ They have ccc=0 and do not occur in non-initial position of the
+ canonical decomposition of any character
+ (like a-umlaut in NFD and a Jamo T in an NFD(Hangul LVT)).
+ ICU uses this property for segmenting a string for generating a set of
+ canonically equivalent strings, e.g. for canonical closure while
+ processing collation tailoring rules.
+ @stable ICU 3.0 */
+ UCHAR_SEGMENT_STARTER=41,
+ /** Binary property Pattern_Syntax (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_SYNTAX=42,
+ /** Binary property Pattern_White_Space (new in Unicode 4.1).
+ See UAX #31 Identifier and Pattern Syntax
+ (http://www.unicode.org/reports/tr31/)
+ @stable ICU 3.4 */
+ UCHAR_PATTERN_WHITE_SPACE=43,
+ /** Binary property alnum (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_ALNUM=44,
+ /** Binary property blank (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_BLANK=45,
+ /** Binary property graph (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_GRAPH=46,
+ /** Binary property print (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_PRINT=47,
+ /** Binary property xdigit (a C/POSIX character class).
+ Implemented according to the UTS #18 Annex C Standard Recommendation.
+ See the uchar.h file documentation.
+ @stable ICU 3.4 */
+ UCHAR_POSIX_XDIGIT=48,
+ /** Binary property Cased. For Lowercase, Uppercase and Titlecase characters. @stable ICU 4.4 */
+ UCHAR_CASED=49,
+ /** Binary property Case_Ignorable. Used in context-sensitive case mappings. @stable ICU 4.4 */
+ UCHAR_CASE_IGNORABLE=50,
+ /** Binary property Changes_When_Lowercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_LOWERCASED=51,
+ /** Binary property Changes_When_Uppercased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_UPPERCASED=52,
+ /** Binary property Changes_When_Titlecased. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_TITLECASED=53,
+ /** Binary property Changes_When_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEFOLDED=54,
+ /** Binary property Changes_When_Casemapped. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_CASEMAPPED=55,
+ /** Binary property Changes_When_NFKC_Casefolded. @stable ICU 4.4 */
+ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED=56,
+ /**
+ * Binary property Emoji.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI=57,
+ /**
+ * Binary property Emoji_Presentation.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_PRESENTATION=58,
+ /**
+ * Binary property Emoji_Modifier.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER=59,
+ /**
+ * Binary property Emoji_Modifier_Base.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 57
+ */
+ UCHAR_EMOJI_MODIFIER_BASE=60,
+ /**
+ * Binary property Emoji_Component.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 60
+ */
+ UCHAR_EMOJI_COMPONENT=61,
+ /**
+ * Binary property Regional_Indicator.
+ * @stable ICU 60
+ */
+ UCHAR_REGIONAL_INDICATOR=62,
+ /**
+ * Binary property Prepended_Concatenation_Mark.
+ * @stable ICU 60
+ */
+ UCHAR_PREPENDED_CONCATENATION_MARK=63,
+ /**
+ * Binary property Extended_Pictographic.
+ * See http://www.unicode.org/reports/tr51/#Emoji_Properties
+ *
+ * @stable ICU 62
+ */
+ UCHAR_EXTENDED_PICTOGRAPHIC=64,
+ /**
+ * Binary property of strings Basic_Emoji.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_BASIC_EMOJI=65,
+ /**
+ * Binary property of strings Emoji_Keycap_Sequence.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_EMOJI_KEYCAP_SEQUENCE=66,
+ /**
+ * Binary property of strings RGI_Emoji_Modifier_Sequence.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE=67,
+ /**
+ * Binary property of strings RGI_Emoji_Flag_Sequence.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_RGI_EMOJI_FLAG_SEQUENCE=68,
+ /**
+ * Binary property of strings RGI_Emoji_Tag_Sequence.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_RGI_EMOJI_TAG_SEQUENCE=69,
+ /**
+ * Binary property of strings RGI_Emoji_ZWJ_Sequence.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_RGI_EMOJI_ZWJ_SEQUENCE=70,
+ /**
+ * Binary property of strings RGI_Emoji.
+ * See https://www.unicode.org/reports/tr51/#Emoji_Sets
+ *
+ * @stable ICU 70
+ */
+ UCHAR_RGI_EMOJI=71,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for binary Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_BINARY_LIMIT=72,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Enumerated property Bidi_Class.
+ Same as u_charDirection, returns UCharDirection values. @stable ICU 2.2 */
+ UCHAR_BIDI_CLASS=0x1000,
+ /** First constant for enumerated/integer Unicode properties. @stable ICU 2.2 */
+ UCHAR_INT_START=UCHAR_BIDI_CLASS,
+ /** Enumerated property Block.
+ Same as ublock_getCode, returns UBlockCode values. @stable ICU 2.2 */
+ UCHAR_BLOCK=0x1001,
+ /** Enumerated property Canonical_Combining_Class.
+ Same as u_getCombiningClass, returns 8-bit numeric values. @stable ICU 2.2 */
+ UCHAR_CANONICAL_COMBINING_CLASS=0x1002,
+ /** Enumerated property Decomposition_Type.
+ Returns UDecompositionType values. @stable ICU 2.2 */
+ UCHAR_DECOMPOSITION_TYPE=0x1003,
+ /** Enumerated property East_Asian_Width.
+ See http://www.unicode.org/reports/tr11/
+ Returns UEastAsianWidth values. @stable ICU 2.2 */
+ UCHAR_EAST_ASIAN_WIDTH=0x1004,
+ /** Enumerated property General_Category.
+ Same as u_charType, returns UCharCategory values. @stable ICU 2.2 */
+ UCHAR_GENERAL_CATEGORY=0x1005,
+ /** Enumerated property Joining_Group.
+ Returns UJoiningGroup values. @stable ICU 2.2 */
+ UCHAR_JOINING_GROUP=0x1006,
+ /** Enumerated property Joining_Type.
+ Returns UJoiningType values. @stable ICU 2.2 */
+ UCHAR_JOINING_TYPE=0x1007,
+ /** Enumerated property Line_Break.
+ Returns ULineBreak values. @stable ICU 2.2 */
+ UCHAR_LINE_BREAK=0x1008,
+ /** Enumerated property Numeric_Type.
+ Returns UNumericType values. @stable ICU 2.2 */
+ UCHAR_NUMERIC_TYPE=0x1009,
+ /** Enumerated property Script.
+ Same as uscript_getScript, returns UScriptCode values. @stable ICU 2.2 */
+ UCHAR_SCRIPT=0x100A,
+ /** Enumerated property Hangul_Syllable_Type, new in Unicode 4.
+ Returns UHangulSyllableType values. @stable ICU 2.6 */
+ UCHAR_HANGUL_SYLLABLE_TYPE=0x100B,
+ /** Enumerated property NFD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFD_QUICK_CHECK=0x100C,
+ /** Enumerated property NFKD_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKD_QUICK_CHECK=0x100D,
+ /** Enumerated property NFC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFC_QUICK_CHECK=0x100E,
+ /** Enumerated property NFKC_Quick_Check.
+ Returns UNormalizationCheckResult values. @stable ICU 3.0 */
+ UCHAR_NFKC_QUICK_CHECK=0x100F,
+ /** Enumerated property Lead_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the first code point
+ of the decomposition, or lccc(c)=ccc(NFD(c)[0]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_LEAD_CANONICAL_COMBINING_CLASS=0x1010,
+ /** Enumerated property Trail_Canonical_Combining_Class.
+ ICU-specific property for the ccc of the last code point
+ of the decomposition, or tccc(c)=ccc(NFD(c)[last]).
+ Useful for checking for canonically ordered text;
+ see UNORM_FCD and http://www.unicode.org/notes/tn5/#FCD .
+ Returns 8-bit numeric values like UCHAR_CANONICAL_COMBINING_CLASS. @stable ICU 3.0 */
+ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS=0x1011,
+ /** Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UGraphemeClusterBreak values. @stable ICU 3.4 */
+ UCHAR_GRAPHEME_CLUSTER_BREAK=0x1012,
+ /** Enumerated property Sentence_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns USentenceBreak values. @stable ICU 3.4 */
+ UCHAR_SENTENCE_BREAK=0x1013,
+ /** Enumerated property Word_Break (new in Unicode 4.1).
+ Used in UAX #29: Text Boundaries
+ (http://www.unicode.org/reports/tr29/)
+ Returns UWordBreakValues values. @stable ICU 3.4 */
+ UCHAR_WORD_BREAK=0x1014,
+ /** Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
+ Used in UAX #9: Unicode Bidirectional Algorithm
+ (http://www.unicode.org/reports/tr9/)
+ Returns UBidiPairedBracketType values. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET_TYPE=0x1015,
+ /**
+ * Enumerated property Indic_Positional_Category.
+ * New in Unicode 6.0 as provisional property Indic_Matra_Category;
+ * renamed and changed to informative in Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicPositionalCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_POSITIONAL_CATEGORY=0x1016,
+ /**
+ * Enumerated property Indic_Syllabic_Category.
+ * New in Unicode 6.0 as provisional; informative since Unicode 8.0.
+ * See http://www.unicode.org/reports/tr44/#IndicSyllabicCategory.txt
+ * @stable ICU 63
+ */
+ UCHAR_INDIC_SYLLABIC_CATEGORY=0x1017,
+ /**
+ * Enumerated property Vertical_Orientation.
+ * Used for UAX #50 Unicode Vertical Text Layout (https://www.unicode.org/reports/tr50/).
+ * New as a UCD property in Unicode 10.0.
+ * @stable ICU 63
+ */
+ UCHAR_VERTICAL_ORIENTATION=0x1018,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for enumerated/integer Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_INT_LIMIT=0x1019,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Bitmask property General_Category_Mask.
+ This is the General_Category property returned as a bit mask.
+ When used in u_getIntPropertyValue(c), same as U_MASK(u_charType(c)),
+ returns bit masks for UCharCategory values where exactly one bit is set.
+ When used with u_getPropertyValueName() and u_getPropertyValueEnum(),
+ a multi-bit mask is used for sets of categories like "Letters".
+ Mask values should be cast to uint32_t.
+ @stable ICU 2.4 */
+ UCHAR_GENERAL_CATEGORY_MASK=0x2000,
+ /** First constant for bit-mask Unicode properties. @stable ICU 2.4 */
+ UCHAR_MASK_START=UCHAR_GENERAL_CATEGORY_MASK,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for bit-mask Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_MASK_LIMIT=0x2001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Double property Numeric_Value.
+ Corresponds to u_getNumericValue. @stable ICU 2.4 */
+ UCHAR_NUMERIC_VALUE=0x3000,
+ /** First constant for double Unicode properties. @stable ICU 2.4 */
+ UCHAR_DOUBLE_START=UCHAR_NUMERIC_VALUE,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for double Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_DOUBLE_LIMIT=0x3001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** String property Age.
+ Corresponds to u_charAge. @stable ICU 2.4 */
+ UCHAR_AGE=0x4000,
+ /** First constant for string Unicode properties. @stable ICU 2.4 */
+ UCHAR_STRING_START=UCHAR_AGE,
+ /** String property Bidi_Mirroring_Glyph.
+ Corresponds to u_charMirror. @stable ICU 2.4 */
+ UCHAR_BIDI_MIRRORING_GLYPH=0x4001,
+ /** String property Case_Folding.
+ Corresponds to u_strFoldCase in ustring.h. @stable ICU 2.4 */
+ UCHAR_CASE_FOLDING=0x4002,
+#ifndef U_HIDE_DEPRECATED_API
+ /** Deprecated string property ISO_Comment.
+ Corresponds to u_getISOComment. @deprecated ICU 49 */
+ UCHAR_ISO_COMMENT=0x4003,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Lowercase_Mapping.
+ Corresponds to u_strToLower in ustring.h. @stable ICU 2.4 */
+ UCHAR_LOWERCASE_MAPPING=0x4004,
+ /** String property Name.
+ Corresponds to u_charName. @stable ICU 2.4 */
+ UCHAR_NAME=0x4005,
+ /** String property Simple_Case_Folding.
+ Corresponds to u_foldCase. @stable ICU 2.4 */
+ UCHAR_SIMPLE_CASE_FOLDING=0x4006,
+ /** String property Simple_Lowercase_Mapping.
+ Corresponds to u_tolower. @stable ICU 2.4 */
+ UCHAR_SIMPLE_LOWERCASE_MAPPING=0x4007,
+ /** String property Simple_Titlecase_Mapping.
+ Corresponds to u_totitle. @stable ICU 2.4 */
+ UCHAR_SIMPLE_TITLECASE_MAPPING=0x4008,
+ /** String property Simple_Uppercase_Mapping.
+ Corresponds to u_toupper. @stable ICU 2.4 */
+ UCHAR_SIMPLE_UPPERCASE_MAPPING=0x4009,
+ /** String property Titlecase_Mapping.
+ Corresponds to u_strToTitle in ustring.h. @stable ICU 2.4 */
+ UCHAR_TITLECASE_MAPPING=0x400A,
+#ifndef U_HIDE_DEPRECATED_API
+ /** String property Unicode_1_Name.
+ This property is of little practical value.
+ Beginning with ICU 49, ICU APIs return an empty string for this property.
+ Corresponds to u_charName(U_UNICODE_10_CHAR_NAME). @deprecated ICU 49 */
+ UCHAR_UNICODE_1_NAME=0x400B,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** String property Uppercase_Mapping.
+ Corresponds to u_strToUpper in ustring.h. @stable ICU 2.4 */
+ UCHAR_UPPERCASE_MAPPING=0x400C,
+ /** String property Bidi_Paired_Bracket (new in Unicode 6.3).
+ Corresponds to u_getBidiPairedBracket. @stable ICU 52 */
+ UCHAR_BIDI_PAIRED_BRACKET=0x400D,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for string Unicode properties.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_STRING_LIMIT=0x400E,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Miscellaneous property Script_Extensions (new in Unicode 6.0).
+ Some characters are commonly used in multiple scripts.
+ For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ Corresponds to uscript_hasScript and uscript_getScriptExtensions in uscript.h.
+ @stable ICU 4.6 */
+ UCHAR_SCRIPT_EXTENSIONS=0x7000,
+ /** First constant for Unicode properties with unusual value types. @stable ICU 4.6 */
+ UCHAR_OTHER_PROPERTY_START=UCHAR_SCRIPT_EXTENSIONS,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last constant for Unicode properties with unusual value types.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCHAR_OTHER_PROPERTY_LIMIT=0x7001,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** Represents a nonexistent or invalid property or property value. @stable ICU 2.4 */
+ UCHAR_INVALID_CODE = -1
+} UProperty;
+
+/**
+ * Data for enumerated Unicode general category types.
+ * See http://www.unicode.org/Public/UNIDATA/UnicodeData.html .
+ * @stable ICU 2.0
+ */
+typedef enum UCharCategory
+{
+ /*
+ * Note: UCharCategory constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 2-letter General_Category value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** Non-category for unassigned and non-character code points. @stable ICU 2.0 */
+ U_UNASSIGNED = 0,
+ /** Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNED!) @stable ICU 2.0 */
+ U_GENERAL_OTHER_TYPES = 0,
+ /** Lu @stable ICU 2.0 */
+ U_UPPERCASE_LETTER = 1,
+ /** Ll @stable ICU 2.0 */
+ U_LOWERCASE_LETTER = 2,
+ /** Lt @stable ICU 2.0 */
+ U_TITLECASE_LETTER = 3,
+ /** Lm @stable ICU 2.0 */
+ U_MODIFIER_LETTER = 4,
+ /** Lo @stable ICU 2.0 */
+ U_OTHER_LETTER = 5,
+ /** Mn @stable ICU 2.0 */
+ U_NON_SPACING_MARK = 6,
+ /** Me @stable ICU 2.0 */
+ U_ENCLOSING_MARK = 7,
+ /** Mc @stable ICU 2.0 */
+ U_COMBINING_SPACING_MARK = 8,
+ /** Nd @stable ICU 2.0 */
+ U_DECIMAL_DIGIT_NUMBER = 9,
+ /** Nl @stable ICU 2.0 */
+ U_LETTER_NUMBER = 10,
+ /** No @stable ICU 2.0 */
+ U_OTHER_NUMBER = 11,
+ /** Zs @stable ICU 2.0 */
+ U_SPACE_SEPARATOR = 12,
+ /** Zl @stable ICU 2.0 */
+ U_LINE_SEPARATOR = 13,
+ /** Zp @stable ICU 2.0 */
+ U_PARAGRAPH_SEPARATOR = 14,
+ /** Cc @stable ICU 2.0 */
+ U_CONTROL_CHAR = 15,
+ /** Cf @stable ICU 2.0 */
+ U_FORMAT_CHAR = 16,
+ /** Co @stable ICU 2.0 */
+ U_PRIVATE_USE_CHAR = 17,
+ /** Cs @stable ICU 2.0 */
+ U_SURROGATE = 18,
+ /** Pd @stable ICU 2.0 */
+ U_DASH_PUNCTUATION = 19,
+ /** Ps @stable ICU 2.0 */
+ U_START_PUNCTUATION = 20,
+ /** Pe @stable ICU 2.0 */
+ U_END_PUNCTUATION = 21,
+ /** Pc @stable ICU 2.0 */
+ U_CONNECTOR_PUNCTUATION = 22,
+ /** Po @stable ICU 2.0 */
+ U_OTHER_PUNCTUATION = 23,
+ /** Sm @stable ICU 2.0 */
+ U_MATH_SYMBOL = 24,
+ /** Sc @stable ICU 2.0 */
+ U_CURRENCY_SYMBOL = 25,
+ /** Sk @stable ICU 2.0 */
+ U_MODIFIER_SYMBOL = 26,
+ /** So @stable ICU 2.0 */
+ U_OTHER_SYMBOL = 27,
+ /** Pi @stable ICU 2.0 */
+ U_INITIAL_PUNCTUATION = 28,
+ /** Pf @stable ICU 2.0 */
+ U_FINAL_PUNCTUATION = 29,
+ /**
+ * One higher than the last enum UCharCategory constant.
+ * This numeric value is stable (will not change), see
+ * http://www.unicode.org/policies/stability_policy.html#Property_Value
+ *
+ * @stable ICU 2.0
+ */
+ U_CHAR_CATEGORY_COUNT
+} UCharCategory;
+
+/**
+ * U_GC_XX_MASK constants are bit flags corresponding to Unicode
+ * general category values.
+ * For each category, the nth bit is set if the numeric value of the
+ * corresponding UCharCategory constant is n.
+ *
+ * There are also some U_GC_Y_MASK constants for groups of general categories
+ * like L for all letter categories.
+ *
+ * @see u_charType
+ * @see U_GET_GC_MASK
+ * @see UCharCategory
+ * @stable ICU 2.1
+ */
+#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_CS_MASK U_MASK(U_SURROGATE)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
+
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
+/** Mask constant for a UCharCategory. @stable ICU 2.1 */
+#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
+
+
+/** Mask constant for multiple UCharCategory bits (L Letters). @stable ICU 2.1 */
+#define U_GC_L_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (LC Cased Letters). @stable ICU 2.1 */
+#define U_GC_LC_MASK \
+ (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
+
+/** Mask constant for multiple UCharCategory bits (M Marks). @stable ICU 2.1 */
+#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
+
+/** Mask constant for multiple UCharCategory bits (N Numbers). @stable ICU 2.1 */
+#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
+
+/** Mask constant for multiple UCharCategory bits (Z Separators). @stable ICU 2.1 */
+#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
+
+/** Mask constant for multiple UCharCategory bits (C Others). @stable ICU 2.1 */
+#define U_GC_C_MASK \
+ (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
+
+/** Mask constant for multiple UCharCategory bits (P Punctuation). @stable ICU 2.1 */
+#define U_GC_P_MASK \
+ (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
+ U_GC_PI_MASK|U_GC_PF_MASK)
+
+/** Mask constant for multiple UCharCategory bits (S Symbols). @stable ICU 2.1 */
+#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
+
+/**
+ * This specifies the language directional property of a character set.
+ * @stable ICU 2.0
+ */
+typedef enum UCharDirection {
+ /*
+ * Note: UCharDirection constants and their API comments are parsed by preparseucd.py.
+ * It matches pairs of lines like
+ * / ** <Unicode 1..3-letter Bidi_Class value> comment... * /
+ * U_<[A-Z_]+> = <integer>,
+ */
+
+ /** L @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT = 0,
+ /** R @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT = 1,
+ /** EN @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER = 2,
+ /** ES @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_SEPARATOR = 3,
+ /** ET @stable ICU 2.0 */
+ U_EUROPEAN_NUMBER_TERMINATOR = 4,
+ /** AN @stable ICU 2.0 */
+ U_ARABIC_NUMBER = 5,
+ /** CS @stable ICU 2.0 */
+ U_COMMON_NUMBER_SEPARATOR = 6,
+ /** B @stable ICU 2.0 */
+ U_BLOCK_SEPARATOR = 7,
+ /** S @stable ICU 2.0 */
+ U_SEGMENT_SEPARATOR = 8,
+ /** WS @stable ICU 2.0 */
+ U_WHITE_SPACE_NEUTRAL = 9,
+ /** ON @stable ICU 2.0 */
+ U_OTHER_NEUTRAL = 10,
+ /** LRE @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_EMBEDDING = 11,
+ /** LRO @stable ICU 2.0 */
+ U_LEFT_TO_RIGHT_OVERRIDE = 12,
+ /** AL @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_ARABIC = 13,
+ /** RLE @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_EMBEDDING = 14,
+ /** RLO @stable ICU 2.0 */
+ U_RIGHT_TO_LEFT_OVERRIDE = 15,
+ /** PDF @stable ICU 2.0 */
+ U_POP_DIRECTIONAL_FORMAT = 16,
+ /** NSM @stable ICU 2.0 */
+ U_DIR_NON_SPACING_MARK = 17,
+ /** BN @stable ICU 2.0 */
+ U_BOUNDARY_NEUTRAL = 18,
+ /** FSI @stable ICU 52 */
+ U_FIRST_STRONG_ISOLATE = 19,
+ /** LRI @stable ICU 52 */
+ U_LEFT_TO_RIGHT_ISOLATE = 20,
+ /** RLI @stable ICU 52 */
+ U_RIGHT_TO_LEFT_ISOLATE = 21,
+ /** PDI @stable ICU 52 */
+ U_POP_DIRECTIONAL_ISOLATE = 22,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest UCharDirection value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_CLASS).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_CHAR_DIRECTION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UCharDirection;
+
+/**
+ * Bidi Paired Bracket Type constants.
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @stable ICU 52
+ */
+typedef enum UBidiPairedBracketType {
+ /*
+ * Note: UBidiPairedBracketType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_BPT_<Unicode Bidi_Paired_Bracket_Type value name>
+ */
+
+ /** Not a paired bracket. @stable ICU 52 */
+ U_BPT_NONE,
+ /** Open paired bracket. @stable ICU 52 */
+ U_BPT_OPEN,
+ /** Close paired bracket. @stable ICU 52 */
+ U_BPT_CLOSE,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UBidiPairedBracketType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BIDI_PAIRED_BRACKET_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_BPT_COUNT /* 3 */
+#endif // U_HIDE_DEPRECATED_API
+} UBidiPairedBracketType;
+
+/**
+ * Constants for Unicode blocks, see the Unicode Data file Blocks.txt
+ * @stable ICU 2.0
+ */
+enum UBlockCode {
+ /*
+ * Note: UBlockCode constants are parsed by preparseucd.py.
+ * It matches lines like
+ * UBLOCK_<Unicode Block value name> = <integer>,
+ */
+
+ /** New No_Block value in Unicode 4. @stable ICU 2.6 */
+ UBLOCK_NO_BLOCK = 0, /*[none]*/ /* Special range indicating No_Block */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BASIC_LATIN = 1, /*[0000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_1_SUPPLEMENT=2, /*[0080]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_A =3, /*[0100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_B =4, /*[0180]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IPA_EXTENSIONS =5, /*[0250]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPACING_MODIFIER_LETTERS =6, /*[02B0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS =7, /*[0300]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Greek and Coptic".
+ * @stable ICU 2.0
+ */
+ UBLOCK_GREEK =8, /*[0370]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CYRILLIC =9, /*[0400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARMENIAN =10, /*[0530]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HEBREW =11, /*[0590]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC =12, /*[0600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SYRIAC =13, /*[0700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAANA =14, /*[0780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DEVANAGARI =15, /*[0900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BENGALI =16, /*[0980]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GURMUKHI =17, /*[0A00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GUJARATI =18, /*[0A80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ORIYA =19, /*[0B00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TAMIL =20, /*[0B80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TELUGU =21, /*[0C00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANNADA =22, /*[0C80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MALAYALAM =23, /*[0D00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SINHALA =24, /*[0D80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_THAI =25, /*[0E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LAO =26, /*[0E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_TIBETAN =27, /*[0F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MYANMAR =28, /*[1000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEORGIAN =29, /*[10A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_JAMO =30, /*[1100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ETHIOPIC =31, /*[1200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CHEROKEE =32, /*[13A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS =33, /*[1400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OGHAM =34, /*[1680]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_RUNIC =35, /*[16A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KHMER =36, /*[1780]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MONGOLIAN =37, /*[1800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LATIN_EXTENDED_ADDITIONAL =38, /*[1E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GREEK_EXTENDED =39, /*[1F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GENERAL_PUNCTUATION =40, /*[2000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS =41, /*[2070]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CURRENCY_SYMBOLS =42, /*[20A0]*/
+
+ /**
+ * Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
+ * @stable ICU 2.0
+ */
+ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS =43, /*[20D0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LETTERLIKE_SYMBOLS =44, /*[2100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_NUMBER_FORMS =45, /*[2150]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARROWS =46, /*[2190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_OPERATORS =47, /*[2200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_TECHNICAL =48, /*[2300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CONTROL_PICTURES =49, /*[2400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OPTICAL_CHARACTER_RECOGNITION =50, /*[2440]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_ALPHANUMERICS =51, /*[2460]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOX_DRAWING =52, /*[2500]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BLOCK_ELEMENTS =53, /*[2580]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_GEOMETRIC_SHAPES =54, /*[25A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS =55, /*[2600]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_DINGBATS =56, /*[2700]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BRAILLE_PATTERNS =57, /*[2800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_RADICALS_SUPPLEMENT =58, /*[2E80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANGXI_RADICALS =59, /*[2F00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS =60, /*[2FF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION =61, /*[3000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIRAGANA =62, /*[3040]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KATAKANA =63, /*[30A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO =64, /*[3100]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_COMPATIBILITY_JAMO =65, /*[3130]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_KANBUN =66, /*[3190]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_BOPOMOFO_EXTENDED =67, /*[31A0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS =68, /*[3200]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY =69, /*[3300]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A =70, /*[3400]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS =71, /*[4E00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_SYLLABLES =72, /*[A000]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_YI_RADICALS =73, /*[A490]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HANGUL_SYLLABLES =74, /*[AC00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_SURROGATES =75, /*[D800]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HIGH_PRIVATE_USE_SURROGATES =76, /*[DB80]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_LOW_SURROGATES =77, /*[DC00]*/
+
+ /**
+ * Same as UBLOCK_PRIVATE_USE.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE_AREA =78, /*[E000]*/
+ /**
+ * Same as UBLOCK_PRIVATE_USE_AREA.
+ * Until Unicode 3.1.1, the corresponding block name was "Private Use",
+ * and multiple code point ranges had this block.
+ * Unicode 3.2 renames the block for the BMP PUA to "Private Use Area" and
+ * adds separate blocks for the supplementary PUAs.
+ *
+ * @stable ICU 2.0
+ */
+ UBLOCK_PRIVATE_USE = UBLOCK_PRIVATE_USE_AREA,
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS =79, /*[F900]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ALPHABETIC_PRESENTATION_FORMS =80, /*[FB00]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_A =81, /*[FB50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_COMBINING_HALF_MARKS =82, /*[FE20]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_FORMS =83, /*[FE30]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SMALL_FORM_VARIANTS =84, /*[FE50]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_ARABIC_PRESENTATION_FORMS_B =85, /*[FE70]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_SPECIALS =86, /*[FFF0]*/
+
+ /** @stable ICU 2.0 */
+ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS =87, /*[FF00]*/
+
+ /* New blocks in Unicode 3.1 */
+
+ /** @stable ICU 2.0 */
+ UBLOCK_OLD_ITALIC = 88, /*[10300]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_GOTHIC = 89, /*[10330]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_DESERET = 90, /*[10400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS = 91, /*[1D000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MUSICAL_SYMBOLS = 92, /*[1D100]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS = 93, /*[1D400]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B = 94, /*[20000]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT = 95, /*[2F800]*/
+ /** @stable ICU 2.0 */
+ UBLOCK_TAGS = 96, /*[E0000]*/
+
+ /* New blocks in Unicode 3.2 */
+
+ /** @stable ICU 3.0 */
+ UBLOCK_CYRILLIC_SUPPLEMENT = 97, /*[0500]*/
+ /**
+ * Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
+ * @stable ICU 2.2
+ */
+ UBLOCK_CYRILLIC_SUPPLEMENTARY = UBLOCK_CYRILLIC_SUPPLEMENT,
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGALOG = 98, /*[1700]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_HANUNOO = 99, /*[1720]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_BUHID = 100, /*[1740]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_TAGBANWA = 101, /*[1760]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A = 102, /*[27C0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_A = 103, /*[27F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_B = 104, /*[2900]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B = 105, /*[2980]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS = 106, /*[2A00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS = 107, /*[31F0]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_VARIATION_SELECTORS = 108, /*[FE00]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A = 109, /*[F0000]*/
+ /** @stable ICU 2.2 */
+ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B = 110, /*[100000]*/
+
+ /* New blocks in Unicode 4 */
+
+ /** @stable ICU 2.6 */
+ UBLOCK_LIMBU = 111, /*[1900]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_LE = 112, /*[1950]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_KHMER_SYMBOLS = 113, /*[19E0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_PHONETIC_EXTENSIONS = 114, /*[1D00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS = 115, /*[2B00]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_YIJING_HEXAGRAM_SYMBOLS = 116, /*[4DC0]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_SYLLABARY = 117, /*[10000]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_LINEAR_B_IDEOGRAMS = 118, /*[10080]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_AEGEAN_NUMBERS = 119, /*[10100]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_UGARITIC = 120, /*[10380]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_SHAVIAN = 121, /*[10450]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_OSMANYA = 122, /*[10480]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_CYPRIOT_SYLLABARY = 123, /*[10800]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_TAI_XUAN_JING_SYMBOLS = 124, /*[1D300]*/
+ /** @stable ICU 2.6 */
+ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT = 125, /*[E0100]*/
+
+ /* New blocks in Unicode 4.1 */
+
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION = 126, /*[1D200]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ANCIENT_GREEK_NUMBERS = 127, /*[10140]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ARABIC_SUPPLEMENT = 128, /*[0750]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_BUGINESE = 129, /*[1A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_CJK_STROKES = 130, /*[31C0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT = 131, /*[1DC0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_COPTIC = 132, /*[2C80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_EXTENDED = 133, /*[2D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_ETHIOPIC_SUPPLEMENT = 134, /*[1380]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GEORGIAN_SUPPLEMENT = 135, /*[2D00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_GLAGOLITIC = 136, /*[2C00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_KHAROSHTHI = 137, /*[10A00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_MODIFIER_TONE_LETTERS = 138, /*[A700]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_NEW_TAI_LUE = 139, /*[1980]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_OLD_PERSIAN = 140, /*[103A0]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT = 141, /*[1D80]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SUPPLEMENTAL_PUNCTUATION = 142, /*[2E00]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_SYLOTI_NAGRI = 143, /*[A800]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_TIFINAGH = 144, /*[2D30]*/
+ /** @stable ICU 3.4 */
+ UBLOCK_VERTICAL_FORMS = 145, /*[FE10]*/
+
+ /* New blocks in Unicode 5.0 */
+
+ /** @stable ICU 3.6 */
+ UBLOCK_NKO = 146, /*[07C0]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_BALINESE = 147, /*[1B00]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_C = 148, /*[2C60]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_LATIN_EXTENDED_D = 149, /*[A720]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHAGS_PA = 150, /*[A840]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_PHOENICIAN = 151, /*[10900]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM = 152, /*[12000]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION = 153, /*[12400]*/
+ /** @stable ICU 3.6 */
+ UBLOCK_COUNTING_ROD_NUMERALS = 154, /*[1D360]*/
+
+ /* New blocks in Unicode 5.1 */
+
+ /** @stable ICU 4.0 */
+ UBLOCK_SUNDANESE = 155, /*[1B80]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LEPCHA = 156, /*[1C00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_OL_CHIKI = 157, /*[1C50]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_A = 158, /*[2DE0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_VAI = 159, /*[A500]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CYRILLIC_EXTENDED_B = 160, /*[A640]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_SAURASHTRA = 161, /*[A880]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_KAYAH_LI = 162, /*[A900]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_REJANG = 163, /*[A930]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CHAM = 164, /*[AA00]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_ANCIENT_SYMBOLS = 165, /*[10190]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_PHAISTOS_DISC = 166, /*[101D0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYCIAN = 167, /*[10280]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_CARIAN = 168, /*[102A0]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_LYDIAN = 169, /*[10920]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_MAHJONG_TILES = 170, /*[1F000]*/
+ /** @stable ICU 4.0 */
+ UBLOCK_DOMINO_TILES = 171, /*[1F030]*/
+
+ /* New blocks in Unicode 5.2 */
+
+ /** @stable ICU 4.4 */
+ UBLOCK_SAMARITAN = 172, /*[0800]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED = 173, /*[18B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_THAM = 174, /*[1A20]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_VEDIC_EXTENSIONS = 175, /*[1CD0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_LISU = 176, /*[A4D0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_BAMUM = 177, /*[A6A0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_COMMON_INDIC_NUMBER_FORMS = 178, /*[A830]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_DEVANAGARI_EXTENDED = 179, /*[A8E0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_A = 180, /*[A960]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_JAVANESE = 181, /*[A980]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MYANMAR_EXTENDED_A = 182, /*[AA60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_TAI_VIET = 183, /*[AA80]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_MEETEI_MAYEK = 184, /*[ABC0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_HANGUL_JAMO_EXTENDED_B = 185, /*[D7B0]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_IMPERIAL_ARAMAIC = 186, /*[10840]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_SOUTH_ARABIAN = 187, /*[10A60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_AVESTAN = 188, /*[10B00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PARTHIAN = 189, /*[10B40]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_INSCRIPTIONAL_PAHLAVI = 190, /*[10B60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_OLD_TURKIC = 191, /*[10C00]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_RUMI_NUMERAL_SYMBOLS = 192, /*[10E60]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_KAITHI = 193, /*[11080]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_EGYPTIAN_HIEROGLYPHS = 194, /*[13000]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT = 195, /*[1F100]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT = 196, /*[1F200]*/
+ /** @stable ICU 4.4 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C = 197, /*[2A700]*/
+
+ /* New blocks in Unicode 6.0 */
+
+ /** @stable ICU 4.6 */
+ UBLOCK_MANDAIC = 198, /*[0840]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BATAK = 199, /*[1BC0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ETHIOPIC_EXTENDED_A = 200, /*[AB00]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BRAHMI = 201, /*[11000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_BAMUM_SUPPLEMENT = 202, /*[16800]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_KANA_SUPPLEMENT = 203, /*[1B000]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_PLAYING_CARDS = 204, /*[1F0A0]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS = 205, /*[1F300]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_EMOTICONS = 206, /*[1F600]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS = 207, /*[1F680]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_ALCHEMICAL_SYMBOLS = 208, /*[1F700]*/
+ /** @stable ICU 4.6 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D = 209, /*[2B740]*/
+
+ /* New blocks in Unicode 6.1 */
+
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_EXTENDED_A = 210, /*[08A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS = 211, /*[1EE00]*/
+ /** @stable ICU 49 */
+ UBLOCK_CHAKMA = 212, /*[11100]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEETEI_MAYEK_EXTENSIONS = 213, /*[AAE0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_CURSIVE = 214, /*[109A0]*/
+ /** @stable ICU 49 */
+ UBLOCK_MEROITIC_HIEROGLYPHS = 215, /*[10980]*/
+ /** @stable ICU 49 */
+ UBLOCK_MIAO = 216, /*[16F00]*/
+ /** @stable ICU 49 */
+ UBLOCK_SHARADA = 217, /*[11180]*/
+ /** @stable ICU 49 */
+ UBLOCK_SORA_SOMPENG = 218, /*[110D0]*/
+ /** @stable ICU 49 */
+ UBLOCK_SUNDANESE_SUPPLEMENT = 219, /*[1CC0]*/
+ /** @stable ICU 49 */
+ UBLOCK_TAKRI = 220, /*[11680]*/
+
+ /* New blocks in Unicode 7.0 */
+
+ /** @stable ICU 54 */
+ UBLOCK_BASSA_VAH = 221, /*[16AD0]*/
+ /** @stable ICU 54 */
+ UBLOCK_CAUCASIAN_ALBANIAN = 222, /*[10530]*/
+ /** @stable ICU 54 */
+ UBLOCK_COPTIC_EPACT_NUMBERS = 223, /*[102E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED = 224, /*[1AB0]*/
+ /** @stable ICU 54 */
+ UBLOCK_DUPLOYAN = 225, /*[1BC00]*/
+ /** @stable ICU 54 */
+ UBLOCK_ELBASAN = 226, /*[10500]*/
+ /** @stable ICU 54 */
+ UBLOCK_GEOMETRIC_SHAPES_EXTENDED = 227, /*[1F780]*/
+ /** @stable ICU 54 */
+ UBLOCK_GRANTHA = 228, /*[11300]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHOJKI = 229, /*[11200]*/
+ /** @stable ICU 54 */
+ UBLOCK_KHUDAWADI = 230, /*[112B0]*/
+ /** @stable ICU 54 */
+ UBLOCK_LATIN_EXTENDED_E = 231, /*[AB30]*/
+ /** @stable ICU 54 */
+ UBLOCK_LINEAR_A = 232, /*[10600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MAHAJANI = 233, /*[11150]*/
+ /** @stable ICU 54 */
+ UBLOCK_MANICHAEAN = 234, /*[10AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_MENDE_KIKAKUI = 235, /*[1E800]*/
+ /** @stable ICU 54 */
+ UBLOCK_MODI = 236, /*[11600]*/
+ /** @stable ICU 54 */
+ UBLOCK_MRO = 237, /*[16A40]*/
+ /** @stable ICU 54 */
+ UBLOCK_MYANMAR_EXTENDED_B = 238, /*[A9E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_NABATAEAN = 239, /*[10880]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_NORTH_ARABIAN = 240, /*[10A80]*/
+ /** @stable ICU 54 */
+ UBLOCK_OLD_PERMIC = 241, /*[10350]*/
+ /** @stable ICU 54 */
+ UBLOCK_ORNAMENTAL_DINGBATS = 242, /*[1F650]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAHAWH_HMONG = 243, /*[16B00]*/
+ /** @stable ICU 54 */
+ UBLOCK_PALMYRENE = 244, /*[10860]*/
+ /** @stable ICU 54 */
+ UBLOCK_PAU_CIN_HAU = 245, /*[11AC0]*/
+ /** @stable ICU 54 */
+ UBLOCK_PSALTER_PAHLAVI = 246, /*[10B80]*/
+ /** @stable ICU 54 */
+ UBLOCK_SHORTHAND_FORMAT_CONTROLS = 247, /*[1BCA0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SIDDHAM = 248, /*[11580]*/
+ /** @stable ICU 54 */
+ UBLOCK_SINHALA_ARCHAIC_NUMBERS = 249, /*[111E0]*/
+ /** @stable ICU 54 */
+ UBLOCK_SUPPLEMENTAL_ARROWS_C = 250, /*[1F800]*/
+ /** @stable ICU 54 */
+ UBLOCK_TIRHUTA = 251, /*[11480]*/
+ /** @stable ICU 54 */
+ UBLOCK_WARANG_CITI = 252, /*[118A0]*/
+
+ /* New blocks in Unicode 8.0 */
+
+ /** @stable ICU 56 */
+ UBLOCK_AHOM = 253, /*[11700]*/
+ /** @stable ICU 56 */
+ UBLOCK_ANATOLIAN_HIEROGLYPHS = 254, /*[14400]*/
+ /** @stable ICU 56 */
+ UBLOCK_CHEROKEE_SUPPLEMENT = 255, /*[AB70]*/
+ /** @stable ICU 56 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E = 256, /*[2B820]*/
+ /** @stable ICU 56 */
+ UBLOCK_EARLY_DYNASTIC_CUNEIFORM = 257, /*[12480]*/
+ /** @stable ICU 56 */
+ UBLOCK_HATRAN = 258, /*[108E0]*/
+ /** @stable ICU 56 */
+ UBLOCK_MULTANI = 259, /*[11280]*/
+ /** @stable ICU 56 */
+ UBLOCK_OLD_HUNGARIAN = 260, /*[10C80]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS = 261, /*[1F900]*/
+ /** @stable ICU 56 */
+ UBLOCK_SUTTON_SIGNWRITING = 262, /*[1D800]*/
+
+ /* New blocks in Unicode 9.0 */
+
+ /** @stable ICU 58 */
+ UBLOCK_ADLAM = 263, /*[1E900]*/
+ /** @stable ICU 58 */
+ UBLOCK_BHAIKSUKI = 264, /*[11C00]*/
+ /** @stable ICU 58 */
+ UBLOCK_CYRILLIC_EXTENDED_C = 265, /*[1C80]*/
+ /** @stable ICU 58 */
+ UBLOCK_GLAGOLITIC_SUPPLEMENT = 266, /*[1E000]*/
+ /** @stable ICU 58 */
+ UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION = 267, /*[16FE0]*/
+ /** @stable ICU 58 */
+ UBLOCK_MARCHEN = 268, /*[11C70]*/
+ /** @stable ICU 58 */
+ UBLOCK_MONGOLIAN_SUPPLEMENT = 269, /*[11660]*/
+ /** @stable ICU 58 */
+ UBLOCK_NEWA = 270, /*[11400]*/
+ /** @stable ICU 58 */
+ UBLOCK_OSAGE = 271, /*[104B0]*/
+ /** @stable ICU 58 */
+ UBLOCK_TANGUT = 272, /*[17000]*/
+ /** @stable ICU 58 */
+ UBLOCK_TANGUT_COMPONENTS = 273, /*[18800]*/
+
+ // New blocks in Unicode 10.0
+
+ /** @stable ICU 60 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F = 274, /*[2CEB0]*/
+ /** @stable ICU 60 */
+ UBLOCK_KANA_EXTENDED_A = 275, /*[1B100]*/
+ /** @stable ICU 60 */
+ UBLOCK_MASARAM_GONDI = 276, /*[11D00]*/
+ /** @stable ICU 60 */
+ UBLOCK_NUSHU = 277, /*[1B170]*/
+ /** @stable ICU 60 */
+ UBLOCK_SOYOMBO = 278, /*[11A50]*/
+ /** @stable ICU 60 */
+ UBLOCK_SYRIAC_SUPPLEMENT = 279, /*[0860]*/
+ /** @stable ICU 60 */
+ UBLOCK_ZANABAZAR_SQUARE = 280, /*[11A00]*/
+
+ // New blocks in Unicode 11.0
+
+ /** @stable ICU 62 */
+ UBLOCK_CHESS_SYMBOLS = 281, /*[1FA00]*/
+ /** @stable ICU 62 */
+ UBLOCK_DOGRA = 282, /*[11800]*/
+ /** @stable ICU 62 */
+ UBLOCK_GEORGIAN_EXTENDED = 283, /*[1C90]*/
+ /** @stable ICU 62 */
+ UBLOCK_GUNJALA_GONDI = 284, /*[11D60]*/
+ /** @stable ICU 62 */
+ UBLOCK_HANIFI_ROHINGYA = 285, /*[10D00]*/
+ /** @stable ICU 62 */
+ UBLOCK_INDIC_SIYAQ_NUMBERS = 286, /*[1EC70]*/
+ /** @stable ICU 62 */
+ UBLOCK_MAKASAR = 287, /*[11EE0]*/
+ /** @stable ICU 62 */
+ UBLOCK_MAYAN_NUMERALS = 288, /*[1D2E0]*/
+ /** @stable ICU 62 */
+ UBLOCK_MEDEFAIDRIN = 289, /*[16E40]*/
+ /** @stable ICU 62 */
+ UBLOCK_OLD_SOGDIAN = 290, /*[10F00]*/
+ /** @stable ICU 62 */
+ UBLOCK_SOGDIAN = 291, /*[10F30]*/
+
+ // New blocks in Unicode 12.0
+
+ /** @stable ICU 64 */
+ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS = 292, /*[13430]*/
+ /** @stable ICU 64 */
+ UBLOCK_ELYMAIC = 293, /*[10FE0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NANDINAGARI = 294, /*[119A0]*/
+ /** @stable ICU 64 */
+ UBLOCK_NYIAKENG_PUACHUE_HMONG = 295, /*[1E100]*/
+ /** @stable ICU 64 */
+ UBLOCK_OTTOMAN_SIYAQ_NUMBERS = 296, /*[1ED00]*/
+ /** @stable ICU 64 */
+ UBLOCK_SMALL_KANA_EXTENSION = 297, /*[1B130]*/
+ /** @stable ICU 64 */
+ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A = 298, /*[1FA70]*/
+ /** @stable ICU 64 */
+ UBLOCK_TAMIL_SUPPLEMENT = 299, /*[11FC0]*/
+ /** @stable ICU 64 */
+ UBLOCK_WANCHO = 300, /*[1E2C0]*/
+
+ // New blocks in Unicode 13.0
+
+ /** @stable ICU 66 */
+ UBLOCK_CHORASMIAN = 301, /*[10FB0]*/
+ /** @stable ICU 66 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G = 302, /*[30000]*/
+ /** @stable ICU 66 */
+ UBLOCK_DIVES_AKURU = 303, /*[11900]*/
+ /** @stable ICU 66 */
+ UBLOCK_KHITAN_SMALL_SCRIPT = 304, /*[18B00]*/
+ /** @stable ICU 66 */
+ UBLOCK_LISU_SUPPLEMENT = 305, /*[11FB0]*/
+ /** @stable ICU 66 */
+ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING = 306, /*[1FB00]*/
+ /** @stable ICU 66 */
+ UBLOCK_TANGUT_SUPPLEMENT = 307, /*[18D00]*/
+ /** @stable ICU 66 */
+ UBLOCK_YEZIDI = 308, /*[10E80]*/
+
+ // New blocks in Unicode 14.0
+
+ /** @stable ICU 70 */
+ UBLOCK_ARABIC_EXTENDED_B = 309, /*[0870]*/
+ /** @stable ICU 70 */
+ UBLOCK_CYPRO_MINOAN = 310, /*[12F90]*/
+ /** @stable ICU 70 */
+ UBLOCK_ETHIOPIC_EXTENDED_B = 311, /*[1E7E0]*/
+ /** @stable ICU 70 */
+ UBLOCK_KANA_EXTENDED_B = 312, /*[1AFF0]*/
+ /** @stable ICU 70 */
+ UBLOCK_LATIN_EXTENDED_F = 313, /*[10780]*/
+ /** @stable ICU 70 */
+ UBLOCK_LATIN_EXTENDED_G = 314, /*[1DF00]*/
+ /** @stable ICU 70 */
+ UBLOCK_OLD_UYGHUR = 315, /*[10F70]*/
+ /** @stable ICU 70 */
+ UBLOCK_TANGSA = 316, /*[16A70]*/
+ /** @stable ICU 70 */
+ UBLOCK_TOTO = 317, /*[1E290]*/
+ /** @stable ICU 70 */
+ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A = 318, /*[11AB0]*/
+ /** @stable ICU 70 */
+ UBLOCK_VITHKUQI = 319, /*[10570]*/
+ /** @stable ICU 70 */
+ UBLOCK_ZNAMENNY_MUSICAL_NOTATION = 320, /*[1CF00]*/
+
+ // New blocks in Unicode 15.0
+
+ /** @stable ICU 72 */
+ UBLOCK_ARABIC_EXTENDED_C = 321, /*[10EC0]*/
+ /** @stable ICU 72 */
+ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H = 322, /*[31350]*/
+ /** @stable ICU 72 */
+ UBLOCK_CYRILLIC_EXTENDED_D = 323, /*[1E030]*/
+ /** @stable ICU 72 */
+ UBLOCK_DEVANAGARI_EXTENDED_A = 324, /*[11B00]*/
+ /** @stable ICU 72 */
+ UBLOCK_KAKTOVIK_NUMERALS = 325, /*[1D2C0]*/
+ /** @stable ICU 72 */
+ UBLOCK_KAWI = 326, /*[11F00]*/
+ /** @stable ICU 72 */
+ UBLOCK_NAG_MUNDARI = 327, /*[1E4D0]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UBlockCode value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_BLOCK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UBLOCK_COUNT = 328,
+#endif // U_HIDE_DEPRECATED_API
+
+ /** @stable ICU 2.0 */
+ UBLOCK_INVALID_CODE=-1
+};
+
+/** @stable ICU 2.0 */
+typedef enum UBlockCode UBlockCode;
+
+/**
+ * East Asian Width constants.
+ *
+ * @see UCHAR_EAST_ASIAN_WIDTH
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+typedef enum UEastAsianWidth {
+ /*
+ * Note: UEastAsianWidth constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_EA_<Unicode East_Asian_Width value name>
+ */
+
+ U_EA_NEUTRAL, /*[N]*/
+ U_EA_AMBIGUOUS, /*[A]*/
+ U_EA_HALFWIDTH, /*[H]*/
+ U_EA_FULLWIDTH, /*[F]*/
+ U_EA_NARROW, /*[Na]*/
+ U_EA_WIDE, /*[W]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UEastAsianWidth value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_EAST_ASIAN_WIDTH).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_EA_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UEastAsianWidth;
+
+/**
+ * Selector constants for u_charName().
+ * u_charName() returns the "modern" name of a
+ * Unicode character; or the name that was defined in
+ * Unicode version 1.0, before the Unicode standard merged
+ * with ISO-10646; or an "extended" name that gives each
+ * Unicode code point a unique name.
+ *
+ * @see u_charName
+ * @stable ICU 2.0
+ */
+typedef enum UCharNameChoice {
+ /** Unicode character name (Name property). @stable ICU 2.0 */
+ U_UNICODE_CHAR_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * The Unicode_1_Name property value which is of little practical value.
+ * Beginning with ICU 49, ICU APIs return an empty string for this name choice.
+ * @deprecated ICU 49
+ */
+ U_UNICODE_10_CHAR_NAME,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** Standard or synthetic character name. @stable ICU 2.0 */
+ U_EXTENDED_CHAR_NAME = U_UNICODE_CHAR_NAME+2,
+ /** Corrected name from NameAliases.txt. @stable ICU 4.4 */
+ U_CHAR_NAME_ALIAS,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UCharNameChoice value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_CHAR_NAME_CHOICE_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UCharNameChoice;
+
+/**
+ * Selector constants for u_getPropertyName() and
+ * u_getPropertyValueName(). These selectors are used to choose which
+ * name is returned for a given property or value. All properties and
+ * values have a long name. Most have a short name, but some do not.
+ * Unicode allows for additional names, beyond the long and short
+ * name, which would be indicated by U_LONG_PROPERTY_NAME + i, where
+ * i=1, 2,...
+ *
+ * @see u_getPropertyName()
+ * @see u_getPropertyValueName()
+ * @stable ICU 2.4
+ */
+typedef enum UPropertyNameChoice {
+ U_SHORT_PROPERTY_NAME,
+ U_LONG_PROPERTY_NAME,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UPropertyNameChoice value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PROPERTY_NAME_CHOICE_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UPropertyNameChoice;
+
+/**
+ * Decomposition Type constants.
+ *
+ * @see UCHAR_DECOMPOSITION_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UDecompositionType {
+ /*
+ * Note: UDecompositionType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_DT_<Unicode Decomposition_Type value name>
+ */
+
+ U_DT_NONE, /*[none]*/
+ U_DT_CANONICAL, /*[can]*/
+ U_DT_COMPAT, /*[com]*/
+ U_DT_CIRCLE, /*[enc]*/
+ U_DT_FINAL, /*[fin]*/
+ U_DT_FONT, /*[font]*/
+ U_DT_FRACTION, /*[fra]*/
+ U_DT_INITIAL, /*[init]*/
+ U_DT_ISOLATED, /*[iso]*/
+ U_DT_MEDIAL, /*[med]*/
+ U_DT_NARROW, /*[nar]*/
+ U_DT_NOBREAK, /*[nb]*/
+ U_DT_SMALL, /*[sml]*/
+ U_DT_SQUARE, /*[sqr]*/
+ U_DT_SUB, /*[sub]*/
+ U_DT_SUPER, /*[sup]*/
+ U_DT_VERTICAL, /*[vert]*/
+ U_DT_WIDE, /*[wide]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UDecompositionType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_DECOMPOSITION_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_DT_COUNT /* 18 */
+#endif // U_HIDE_DEPRECATED_API
+} UDecompositionType;
+
+/**
+ * Joining Type constants.
+ *
+ * @see UCHAR_JOINING_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningType {
+ /*
+ * Note: UJoiningType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JT_<Unicode Joining_Type value name>
+ */
+
+ U_JT_NON_JOINING, /*[U]*/
+ U_JT_JOIN_CAUSING, /*[C]*/
+ U_JT_DUAL_JOINING, /*[D]*/
+ U_JT_LEFT_JOINING, /*[L]*/
+ U_JT_RIGHT_JOINING, /*[R]*/
+ U_JT_TRANSPARENT, /*[T]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UJoiningType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_JT_COUNT /* 6 */
+#endif // U_HIDE_DEPRECATED_API
+} UJoiningType;
+
+/**
+ * Joining Group constants.
+ *
+ * @see UCHAR_JOINING_GROUP
+ * @stable ICU 2.2
+ */
+typedef enum UJoiningGroup {
+ /*
+ * Note: UJoiningGroup constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_JG_<Unicode Joining_Group value name>
+ */
+
+ U_JG_NO_JOINING_GROUP,
+ U_JG_AIN,
+ U_JG_ALAPH,
+ U_JG_ALEF,
+ U_JG_BEH,
+ U_JG_BETH,
+ U_JG_DAL,
+ U_JG_DALATH_RISH,
+ U_JG_E,
+ U_JG_FEH,
+ U_JG_FINAL_SEMKATH,
+ U_JG_GAF,
+ U_JG_GAMAL,
+ U_JG_HAH,
+ U_JG_TEH_MARBUTA_GOAL, /**< @stable ICU 4.6 */
+ U_JG_HAMZA_ON_HEH_GOAL=U_JG_TEH_MARBUTA_GOAL,
+ U_JG_HE,
+ U_JG_HEH,
+ U_JG_HEH_GOAL,
+ U_JG_HETH,
+ U_JG_KAF,
+ U_JG_KAPH,
+ U_JG_KNOTTED_HEH,
+ U_JG_LAM,
+ U_JG_LAMADH,
+ U_JG_MEEM,
+ U_JG_MIM,
+ U_JG_NOON,
+ U_JG_NUN,
+ U_JG_PE,
+ U_JG_QAF,
+ U_JG_QAPH,
+ U_JG_REH,
+ U_JG_REVERSED_PE,
+ U_JG_SAD,
+ U_JG_SADHE,
+ U_JG_SEEN,
+ U_JG_SEMKATH,
+ U_JG_SHIN,
+ U_JG_SWASH_KAF,
+ U_JG_SYRIAC_WAW,
+ U_JG_TAH,
+ U_JG_TAW,
+ U_JG_TEH_MARBUTA,
+ U_JG_TETH,
+ U_JG_WAW,
+ U_JG_YEH,
+ U_JG_YEH_BARREE,
+ U_JG_YEH_WITH_TAIL,
+ U_JG_YUDH,
+ U_JG_YUDH_HE,
+ U_JG_ZAIN,
+ U_JG_FE, /**< @stable ICU 2.6 */
+ U_JG_KHAPH, /**< @stable ICU 2.6 */
+ U_JG_ZHAIN, /**< @stable ICU 2.6 */
+ U_JG_BURUSHASKI_YEH_BARREE, /**< @stable ICU 4.0 */
+ U_JG_FARSI_YEH, /**< @stable ICU 4.4 */
+ U_JG_NYA, /**< @stable ICU 4.4 */
+ U_JG_ROHINGYA_YEH, /**< @stable ICU 49 */
+ U_JG_MANICHAEAN_ALEPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_AYIN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_BETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DALETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_DHAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_FIVE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_GIMEL, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_HUNDRED, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_KAPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_LAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_MEM, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_NUN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ONE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_PE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_QOPH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_RESH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SADHE, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_SAMEKH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TEN, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TETH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_THAMEDH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_TWENTY, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_WAW, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_YODH, /**< @stable ICU 54 */
+ U_JG_MANICHAEAN_ZAYIN, /**< @stable ICU 54 */
+ U_JG_STRAIGHT_WAW, /**< @stable ICU 54 */
+ U_JG_AFRICAN_FEH, /**< @stable ICU 58 */
+ U_JG_AFRICAN_NOON, /**< @stable ICU 58 */
+ U_JG_AFRICAN_QAF, /**< @stable ICU 58 */
+
+ U_JG_MALAYALAM_BHA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_JA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_LLLA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NGA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NNNA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_NYA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_RA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_SSA, /**< @stable ICU 60 */
+ U_JG_MALAYALAM_TTA, /**< @stable ICU 60 */
+
+ U_JG_HANIFI_ROHINGYA_KINNA_YA, /**< @stable ICU 62 */
+ U_JG_HANIFI_ROHINGYA_PA, /**< @stable ICU 62 */
+
+ U_JG_THIN_YEH, /**< @stable ICU 70 */
+ U_JG_VERTICAL_TAIL, /**< @stable ICU 70 */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UJoiningGroup value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_JOINING_GROUP).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_JG_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UJoiningGroup;
+
+/**
+ * Grapheme Cluster Break constants.
+ *
+ * @see UCHAR_GRAPHEME_CLUSTER_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UGraphemeClusterBreak {
+ /*
+ * Note: UGraphemeClusterBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_GCB_<Unicode Grapheme_Cluster_Break value name>
+ */
+
+ U_GCB_OTHER = 0, /*[XX]*/
+ U_GCB_CONTROL = 1, /*[CN]*/
+ U_GCB_CR = 2, /*[CR]*/
+ U_GCB_EXTEND = 3, /*[EX]*/
+ U_GCB_L = 4, /*[L]*/
+ U_GCB_LF = 5, /*[LF]*/
+ U_GCB_LV = 6, /*[LV]*/
+ U_GCB_LVT = 7, /*[LVT]*/
+ U_GCB_T = 8, /*[T]*/
+ U_GCB_V = 9, /*[V]*/
+ /** @stable ICU 4.0 */
+ U_GCB_SPACING_MARK = 10, /*[SM]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ /** @stable ICU 4.0 */
+ U_GCB_PREPEND = 11, /*[PP]*/
+ /** @stable ICU 50 */
+ U_GCB_REGIONAL_INDICATOR = 12, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 58 */
+ U_GCB_E_BASE = 13, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_GCB_E_BASE_GAZ = 14, /*[EBG]*/
+ /** @stable ICU 58 */
+ U_GCB_E_MODIFIER = 15, /*[EM]*/
+ /** @stable ICU 58 */
+ U_GCB_GLUE_AFTER_ZWJ = 16, /*[GAZ]*/
+ /** @stable ICU 58 */
+ U_GCB_ZWJ = 17, /*[ZWJ]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UGraphemeClusterBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_GRAPHEME_CLUSTER_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_GCB_COUNT = 18
+#endif // U_HIDE_DEPRECATED_API
+} UGraphemeClusterBreak;
+
+/**
+ * Word Break constants.
+ * (UWordBreak is a pre-existing enum type in ubrk.h for word break status tags.)
+ *
+ * @see UCHAR_WORD_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum UWordBreakValues {
+ /*
+ * Note: UWordBreakValues constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_WB_<Unicode Word_Break value name>
+ */
+
+ U_WB_OTHER = 0, /*[XX]*/
+ U_WB_ALETTER = 1, /*[LE]*/
+ U_WB_FORMAT = 2, /*[FO]*/
+ U_WB_KATAKANA = 3, /*[KA]*/
+ U_WB_MIDLETTER = 4, /*[ML]*/
+ U_WB_MIDNUM = 5, /*[MN]*/
+ U_WB_NUMERIC = 6, /*[NU]*/
+ U_WB_EXTENDNUMLET = 7, /*[EX]*/
+ /** @stable ICU 4.0 */
+ U_WB_CR = 8, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ /** @stable ICU 4.0 */
+ U_WB_EXTEND = 9, /*[Extend]*/
+ /** @stable ICU 4.0 */
+ U_WB_LF = 10, /*[LF]*/
+ /** @stable ICU 4.0 */
+ U_WB_MIDNUMLET =11, /*[MB]*/
+ /** @stable ICU 4.0 */
+ U_WB_NEWLINE =12, /*[NL]*/
+ /** @stable ICU 50 */
+ U_WB_REGIONAL_INDICATOR = 13, /*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 52 */
+ U_WB_HEBREW_LETTER = 14, /*[HL]*/ /* from here on: new in Unicode 6.3/ICU 52 */
+ /** @stable ICU 52 */
+ U_WB_SINGLE_QUOTE = 15, /*[SQ]*/
+ /** @stable ICU 52 */
+ U_WB_DOUBLE_QUOTE = 16, /*[DQ]*/
+ /** @stable ICU 58 */
+ U_WB_E_BASE = 17, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_WB_E_BASE_GAZ = 18, /*[EBG]*/
+ /** @stable ICU 58 */
+ U_WB_E_MODIFIER = 19, /*[EM]*/
+ /** @stable ICU 58 */
+ U_WB_GLUE_AFTER_ZWJ = 20, /*[GAZ]*/
+ /** @stable ICU 58 */
+ U_WB_ZWJ = 21, /*[ZWJ]*/
+ /** @stable ICU 62 */
+ U_WB_WSEGSPACE = 22, /*[WSEGSPACE]*/
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UWordBreakValues value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_WORD_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_WB_COUNT = 23
+#endif // U_HIDE_DEPRECATED_API
+} UWordBreakValues;
+
+/**
+ * Sentence Break constants.
+ *
+ * @see UCHAR_SENTENCE_BREAK
+ * @stable ICU 3.4
+ */
+typedef enum USentenceBreak {
+ /*
+ * Note: USentenceBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_SB_<Unicode Sentence_Break value name>
+ */
+
+ U_SB_OTHER = 0, /*[XX]*/
+ U_SB_ATERM = 1, /*[AT]*/
+ U_SB_CLOSE = 2, /*[CL]*/
+ U_SB_FORMAT = 3, /*[FO]*/
+ U_SB_LOWER = 4, /*[LO]*/
+ U_SB_NUMERIC = 5, /*[NU]*/
+ U_SB_OLETTER = 6, /*[LE]*/
+ U_SB_SEP = 7, /*[SE]*/
+ U_SB_SP = 8, /*[SP]*/
+ U_SB_STERM = 9, /*[ST]*/
+ U_SB_UPPER = 10, /*[UP]*/
+ U_SB_CR = 11, /*[CR]*/ /* from here on: new in Unicode 5.1/ICU 4.0 */
+ U_SB_EXTEND = 12, /*[EX]*/
+ U_SB_LF = 13, /*[LF]*/
+ U_SB_SCONTINUE = 14, /*[SC]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal USentenceBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SENTENCE_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_SB_COUNT = 15
+#endif // U_HIDE_DEPRECATED_API
+} USentenceBreak;
+
+/**
+ * Line Break constants.
+ *
+ * @see UCHAR_LINE_BREAK
+ * @stable ICU 2.2
+ */
+typedef enum ULineBreak {
+ /*
+ * Note: ULineBreak constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_LB_<Unicode Line_Break value name>
+ */
+
+ U_LB_UNKNOWN = 0, /*[XX]*/
+ U_LB_AMBIGUOUS = 1, /*[AI]*/
+ U_LB_ALPHABETIC = 2, /*[AL]*/
+ U_LB_BREAK_BOTH = 3, /*[B2]*/
+ U_LB_BREAK_AFTER = 4, /*[BA]*/
+ U_LB_BREAK_BEFORE = 5, /*[BB]*/
+ U_LB_MANDATORY_BREAK = 6, /*[BK]*/
+ U_LB_CONTINGENT_BREAK = 7, /*[CB]*/
+ U_LB_CLOSE_PUNCTUATION = 8, /*[CL]*/
+ U_LB_COMBINING_MARK = 9, /*[CM]*/
+ U_LB_CARRIAGE_RETURN = 10, /*[CR]*/
+ U_LB_EXCLAMATION = 11, /*[EX]*/
+ U_LB_GLUE = 12, /*[GL]*/
+ U_LB_HYPHEN = 13, /*[HY]*/
+ U_LB_IDEOGRAPHIC = 14, /*[ID]*/
+ /** Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0 @stable ICU 3.0 */
+ U_LB_INSEPARABLE = 15, /*[IN]*/
+ U_LB_INSEPERABLE = U_LB_INSEPARABLE,
+ U_LB_INFIX_NUMERIC = 16, /*[IS]*/
+ U_LB_LINE_FEED = 17, /*[LF]*/
+ U_LB_NONSTARTER = 18, /*[NS]*/
+ U_LB_NUMERIC = 19, /*[NU]*/
+ U_LB_OPEN_PUNCTUATION = 20, /*[OP]*/
+ U_LB_POSTFIX_NUMERIC = 21, /*[PO]*/
+ U_LB_PREFIX_NUMERIC = 22, /*[PR]*/
+ U_LB_QUOTATION = 23, /*[QU]*/
+ U_LB_COMPLEX_CONTEXT = 24, /*[SA]*/
+ U_LB_SURROGATE = 25, /*[SG]*/
+ U_LB_SPACE = 26, /*[SP]*/
+ U_LB_BREAK_SYMBOLS = 27, /*[SY]*/
+ U_LB_ZWSPACE = 28, /*[ZW]*/
+ /** @stable ICU 2.6 */
+ U_LB_NEXT_LINE = 29, /*[NL]*/ /* from here on: new in Unicode 4/ICU 2.6 */
+ /** @stable ICU 2.6 */
+ U_LB_WORD_JOINER = 30, /*[WJ]*/
+ /** @stable ICU 3.4 */
+ U_LB_H2 = 31, /*[H2]*/ /* from here on: new in Unicode 4.1/ICU 3.4 */
+ /** @stable ICU 3.4 */
+ U_LB_H3 = 32, /*[H3]*/
+ /** @stable ICU 3.4 */
+ U_LB_JL = 33, /*[JL]*/
+ /** @stable ICU 3.4 */
+ U_LB_JT = 34, /*[JT]*/
+ /** @stable ICU 3.4 */
+ U_LB_JV = 35, /*[JV]*/
+ /** @stable ICU 4.4 */
+ U_LB_CLOSE_PARENTHESIS = 36, /*[CP]*/ /* new in Unicode 5.2/ICU 4.4 */
+ /** @stable ICU 49 */
+ U_LB_CONDITIONAL_JAPANESE_STARTER = 37,/*[CJ]*/ /* new in Unicode 6.1/ICU 49 */
+ /** @stable ICU 49 */
+ U_LB_HEBREW_LETTER = 38, /*[HL]*/ /* new in Unicode 6.1/ICU 49 */
+ /** @stable ICU 50 */
+ U_LB_REGIONAL_INDICATOR = 39,/*[RI]*/ /* new in Unicode 6.2/ICU 50 */
+ /** @stable ICU 58 */
+ U_LB_E_BASE = 40, /*[EB]*/ /* from here on: new in Unicode 9.0/ICU 58 */
+ /** @stable ICU 58 */
+ U_LB_E_MODIFIER = 41, /*[EM]*/
+ /** @stable ICU 58 */
+ U_LB_ZWJ = 42, /*[ZWJ]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal ULineBreak value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_LINE_BREAK).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_LB_COUNT = 43
+#endif // U_HIDE_DEPRECATED_API
+} ULineBreak;
+
+/**
+ * Numeric Type constants.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @stable ICU 2.2
+ */
+typedef enum UNumericType {
+ /*
+ * Note: UNumericType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_NT_<Unicode Numeric_Type value name>
+ */
+
+ U_NT_NONE, /*[None]*/
+ U_NT_DECIMAL, /*[de]*/
+ U_NT_DIGIT, /*[di]*/
+ U_NT_NUMERIC, /*[nu]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UNumericType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_NUMERIC_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_NT_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UNumericType;
+
+/**
+ * Hangul Syllable Type constants.
+ *
+ * @see UCHAR_HANGUL_SYLLABLE_TYPE
+ * @stable ICU 2.6
+ */
+typedef enum UHangulSyllableType {
+ /*
+ * Note: UHangulSyllableType constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_HST_<Unicode Hangul_Syllable_Type value name>
+ */
+
+ U_HST_NOT_APPLICABLE, /*[NA]*/
+ U_HST_LEADING_JAMO, /*[L]*/
+ U_HST_VOWEL_JAMO, /*[V]*/
+ U_HST_TRAILING_JAMO, /*[T]*/
+ U_HST_LV_SYLLABLE, /*[LV]*/
+ U_HST_LVT_SYLLABLE, /*[LVT]*/
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UHangulSyllableType value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_HANGUL_SYLLABLE_TYPE).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_HST_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UHangulSyllableType;
+
+/**
+ * Indic Positional Category constants.
+ *
+ * @see UCHAR_INDIC_POSITIONAL_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicPositionalCategory {
+ /*
+ * Note: UIndicPositionalCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INPC_<Unicode Indic_Positional_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INPC_NA,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_OVERSTRUCK,
+ /** @stable ICU 63 */
+ U_INPC_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_BOTTOM_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_LEFT_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_TOP_AND_RIGHT,
+ /** @stable ICU 63 */
+ U_INPC_VISUAL_ORDER_LEFT,
+ /** @stable ICU 66 */
+ U_INPC_TOP_AND_BOTTOM_AND_LEFT,
+} UIndicPositionalCategory;
+
+/**
+ * Indic Syllabic Category constants.
+ *
+ * @see UCHAR_INDIC_SYLLABIC_CATEGORY
+ * @stable ICU 63
+ */
+typedef enum UIndicSyllabicCategory {
+ /*
+ * Note: UIndicSyllabicCategory constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_INSC_<Unicode Indic_Syllabic_Category value name>
+ */
+
+ /** @stable ICU 63 */
+ U_INSC_OTHER,
+ /** @stable ICU 63 */
+ U_INSC_AVAGRAHA,
+ /** @stable ICU 63 */
+ U_INSC_BINDU,
+ /** @stable ICU 63 */
+ U_INSC_BRAHMI_JOINING_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_CANTILLATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_DEAD,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_FINAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_HEAD_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_INITIAL_POSTFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_MEDIAL,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PLACEHOLDER,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PRECEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_PREFIXED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUBJOINED,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_SUCCEEDING_REPHA,
+ /** @stable ICU 63 */
+ U_INSC_CONSONANT_WITH_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_GEMINATION_MARK,
+ /** @stable ICU 63 */
+ U_INSC_INVISIBLE_STACKER,
+ /** @stable ICU 63 */
+ U_INSC_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_MODIFYING_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_NON_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_NUKTA,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER,
+ /** @stable ICU 63 */
+ U_INSC_NUMBER_JOINER,
+ /** @stable ICU 63 */
+ U_INSC_PURE_KILLER,
+ /** @stable ICU 63 */
+ U_INSC_REGISTER_SHIFTER,
+ /** @stable ICU 63 */
+ U_INSC_SYLLABLE_MODIFIER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_LETTER,
+ /** @stable ICU 63 */
+ U_INSC_TONE_MARK,
+ /** @stable ICU 63 */
+ U_INSC_VIRAMA,
+ /** @stable ICU 63 */
+ U_INSC_VISARGA,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_DEPENDENT,
+ /** @stable ICU 63 */
+ U_INSC_VOWEL_INDEPENDENT,
+} UIndicSyllabicCategory;
+
+/**
+ * Vertical Orientation constants.
+ *
+ * @see UCHAR_VERTICAL_ORIENTATION
+ * @stable ICU 63
+ */
+typedef enum UVerticalOrientation {
+ /*
+ * Note: UVerticalOrientation constants are parsed by preparseucd.py.
+ * It matches lines like
+ * U_VO_<Unicode Vertical_Orientation value name>
+ */
+
+ /** @stable ICU 63 */
+ U_VO_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_ROTATED,
+ /** @stable ICU 63 */
+ U_VO_TRANSFORMED_UPRIGHT,
+ /** @stable ICU 63 */
+ U_VO_UPRIGHT,
+} UVerticalOrientation;
+
+/**
+ * Check a binary Unicode property for a code point.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ucd/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Important: If ICU is built with UCD files from Unicode versions below 3.2,
+ * then properties marked with "new in Unicode 3.2" are not or not fully available.
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
+ * @return true or false according to the binary Unicode property value for c.
+ * Also false if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all.
+ *
+ * @see UProperty
+ * @see u_getBinaryPropertySet
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_hasBinaryProperty(UChar32 c, UProperty which);
+
+/**
+ * Returns true if the property is true for the string.
+ * Same as u_hasBinaryProperty(single code point, which)
+ * if the string contains exactly one code point.
+ *
+ * Most properties apply only to single code points.
+ * <a href="https://www.unicode.org/reports/tr51/#Emoji_Sets">UTS #51 Unicode Emoji</a>
+ * defines several properties of strings.
+ *
+ * @param s String to test.
+ * @param length Length of the string, or negative if NUL-terminated.
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START&lt;=which&lt;UCHAR_BINARY_LIMIT.
+ * @return true or false according to the binary Unicode property value for the string.
+ * Also false if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getBinaryPropertySet
+ * @see u_getIntPropertyValue
+ * @see u_getUnicodeVersion
+ * @stable ICU 70
+ */
+U_CAPI UBool U_EXPORT2
+u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which);
+
+/**
+ * Returns a frozen USet for a binary property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for a binary property.
+ *
+ * The returned set contains all code points for which the property is true.
+ *
+ * @param property UCHAR_BINARY_START..UCHAR_BINARY_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a set
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see Unicode::fromUSet
+ * @stable ICU 63
+ */
+U_CAPI const USet * U_EXPORT2
+u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode);
+
+/**
+ * Check if a code point has the Alphabetic Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_ALPHABETIC).
+ * This is different from u_isalpha!
+ * @param c Code point to test
+ * @return true if the code point has the Alphabetic Unicode property, false otherwise
+ *
+ * @see UCHAR_ALPHABETIC
+ * @see u_isalpha
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUAlphabetic(UChar32 c);
+
+/**
+ * Check if a code point has the Lowercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_LOWERCASE).
+ * This is different from u_islower!
+ * @param c Code point to test
+ * @return true if the code point has the Lowercase Unicode property, false otherwise
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_islower
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isULowercase(UChar32 c);
+
+/**
+ * Check if a code point has the Uppercase Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_UPPERCASE).
+ * This is different from u_isupper!
+ * @param c Code point to test
+ * @return true if the code point has the Uppercase Unicode property, false otherwise
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_isupper
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUUppercase(UChar32 c);
+
+/**
+ * Check if a code point has the White_Space Unicode property.
+ * Same as u_hasBinaryProperty(c, UCHAR_WHITE_SPACE).
+ * This is different from both u_isspace and u_isWhitespace!
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c Code point to test
+ * @return true if the code point has the White_Space Unicode property, false otherwise.
+ *
+ * @see UCHAR_WHITE_SPACE
+ * @see u_isWhitespace
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_hasBinaryProperty
+ * @stable ICU 2.1
+ */
+U_CAPI UBool U_EXPORT2
+u_isUWhiteSpace(UChar32 c);
+
+/**
+ * Get the property value for an enumerated or integer Unicode property for a code point.
+ * Also returns binary and mask property values.
+ *
+ * Unicode, especially in version 3.2, defines many more properties than the
+ * original set in UnicodeData.txt.
+ *
+ * The properties APIs are intended to reflect Unicode properties as defined
+ * in the Unicode Character Database (UCD) and Unicode Technical Reports (UTR).
+ * For details about the properties see http://www.unicode.org/ .
+ * For names of Unicode properties see the UCD file PropertyAliases.txt.
+ *
+ * Sample usage:
+ * UEastAsianWidth ea=(UEastAsianWidth)u_getIntPropertyValue(c, UCHAR_EAST_ASIAN_WIDTH);
+ * UBool b=(UBool)u_getIntPropertyValue(c, UCHAR_IDEOGRAPHIC);
+ *
+ * @param c Code point to test.
+ * @param which UProperty selector constant, identifies which property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * @return Numeric value that is directly the property value or,
+ * for enumerated properties, corresponds to the numeric value of the enumerated
+ * constant of the respective property value enumeration type
+ * (cast to enum type if necessary).
+ * Returns 0 or 1 (for false/true) for binary Unicode properties.
+ * Returns a bit-mask for mask properties.
+ * Returns 0 if 'which' is out of bounds or if the Unicode version
+ * does not have data for the property at all, or not for this code point.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getIntPropertyMinValue
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyMap
+ * @see u_getUnicodeVersion
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyValue(UChar32 c, UProperty which);
+
+/**
+ * Get the minimum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMaxValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Minimum value returned by u_getIntPropertyValue for a Unicode property.
+ * 0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMinValue(UProperty which);
+
+/**
+ * Get the maximum value for an enumerated/integer/binary Unicode property.
+ * Can be used together with u_getIntPropertyMinValue
+ * to allocate arrays of UnicodeSet or similar.
+ *
+ * Examples for min/max values (for Unicode 3.2):
+ *
+ * - UCHAR_BIDI_CLASS: 0/18 (U_LEFT_TO_RIGHT/U_BOUNDARY_NEUTRAL)
+ * - UCHAR_SCRIPT: 0/45 (USCRIPT_COMMON/USCRIPT_TAGBANWA)
+ * - UCHAR_IDEOGRAPHIC: 0/1 (false/true)
+ *
+ * For undefined UProperty constant values, min/max values will be 0/-1.
+ *
+ * @param which UProperty selector constant, identifies which binary property to check.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT.
+ * @return Maximum value returned by u_getIntPropertyValue for a Unicode property.
+ * <=0 if the property selector is out of range.
+ *
+ * @see UProperty
+ * @see u_hasBinaryProperty
+ * @see u_getUnicodeVersion
+ * @see u_getIntPropertyMaxValue
+ * @see u_getIntPropertyValue
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getIntPropertyMaxValue(UProperty which);
+
+/**
+ * Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
+ * The library retains ownership over the returned object.
+ * Sets an error code if the property number is not one for an "int property".
+ *
+ * The returned object maps all Unicode code points to their values for that property.
+ * For documentation of the integer values see u_getIntPropertyValue().
+ *
+ * @param property UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the property as a map
+ * @see UProperty
+ * @see u_getIntPropertyValue
+ * @stable ICU 63
+ */
+U_CAPI const UCPMap * U_EXPORT2
+u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode);
+
+/**
+ * Get the numeric value for a Unicode code point as defined in the
+ * Unicode Character Database.
+ *
+ * A "double" return type is necessary because
+ * some numeric values are fractions, negative, or too large for int32_t.
+ *
+ * For characters without any numeric values in the Unicode Character Database,
+ * this function will return U_NO_NUMERIC_VALUE.
+ * Note: This is different from the Unicode Standard which specifies NaN as the default value.
+ * (NaN is not available on all platforms.)
+ *
+ * Similar to java.lang.Character.getNumericValue(), but u_getNumericValue()
+ * also supports negative values, large values, and fractions,
+ * while Java's getNumericValue() returns values 10..35 for ASCII letters.
+ *
+ * @param c Code point to get the numeric value for.
+ * @return Numeric value of c, or U_NO_NUMERIC_VALUE if none is defined.
+ *
+ * @see U_NO_NUMERIC_VALUE
+ * @stable ICU 2.2
+ */
+U_CAPI double U_EXPORT2
+u_getNumericValue(UChar32 c);
+
+/**
+ * Special value that is returned by u_getNumericValue when
+ * no numeric value is defined for a code point.
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.2
+ */
+#define U_NO_NUMERIC_VALUE ((double)-123456789.)
+
+/**
+ * Determines whether the specified code point has the general category "Ll"
+ * (lowercase letter).
+ *
+ * Same as java.lang.Character.isLowerCase().
+ *
+ * This misses some characters that are also lowercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_LOWERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Ll lowercase letter
+ *
+ * @see UCHAR_LOWERCASE
+ * @see u_isupper
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_islower(UChar32 c);
+
+/**
+ * Determines whether the specified code point has the general category "Lu"
+ * (uppercase letter).
+ *
+ * Same as java.lang.Character.isUpperCase().
+ *
+ * This misses some characters that are also uppercase but
+ * have a different general category value.
+ * In order to include those, use UCHAR_UPPERCASE.
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Lu uppercase letter
+ *
+ * @see UCHAR_UPPERCASE
+ * @see u_islower
+ * @see u_istitle
+ * @see u_tolower
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isupper(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a titlecase letter.
+ * True for general category "Lt" (titlecase letter).
+ *
+ * Same as java.lang.Character.isTitleCase().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an Lt titlecase letter
+ *
+ * @see u_isupper
+ * @see u_islower
+ * @see u_totitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_istitle(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a digit character according to Java.
+ * True for characters with general category "Nd" (decimal digit numbers).
+ * Beginning with Unicode 4, this is the same as
+ * testing for the Numeric_Type of Decimal.
+ *
+ * Same as java.lang.Character.isDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a digit character according to Character.isDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a letter character.
+ * True for general categories "L" (letters).
+ *
+ * Same as java.lang.Character.isLetter().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a letter character
+ *
+ * @see u_isdigit
+ * @see u_isalnum
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isalpha(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an alphanumeric character
+ * (letter or digit) according to Java.
+ * True for characters with general categories
+ * "L" (letters) and "Nd" (decimal digit numbers).
+ *
+ * Same as java.lang.Character.isLetterOrDigit().
+ *
+ * In addition to being equivalent to a Java function, this also serves
+ * as a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an alphanumeric character according to Character.isLetterOrDigit()
+ *
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isalnum(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a hexadecimal digit.
+ * This is equivalent to u_digit(c, 16)>=0.
+ * True for characters with general category "Nd" (decimal digit numbers)
+ * as well as Latin letters a-f and A-F in both ASCII and Fullwidth ASCII.
+ * (That is, for letters with code points
+ * 0041..0046, 0061..0066, FF21..FF26, FF41..FF46.)
+ *
+ * In order to narrow the definition of hexadecimal digits to only ASCII
+ * characters, use (c<=0x7f && u_isxdigit(c)).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a hexadecimal digit
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isxdigit(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a punctuation character.
+ * True for characters with general categories "P" (punctuation).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a punctuation character
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_ispunct(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "graphic" character
+ * (printable, excluding spaces).
+ * true for all characters except those with general categories
+ * "Cc" (control codes), "Cf" (format controls), "Cs" (surrogates),
+ * "Cn" (unassigned), and "Z" (separators).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a "graphic" character
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isgraph(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a "blank" or "horizontal space",
+ * a character that visibly separates words on a line.
+ * The following are equivalent definitions:
+ *
+ * true for Unicode White_Space characters except for "vertical space controls"
+ * where "vertical space controls" are the following characters:
+ * U+000A (LF) U+000B (VT) U+000C (FF) U+000D (CR) U+0085 (NEL) U+2028 (LS) U+2029 (PS)
+ *
+ * same as
+ *
+ * true for U+0009 (TAB) and characters with general category "Zs" (space separators).
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a "blank"
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isblank(UChar32 c);
+
+/**
+ * Determines whether the specified code point is "defined",
+ * which usually means that it is assigned a character.
+ * True for general categories other than "Cn" (other, not assigned),
+ * i.e., true for all code points mentioned in UnicodeData.txt.
+ *
+ * Note that non-character code points (e.g., U+FDD0) are not "defined"
+ * (they are Cn), but surrogate code points are "defined" (Cs).
+ *
+ * Same as java.lang.Character.isDefined().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is assigned a character
+ *
+ * @see u_isdigit
+ * @see u_isalpha
+ * @see u_isalnum
+ * @see u_isupper
+ * @see u_islower
+ * @see u_istitle
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isdefined(UChar32 c);
+
+/**
+ * Determines if the specified character is a space character or not.
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the character to be tested
+ * @return true if the character is a space character; false otherwise.
+ *
+ * @see u_isJavaSpaceChar
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isspace(UChar32 c);
+
+/**
+ * Determine if the specified code point is a space character according to Java.
+ * True for characters with general categories "Z" (separators),
+ * which does not include control codes (e.g., TAB or Line Feed).
+ *
+ * Same as java.lang.Character.isSpaceChar().
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a space character according to Character.isSpaceChar()
+ *
+ * @see u_isspace
+ * @see u_isWhitespace
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaSpaceChar(UChar32 c);
+
+/**
+ * Determines if the specified code point is a whitespace character according to Java/ICU.
+ * A character is considered to be a Java whitespace character if and only
+ * if it satisfies one of the following criteria:
+ *
+ * - It is a Unicode Separator character (categories "Z" = "Zs" or "Zl" or "Zp"), but is not
+ * also a non-breaking space (U+00A0 NBSP or U+2007 Figure Space or U+202F Narrow NBSP).
+ * - It is U+0009 HORIZONTAL TABULATION.
+ * - It is U+000A LINE FEED.
+ * - It is U+000B VERTICAL TABULATION.
+ * - It is U+000C FORM FEED.
+ * - It is U+000D CARRIAGE RETURN.
+ * - It is U+001C FILE SEPARATOR.
+ * - It is U+001D GROUP SEPARATOR.
+ * - It is U+001E RECORD SEPARATOR.
+ * - It is U+001F UNIT SEPARATOR.
+ *
+ * This API tries to sync with the semantics of Java's
+ * java.lang.Character.isWhitespace(), but it may not return
+ * the exact same results because of the Unicode version
+ * difference.
+ *
+ * Note: Unicode 4.0.1 changed U+200B ZERO WIDTH SPACE from a Space Separator (Zs)
+ * to a Format Control (Cf). Since then, isWhitespace(0x200b) returns false.
+ * See http://www.unicode.org/versions/Unicode4.0.1/
+ *
+ * Note: There are several ICU whitespace functions; please see the uchar.h
+ * file documentation for a detailed comparison.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a whitespace character according to Java/ICU
+ *
+ * @see u_isspace
+ * @see u_isJavaSpaceChar
+ * @see u_isUWhiteSpace
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isWhitespace(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a control character
+ * (as defined by this function).
+ * A control character is one of the following:
+ * - ISO 8-bit control character (U+0000..U+001f and U+007f..U+009f)
+ * - U_CONTROL_CHAR (Cc)
+ * - U_FORMAT_CHAR (Cf)
+ * - U_LINE_SEPARATOR (Zl)
+ * - U_PARAGRAPH_SEPARATOR (Zp)
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a control character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isprint
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_iscntrl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is an ISO control code.
+ * True for U+0000..U+001f and U+007f..U+009f (general category "Cc").
+ *
+ * Same as java.lang.Character.isISOControl().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is an ISO control code
+ *
+ * @see u_iscntrl
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+u_isISOControl(UChar32 c);
+
+/**
+ * Determines whether the specified code point is a printable character.
+ * True for general categories <em>other</em> than "C" (controls).
+ *
+ * This is a C/POSIX migration function.
+ * See the comments about C/POSIX character classification functions in the
+ * documentation at the top of this header file.
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a printable character
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_iscntrl
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isprint(UChar32 c);
+
+/**
+ * Non-standard: Determines whether the specified code point is a base character.
+ * True for general categories "L" (letters), "N" (numbers),
+ * "Mc" (spacing combining marks), and "Me" (enclosing marks).
+ *
+ * Note that this is different from the Unicode Standard definition in
+ * chapter 3.6, conformance clause D51 “Base character”,
+ * which defines base characters as the code points with general categories
+ * Letter (L), Number (N), Punctuation (P), Symbol (S), or Space Separator (Zs).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is a base character according to this function
+ *
+ * @see u_isalpha
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isbase(UChar32 c);
+
+/**
+ * Returns the bidirectional category value for the code point,
+ * which is used in the Unicode bidirectional algorithm
+ * (UAX #9 http://www.unicode.org/reports/tr9/).
+ * Note that some <em>unassigned</em> code points have bidi values
+ * of R or AL because they are in blocks that are reserved
+ * for Right-To-Left scripts.
+ *
+ * Same as java.lang.Character.getDirectionality()
+ *
+ * @param c the code point to be tested
+ * @return the bidirectional category (UCharDirection) value
+ *
+ * @see UCharDirection
+ * @stable ICU 2.0
+ */
+U_CAPI UCharDirection U_EXPORT2
+u_charDirection(UChar32 c);
+
+/**
+ * Determines whether the code point has the Bidi_Mirrored property.
+ * This property is set for characters that are commonly used in
+ * Right-To-Left contexts and need to be displayed with a "mirrored"
+ * glyph.
+ *
+ * Same as java.lang.Character.isMirrored().
+ * Same as UCHAR_BIDI_MIRRORED
+ *
+ * @param c the code point to be tested
+ * @return true if the character has the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isMirrored(UChar32 c);
+
+/**
+ * Maps the specified character to a "mirror-image" character.
+ * For characters with the Bidi_Mirrored property, implementations
+ * sometimes need a "poor man's" mapping to another Unicode
+ * character (code point) such that the default glyph may serve
+ * as the mirror-image of the default glyph of the specified
+ * character. This is useful for text conversion to and from
+ * codepages with visual order, and for displays without glyph
+ * selection capabilities.
+ *
+ * @param c the code point to be mapped
+ * @return another Unicode code point that may serve as a mirror-image
+ * substitute, or c itself if there is no such mapping or c
+ * does not have the Bidi_Mirrored property
+ *
+ * @see UCHAR_BIDI_MIRRORED
+ * @see u_isMirrored
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_charMirror(UChar32 c);
+
+/**
+ * Maps the specified character to its paired bracket character.
+ * For Bidi_Paired_Bracket_Type!=None, this is the same as u_charMirror().
+ * Otherwise c itself is returned.
+ * See http://www.unicode.org/reports/tr9/
+ *
+ * @param c the code point to be mapped
+ * @return the paired bracket code point,
+ * or c itself if there is no such mapping
+ * (Bidi_Paired_Bracket_Type=None)
+ *
+ * @see UCHAR_BIDI_PAIRED_BRACKET
+ * @see UCHAR_BIDI_PAIRED_BRACKET_TYPE
+ * @see u_charMirror
+ * @stable ICU 52
+ */
+U_CAPI UChar32 U_EXPORT2
+u_getBidiPairedBracket(UChar32 c);
+
+/**
+ * Returns the general category value for the code point.
+ *
+ * Same as java.lang.Character.getType().
+ *
+ * @param c the code point to be tested
+ * @return the general category (UCharCategory) value
+ *
+ * @see UCharCategory
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+u_charType(UChar32 c);
+
+/**
+ * Get a single-bit bit set for the general category of a character.
+ * This bit set can be compared bitwise with U_GC_SM_MASK, U_GC_L_MASK, etc.
+ * Same as U_MASK(u_charType(c)).
+ *
+ * @param c the code point to be tested
+ * @return a single-bit mask corresponding to the general category (UCharCategory) value
+ *
+ * @see u_charType
+ * @see UCharCategory
+ * @see U_GC_CN_MASK
+ * @stable ICU 2.1
+ */
+#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
+
+/**
+ * Callback from u_enumCharTypes(), is called for each contiguous range
+ * of code points c (where start<=c<limit)
+ * with the same Unicode general category ("character type").
+ *
+ * The callback function can stop the enumeration by returning false.
+ *
+ * @param context an opaque pointer, as passed into utrie_enum()
+ * @param start the first code point in a contiguous range with value
+ * @param limit one past the last code point in a contiguous range with value
+ * @param type the general category for all code points in [start..limit[
+ * @return false to stop the enumeration
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see u_enumCharTypes
+ */
+typedef UBool U_CALLCONV
+UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type);
+
+/**
+ * Enumerate efficiently all code points with their Unicode general categories.
+ *
+ * This is useful for building data structures (e.g., UnicodeSet's),
+ * for enumerating all assigned code points (type!=U_UNASSIGNED), etc.
+ *
+ * For each contiguous range of code points with a given general category ("character type"),
+ * the UCharEnumTypeRange function is called.
+ * Adjacent ranges have different types.
+ * The Unicode Standard guarantees that the numeric value of the type is 0..31.
+ *
+ * @param enumRange a pointer to a function that is called for each contiguous range
+ * of code points with the same general category
+ * @param context an opaque pointer that is passed on to the callback function
+ *
+ * @stable ICU 2.1
+ * @see UCharCategory
+ * @see UCharEnumTypeRange
+ */
+U_CAPI void U_EXPORT2
+u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context);
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns the combining class of the code point as specified in UnicodeData.txt.
+ *
+ * @param c the code point of the character
+ * @return the combining class of the character
+ * @stable ICU 2.0
+ */
+U_CAPI uint8_t U_EXPORT2
+u_getCombiningClass(UChar32 c);
+
+#endif
+
+/**
+ * Returns the decimal digit value of a decimal digit character.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ *
+ * Unlike ICU releases before 2.6, no digit values are returned for any
+ * Han characters because Han number characters are often used with a special
+ * Chinese-style number format (with characters for powers of 10 in between)
+ * instead of in decimal-positional notation.
+ * Unicode 4 explicitly assigns Han number characters the Numeric_Type
+ * Numeric instead of Decimal.
+ * See Jitterbug 1483 for more details.
+ *
+ * Use u_getIntPropertyValue(c, UCHAR_NUMERIC_TYPE) and u_getNumericValue()
+ * for complete numeric Unicode properties.
+ *
+ * @param c the code point for which to get the decimal digit value
+ * @return the decimal digit value of c,
+ * or -1 if c is not a decimal digit character
+ *
+ * @see u_getNumericValue
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_charDigitValue(UChar32 c);
+
+/**
+ * Returns the Unicode allocation block that contains the character.
+ *
+ * @param c the code point to be tested
+ * @return the block value (UBlockCode) for c
+ *
+ * @see UBlockCode
+ * @stable ICU 2.0
+ */
+U_CAPI UBlockCode U_EXPORT2
+ublock_getCode(UChar32 c);
+
+/**
+ * Retrieve the name of a Unicode character.
+ * Depending on <code>nameChoice</code>, the character name written
+ * into the buffer is the "modern" name or the name that was defined
+ * in Unicode version 1.0.
+ * The name contains only "invariant" characters
+ * like A-Z, 0-9, space, and '-'.
+ * Unicode 1.0 names are only retrieved if they are different from the modern
+ * names and if the data file contains the data for them. gennames may or may
+ * not be called with a command line option to include 1.0 names in unames.dat.
+ *
+ * @param code The character (code point) for which to get the name.
+ * It must be <code>0<=code<=0x10ffff</code>.
+ * @param nameChoice Selector for which name to get.
+ * @param buffer Destination address for copying the name.
+ * The name will always be zero-terminated.
+ * If there is no name, then the buffer will be set to the empty string.
+ * @param bufferLength <code>==sizeof(buffer)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_charName()</code>
+ * returns.
+ * @return The length of the name, or 0 if there is no name for this character.
+ * If the bufferLength is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @see UCharNameChoice
+ * @see u_charFromName
+ * @see u_enumCharNames
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_charName(UChar32 code, UCharNameChoice nameChoice,
+ char *buffer, int32_t bufferLength,
+ UErrorCode *pErrorCode);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Returns an empty string.
+ * Used to return the ISO 10646 comment for a character.
+ * The Unicode ISO_Comment property is deprecated and has no values.
+ *
+ * @param c The character (code point) for which to get the ISO comment.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the comment.
+ * The comment will be zero-terminated if possible.
+ * If there is no comment, then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable;
+ * check for <code>U_SUCCESS()</code> after <code>u_getISOComment()</code>
+ * returns.
+ * @return 0
+ *
+ * @deprecated ICU 49
+ */
+U_DEPRECATED int32_t U_EXPORT2
+u_getISOComment(UChar32 c,
+ char *dest, int32_t destCapacity,
+ UErrorCode *pErrorCode);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Find a Unicode character by its name and return its code point value.
+ * The name is matched exactly and completely.
+ * If the name does not correspond to a code point, <i>pErrorCode</i>
+ * is set to <code>U_INVALID_CHAR_FOUND</code>.
+ * A Unicode 1.0 name is matched only if it differs from the modern name.
+ * Unicode names are all uppercase. Extended names are lowercase followed
+ * by an uppercase hexadecimal number, and within angle brackets.
+ *
+ * @param nameChoice Selector for which name to match.
+ * @param name The name to match.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ * @return The Unicode value of the code point with the given name,
+ * or an undefined value if there is no such code point.
+ *
+ * @see UCharNameChoice
+ * @see u_charName
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+U_CAPI UChar32 U_EXPORT2
+u_charFromName(UCharNameChoice nameChoice,
+ const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Type of a callback function for u_enumCharNames() that gets called
+ * for each Unicode character with the code point value and
+ * the character name.
+ * If such a function returns false, then the enumeration is stopped.
+ *
+ * @param context The context pointer that was passed to u_enumCharNames().
+ * @param code The Unicode code point for the character with this name.
+ * @param nameChoice Selector for which kind of names is enumerated.
+ * @param name The character's name, zero-terminated.
+ * @param length The length of the name.
+ * @return true if the enumeration should continue, false to stop it.
+ *
+ * @see UCharNameChoice
+ * @see u_enumCharNames
+ * @stable ICU 1.7
+ */
+typedef UBool U_CALLCONV UEnumCharNamesFn(void *context,
+ UChar32 code,
+ UCharNameChoice nameChoice,
+ const char *name,
+ int32_t length);
+
+/**
+ * Enumerate all assigned Unicode characters between the start and limit
+ * code points (start inclusive, limit exclusive) and call a function
+ * for each, passing the code point value and the character name.
+ * For Unicode 1.0 names, only those are enumerated that differ from the
+ * modern names.
+ *
+ * @param start The first code point in the enumeration range.
+ * @param limit One more than the last code point in the enumeration range
+ * (the first one after the range).
+ * @param fn The function that is to be called for each character name.
+ * @param context An arbitrary pointer that is passed to the function.
+ * @param nameChoice Selector for which kind of names to enumerate.
+ * @param pErrorCode Pointer to a UErrorCode variable
+ *
+ * @see UCharNameChoice
+ * @see UEnumCharNamesFn
+ * @see u_charName
+ * @see u_charFromName
+ * @stable ICU 1.7
+ */
+U_CAPI void U_EXPORT2
+u_enumCharNames(UChar32 start, UChar32 limit,
+ UEnumCharNamesFn *fn,
+ void *context,
+ UCharNameChoice nameChoice,
+ UErrorCode *pErrorCode);
+
+/**
+ * Return the Unicode name for a given property, as given in the
+ * Unicode database file PropertyAliases.txt.
+ *
+ * In addition, this function maps the property
+ * UCHAR_GENERAL_CATEGORY_MASK to the synthetic names "gcm" /
+ * "General_Category_Mask". These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param property UProperty selector other than UCHAR_INVALID_CODE.
+ * If out of range, NULL is returned.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All properties have a long name. Most
+ * have a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+ *
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+u_getPropertyName(UProperty property,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the UProperty enum for a given property name, as specified
+ * in the Unicode database file PropertyAliases.txt. Short, long, and
+ * any other variants are recognized.
+ *
+ * In addition, this function maps the synthetic names "gcm" /
+ * "General_Category_Mask" to the property
+ * UCHAR_GENERAL_CATEGORY_MASK. These names are not in
+ * PropertyAliases.txt.
+ *
+ * @param alias the property name to be matched. The name is compared
+ * using "loose matching" as described in PropertyAliases.txt.
+ *
+ * @return a UProperty enum, or UCHAR_INVALID_CODE if the given name
+ * does not match any property.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_CAPI UProperty U_EXPORT2
+u_getPropertyEnum(const char* alias);
+
+/**
+ * Return the Unicode name for a given property value, as given in the
+ * Unicode database file PropertyValueAliases.txt.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt can only be
+ * retrieved using UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, NULL is returned.
+ *
+ * @param value selector for a value for the given property. If out
+ * of range, NULL is returned. In general, valid values range
+ * from 0 up to some maximum. There are a few exceptions:
+ * (1.) UCHAR_BLOCK values begin at the non-zero value
+ * UBLOCK_BASIC_LATIN. (2.) UCHAR_CANONICAL_COMBINING_CLASS
+ * values are not contiguous and range from 0..240. (3.)
+ * UCHAR_GENERAL_CATEGORY_MASK values are not values of
+ * UCharCategory, but rather mask values produced by
+ * U_GET_GC_MASK(). This allows grouped categories such as
+ * [:L:] to be represented. Mask values range
+ * non-contiguously from 1..U_GC_P_MASK.
+ *
+ * @param nameChoice selector for which name to get. If out of range,
+ * NULL is returned. All values have a long name. Most have
+ * a short name, but some do not. Unicode allows for
+ * additional names; if present these will be returned by
+ * U_LONG_PROPERTY_NAME + i, where i=1, 2,...
+
+ * @return a pointer to the name, or NULL if either the
+ * property or the nameChoice is out of range. If a given
+ * nameChoice returns NULL, then all larger values of
+ * nameChoice will return NULL, with one exception: if NULL is
+ * returned for U_SHORT_PROPERTY_NAME, then
+ * U_LONG_PROPERTY_NAME (and higher) may still return a
+ * non-NULL value. The returned pointer is valid until
+ * u_cleanup() is called.
+ *
+ * @see UProperty
+ * @see UPropertyNameChoice
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+u_getPropertyValueName(UProperty property,
+ int32_t value,
+ UPropertyNameChoice nameChoice);
+
+/**
+ * Return the property value integer for a given value name, as
+ * specified in the Unicode database file PropertyValueAliases.txt.
+ * Short, long, and any other variants are recognized.
+ *
+ * Note: Some of the names in PropertyValueAliases.txt will only be
+ * recognized with UCHAR_GENERAL_CATEGORY_MASK, not
+ * UCHAR_GENERAL_CATEGORY. These include: "C" / "Other", "L" /
+ * "Letter", "LC" / "Cased_Letter", "M" / "Mark", "N" / "Number", "P"
+ * / "Punctuation", "S" / "Symbol", and "Z" / "Separator".
+ *
+ * @param property UProperty selector constant.
+ * Must be UCHAR_BINARY_START<=which<UCHAR_BINARY_LIMIT
+ * or UCHAR_INT_START<=which<UCHAR_INT_LIMIT
+ * or UCHAR_MASK_START<=which<UCHAR_MASK_LIMIT.
+ * If out of range, UCHAR_INVALID_CODE is returned.
+ *
+ * @param alias the value name to be matched. The name is compared
+ * using "loose matching" as described in
+ * PropertyValueAliases.txt.
+ *
+ * @return a value integer or UCHAR_INVALID_CODE if the given name
+ * does not match any value of the given property, or if the
+ * property is invalid. Note: UCHAR_GENERAL_CATEGORY_MASK values
+ * are not values of UCharCategory, but rather mask values
+ * produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @see UProperty
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+u_getPropertyValueEnum(UProperty property,
+ const char* alias);
+
+/**
+ * Determines if the specified character is permissible as the first character in an identifier
+ * according to UAX #31 Unicode Identifier and Pattern Syntax.
+ *
+ * Same as Unicode ID_Start (UCHAR_ID_START).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may start an identifier
+ *
+ * @see UCHAR_ID_START
+ * @see u_isalpha
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as a non-initial character of an identifier
+ * according to UAX #31 Unicode Identifier and Pattern Syntax.
+ *
+ * Same as Unicode ID_Continue (UCHAR_ID_CONTINUE).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may occur as a non-initial character of an identifier
+ *
+ * @see UCHAR_ID_CONTINUE
+ * @see u_isIDStart
+ * @see u_isIDIgnorable
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDPart(UChar32 c);
+
+/**
+ * Determines if the specified character should be regarded
+ * as an ignorable character in an identifier,
+ * according to Java.
+ * True for characters with general category "Cf" (format controls) as well as
+ * non-whitespace ISO controls
+ * (U+0000..U+0008, U+000E..U+001B, U+007F..U+009F).
+ *
+ * Same as java.lang.Character.isIdentifierIgnorable().
+ *
+ * Note that Unicode just recommends to ignore Cf (format controls).
+ *
+ * @param c the code point to be tested
+ * @return true if the code point is ignorable in identifiers according to Java
+ *
+ * @see UCHAR_DEFAULT_IGNORABLE_CODE_POINT
+ * @see u_isIDStart
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isIDIgnorable(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible as the
+ * first character in a Java identifier.
+ * In addition to u_isIDStart(c), true for characters with
+ * general categories "Sc" (currency symbols) and "Pc" (connecting punctuation).
+ *
+ * Same as java.lang.Character.isJavaIdentifierStart().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may start a Java identifier
+ *
+ * @see u_isJavaIDPart
+ * @see u_isalpha
+ * @see u_isIDStart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaIDStart(UChar32 c);
+
+/**
+ * Determines if the specified character is permissible
+ * in a Java identifier.
+ * In addition to u_isIDPart(c), true for characters with
+ * general category "Sc" (currency symbols).
+ *
+ * Same as java.lang.Character.isJavaIdentifierPart().
+ *
+ * @param c the code point to be tested
+ * @return true if the code point may occur in a Java identifier
+ *
+ * @see u_isIDIgnorable
+ * @see u_isJavaIDStart
+ * @see u_isalpha
+ * @see u_isdigit
+ * @see u_isIDPart
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+u_isJavaIDPart(UChar32 c);
+
+/**
+ * The given character is mapped to its lowercase equivalent according to
+ * UnicodeData.txt; if the character has no lowercase equivalent, the character
+ * itself is returned.
+ *
+ * Same as java.lang.Character.toLowerCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Lowercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_tolower(UChar32 c);
+
+/**
+ * The given character is mapped to its uppercase equivalent according to UnicodeData.txt;
+ * if the character has no uppercase equivalent, the character itself is
+ * returned.
+ *
+ * Same as java.lang.Character.toUpperCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Uppercase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_toupper(UChar32 c);
+
+/**
+ * The given character is mapped to its titlecase equivalent
+ * according to UnicodeData.txt;
+ * if none is defined, the character itself is returned.
+ *
+ * Same as java.lang.Character.toTitleCase().
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
+ *
+ * @param c the code point to be mapped
+ * @return the Simple_Titlecase_Mapping of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_totitle(UChar32 c);
+
+/**
+ * The given character is mapped to its case folding equivalent according to
+ * UnicodeData.txt and CaseFolding.txt;
+ * if the character has no case folding equivalent, the character
+ * itself is returned.
+ *
+ * This function only returns the simple, single-code point case mapping.
+ * Full case mappings should be used whenever possible because they produce
+ * better results by working on whole strings.
+ * They take into account the string context and the language and can map
+ * to a result string with a different length as appropriate.
+ * Full case mappings are applied by the string case mapping functions,
+ * see ustring.h and the UnicodeString class.
+ * See also the User Guide chapter on C/POSIX migration:
+ * https://unicode-org.github.io/icu/userguide/icu/posix#case-mappings
+ *
+ * @param c the code point to be mapped
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return the Simple_Case_Folding of the code point, if any;
+ * otherwise the code point itself.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_foldCase(UChar32 c, uint32_t options);
+
+/**
+ * Returns the decimal digit value of the code point in the
+ * specified radix.
+ *
+ * If the radix is not in the range <code>2<=radix<=36</code> or if the
+ * value of <code>c</code> is not a valid digit in the specified
+ * radix, <code>-1</code> is returned. A character is a valid digit
+ * if at least one of the following is true:
+ * <ul>
+ * <li>The character has a decimal digit value.
+ * Such characters have the general category "Nd" (decimal digit numbers)
+ * and a Numeric_Type of Decimal.
+ * In this case the value is the character's decimal digit value.</li>
+ * <li>The character is one of the uppercase Latin letters
+ * <code>'A'</code> through <code>'Z'</code>.
+ * In this case the value is <code>c-'A'+10</code>.</li>
+ * <li>The character is one of the lowercase Latin letters
+ * <code>'a'</code> through <code>'z'</code>.
+ * In this case the value is <code>ch-'a'+10</code>.</li>
+ * <li>Latin letters from both the ASCII range (0061..007A, 0041..005A)
+ * as well as from the Fullwidth ASCII range (FF41..FF5A, FF21..FF3A)
+ * are recognized.</li>
+ * </ul>
+ *
+ * Same as java.lang.Character.digit().
+ *
+ * @param ch the code point to be tested.
+ * @param radix the radix.
+ * @return the numeric value represented by the character in the
+ * specified radix,
+ * or -1 if there is no value or if the value exceeds the radix.
+ *
+ * @see UCHAR_NUMERIC_TYPE
+ * @see u_forDigit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_digit(UChar32 ch, int8_t radix);
+
+/**
+ * Determines the character representation for a specific digit in
+ * the specified radix. If the value of <code>radix</code> is not a
+ * valid radix, or the value of <code>digit</code> is not a valid
+ * digit in the specified radix, the null character
+ * (<code>U+0000</code>) is returned.
+ * <p>
+ * The <code>radix</code> argument is valid if it is greater than or
+ * equal to 2 and less than or equal to 36.
+ * The <code>digit</code> argument is valid if
+ * <code>0 <= digit < radix</code>.
+ * <p>
+ * If the digit is less than 10, then
+ * <code>'0' + digit</code> is returned. Otherwise, the value
+ * <code>'a' + digit - 10</code> is returned.
+ *
+ * Same as java.lang.Character.forDigit().
+ *
+ * @param digit the number to convert to a character.
+ * @param radix the radix.
+ * @return the <code>char</code> representation of the specified digit
+ * in the specified radix.
+ *
+ * @see u_digit
+ * @see u_charDigitValue
+ * @see u_isdigit
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_forDigit(int32_t digit, int8_t radix);
+
+/**
+ * Get the "age" of the code point.
+ * The "age" is the Unicode version when the code point was first
+ * designated (as a non-character or for Private Use)
+ * or assigned a character.
+ * This can be useful to avoid emitting code points to receiving
+ * processes that do not accept newer characters.
+ * The data is from the UCD file DerivedAge.txt.
+ *
+ * @param c The code point.
+ * @param versionArray The Unicode version number array, to be filled in.
+ *
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+u_charAge(UChar32 c, UVersionInfo versionArray);
+
+/**
+ * Gets the Unicode version information.
+ * The version array is filled in with the version information
+ * for the Unicode standard that is currently used by ICU.
+ * For example, Unicode version 3.1.1 is represented as an array with
+ * the values { 3, 1, 1, 0 }.
+ *
+ * @param versionArray an output array that will be filled in with
+ * the Unicode version number
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_getUnicodeVersion(UVersionInfo versionArray);
+
+#if !UCONFIG_NO_NORMALIZATION
+/**
+ * Get the FC_NFKC_Closure property string for a character.
+ * See Unicode Standard Annex #15 for details, search for "FC_NFKC_Closure"
+ * or for "FNC": http://www.unicode.org/reports/tr15/
+ *
+ * @param c The character (code point) for which to get the FC_NFKC_Closure string.
+ * It must be <code>0<=c<=0x10ffff</code>.
+ * @param dest Destination address for copying the string.
+ * The string will be zero-terminated if possible.
+ * If there is no FC_NFKC_Closure string,
+ * then the buffer will be set to the empty string.
+ * @param destCapacity <code>==sizeof(dest)</code>
+ * @param pErrorCode Pointer to a UErrorCode variable.
+ * @return The length of the string, or 0 if there is no FC_NFKC_Closure string for this character.
+ * If the destCapacity is less than or equal to the length, then the buffer
+ * contains the truncated name and the returned length indicates the full
+ * length of the name.
+ * The length does not include the zero-termination.
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode);
+
+#endif
+
+
+U_CDECL_END
+
+#endif /*_UCHAR*/
+/*eof*/
diff --git a/intl/icu/source/common/unicode/ucharstrie.h b/intl/icu/source/common/unicode/ucharstrie.h
new file mode 100644
index 0000000000..fa1b55616c
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucharstrie.h
@@ -0,0 +1,623 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARSTRIE_H__
+#define __UCHARSTRIE_H__
+
+/**
+ * \file
+ * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
+ * to integer values.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unistr.h"
+#include "unicode/uobject.h"
+#include "unicode/ustringtrie.h"
+
+U_NAMESPACE_BEGIN
+
+class Appendable;
+class UCharsTrieBuilder;
+class UVector32;
+
+/**
+ * Light-weight, non-const reader class for a UCharsTrie.
+ * Traverses a char16_t-serialized data structure with minimal state,
+ * for mapping strings (16-bit-unit sequences) to non-negative integer values.
+ *
+ * This class owns the serialized trie data only if it was constructed by
+ * the builder's build() method.
+ * The public constructor and the copy constructor only alias the data (only copy the pointer).
+ * There is no assignment operator.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UCharsTrie : public UMemory {
+public:
+ /**
+ * Constructs a UCharsTrie reader instance.
+ *
+ * The trieUChars must contain a copy of a char16_t sequence from the UCharsTrieBuilder,
+ * starting with the first char16_t of that sequence.
+ * The UCharsTrie object will not read more char16_ts than
+ * the UCharsTrieBuilder generated in the corresponding build() call.
+ *
+ * The array is not copied/cloned and must not be modified while
+ * the UCharsTrie object is in use.
+ *
+ * @param trieUChars The char16_t array that contains the serialized trie.
+ * @stable ICU 4.8
+ */
+ UCharsTrie(ConstChar16Ptr trieUChars)
+ : ownedArray_(nullptr), uchars_(trieUChars),
+ pos_(uchars_), remainingMatchLength_(-1) {}
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~UCharsTrie();
+
+ /**
+ * Copy constructor, copies the other trie reader object and its state,
+ * but not the char16_t array which will be shared. (Shallow copy.)
+ * @param other Another UCharsTrie object.
+ * @stable ICU 4.8
+ */
+ UCharsTrie(const UCharsTrie &other)
+ : ownedArray_(nullptr), uchars_(other.uchars_),
+ pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
+
+ /**
+ * Resets this trie to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrie &reset() {
+ pos_=uchars_;
+ remainingMatchLength_=-1;
+ return *this;
+ }
+
+ /**
+ * Returns the state of this trie as a 64-bit integer.
+ * The state value is never 0.
+ *
+ * @return opaque state value
+ * @see resetToState64
+ * @stable ICU 65
+ */
+ uint64_t getState64() const {
+ return (static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
+ (uint64_t)(pos_ - uchars_);
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * Unlike resetToState(State), the 64-bit state value
+ * must be from getState64() from the same trie object or
+ * from one initialized the exact same way.
+ * Because of no validation, this method is faster.
+ *
+ * @param state The opaque trie state value from getState64().
+ * @return *this
+ * @see getState64
+ * @see resetToState
+ * @see reset
+ * @stable ICU 65
+ */
+ UCharsTrie &resetToState64(uint64_t state) {
+ remainingMatchLength_ = static_cast<int32_t>(state >> kState64RemainingShift) - 2;
+ pos_ = uchars_ + (state & kState64PosMask);
+ return *this;
+ }
+
+ /**
+ * UCharsTrie state object, for saving a trie's current state
+ * and resetting the trie back to this state later.
+ * @stable ICU 4.8
+ */
+ class State : public UMemory {
+ public:
+ /**
+ * Constructs an empty State.
+ * @stable ICU 4.8
+ */
+ State() { uchars=nullptr; }
+ private:
+ friend class UCharsTrie;
+
+ const char16_t *uchars;
+ const char16_t *pos;
+ int32_t remainingMatchLength;
+ };
+
+ /**
+ * Saves the state of this trie.
+ * @param state The State object to hold the trie's state.
+ * @return *this
+ * @see resetToState
+ * @stable ICU 4.8
+ */
+ const UCharsTrie &saveState(State &state) const {
+ state.uchars=uchars_;
+ state.pos=pos_;
+ state.remainingMatchLength=remainingMatchLength_;
+ return *this;
+ }
+
+ /**
+ * Resets this trie to the saved state.
+ * If the state object contains no state, or the state of a different trie,
+ * then this trie remains unchanged.
+ * @param state The State object which holds a saved trie state.
+ * @return *this
+ * @see saveState
+ * @see reset
+ * @stable ICU 4.8
+ */
+ UCharsTrie &resetToState(const State &state) {
+ if(uchars_==state.uchars && uchars_!=nullptr) {
+ pos_=state.pos;
+ remainingMatchLength_=state.remainingMatchLength;
+ }
+ return *this;
+ }
+
+ /**
+ * Determines whether the string so far matches, whether it has a value,
+ * and whether another input char16_t can continue a matching string.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult current() const;
+
+ /**
+ * Traverses the trie from the initial state for this input char16_t.
+ * Equivalent to reset().next(uchar).
+ * @param uchar Input char value. Values below 0 and above 0xffff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ inline UStringTrieResult first(int32_t uchar) {
+ remainingMatchLength_=-1;
+ return nextImpl(uchars_, uchar);
+ }
+
+ /**
+ * Traverses the trie from the initial state for the
+ * one or two UTF-16 code units for this input code point.
+ * Equivalent to reset().nextForCodePoint(cp).
+ * @param cp A Unicode code point 0..0x10ffff.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult firstForCodePoint(UChar32 cp);
+
+ /**
+ * Traverses the trie from the current state for this input char16_t.
+ * @param uchar Input char value. Values below 0 and above 0xffff will never match.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(int32_t uchar);
+
+ /**
+ * Traverses the trie from the current state for the
+ * one or two UTF-16 code units for this input code point.
+ * @param cp A Unicode code point 0..0x10ffff.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult nextForCodePoint(UChar32 cp);
+
+ /**
+ * Traverses the trie from the current state for this string.
+ * Equivalent to
+ * \code
+ * Result result=current();
+ * for(each c in s)
+ * if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
+ * result=next(c);
+ * return result;
+ * \endcode
+ * @param s A string. Can be nullptr if length is 0.
+ * @param length The length of the string. Can be -1 if NUL-terminated.
+ * @return The match/value Result.
+ * @stable ICU 4.8
+ */
+ UStringTrieResult next(ConstChar16Ptr s, int32_t length);
+
+ /**
+ * Returns a matching string's value if called immediately after
+ * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
+ * getValue() can be called multiple times.
+ *
+ * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
+ * @return The value for the string so far.
+ * @stable ICU 4.8
+ */
+ inline int32_t getValue() const {
+ const char16_t *pos=pos_;
+ int32_t leadUnit=*pos++;
+ // U_ASSERT(leadUnit>=kMinValueLead);
+ return leadUnit&kValueIsFinal ?
+ readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
+ }
+
+ /**
+ * Determines whether all strings reachable from the current state
+ * map to the same value.
+ * @param uniqueValue Receives the unique value, if this function returns true.
+ * (output-only)
+ * @return true if all strings reachable from the current state
+ * map to the same value.
+ * @stable ICU 4.8
+ */
+ inline UBool hasUniqueValue(int32_t &uniqueValue) const {
+ const char16_t *pos=pos_;
+ // Skip the rest of a pending linear-match node.
+ return pos!=nullptr && findUniqueValue(pos+remainingMatchLength_+1, false, uniqueValue);
+ }
+
+ /**
+ * Finds each char16_t which continues the string from the current state.
+ * That is, each char16_t c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
+ * @param out Each next char16_t is appended to this object.
+ * @return the number of char16_ts which continue the string from here
+ * @stable ICU 4.8
+ */
+ int32_t getNextUChars(Appendable &out) const;
+
+ /**
+ * Iterator for all of the (string, value) pairs in a UCharsTrie.
+ * @stable ICU 4.8
+ */
+ class U_COMMON_API Iterator : public UMemory {
+ public:
+ /**
+ * Iterates from the root of a char16_t-serialized UCharsTrie.
+ * @param trieUChars The trie char16_ts.
+ * @param maxStringLength If 0, the iterator returns full strings.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Iterates from the current state of the specified UCharsTrie.
+ * @param trie The trie whose state will be copied for iteration.
+ * @param maxStringLength If 0, the iterator returns full strings.
+ * Otherwise, the iterator returns strings with this maximum length.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @stable ICU 4.8
+ */
+ Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ ~Iterator();
+
+ /**
+ * Resets this iterator to its initial state.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ Iterator &reset();
+
+ /**
+ * @return true if there are more elements.
+ * @stable ICU 4.8
+ */
+ UBool hasNext() const;
+
+ /**
+ * Finds the next (string, value) pair if there is one.
+ *
+ * If the string is truncated to the maximum length and does not
+ * have a real value, then the value is set to -1.
+ * In this case, this "not a real value" is indistinguishable from
+ * a real value of -1.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if there is another element.
+ * @stable ICU 4.8
+ */
+ UBool next(UErrorCode &errorCode);
+
+ /**
+ * @return The string for the last successful next().
+ * @stable ICU 4.8
+ */
+ const UnicodeString &getString() const { return str_; }
+ /**
+ * @return The value for the last successful next().
+ * @stable ICU 4.8
+ */
+ int32_t getValue() const { return value_; }
+
+ private:
+ UBool truncateAndStop() {
+ pos_=nullptr;
+ value_=-1; // no real value for str
+ return true;
+ }
+
+ const char16_t *branchNext(const char16_t *pos, int32_t length, UErrorCode &errorCode);
+
+ const char16_t *uchars_;
+ const char16_t *pos_;
+ const char16_t *initialPos_;
+ int32_t remainingMatchLength_;
+ int32_t initialRemainingMatchLength_;
+ UBool skipValue_; // Skip intermediate value which was already delivered.
+
+ UnicodeString str_;
+ int32_t maxLength_;
+ int32_t value_;
+
+ // The stack stores pairs of integers for backtracking to another
+ // outbound edge of a branch node.
+ // The first integer is an offset from uchars_.
+ // The second integer has the str_.length() from before the node in bits 15..0,
+ // and the remaining branch length in bits 31..16.
+ // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
+ // but the code looks more confusing that way.)
+ UVector32 *stack_;
+ };
+
+private:
+ friend class UCharsTrieBuilder;
+
+ /**
+ * Constructs a UCharsTrie reader instance.
+ * Unlike the public constructor which just aliases an array,
+ * this constructor adopts the builder's array.
+ * This constructor is only called by the builder.
+ */
+ UCharsTrie(char16_t *adoptUChars, const char16_t *trieUChars)
+ : ownedArray_(adoptUChars), uchars_(trieUChars),
+ pos_(uchars_), remainingMatchLength_(-1) {}
+
+ // No assignment operator.
+ UCharsTrie &operator=(const UCharsTrie &other) = delete;
+
+ inline void stop() {
+ pos_=nullptr;
+ }
+
+ // Reads a compact 32-bit integer.
+ // pos is already after the leadUnit, and the lead unit has bit 15 reset.
+ static inline int32_t readValue(const char16_t *pos, int32_t leadUnit) {
+ int32_t value;
+ if(leadUnit<kMinTwoUnitValueLead) {
+ value=leadUnit;
+ } else if(leadUnit<kThreeUnitValueLead) {
+ value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
+ } else {
+ value=(pos[0]<<16)|pos[1];
+ }
+ return value;
+ }
+ static inline const char16_t *skipValue(const char16_t *pos, int32_t leadUnit) {
+ if(leadUnit>=kMinTwoUnitValueLead) {
+ if(leadUnit<kThreeUnitValueLead) {
+ ++pos;
+ } else {
+ pos+=2;
+ }
+ }
+ return pos;
+ }
+ static inline const char16_t *skipValue(const char16_t *pos) {
+ int32_t leadUnit=*pos++;
+ return skipValue(pos, leadUnit&0x7fff);
+ }
+
+ static inline int32_t readNodeValue(const char16_t *pos, int32_t leadUnit) {
+ // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+ int32_t value;
+ if(leadUnit<kMinTwoUnitNodeValueLead) {
+ value=(leadUnit>>6)-1;
+ } else if(leadUnit<kThreeUnitNodeValueLead) {
+ value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
+ } else {
+ value=(pos[0]<<16)|pos[1];
+ }
+ return value;
+ }
+ static inline const char16_t *skipNodeValue(const char16_t *pos, int32_t leadUnit) {
+ // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
+ if(leadUnit>=kMinTwoUnitNodeValueLead) {
+ if(leadUnit<kThreeUnitNodeValueLead) {
+ ++pos;
+ } else {
+ pos+=2;
+ }
+ }
+ return pos;
+ }
+
+ static inline const char16_t *jumpByDelta(const char16_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoUnitDeltaLead) {
+ if(delta==kThreeUnitDeltaLead) {
+ delta=(pos[0]<<16)|pos[1];
+ pos+=2;
+ } else {
+ delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
+ }
+ }
+ return pos+delta;
+ }
+
+ static const char16_t *skipDelta(const char16_t *pos) {
+ int32_t delta=*pos++;
+ if(delta>=kMinTwoUnitDeltaLead) {
+ if(delta==kThreeUnitDeltaLead) {
+ pos+=2;
+ } else {
+ ++pos;
+ }
+ }
+ return pos;
+ }
+
+ static inline UStringTrieResult valueResult(int32_t node) {
+ return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
+ }
+
+ // Handles a branch node for both next(uchar) and next(string).
+ UStringTrieResult branchNext(const char16_t *pos, int32_t length, int32_t uchar);
+
+ // Requires remainingLength_<0.
+ UStringTrieResult nextImpl(const char16_t *pos, int32_t uchar);
+
+ // Helper functions for hasUniqueValue().
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // from a branch.
+ static const char16_t *findUniqueValueFromBranch(const char16_t *pos, int32_t length,
+ UBool haveUniqueValue, int32_t &uniqueValue);
+ // Recursively finds a unique value (or whether there is not a unique one)
+ // starting from a position on a node lead unit.
+ static UBool findUniqueValue(const char16_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
+
+ // Helper functions for getNextUChars().
+ // getNextUChars() when pos is on a branch node.
+ static void getNextBranchUChars(const char16_t *pos, int32_t length, Appendable &out);
+
+ // UCharsTrie data structure
+ //
+ // The trie consists of a series of char16_t-serialized nodes for incremental
+ // Unicode string/char16_t sequence matching. (char16_t=16-bit unsigned integer)
+ // The root node is at the beginning of the trie data.
+ //
+ // Types of nodes are distinguished by their node lead unit ranges.
+ // After each node, except a final-value node, another node follows to
+ // encode match values or continue matching further units.
+ //
+ // Node types:
+ // - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
+ // The value is for the string/char16_t sequence so far.
+ // - Match node, optionally with an intermediate value in a different compact format.
+ // The value, if present, is for the string/char16_t sequence so far.
+ //
+ // Aside from the value, which uses the node lead unit's high bits:
+ //
+ // - Linear-match node: Matches a number of units.
+ // - Branch node: Branches to other nodes according to the current input unit.
+ // The node unit is the length of the branch (number of units to select from)
+ // minus 1. It is followed by a sub-node:
+ // - If the length is at most kMaxBranchLinearSubNodeLength, then
+ // there are length-1 (key, value) pairs and then one more comparison unit.
+ // If one of the key units matches, then the value is either a final value for
+ // the string so far, or a "jump" delta to the next node.
+ // If the last unit matches, then matching continues with the next node.
+ // (Values have the same encoding as final-value nodes.)
+ // - If the length is greater than kMaxBranchLinearSubNodeLength, then
+ // there is one unit and one "jump" delta.
+ // If the input unit is less than the sub-node unit, then "jump" by delta to
+ // the next sub-node which will have a length of length/2.
+ // (The delta has its own compact encoding.)
+ // Otherwise, skip the "jump" delta to the next sub-node
+ // which will have a length of length-length/2.
+
+ // Match-node lead unit values, after masking off intermediate-value bits:
+
+ // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
+ // the length is one more than the next unit.
+
+ // For a branch sub-node with at most this many entries, we drop down
+ // to a linear search.
+ static const int32_t kMaxBranchLinearSubNodeLength=5;
+
+ // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
+ static const int32_t kMinLinearMatch=0x30;
+ static const int32_t kMaxLinearMatchLength=0x10;
+
+ // Match-node lead unit bits 14..6 for the optional intermediate value.
+ // If these bits are 0, then there is no intermediate value.
+ // Otherwise, see the *NodeValue* constants below.
+ static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x0040
+ static const int32_t kNodeTypeMask=kMinValueLead-1; // 0x003f
+
+ // A final-value node has bit 15 set.
+ static const int32_t kValueIsFinal=0x8000;
+
+ // Compact value: After testing and masking off bit 15, use the following thresholds.
+ static const int32_t kMaxOneUnitValue=0x3fff;
+
+ static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1; // 0x4000
+ static const int32_t kThreeUnitValueLead=0x7fff;
+
+ static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1; // 0x3ffeffff
+
+ // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
+ static const int32_t kMaxOneUnitNodeValue=0xff;
+ static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6); // 0x4040
+ static const int32_t kThreeUnitNodeValueLead=0x7fc0;
+
+ static const int32_t kMaxTwoUnitNodeValue=
+ ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1; // 0xfdffff
+
+ // Compact delta integers.
+ static const int32_t kMaxOneUnitDelta=0xfbff;
+ static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1; // 0xfc00
+ static const int32_t kThreeUnitDeltaLead=0xffff;
+
+ static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1; // 0x03feffff
+
+ // For getState64():
+ // The remainingMatchLength_ is -1..14=(kMaxLinearMatchLength=0x10)-2
+ // so we need at least 5 bits for that.
+ // We add 2 to store it as a positive value 1..16=kMaxLinearMatchLength.
+ static constexpr int32_t kState64RemainingShift = 59;
+ static constexpr uint64_t kState64PosMask = (UINT64_C(1) << kState64RemainingShift) - 1;
+
+ char16_t *ownedArray_;
+
+ // Fixed value referencing the UCharsTrie words.
+ const char16_t *uchars_;
+
+ // Iterator variables.
+
+ // Pointer to next trie unit to read. nullptr if no more matches.
+ const char16_t *pos_;
+ // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
+ int32_t remainingMatchLength_;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __UCHARSTRIE_H__
diff --git a/intl/icu/source/common/unicode/ucharstriebuilder.h b/intl/icu/source/common/unicode/ucharstriebuilder.h
new file mode 100644
index 0000000000..5c8aa33ffb
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucharstriebuilder.h
@@ -0,0 +1,193 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucharstriebuilder.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov14
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCHARSTRIEBUILDER_H__
+#define __UCHARSTRIEBUILDER_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/stringtriebuilder.h"
+#include "unicode/ucharstrie.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: Builder for icu::UCharsTrie
+ */
+
+U_NAMESPACE_BEGIN
+
+class UCharsTrieElement;
+
+/**
+ * Builder class for UCharsTrie.
+ *
+ * This class is not intended for public subclassing.
+ * @stable ICU 4.8
+ */
+class U_COMMON_API UCharsTrieBuilder : public StringTrieBuilder {
+public:
+ /**
+ * Constructs an empty builder.
+ * @param errorCode Standard ICU error code.
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder(UErrorCode &errorCode);
+
+ /**
+ * Destructor.
+ * @stable ICU 4.8
+ */
+ virtual ~UCharsTrieBuilder();
+
+ /**
+ * Adds a (string, value) pair.
+ * The string must be unique.
+ * The string contents will be copied; the builder does not keep
+ * a reference to the input UnicodeString or its buffer.
+ * @param s The input string.
+ * @param value The value associated with this string.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder &add(const UnicodeString &s, int32_t value, UErrorCode &errorCode);
+
+ /**
+ * Builds a UCharsTrie for the add()ed data.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A UCharsTrie cannot be empty. At least one (string, value) pair
+ * must have been add()ed.
+ *
+ * This method passes ownership of the builder's internal result array to the new trie object.
+ * Another call to any build() variant will re-serialize the trie.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return A new UCharsTrie for the add()ed data.
+ * @stable ICU 4.8
+ */
+ UCharsTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ /**
+ * Builds a UCharsTrie for the add()ed data and char16_t-serializes it.
+ * Once built, no further data can be add()ed until clear() is called.
+ *
+ * A UCharsTrie cannot be empty. At least one (string, value) pair
+ * must have been add()ed.
+ *
+ * Multiple calls to buildUnicodeString() set the UnicodeStrings to the
+ * builder's same char16_t array, without rebuilding.
+ * If buildUnicodeString() is called after build(), the trie will be
+ * re-serialized into a new array (because build() passes on ownership).
+ * If build() is called after buildUnicodeString(), the trie object returned
+ * by build() will become the owner of the underlying data for the
+ * previously returned UnicodeString.
+ * After clear() has been called, a new array will be used as well.
+ * @param buildOption Build option, see UStringTrieBuildOption.
+ * @param result A UnicodeString which will be set to the char16_t-serialized
+ * UCharsTrie for the add()ed data.
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return result
+ * @stable ICU 4.8
+ */
+ UnicodeString &buildUnicodeString(UStringTrieBuildOption buildOption, UnicodeString &result,
+ UErrorCode &errorCode);
+
+ /**
+ * Removes all (string, value) pairs.
+ * New data can then be add()ed and a new trie can be built.
+ * @return *this
+ * @stable ICU 4.8
+ */
+ UCharsTrieBuilder &clear() {
+ strings.remove();
+ elementsLength=0;
+ ucharsLength=0;
+ return *this;
+ }
+
+private:
+ UCharsTrieBuilder(const UCharsTrieBuilder &other) = delete; // no copy constructor
+ UCharsTrieBuilder &operator=(const UCharsTrieBuilder &other) = delete; // no assignment operator
+
+ void buildUChars(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
+
+ virtual int32_t getElementStringLength(int32_t i) const override;
+ virtual char16_t getElementUnit(int32_t i, int32_t unitIndex) const override;
+ virtual int32_t getElementValue(int32_t i) const override;
+
+ virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const override;
+
+ virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const override;
+ virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const override;
+ virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, char16_t unit) const override;
+
+ virtual UBool matchNodesCanHaveValues() const override { return true; }
+
+ virtual int32_t getMaxBranchLinearSubNodeLength() const override { return UCharsTrie::kMaxBranchLinearSubNodeLength; }
+ virtual int32_t getMinLinearMatch() const override { return UCharsTrie::kMinLinearMatch; }
+ virtual int32_t getMaxLinearMatchLength() const override { return UCharsTrie::kMaxLinearMatchLength; }
+
+ class UCTLinearMatchNode : public LinearMatchNode {
+ public:
+ UCTLinearMatchNode(const char16_t *units, int32_t len, Node *nextNode);
+ virtual bool operator==(const Node &other) const override;
+ virtual void write(StringTrieBuilder &builder) override;
+ private:
+ const char16_t *s;
+ };
+
+ virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
+ Node *nextNode) const override;
+
+ UBool ensureCapacity(int32_t length);
+ virtual int32_t write(int32_t unit) override;
+ int32_t write(const char16_t *s, int32_t length);
+ virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) override;
+ virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) override;
+ virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) override;
+ virtual int32_t writeDeltaTo(int32_t jumpTarget) override;
+
+ UnicodeString strings;
+ UCharsTrieElement *elements;
+ int32_t elementsCapacity;
+ int32_t elementsLength;
+
+ // char16_t serialization of the trie.
+ // Grows from the back: ucharsLength measures from the end of the buffer!
+ char16_t *uchars;
+ int32_t ucharsCapacity;
+ int32_t ucharsLength;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif // __UCHARSTRIEBUILDER_H__
diff --git a/intl/icu/source/common/unicode/uchriter.h b/intl/icu/source/common/unicode/uchriter.h
new file mode 100644
index 0000000000..9fae5e7de0
--- /dev/null
+++ b/intl/icu/source/common/unicode/uchriter.h
@@ -0,0 +1,393 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2005, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+
+#ifndef UCHRITER_H
+#define UCHRITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/chariter.h"
+
+/**
+ * \file
+ * \brief C++ API: char16_t Character Iterator
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * A concrete subclass of CharacterIterator that iterates over the
+ * characters (code units or code points) in a char16_t array.
+ * It's possible not only to create an
+ * iterator that iterates over an entire char16_t array, but also to
+ * create one that iterates over only a subrange of a char16_t array
+ * (iterators over different subranges of the same char16_t array don't
+ * compare equal).
+ * @see CharacterIterator
+ * @see ForwardCharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UCharCharacterIterator : public CharacterIterator {
+public:
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>length-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * @param textPtr The char16_t array to be iterated over
+ * @param length The length of the char16_t array
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length);
+
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>length-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The char16_t array to be iterated over
+ * @param length The length of the char16_t array
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
+ int32_t position);
+
+ /**
+ * Create an iterator over the char16_t array referred to by "textPtr".
+ * The iteration range is 0 to <code>end-1</code>.
+ * text is only aliased, not adopted (the
+ * destructor will not delete it).
+ * The starting
+ * position is specified by "position". If begin and end do not
+ * form a valid iteration range or "position" is outside the valid
+ * iteration range, the behavior of this object is undefined.
+ * @param textPtr The char16_t array to be iterated over
+ * @param length The length of the char16_t array
+ * @param textBegin The begin position of the iteration range
+ * @param textEnd The end position of the iteration range
+ * @param position The starting position of the iteration
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(ConstChar16Ptr textPtr, int32_t length,
+ int32_t textBegin,
+ int32_t textEnd,
+ int32_t position);
+
+ /**
+ * Copy constructor. The new iterator iterates over the same range
+ * of the same string as "that", and its initial position is the
+ * same as "that"'s current position.
+ * @param that The UCharCharacterIterator to be copied
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator(const UCharCharacterIterator& that);
+
+ /**
+ * Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UCharCharacterIterator();
+
+ /**
+ * Assignment operator. *this is altered to iterate over the sane
+ * range of the same string as "that", and refers to the same
+ * character within that string as "that" does.
+ * @param that The object to be copied
+ * @return the newly created object
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator&
+ operator=(const UCharCharacterIterator& that);
+
+ /**
+ * Returns true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @param that The ForwardCharacterIterator used to be compared for equality
+ * @return true if the iterators iterate over the same range of the
+ * same string and are pointing at the same character.
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const ForwardCharacterIterator& that) const override;
+
+ /**
+ * Generates a hash code for this iterator.
+ * @return the hash code.
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const override;
+
+ /**
+ * Returns a new UCharCharacterIterator referring to the same
+ * character in the same range of the same string as this one. The
+ * caller must delete the new iterator.
+ * @return the CharacterIterator newly created
+ * @stable ICU 2.0
+ */
+ virtual UCharCharacterIterator* clone() const override;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with next().
+ * @return the first code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t first(void) override;
+
+ /**
+ * Sets the iterator to refer to the first code unit in its
+ * iteration range, returns that code unit, and moves the position
+ * to the second code unit. This is an alternative to setToStart()
+ * for forward iteration with nextPostInc().
+ * @return the first code unit in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual char16_t firstPostInc(void) override;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, and returns that code unit,
+ * This can be used to begin an iteration with next32().
+ * Note that an iteration with next32PostInc(), beginning with,
+ * e.g., setToStart() or firstPostInc(), is more efficient.
+ * @return the first code point in its iteration range
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32(void) override;
+
+ /**
+ * Sets the iterator to refer to the first code point in its
+ * iteration range, returns that code point, and moves the position
+ * to the second code point. This is an alternative to setToStart()
+ * for forward iteration with next32PostInc().
+ * @return the first code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 first32PostInc(void) override;
+
+ /**
+ * Sets the iterator to refer to the last code unit in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous().
+ * @return the last code unit in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t last(void) override;
+
+ /**
+ * Sets the iterator to refer to the last code point in its
+ * iteration range, and returns that code unit.
+ * This can be used to begin an iteration with previous32().
+ * @return the last code point in its iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 last32(void) override;
+
+ /**
+ * Sets the iterator to refer to the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code unit.
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual char16_t setIndex(int32_t position) override;
+
+ /**
+ * Sets the iterator to refer to the beginning of the code point
+ * that contains the "position"-th code unit
+ * in the text-storage object the iterator refers to, and
+ * returns that code point.
+ * The current position is adjusted to the beginning of the code point
+ * (its first code unit).
+ * @param position the position within the text-storage object
+ * @return the code unit
+ * @stable ICU 2.0
+ */
+ virtual UChar32 setIndex32(int32_t position) override;
+
+ /**
+ * Returns the code unit the iterator currently refers to.
+ * @return the code unit the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual char16_t current(void) const override;
+
+ /**
+ * Returns the code point the iterator currently refers to.
+ * @return the code point the iterator currently refers to.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 current32(void) const override;
+
+ /**
+ * Advances to the next code unit in the iteration range (toward
+ * endIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the next code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t next(void) override;
+
+ /**
+ * Gets the current code unit for returning and advances to the next code unit
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code units to return, returns DONE.
+ * @return the current code unit.
+ * @stable ICU 2.0
+ */
+ virtual char16_t nextPostInc(void) override;
+
+ /**
+ * Advances to the next code point in the iteration range (toward
+ * endIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * Note that iteration with "pre-increment" semantics is less
+ * efficient than iteration with "post-increment" semantics
+ * that is provided by next32PostInc().
+ * @return the next code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32(void) override;
+
+ /**
+ * Gets the current code point for returning and advances to the next code point
+ * in the iteration range
+ * (toward endIndex()). If there are
+ * no more code points to return, returns DONE.
+ * @return the current point.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 next32PostInc(void) override;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * This is used with nextPostInc() or next32PostInc() in forward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * at or after the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasNext() override;
+
+ /**
+ * Advances to the previous code unit in the iteration range (toward
+ * startIndex()), and returns that code unit. If there are no more
+ * code units to return, returns DONE.
+ * @return the previous code unit in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual char16_t previous(void) override;
+
+ /**
+ * Advances to the previous code point in the iteration range (toward
+ * startIndex()), and returns that code point. If there are no more
+ * code points to return, returns DONE.
+ * @return the previous code point in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UChar32 previous32(void) override;
+
+ /**
+ * Returns false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * This is used with previous() or previous32() in backward
+ * iteration.
+ * @return false if there are no more code units or code points
+ * before the current position in the iteration range.
+ * @stable ICU 2.0
+ */
+ virtual UBool hasPrevious() override;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+ virtual int32_t move(int32_t delta, EOrigin origin) override;
+
+ /**
+ * Moves the current position relative to the start or end of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code points forward
+ * or backward by specifying a positive or negative delta.
+ * @param delta the position relative to origin. A positive delta means forward;
+ * a negative delta means backward.
+ * @param origin Origin enumeration {kStart, kCurrent, kEnd}
+ * @return the new position
+ * @stable ICU 2.0
+ */
+#ifdef move32
+ // One of the system headers right now is sometimes defining a conflicting macro we don't use
+#undef move32
+#endif
+ virtual int32_t move32(int32_t delta, EOrigin origin) override;
+
+ /**
+ * Sets the iterator to iterate over a new range of text
+ * @stable ICU 2.0
+ */
+ void setText(ConstChar16Ptr newText, int32_t newTextLength);
+
+ /**
+ * Copies the char16_t array under iteration into the UnicodeString
+ * referred to by "result". Even if this iterator iterates across
+ * only a part of this string, the whole string is copied.
+ * @param result Receives a copy of the text under iteration.
+ * @stable ICU 2.0
+ */
+ virtual void getText(UnicodeString& result) override;
+
+ /**
+ * Return a class ID for this class (not really public)
+ * @return a class ID for this class
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Return a class ID for this object (not really public)
+ * @return a class ID for this object.
+ * @stable ICU 2.0
+ */
+ virtual UClassID getDynamicClassID(void) const override;
+
+protected:
+ /**
+ * Protected constructor
+ * @stable ICU 2.0
+ */
+ UCharCharacterIterator();
+ /**
+ * Protected member text
+ * @stable ICU 2.0
+ */
+ const char16_t* text;
+
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/uclean.h b/intl/icu/source/common/unicode/uclean.h
new file mode 100644
index 0000000000..f5b0aa088a
--- /dev/null
+++ b/intl/icu/source/common/unicode/uclean.h
@@ -0,0 +1,262 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 2001-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* file name: uclean.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2001July05
+* created by: George Rhoten
+*/
+
+#ifndef __UCLEAN_H__
+#define __UCLEAN_H__
+
+#include "unicode/utypes.h"
+/**
+ * \file
+ * \brief C API: Initialize and clean up ICU
+ */
+
+/**
+ * Initialize ICU.
+ *
+ * Use of this function is optional. It is OK to simply use ICU
+ * services and functions without first having initialized
+ * ICU by calling u_init().
+ *
+ * u_init() will attempt to load some part of ICU's data, and is
+ * useful as a test for configuration or installation problems that
+ * leave the ICU data inaccessible. A successful invocation of u_init()
+ * does not, however, guarantee that all ICU data is accessible.
+ *
+ * Multiple calls to u_init() cause no harm, aside from the small amount
+ * of time required.
+ *
+ * In old versions of ICU, u_init() was required in multi-threaded applications
+ * to ensure the thread safety of ICU. u_init() is no longer needed for this purpose.
+ *
+ * @param status An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * An Error will be returned if some required part of ICU data can not
+ * be loaded or initialized.
+ * The function returns immediately if the input error code indicates a
+ * failure, as usual.
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+u_init(UErrorCode *status);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Clean up the system resources, such as allocated memory or open files,
+ * used in all ICU libraries. This will free/delete all memory owned by the
+ * ICU libraries, and return them to their original load state. All open ICU
+ * items (collators, resource bundles, converters, etc.) must be closed before
+ * calling this function, otherwise ICU may not free its allocated memory
+ * (e.g. close your converters and resource bundles before calling this
+ * function). Generally, this function should be called once just before
+ * an application exits. For applications that dynamically load and unload
+ * the ICU libraries (relatively uncommon), u_cleanup() should be called
+ * just before the library unload.
+ * <p>
+ * u_cleanup() also clears any ICU heap functions, mutex functions or
+ * trace functions that may have been set for the process.
+ * This has the effect of restoring ICU to its initial condition, before
+ * any of these override functions were installed. Refer to
+ * u_setMemoryFunctions(), u_setMutexFunctions and
+ * utrace_setFunctions(). If ICU is to be reinitialized after
+ * calling u_cleanup(), these runtime override functions will need to
+ * be set up again if they are still required.
+ * <p>
+ * u_cleanup() is not thread safe. All other threads should stop using ICU
+ * before calling this function.
+ * <p>
+ * Any open ICU items will be left in an undefined state by u_cleanup(),
+ * and any subsequent attempt to use such an item will give unpredictable
+ * results.
+ * <p>
+ * After calling u_cleanup(), an application may continue to use ICU by
+ * calling u_init(). An application must invoke u_init() first from one single
+ * thread before allowing other threads call u_init(). All threads existing
+ * at the time of the first thread's call to u_init() must also call
+ * u_init() themselves before continuing with other ICU operations.
+ * <p>
+ * The use of u_cleanup() just before an application terminates is optional,
+ * but it should be called only once for performance reasons. The primary
+ * benefit is to eliminate reports of memory or resource leaks originating
+ * in ICU code from the results generated by heap analysis tools.
+ * <p>
+ * <strong>Use this function with great care!</strong>
+ * </p>
+ *
+ * @stable ICU 2.0
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_cleanup(void);
+
+U_CDECL_BEGIN
+/**
+ * Pointer type for a user supplied memory allocation function.
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param size The number of bytes to be allocated
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemAllocFn(const void *context, size_t size);
+/**
+ * Pointer type for a user supplied memory re-allocation function.
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param mem Pointer to the memory block to be resized.
+ * @param size The new size for the block.
+ * @return Pointer to the newly allocated memory, or NULL if the allocation failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void *U_CALLCONV UMemReallocFn(const void *context, void *mem, size_t size);
+/**
+ * Pointer type for a user supplied memory free function. Behavior should be
+ * similar the standard C library free().
+ * @param context user supplied value, obtained from u_setMemoryFunctions().
+ * @param mem Pointer to the memory block to be freed.
+ * @return Pointer to the resized memory block, or NULL if the resizing failed.
+ * @stable ICU 2.8
+ * @system
+ */
+typedef void U_CALLCONV UMemFreeFn (const void *context, void *mem);
+
+/**
+ * Set the functions that ICU will use for memory allocation.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use the standard C library malloc() and free() functions.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the memory functions each time they
+ * are called.
+ * @param a Pointer to a user-supplied malloc function.
+ * @param r Pointer to a user-supplied realloc function.
+ * @param f Pointer to a user-supplied free function.
+ * @param status Receives error values.
+ * @stable ICU 2.8
+ * @system
+ */
+U_CAPI void U_EXPORT2
+u_setMemoryFunctions(const void *context, UMemAllocFn * U_CALLCONV_FPTR a, UMemReallocFn * U_CALLCONV_FPTR r, UMemFreeFn * U_CALLCONV_FPTR f,
+ UErrorCode *status);
+
+U_CDECL_END
+
+#ifndef U_HIDE_DEPRECATED_API
+/*********************************************************************************
+ *
+ * Deprecated Functions
+ *
+ * The following functions for user supplied mutexes are no longer supported.
+ * Any attempt to use them will return a U_UNSUPPORTED_ERROR.
+ *
+ **********************************************************************************/
+
+/**
+ * An opaque pointer type that represents an ICU mutex.
+ * For user-implemented mutexes, the value will typically point to a
+ * struct or object that implements the mutex.
+ * @deprecated ICU 52. This type is no longer supported.
+ * @system
+ */
+typedef void *UMTX;
+
+U_CDECL_BEGIN
+/**
+ * Function Pointer type for a user supplied mutex initialization function.
+ * The user-supplied function will be called by ICU whenever ICU needs to create a
+ * new mutex. The function implementation should create a mutex, and store a pointer
+ * to something that uniquely identifies the mutex into the UMTX that is supplied
+ * as a parameter.
+ * @param context user supplied value, obtained from u_setMutexFunctions().
+ * @param mutex Receives a pointer that identifies the new mutex.
+ * The mutex init function must set the UMTX to a non-null value.
+ * Subsequent calls by ICU to lock, unlock, or destroy a mutex will
+ * identify the mutex by the UMTX value.
+ * @param status Error status. Report errors back to ICU by setting this variable
+ * with an error code.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxInitFn (const void *context, UMTX *mutex, UErrorCode* status);
+
+
+/**
+ * Function Pointer type for a user supplied mutex functions.
+ * One of the user-supplied functions with this signature will be called by ICU
+ * whenever ICU needs to lock, unlock, or destroy a mutex.
+ * @param context user supplied value, obtained from u_setMutexFunctions().
+ * @param mutex specify the mutex on which to operate.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef void U_CALLCONV UMtxFn (const void *context, UMTX *mutex);
+U_CDECL_END
+
+/**
+ * Set the functions that ICU will use for mutex operations
+ * Use of this function is optional; by default (without this function), ICU will
+ * directly access system functions for mutex operations
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the user-supplied mutex functions each time they
+ * are called.
+ * @param init Pointer to a mutex initialization function. Must be non-null.
+ * @param destroy Pointer to the mutex destroy function. Must be non-null.
+ * @param lock pointer to the mutex lock function. Must be non-null.
+ * @param unlock Pointer to the mutex unlock function. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setMutexFunctions(const void *context, UMtxInitFn *init, UMtxFn *destroy, UMtxFn *lock, UMtxFn *unlock,
+ UErrorCode *status);
+
+
+/**
+ * Pointer type for a user supplied atomic increment or decrement function.
+ * @param context user supplied value, obtained from u_setAtomicIncDecFunctions().
+ * @param p Pointer to a 32 bit int to be incremented or decremented
+ * @return The value of the variable after the inc or dec operation.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+typedef int32_t U_CALLCONV UMtxAtomicFn(const void *context, int32_t *p);
+
+/**
+ * Set the functions that ICU will use for atomic increment and decrement of int32_t values.
+ * Use of this function is optional; by default (without this function), ICU will
+ * use its own internal implementation of atomic increment/decrement.
+ * This function can only be used when ICU is in an initial, unused state, before
+ * u_init() has been called.
+ * @param context This pointer value will be saved, and then (later) passed as
+ * a parameter to the increment and decrement functions each time they
+ * are called. This function can only be called
+ * @param inc Pointer to a function to do an atomic increment operation. Must be non-null.
+ * @param dec Pointer to a function to do an atomic decrement operation. Must be non-null.
+ * @param status Receives error values.
+ * @deprecated ICU 52. This function is no longer supported.
+ * @system
+ */
+U_DEPRECATED void U_EXPORT2
+u_setAtomicIncDecFunctions(const void *context, UMtxAtomicFn *inc, UMtxAtomicFn *dec,
+ UErrorCode *status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* U_HIDE_SYSTEM_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucnv.h b/intl/icu/source/common/unicode/ucnv.h
new file mode 100644
index 0000000000..20c173b662
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucnv.h
@@ -0,0 +1,2056 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv.h:
+ * External APIs for the ICU's codeset conversion library
+ * Bertrand A. Damiba
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 04/04/99 helena Fixed internal header inclusion.
+ * 05/11/00 helena Added setFallback and usesFallback APIs.
+ * 06/29/2000 helena Major rewrite of the callback APIs.
+ * 12/07/2000 srl Update of documentation
+ */
+
+/**
+ * \file
+ * \brief C API: Character conversion
+ *
+ * <h2>Character Conversion C API</h2>
+ *
+ * <p>This API is used to convert codepage or character encoded data to and
+ * from UTF-16. You can open a converter with {@link ucnv_open() }. With that
+ * converter, you can get its properties, set options, convert your data and
+ * close the converter.</p>
+ *
+ * <p>Since many software programs recognize different converter names for
+ * different types of converters, there are other functions in this API to
+ * iterate over the converter aliases. The functions {@link ucnv_getAvailableName() },
+ * {@link ucnv_getAlias() } and {@link ucnv_getStandardName() } are some of the
+ * more frequently used alias functions to get this information.</p>
+ *
+ * <p>When a converter encounters an illegal, irregular, invalid or unmappable character
+ * its default behavior is to use a substitution character to replace the
+ * bad byte sequence. This behavior can be changed by using {@link ucnv_setFromUCallBack() }
+ * or {@link ucnv_setToUCallBack() } on the converter. The header ucnv_err.h defines
+ * many other callback actions that can be used instead of a character substitution.</p>
+ *
+ * <p>More information about this API can be found in our
+ * <a href="https://unicode-org.github.io/icu/userguide/conversion/">User Guide</a>.</p>
+ */
+
+#ifndef UCNV_H
+#define UCNV_H
+
+#include "unicode/ucnv_err.h"
+#include "unicode/uenum.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
+
+#define USET_DEFINED
+
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * It is forward-declared here to avoid including unicode/uset.h file if related
+ * conversion APIs are not used.
+ *
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+U_CDECL_BEGIN
+
+/** Maximum length of a converter name including the terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_CONVERTER_NAME_LENGTH 60
+/** Maximum length of a converter name including path and terminating NULL @stable ICU 2.0 */
+#define UCNV_MAX_FULL_FILE_NAME_LENGTH (600+UCNV_MAX_CONVERTER_NAME_LENGTH)
+
+/** Shift in for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SI 0x0F
+/** Shift out for EBDCDIC_STATEFUL and iso2022 states @stable ICU 2.0 */
+#define UCNV_SO 0x0E
+
+/**
+ * Enum for specifying basic types of converters
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** @stable ICU 2.0 */
+ UCNV_UNSUPPORTED_CONVERTER = -1,
+ /** @stable ICU 2.0 */
+ UCNV_SBCS = 0,
+ /** @stable ICU 2.0 */
+ UCNV_DBCS = 1,
+ /** @stable ICU 2.0 */
+ UCNV_MBCS = 2,
+ /** @stable ICU 2.0 */
+ UCNV_LATIN_1 = 3,
+ /** @stable ICU 2.0 */
+ UCNV_UTF8 = 4,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_BigEndian = 5,
+ /** @stable ICU 2.0 */
+ UCNV_UTF16_LittleEndian = 6,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_BigEndian = 7,
+ /** @stable ICU 2.0 */
+ UCNV_UTF32_LittleEndian = 8,
+ /** @stable ICU 2.0 */
+ UCNV_EBCDIC_STATEFUL = 9,
+ /** @stable ICU 2.0 */
+ UCNV_ISO_2022 = 10,
+
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_1 = 11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_2,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_3,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_4,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_5,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_6,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_8,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_11,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_16,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_17,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_18,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_LMBCS_LAST = UCNV_LMBCS_19,
+ /** @stable ICU 2.0 */
+ UCNV_HZ,
+ /** @stable ICU 2.0 */
+ UCNV_SCSU,
+ /** @stable ICU 2.0 */
+ UCNV_ISCII,
+ /** @stable ICU 2.0 */
+ UCNV_US_ASCII,
+ /** @stable ICU 2.0 */
+ UCNV_UTF7,
+ /** @stable ICU 2.2 */
+ UCNV_BOCU1,
+ /** @stable ICU 2.2 */
+ UCNV_UTF16,
+ /** @stable ICU 2.2 */
+ UCNV_UTF32,
+ /** @stable ICU 2.2 */
+ UCNV_CESU8,
+ /** @stable ICU 2.4 */
+ UCNV_IMAP_MAILBOX,
+ /** @stable ICU 4.8 */
+ UCNV_COMPOUND_TEXT,
+
+ /* Number of converter types for which we have conversion routines. */
+ UCNV_NUMBER_OF_SUPPORTED_CONVERTER_TYPES
+} UConverterType;
+
+/**
+ * Enum for specifying which platform a converter ID refers to.
+ * The use of platform/CCSID is not recommended. See ucnv_openCCSID().
+ *
+ * @see ucnv_getPlatform
+ * @see ucnv_openCCSID
+ * @see ucnv_getCCSID
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNKNOWN = -1,
+ UCNV_IBM = 0
+} UConverterPlatform;
+
+/**
+ * Function pointer for error callback in the codepage to unicode direction.
+ * Called when an error has occurred in conversion to unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setToUCallBack
+ * @see UConverterToUnicodeArgs
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterToUCallback) (
+ const void* context,
+ UConverterToUnicodeArgs *args,
+ const char *codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+/**
+ * Function pointer for error callback in the unicode to codepage direction.
+ * Called when an error has occurred in conversion from unicode, or on open/close of the callback (see reason).
+ * @param context Pointer to the callback's private data
+ * @param args Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param pErrorCode ICU error code in/out parameter.
+ * For converter callback functions, set to a conversion error
+ * before the call, and the callback may reset it to U_ZERO_ERROR.
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+typedef void (U_EXPORT2 *UConverterFromUCallback) (
+ const void* context,
+ UConverterFromUnicodeArgs *args,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+/**
+ * Character that separates converter names from options and options from each other.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_CHAR ','
+
+/**
+ * String version of UCNV_OPTION_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_OPTION_SEP_STRING ","
+
+/**
+ * Character that separates a converter option from its value.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_CHAR '='
+
+/**
+ * String version of UCNV_VALUE_SEP_CHAR.
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_VALUE_SEP_STRING "="
+
+/**
+ * Converter option for specifying a locale.
+ * For example, ucnv_open("SCSU,locale=ja", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.0
+ */
+#define UCNV_LOCALE_OPTION_STRING ",locale="
+
+/**
+ * Converter option for specifying a version selector (0..9) for some converters.
+ * For example,
+ * \code
+ * ucnv_open("UTF-7,version=1", &errorCode);
+ * \endcode
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_VERSION_OPTION_STRING ",version="
+
+/**
+ * Converter option for EBCDIC SBCS or mixed-SBCS/DBCS (stateful) codepages.
+ * Swaps Unicode mappings for EBCDIC LF and NL codes, as used on
+ * S/390 (z/OS) Unix System Services (Open Edition).
+ * For example, ucnv_open("ibm-1047,swaplfnl", &errorCode);
+ * See convrtrs.txt.
+ *
+ * @see ucnv_open
+ * @stable ICU 2.4
+ */
+#define UCNV_SWAP_LFNL_OPTION_STRING ",swaplfnl"
+
+/**
+ * Do a fuzzy compare of two converter/alias names.
+ * The comparison is case-insensitive, ignores leading zeroes if they are not
+ * followed by further digits, and ignores all but letters and digits.
+ * Thus the strings "UTF-8", "utf_8", "u*T@f08" and "Utf 8" are exactly equivalent.
+ * See section 1.4, Charset Alias Matching in Unicode Technical Standard #22
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * @param name1 a converter name or alias, zero-terminated
+ * @param name2 a converter name or alias, zero-terminated
+ * @return 0 if the names match, or a negative value if the name1
+ * lexically precedes name2, or a positive value if the name1
+ * lexically follows name2.
+ * @stable ICU 2.0
+ */
+U_CAPI int U_EXPORT2
+ucnv_compareNames(const char *name1, const char *name2);
+
+
+/**
+ * Creates a UConverter object with the name of a coded character set specified as a C string.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <code>NULL</code> is passed for the converter name, it will create one with the
+ * getDefaultName return value.
+ *
+ * <p>A converter name for ICU 1.5 and above may contain options
+ * like a locale specification to control the specific behavior of
+ * the newly instantiated converter.
+ * The meaning of the options depends on the particular converter.
+ * If an option is not defined for or recognized by a given converter, then it is ignored.</p>
+ *
+ * <p>Options are appended to the converter name string, with a
+ * <code>UCNV_OPTION_SEP_CHAR</code> between the name and the first option and
+ * also between adjacent options.</p>
+ *
+ * <p>If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.</p>
+ *
+ * <p>The conversion behavior and names can vary between platforms. ICU may
+ * convert some characters differently from other platforms. Details on this topic
+ * are in the <a href="https://unicode-org.github.io/icu/userguide/conversion/">User
+ * Guide</a>. Aliases starting with a "cp" prefix have no specific meaning
+ * other than its an alias starting with the letters "cp". Please do not
+ * associate any meaning to these aliases.</p>
+ *
+ * \snippet samples/ucnv/convsamp.cpp ucnv_open
+ *
+ * @param converterName Name of the coded character set table.
+ * This may have options appended to the string.
+ * IANA alias character set names, IBM CCSIDs starting with "ibm-",
+ * Windows codepage numbers starting with "windows-" are frequently
+ * used for this parameter. See ucnv_getAvailableName and
+ * ucnv_getAlias for a complete list that is available.
+ * If this parameter is NULL, the default converter will be used.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @see ucnv_getAvailableName
+ * @see ucnv_getAlias
+ * @see ucnv_getDefaultName
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_open(const char *converterName, UErrorCode *err);
+
+
+/**
+ * Creates a Unicode converter with the names specified as unicode string.
+ * The name should be limited to the ASCII-7 alphanumerics range.
+ * The actual name will be resolved with the alias file
+ * using a case-insensitive string comparison that ignores
+ * leading zeroes and all non-alphanumeric characters.
+ * E.g., the names "UTF8", "utf-8", "u*T@f08" and "Utf 8" are all equivalent.
+ * (See also ucnv_compareNames().)
+ * If <TT>NULL</TT> is passed for the converter name, it will create
+ * one with the ucnv_getDefaultName() return value.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * <p>See ucnv_open for the complete details</p>
+ * @param name Name of the UConverter table in a zero terminated
+ * Unicode string
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR,
+ * U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an
+ * error occurred
+ * @see ucnv_open
+ * @see ucnv_openCCSID
+ * @see ucnv_close
+ * @see ucnv_compareNames
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openU(const UChar *name,
+ UErrorCode *err);
+
+/**
+ * Creates a UConverter object from a CCSID number and platform pair.
+ * Note that the usefulness of this function is limited to platforms with numeric
+ * encoding IDs. Only IBM and Microsoft platforms use numeric (16-bit) identifiers for
+ * encodings.
+ *
+ * In addition, IBM CCSIDs and Unicode conversion tables are not 1:1 related.
+ * For many IBM CCSIDs there are multiple (up to six) Unicode conversion tables, and
+ * for some Unicode conversion tables there are multiple CCSIDs.
+ * Some "alternate" Unicode conversion tables are provided by the
+ * IBM CDRA conversion table registry.
+ * The most prominent example of a systematic modification of conversion tables that is
+ * not provided in the form of conversion table files in the repository is
+ * that S/390 Unix System Services swaps the codes for Line Feed and New Line in all
+ * EBCDIC codepages, which requires such a swap in the Unicode conversion tables as well.
+ *
+ * Only IBM default conversion tables are accessible with ucnv_openCCSID().
+ * ucnv_getCCSID() will return the same CCSID for all conversion tables that are associated
+ * with that CCSID.
+ *
+ * Currently, the only "platform" supported in the ICU converter API is UCNV_IBM.
+ *
+ * In summary, the use of CCSIDs and the associated API functions is not recommended.
+ *
+ * In order to open a converter with the default IBM CDRA Unicode conversion table,
+ * you can use this function or use the prefix "ibm-":
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu", ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter with the IBM S/390 Unix System Services variant
+ * of a Unicode/EBCDIC conversion table,
+ * you can use the prefix "ibm-" together with the option string UCNV_SWAP_LFNL_OPTION_STRING:
+ * \code
+ * char name[20];
+ * sprintf(name, "ibm-%hu" UCNV_SWAP_LFNL_OPTION_STRING, ccsid);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * In order to open a converter from a Microsoft codepage number, use the prefix "cp":
+ * \code
+ * char name[20];
+ * sprintf(name, "cp%hu", codepageID);
+ * cnv=ucnv_open(name, &errorCode);
+ * \endcode
+ *
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ *
+ * @param codepage codepage number to create
+ * @param platform the platform in which the codepage number exists
+ * @param err error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error
+ * occurred.
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_close
+ * @see ucnv_getCCSID
+ * @see ucnv_getPlatform
+ * @see UConverterPlatform
+ * @stable ICU 2.0
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openCCSID(int32_t codepage,
+ UConverterPlatform platform,
+ UErrorCode * err);
+
+/**
+ * <p>Creates a UConverter object specified from a packageName and a converterName.</p>
+ *
+ * <p>The packageName and converterName must point to an ICU udata object, as defined by
+ * <code> udata_open( packageName, "cnv", converterName, err) </code> or equivalent.
+ * Typically, packageName will refer to a (.dat) file, or to a package registered with
+ * udata_setAppData(). Using a full file or directory pathname for packageName is deprecated.</p>
+ *
+ * <p>The name will NOT be looked up in the alias mechanism, nor will the converter be
+ * stored in the converter cache or the alias table. The only way to open further converters
+ * is call this function multiple times, or use the ucnv_clone() function to clone a
+ * 'primary' converter.</p>
+ *
+ * <p>A future version of ICU may add alias table lookups and/or caching
+ * to this function.</p>
+ *
+ * <p>Example Use:
+ * <code>cnv = ucnv_openPackage("myapp", "myconverter", &err);</code>
+ * </p>
+ *
+ * @param packageName name of the package (equivalent to 'path' in udata_open() call)
+ * @param converterName name of the data item to be used, without suffix.
+ * @param err outgoing error status <TT>U_MEMORY_ALLOCATION_ERROR, U_FILE_ACCESS_ERROR</TT>
+ * @return the created Unicode converter object, or <TT>NULL</TT> if an error occurred
+ * @see udata_open
+ * @see ucnv_open
+ * @see ucnv_clone
+ * @see ucnv_close
+ * @stable ICU 2.2
+ */
+U_CAPI UConverter* U_EXPORT2
+ucnv_openPackage(const char *packageName, const char *converterName, UErrorCode *err);
+
+/**
+ * Thread safe converter cloning operation.
+ *
+ * You must ucnv_close() the clone.
+ *
+ * @param cnv converter to be cloned
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * @return pointer to the new clone
+ * @stable ICU 71
+ */
+U_CAPI UConverter* U_EXPORT2 ucnv_clone(const UConverter *cnv, UErrorCode *status);
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * Thread safe converter cloning operation.
+ * For most efficient operation, pass in a stackBuffer (and a *pBufferSize)
+ * with at least U_CNV_SAFECLONE_BUFFERSIZE bytes of space.
+ * If the buffer size is sufficient, then the clone will use the stack buffer;
+ * otherwise, it will be allocated, and *pBufferSize will indicate
+ * the actual size. (This should not occur with U_CNV_SAFECLONE_BUFFERSIZE.)
+ *
+ * You must ucnv_close() the clone in any case.
+ *
+ * If *pBufferSize==0, (regardless of whether stackBuffer==NULL or not)
+ * then *pBufferSize will be changed to a sufficient size
+ * for cloning this converter,
+ * without actually cloning the converter ("pure pre-flighting").
+ *
+ * If *pBufferSize is greater than zero but not large enough for a stack-based
+ * clone, then the converter is cloned using newly allocated memory
+ * and *pBufferSize is changed to the necessary size.
+ *
+ * If the converter clone fits into the stack buffer but the stack buffer is not
+ * sufficiently aligned for the clone, then the clone will use an
+ * adjusted pointer and use an accordingly smaller buffer size.
+ *
+ * @param cnv converter to be cloned
+ * @param stackBuffer <em>Deprecated functionality as of ICU 52, use NULL.</em><br>
+ * user allocated space for the new clone. If NULL new memory will be allocated.
+ * If buffer is not large enough, new memory will be allocated.
+ * Clients can use the U_CNV_SAFECLONE_BUFFERSIZE. This will probably be enough to avoid memory allocations.
+ * @param pBufferSize <em>Deprecated functionality as of ICU 52, use NULL or 1.</em><br>
+ * pointer to size of allocated space.
+ * @param status to indicate whether the operation went on smoothly or there were errors
+ * An informational status value, U_SAFECLONE_ALLOCATED_WARNING,
+ * is used if pBufferSize != NULL and any allocations were necessary
+ * However, it is better to check if *pBufferSize grew for checking for
+ * allocations because warning codes can be overridden by subsequent
+ * function calls.
+ * @return pointer to the new clone
+ * @deprecated ICU 71 Use ucnv_clone() instead.
+ */
+U_DEPRECATED UConverter * U_EXPORT2
+ucnv_safeClone(const UConverter *cnv,
+ void *stackBuffer,
+ int32_t *pBufferSize,
+ UErrorCode *status);
+
+/**
+ * \def U_CNV_SAFECLONE_BUFFERSIZE
+ * Definition of a buffer size that is designed to be large enough for
+ * converters to be cloned with ucnv_safeClone().
+ * @deprecated ICU 52. Do not rely on ucnv_safeClone() cloning into any provided buffer.
+ */
+#define U_CNV_SAFECLONE_BUFFERSIZE 1024
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Deletes the unicode converter and releases resources associated
+ * with just this instance.
+ * Does not free up shared converter tables.
+ *
+ * @param converter the converter object to be deleted
+ * @see ucnv_open
+ * @see ucnv_openU
+ * @see ucnv_openCCSID
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_close(UConverter * converter);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterPointer
+ * "Smart pointer" class, closes a UConverter via ucnv_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterPointer, UConverter, ucnv_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Fills in the output parameter, subChars, with the substitution characters
+ * as multiple bytes.
+ * If ucnv_setSubstString() set a Unicode string because the converter is
+ * stateful, then subChars will be an empty string.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution characters
+ * @param len on input the capacity of subChars, on output the number
+ * of bytes copied to it
+ * @param err the outgoing error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @see ucnv_setSubstString
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getSubstChars(const UConverter *converter,
+ char *subChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Sets the substitution chars when converting from unicode to a codepage. The
+ * substitution is specified as a string of 1-4 bytes, and may contain
+ * <TT>NULL</TT> bytes.
+ * The subChars must represent a single character. The caller needs to know the
+ * byte sequence of a valid character in the converter's charset.
+ * For some converters, for example some ISO 2022 variants, only single-byte
+ * substitution characters may be supported.
+ * The newer ucnv_setSubstString() function relaxes these limitations.
+ *
+ * @param converter the Unicode converter
+ * @param subChars the substitution character byte sequence we want set
+ * @param len the number of bytes in subChars
+ * @param err the error status code. <TT>U_INDEX_OUTOFBOUNDS_ERROR </TT> if
+ * len is bigger than the maximum number of bytes allowed in subchars
+ * @see ucnv_setSubstString
+ * @see ucnv_getSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setSubstChars(UConverter *converter,
+ const char *subChars,
+ int8_t len,
+ UErrorCode *err);
+
+/**
+ * Set a substitution string for converting from Unicode to a charset.
+ * The caller need not know the charset byte sequence for each charset.
+ *
+ * Unlike ucnv_setSubstChars() which is designed to set a charset byte sequence
+ * for a single character, this function takes a Unicode string with
+ * zero, one or more characters, and immediately verifies that the string can be
+ * converted to the charset.
+ * If not, or if the result is too long (more than 32 bytes as of ICU 3.6),
+ * then the function returns with an error accordingly.
+ *
+ * Also unlike ucnv_setSubstChars(), this function works for stateful charsets
+ * by converting on the fly at the point of substitution rather than setting
+ * a fixed byte sequence.
+ *
+ * @param cnv The UConverter object.
+ * @param s The Unicode string.
+ * @param length The number of UChars in s, or -1 for a NUL-terminated string.
+ * @param err Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ *
+ * @see ucnv_setSubstChars
+ * @see ucnv_getSubstChars
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_setSubstString(UConverter *cnv,
+ const UChar *s,
+ int32_t length,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errBytes, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errBytes the codepage bytes which were in error
+ * @param len on input the capacity of errBytes, on output the number of
+ * bytes which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getInvalidChars(const UConverter *converter,
+ char *errBytes,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Fills in the output parameter, errChars, with the error characters from the
+ * last failing conversion.
+ *
+ * @param converter the Unicode converter
+ * @param errUChars the UChars which were in error
+ * @param len on input the capacity of errUChars, on output the number of
+ * UChars which were copied to it
+ * @param err the error status code.
+ * If the substitution character array is too small, an
+ * <TT>U_INDEX_OUTOFBOUNDS_ERROR</TT> will be returned.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getInvalidUChars(const UConverter *converter,
+ UChar *errUChars,
+ int8_t *len,
+ UErrorCode *err);
+
+/**
+ * Resets the state of a converter to the default state. This is used
+ * in the case of an error, to restart a conversion from a known default state.
+ * It will also empty the internal output buffers.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_reset(UConverter *converter);
+
+/**
+ * Resets the to-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion to
+ * Unicode to a known default state. It will also empty the internal
+ * output buffers used for the conversion to Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_resetToUnicode(UConverter *converter);
+
+/**
+ * Resets the from-Unicode part of a converter state to the default state.
+ * This is used in the case of an error to restart a conversion from
+ * Unicode to a known default state. It will also empty the internal output
+ * buffers used for the conversion from Unicode codepoints.
+ * @param converter the Unicode converter
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_resetFromUnicode(UConverter *converter);
+
+/**
+ * Returns the maximum number of bytes that are output per UChar in conversion
+ * from Unicode using this converter.
+ * The returned number can be used with UCNV_GET_MAX_BYTES_FOR_STRING
+ * to calculate the size of a target buffer for conversion from Unicode.
+ *
+ * Note: Before ICU 2.8, this function did not return reliable numbers for
+ * some stateful converters (EBCDIC_STATEFUL, ISO-2022) and LMBCS.
+ *
+ * This number may not be the same as the maximum number of bytes per
+ * "conversion unit". In other words, it may not be the intuitively expected
+ * number of bytes per character that would be published for a charset,
+ * and may not fulfill any other purpose than the allocation of an output
+ * buffer of guaranteed sufficient size for a given input length and converter.
+ *
+ * Examples for special cases that are taken into account:
+ * - Supplementary code points may convert to more bytes than BMP code points.
+ * This function returns bytes per UChar (UTF-16 code unit), not per
+ * Unicode code point, for efficient buffer allocation.
+ * - State-shifting output (SI/SO, escapes, etc.) from stateful converters.
+ * - When m input UChars are converted to n output bytes, then the maximum m/n
+ * is taken into account.
+ *
+ * The number returned here does not take into account
+ * (see UCNV_GET_MAX_BYTES_FOR_STRING):
+ * - callbacks which output more than one charset character sequence per call,
+ * like escape callbacks
+ * - initial and final non-character bytes that are output by some converters
+ * (automatic BOMs, initial escape sequence, final SI, etc.)
+ *
+ * Examples for returned values:
+ * - SBCS charsets: 1
+ * - Shift-JIS: 2
+ * - UTF-16: 2 (2 per BMP, 4 per surrogate _pair_, BOM not counted)
+ * - UTF-8: 3 (3 per BMP, 4 per surrogate _pair_)
+ * - EBCDIC_STATEFUL (EBCDIC mixed SBCS/DBCS): 3 (SO + DBCS)
+ * - ISO-2022: 3 (always outputs UTF-8)
+ * - ISO-2022-JP: 6 (4-byte escape sequences + DBCS)
+ * - ISO-2022-CN: 8 (4-byte designator sequences + 2-byte SS2/SS3 + DBCS)
+ *
+ * @param converter The Unicode converter.
+ * @return The maximum number of bytes per UChar (16 bit code unit)
+ * that are output by ucnv_fromUnicode(),
+ * to be used together with UCNV_GET_MAX_BYTES_FOR_STRING
+ * for buffer allocation.
+ *
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @see ucnv_getMinCharSize
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+ucnv_getMaxCharSize(const UConverter *converter);
+
+/**
+ * Calculates the size of a buffer for conversion from Unicode to a charset.
+ * The calculated size is guaranteed to be sufficient for this conversion.
+ *
+ * It takes into account initial and final non-character bytes that are output
+ * by some converters.
+ * It does not take into account callbacks which output more than one charset
+ * character sequence per call, like escape callbacks.
+ * The default (substitution) callback only outputs one charset character sequence.
+ *
+ * @param length Number of UChars to be converted.
+ * @param maxCharSize Return value from ucnv_getMaxCharSize() for the converter
+ * that will be used.
+ * @return Size of a buffer that will be large enough to hold the output bytes of
+ * converting length UChars with the converter that returned the maxCharSize.
+ *
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.8
+ */
+#define UCNV_GET_MAX_BYTES_FOR_STRING(length, maxCharSize) \
+ (((int32_t)(length)+10)*(int32_t)(maxCharSize))
+
+/**
+ * Returns the minimum byte length (per codepoint) for characters in this codepage.
+ * This is usually either 1 or 2.
+ * @param converter the Unicode converter
+ * @return the minimum number of bytes per codepoint allowed by this particular converter
+ * @see ucnv_getMaxCharSize
+ * @stable ICU 2.0
+ */
+U_CAPI int8_t U_EXPORT2
+ucnv_getMinCharSize(const UConverter *converter);
+
+/**
+ * Returns the display name of the converter passed in based on the Locale
+ * passed in. If the locale contains no display name, the internal ASCII
+ * name will be filled in.
+ *
+ * @param converter the Unicode converter.
+ * @param displayLocale is the specific Locale we want to localized for
+ * @param displayName user provided buffer to be filled in
+ * @param displayNameCapacity size of displayName Buffer
+ * @param err error status code
+ * @return displayNameLength number of UChar needed in displayName
+ * @see ucnv_getName
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_getDisplayName(const UConverter *converter,
+ const char *displayLocale,
+ UChar *displayName,
+ int32_t displayNameCapacity,
+ UErrorCode *err);
+
+/**
+ * Gets the internal, canonical name of the converter (zero-terminated).
+ * The lifetime of the returned string will be that of the converter
+ * passed to this function.
+ * @param converter the Unicode converter
+ * @param err UErrorCode status
+ * @return the internal name of the converter
+ * @see ucnv_getDisplayName
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getName(const UConverter *converter, UErrorCode *err);
+
+/**
+ * Gets a codepage number associated with the converter. This is not guaranteed
+ * to be the one used to create the converter. Some converters do not represent
+ * platform registered codepages and return zero for the codepage number.
+ * The error code fill-in parameter indicates if the codepage number
+ * is available.
+ * Does not check if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ *
+ * Important: The use of CCSIDs is not recommended because it is limited
+ * to only two platforms in principle and only one (UCNV_IBM) in the current
+ * ICU converter API.
+ * Also, CCSIDs are insufficient to identify IBM Unicode conversion tables precisely.
+ * For more details see ucnv_openCCSID().
+ *
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return If any error occurs, -1 will be returned otherwise, the codepage number
+ * will be returned
+ * @see ucnv_openCCSID
+ * @see ucnv_getPlatform
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_getCCSID(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets a codepage platform associated with the converter. Currently,
+ * only <TT>UCNV_IBM</TT> will be returned.
+ * Does not test if the converter is <TT>NULL</TT> or if converter's data
+ * table is <TT>NULL</TT>.
+ * @param converter the Unicode converter
+ * @param err the error status code.
+ * @return The codepage platform
+ * @stable ICU 2.0
+ */
+U_CAPI UConverterPlatform U_EXPORT2
+ucnv_getPlatform(const UConverter *converter,
+ UErrorCode *err);
+
+/**
+ * Gets the type of the converter
+ * e.g. SBCS, MBCS, DBCS, UTF8, UTF16_BE, UTF16_LE, ISO_2022,
+ * EBCDIC_STATEFUL, LATIN_1
+ * @param converter a valid, opened converter
+ * @return the type of the converter
+ * @stable ICU 2.0
+ */
+U_CAPI UConverterType U_EXPORT2
+ucnv_getType(const UConverter * converter);
+
+/**
+ * Gets the "starter" (lead) bytes for converters of type MBCS.
+ * Will fill in an <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if converter passed in
+ * is not MBCS. Fills in an array of type UBool, with the value of the byte
+ * as offset to the array. For example, if (starters[0x20] == true) at return,
+ * it means that the byte 0x20 is a starter byte in this converter.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter a valid, opened converter of type MBCS
+ * @param starters an array of size 256 to be filled in
+ * @param err error status, <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> if the
+ * converter is not a type which can return starters.
+ * @see ucnv_getType
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getStarters(const UConverter* converter,
+ UBool starters[256],
+ UErrorCode* err);
+
+
+/**
+ * Selectors for Unicode sets that can be returned by ucnv_getUnicodeSet().
+ * @see ucnv_getUnicodeSet
+ * @stable ICU 2.6
+ */
+typedef enum UConverterUnicodeSet {
+ /** Select the set of roundtrippable Unicode code points. @stable ICU 2.6 */
+ UCNV_ROUNDTRIP_SET,
+ /** Select the set of Unicode code points with roundtrip or fallback mappings. @stable ICU 4.0 */
+ UCNV_ROUNDTRIP_AND_FALLBACK_SET,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of UConverterUnicodeSet selectors.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCNV_SET_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UConverterUnicodeSet;
+
+
+/**
+ * Returns the set of Unicode code points that can be converted by an ICU converter.
+ *
+ * Returns one of several kinds of set:
+ *
+ * 1. UCNV_ROUNDTRIP_SET
+ *
+ * The set of all Unicode code points that can be roundtrip-converted
+ * (converted without any data loss) with the converter (ucnv_fromUnicode()).
+ * This set will not include code points that have fallback mappings
+ * or are only the result of reverse fallback mappings.
+ * This set will also not include PUA code points with fallbacks, although
+ * ucnv_fromUnicode() will always uses those mappings despite ucnv_setFallback().
+ * See UTR #22 "Character Mapping Markup Language"
+ * at http://www.unicode.org/reports/tr22/
+ *
+ * This is useful for example for
+ * - checking that a string or document can be roundtrip-converted with a converter,
+ * without/before actually performing the conversion
+ * - testing if a converter can be used for text for typical text for a certain locale,
+ * by comparing its roundtrip set with the set of ExemplarCharacters from
+ * ICU's locale data or other sources
+ *
+ * 2. UCNV_ROUNDTRIP_AND_FALLBACK_SET
+ *
+ * The set of all Unicode code points that can be converted with the converter (ucnv_fromUnicode())
+ * when fallbacks are turned on (see ucnv_setFallback()).
+ * This set includes all code points with roundtrips and fallbacks (but not reverse fallbacks).
+ *
+ * In the future, there may be more UConverterUnicodeSet choices to select
+ * sets with different properties.
+ *
+ * @param cnv The converter for which a set is requested.
+ * @param setFillIn A valid USet *. It will be cleared by this function before
+ * the converter's specific set is filled into the USet.
+ * @param whichSet A UConverterUnicodeSet selector;
+ * currently UCNV_ROUNDTRIP_SET is the only supported value.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ *
+ * @see UConverterUnicodeSet
+ * @see uset_open
+ * @see uset_close
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_getUnicodeSet(const UConverter *cnv,
+ USet *setFillIn,
+ UConverterUnicodeSet whichSet,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the current callback function used by the converter when an illegal
+ * or invalid codepage sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getToUCallBack (const UConverter * converter,
+ UConverterToUCallback *action,
+ const void **context);
+
+/**
+ * Gets the current callback function used by the converter when illegal
+ * or invalid Unicode sequence is found.
+ * Context pointers are always owned by the caller.
+ *
+ * @param converter the unicode converter
+ * @param action fillin: returns the callback function pointer
+ * @param context fillin: returns the callback's private void* context
+ * @see ucnv_setFromUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getFromUCallBack (const UConverter * converter,
+ UConverterFromUCallback *action,
+ const void **context);
+
+/**
+ * Changes the callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new toUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setToUCallBack (UConverter * converter,
+ UConverterToUCallback newAction,
+ const void* newContext,
+ UConverterToUCallback *oldAction,
+ const void** oldContext,
+ UErrorCode * err);
+
+/**
+ * Changes the current callback function used by the converter when
+ * an illegal or invalid sequence is found.
+ * Context pointers are always owned by the caller.
+ * Predefined actions and contexts can be found in the ucnv_err.h header.
+ *
+ * @param converter the unicode converter
+ * @param newAction the new callback function
+ * @param newContext the new fromUnicode callback context pointer. This can be NULL.
+ * @param oldAction fillin: returns the old callback function pointer. This can be NULL.
+ * @param oldContext fillin: returns the old callback's private void* context. This can be NULL.
+ * @param err The error code status
+ * @see ucnv_getFromUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setFromUCallBack (UConverter * converter,
+ UConverterFromUCallback newAction,
+ const void *newContext,
+ UConverterFromUCallback *oldAction,
+ const void **oldContext,
+ UErrorCode * err);
+
+/**
+ * Converts an array of unicode characters to an array of codepage
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last UChar consumed.
+ *
+ * Target similarly starts out pointer at the first available byte in the output
+ * buffer, and ends up pointing after the last byte written to the output.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==true), the input is completely consumed
+ * when *source==sourceLimit and no error code is set.
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * codepage characters to. Output : points to after the last codepage character copied
+ * to <TT>target</TT>.
+ * @param targetLimit the pointer just after last of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source Unicode character buffer.
+ * @param sourceLimit the pointer just after the last of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>true</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>true</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setToUCallBack
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_fromUnicode (UConverter * converter,
+ char **target,
+ const char *targetLimit,
+ const UChar ** source,
+ const UChar * sourceLimit,
+ int32_t* offsets,
+ UBool flush,
+ UErrorCode * err);
+
+/**
+ * Converts a buffer of codepage bytes into an array of unicode UChars
+ * characters. This function is optimized for converting a continuous
+ * stream of data in buffer-sized chunks, where the entire source and
+ * target does not fit in available buffers.
+ *
+ * The source pointer is an in/out parameter. It starts out pointing where the
+ * conversion is to begin, and ends up pointing after the last byte of source consumed.
+ *
+ * Target similarly starts out pointer at the first available UChar in the output
+ * buffer, and ends up pointing after the last UChar written to the output.
+ * It does NOT necessarily keep UChar sequences together.
+ *
+ * The converter always attempts to consume the entire source buffer, unless
+ * (1.) the target buffer is full, or (2.) a failing error is returned from the
+ * current callback function. When a successful error status has been
+ * returned, it means that all of the source buffer has been
+ * consumed. At that point, the caller should reset the source and
+ * sourceLimit pointers to point to the next chunk.
+ *
+ * At the end of the stream (flush==true), the input is completely consumed
+ * when *source==sourceLimit and no error code is set
+ * The converter object is then automatically reset by this function.
+ * (This means that a converter need not be reset explicitly between data
+ * streams if it finishes the previous stream without errors.)
+ *
+ * This is a <I>stateful</I> conversion. Additionally, even when all source data has
+ * been consumed, some data may be in the converters' internal state.
+ * Call this function repeatedly, updating the target pointers with
+ * the next empty chunk of target in case of a
+ * <TT>U_BUFFER_OVERFLOW_ERROR</TT>, and updating the source pointers
+ * with the next chunk of source when a successful error status is
+ * returned, until there are no more chunks of source data.
+ * @param converter the Unicode converter
+ * @param target I/O parameter. Input : Points to the beginning of the buffer to copy
+ * UChars into. Output : points to after the last UChar copied.
+ * @param targetLimit the pointer just after the end of the <TT>target</TT> buffer
+ * @param source I/O parameter, pointer to pointer to the source codepage buffer.
+ * @param sourceLimit the pointer to the byte after the end of the source buffer
+ * @param offsets if NULL is passed, nothing will happen to it, otherwise it needs to have the same number
+ * of allocated cells as <TT>target</TT>. Will fill in offsets from target to source pointer
+ * e.g: <TT>offsets[3]</TT> is equal to 6, it means that the <TT>target[3]</TT> was a result of transcoding <TT>source[6]</TT>
+ * For output data carried across calls, and other data without a specific source character
+ * (such as from escape sequences or callbacks) -1 will be placed for offsets.
+ * @param flush set to <TT>true</TT> if the current source buffer is the last available
+ * chunk of the source, <TT>false</TT> otherwise. Note that if a failing status is returned,
+ * this function may have to be called multiple times with flush set to <TT>true</TT> until
+ * the source buffer is consumed.
+ * @param err the error status. <TT>U_ILLEGAL_ARGUMENT_ERROR</TT> will be set if the
+ * converter is <TT>NULL</TT>.
+ * <code>U_BUFFER_OVERFLOW_ERROR</code> will be set if the target is full and there is
+ * still data to be written to the target.
+ * @see ucnv_fromUChars
+ * @see ucnv_convert
+ * @see ucnv_getMinCharSize
+ * @see ucnv_setFromUCallBack
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_toUnicode(UConverter *converter,
+ UChar **target,
+ const UChar *targetLimit,
+ const char **source,
+ const char *sourceLimit,
+ int32_t *offsets,
+ UBool flush,
+ UErrorCode *err);
+
+/**
+ * Convert the Unicode string into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_fromUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * UCNV_GET_MAX_BYTES_FOR_STRING(srcLength, ucnv_getMaxCharSize(cnv)).
+ *
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called)
+ * @param src the input Unicode string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_fromUnicode
+ * @see ucnv_convert
+ * @see UCNV_GET_MAX_BYTES_FOR_STRING
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUChars(UConverter *cnv,
+ char *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert the codepage string into a Unicode string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function is a more convenient but less powerful version of ucnv_toUnicode().
+ * It is only useful for whole strings, not for streaming conversion.
+ *
+ * The maximum output buffer capacity required (barring output from callbacks) will be
+ * 2*srcLength (each char may be converted into a surrogate pair).
+ *
+ * @param cnv the converter object to be used (ucnv_resetToUnicode() will be called)
+ * @param src the input codepage string
+ * @param srcLength the input string length, or -1 if NUL-terminated
+ * @param dest destination string buffer, can be NULL if destCapacity==0
+ * @param destCapacity the number of UChars available at dest
+ * @param pErrorCode normal ICU error code;
+ * common error codes that may be set by this function include
+ * U_BUFFER_OVERFLOW_ERROR, U_STRING_NOT_TERMINATED_WARNING,
+ * U_ILLEGAL_ARGUMENT_ERROR, and conversion errors
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @see ucnv_toUnicode
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toUChars(UConverter *cnv,
+ UChar *dest, int32_t destCapacity,
+ const char *src, int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a codepage buffer into Unicode one character at a time.
+ * The input is completely consumed when the U_INDEX_OUTOFBOUNDS_ERROR is set.
+ *
+ * Advantage compared to ucnv_toUnicode() or ucnv_toUChars():
+ * - Faster for small amounts of data, for most converters, e.g.,
+ * US-ASCII, ISO-8859-1, UTF-8/16/32, and most "normal" charsets.
+ * (For complex converters, e.g., SCSU, UTF-7 and ISO 2022 variants,
+ * it uses ucnv_toUnicode() internally.)
+ * - Convenient.
+ *
+ * Limitations compared to ucnv_toUnicode():
+ * - Always assumes flush=true.
+ * This makes ucnv_getNextUChar() unsuitable for "streaming" conversion,
+ * that is, for where the input is supplied in multiple buffers,
+ * because ucnv_getNextUChar() will assume the end of the input at the end
+ * of the first buffer.
+ * - Does not provide offset output.
+ *
+ * It is possible to "mix" ucnv_getNextUChar() and ucnv_toUnicode() because
+ * ucnv_getNextUChar() uses the current state of the converter
+ * (unlike ucnv_toUChars() which always resets first).
+ * However, if ucnv_getNextUChar() is called after ucnv_toUnicode()
+ * stopped in the middle of a character sequence (with flush=false),
+ * then ucnv_getNextUChar() will always use the slower ucnv_toUnicode()
+ * internally until the next character boundary.
+ * (This is new in ICU 2.6. In earlier releases, ucnv_getNextUChar() had to
+ * start at a character boundary.)
+ *
+ * Instead of using ucnv_getNextUChar(), it is recommended
+ * to convert using ucnv_toUnicode() or ucnv_toUChars()
+ * and then iterate over the text using U16_NEXT() or a UCharIterator (uiter.h)
+ * or a C++ CharacterIterator or similar.
+ * This allows streaming conversion and offset output, for example.
+ *
+ * <p>Handling of surrogate pairs and supplementary-plane code points:<br>
+ * There are two different kinds of codepages that provide mappings for surrogate characters:
+ * <ul>
+ * <li>Codepages like UTF-8, UTF-32, and GB 18030 provide direct representations for Unicode
+ * code points U+10000-U+10ffff as well as for single surrogates U+d800-U+dfff.
+ * Each valid sequence will result in exactly one returned code point.
+ * If a sequence results in a single surrogate, then that will be returned
+ * by itself, even if a neighboring sequence encodes the matching surrogate.</li>
+ * <li>Codepages like SCSU and LMBCS (and UTF-16) provide direct representations only for BMP code points
+ * including surrogates. Code points in supplementary planes are represented with
+ * two sequences, each encoding a surrogate.
+ * For these codepages, matching pairs of surrogates will be combined into single
+ * code points for returning from this function.
+ * (Note that SCSU is actually a mix of these codepage types.)</li>
+ * </ul></p>
+ *
+ * @param converter an open UConverter
+ * @param source the address of a pointer to the codepage buffer, will be
+ * updated to point after the bytes consumed in the conversion call.
+ * @param sourceLimit points to the end of the input buffer
+ * @param err fills in error status (see ucnv_toUnicode)
+ * <code>U_INDEX_OUTOFBOUNDS_ERROR</code> will be set if the input
+ * is empty or does not convert to any output (e.g.: pure state-change
+ * codes SI/SO, escape sequences for ISO 2022,
+ * or if the callback did not output anything, ...).
+ * This function will not set a <code>U_BUFFER_OVERFLOW_ERROR</code> because
+ * the "buffer" is the return code. However, there might be subsequent output
+ * stored in the converter object
+ * that will be returned in following calls to this function.
+ * @return a UChar32 resulting from the partial conversion of source
+ * @see ucnv_toUnicode
+ * @see ucnv_toUChars
+ * @see ucnv_convert
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+ucnv_getNextUChar(UConverter * converter,
+ const char **source,
+ const char * sourceLimit,
+ UErrorCode * err);
+
+/**
+ * Convert from one external charset to another using two existing UConverters.
+ * Internally, two conversions - ucnv_toUnicode() and ucnv_fromUnicode() -
+ * are used, "pivoting" through 16-bit Unicode.
+ *
+ * Important: For streaming conversion (multiple function calls for successive
+ * parts of a text stream), the caller must provide a pivot buffer explicitly,
+ * and must preserve the pivot buffer and associated pointers from one
+ * call to another. (The buffer may be moved if its contents and the relative
+ * pointer positions are preserved.)
+ *
+ * There is a similar function, ucnv_convert(),
+ * which has the following limitations:
+ * - it takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - it does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ *
+ * By contrast, ucnv_convertEx()
+ * - takes UConverter parameters instead of charset names
+ * - fully exposes the pivot buffer for streaming conversion and complete error handling
+ *
+ * ucnv_convertEx() also provides further convenience:
+ * - an option to reset the converters at the beginning
+ * (if reset==true, see parameters;
+ * also sets *pivotTarget=*pivotSource=pivotStart)
+ * - allow NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLimit==NULL, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - the pivot buffer can be provided internally;
+ * possible only for whole-string conversion, not streaming conversion;
+ * in this case, the caller will not be able to get details about where an
+ * error occurred
+ * (if pivotStart==NULL, see below)
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * Limitation compared to the direct use of
+ * ucnv_fromUnicode() and ucnv_toUnicode():
+ * ucnv_convertEx() does not provide offset information.
+ *
+ * Limitation compared to ucnv_fromUChars() and ucnv_toUChars():
+ * ucnv_convertEx() does not support preflighting directly.
+ *
+ * Sample code for converting a single string from
+ * one external charset to UTF-8, ignoring the location of errors:
+ *
+ * \code
+ * int32_t
+ * myToUTF8(UConverter *cnv,
+ * const char *s, int32_t length,
+ * char *u8, int32_t capacity,
+ * UErrorCode *pErrorCode) {
+ * UConverter *utf8Cnv;
+ * char *target;
+ *
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * utf8Cnv=myGetCachedUTF8Converter(pErrorCode);
+ * if(U_FAILURE(*pErrorCode)) {
+ * return 0;
+ * }
+ *
+ * if(length<0) {
+ * length=strlen(s);
+ * }
+ * target=u8;
+ * ucnv_convertEx(utf8Cnv, cnv,
+ * &target, u8+capacity,
+ * &s, s+length,
+ * NULL, NULL, NULL, NULL,
+ * true, true,
+ * pErrorCode);
+ *
+ * myReleaseCachedUTF8Converter(utf8Cnv);
+ *
+ * // return the output string length, but without preflighting
+ * return (int32_t)(target-u8);
+ * }
+ * \endcode
+ *
+ * @param targetCnv Output converter, used to convert from the UTF-16 pivot
+ * to the target using ucnv_fromUnicode().
+ * @param sourceCnv Input converter, used to convert from the source to
+ * the UTF-16 pivot using ucnv_toUnicode().
+ * @param target I/O parameter, same as for ucnv_fromUChars().
+ * Input: *target points to the beginning of the target buffer.
+ * Output: *target points to the first unit after the last char written.
+ * @param targetLimit Pointer to the first unit after the target buffer.
+ * @param source I/O parameter, same as for ucnv_toUChars().
+ * Input: *source points to the beginning of the source buffer.
+ * Output: *source points to the first unit after the last char read.
+ * @param sourceLimit Pointer to the first unit after the source buffer.
+ * @param pivotStart Pointer to the UTF-16 pivot buffer. If pivotStart==NULL,
+ * then an internal buffer is used and the other pivot
+ * arguments are ignored and can be NULL as well.
+ * @param pivotSource I/O parameter, same as source in ucnv_fromUChars() for
+ * conversion from the pivot buffer to the target buffer.
+ * @param pivotTarget I/O parameter, same as target in ucnv_toUChars() for
+ * conversion from the source buffer to the pivot buffer.
+ * It must be pivotStart<=*pivotSource<=*pivotTarget<=pivotLimit
+ * and pivotStart<pivotLimit (unless pivotStart==NULL).
+ * @param pivotLimit Pointer to the first unit after the pivot buffer.
+ * @param reset If true, then ucnv_resetToUnicode(sourceCnv) and
+ * ucnv_resetFromUnicode(targetCnv) are called, and the
+ * pivot pointers are reset (*pivotTarget=*pivotSource=pivotStart).
+ * @param flush If true, indicates the end of the input.
+ * Passed directly to ucnv_toUnicode(), and carried over to
+ * ucnv_fromUnicode() when the source is empty as well.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * U_BUFFER_OVERFLOW_ERROR always refers to the target buffer
+ * because overflows into the pivot buffer are handled internally.
+ * Other conversion errors are from the source-to-pivot
+ * conversion if *pivotSource==pivotStart, otherwise from
+ * the pivot-to-target conversion.
+ *
+ * @see ucnv_convert
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+ucnv_convertEx(UConverter *targetCnv, UConverter *sourceCnv,
+ char **target, const char *targetLimit,
+ const char **source, const char *sourceLimit,
+ UChar *pivotStart, UChar **pivotSource,
+ UChar **pivotTarget, const UChar *pivotLimit,
+ UBool reset, UBool flush,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, two converters are opened according to the name arguments,
+ * then the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(), then the converters are closed again.
+ *
+ * This is a convenience function, not an efficient way to convert a lot of text:
+ * ucnv_convert()
+ * - takes charset names, not converter objects, so that
+ * - two converters are opened for each call
+ * - only single-string conversion is possible, not streaming operation
+ * - does not provide enough information to find out,
+ * in case of failure, whether the toUnicode or
+ * the fromUnicode conversion failed
+ * - allows NUL-terminated input
+ * (only a single NUL byte, will not work for charsets with multi-byte NULs)
+ * (if sourceLength==-1, see parameters)
+ * - terminate with a NUL on output
+ * (only a single NUL byte, not useful for charsets with multi-byte NULs),
+ * or set U_STRING_NOT_TERMINATED_WARNING if the output exactly fills
+ * the target buffer
+ * - a pivot buffer is provided internally
+ *
+ * The function returns when one of the following is true:
+ * - the entire source text has been converted successfully to the target buffer
+ * and either the target buffer is terminated with a single NUL byte
+ * or the error code is set to U_STRING_NOT_TERMINATED_WARNING
+ * - a target buffer overflow occurred (U_BUFFER_OVERFLOW_ERROR)
+ * and the full output string length is returned ("preflighting")
+ * - a conversion error occurred
+ * (other U_FAILURE(), see description of pErrorCode)
+ *
+ * @param toConverterName The name of the converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param fromConverterName The name of the converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes, or -1 for NUL-terminated input.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_convertEx
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_toAlgorithmic
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @see ucnv_getNextUChar
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_convert(const char *toConverterName,
+ const char *fromConverterName,
+ char *target,
+ int32_t targetCapacity,
+ const char *source,
+ int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_toAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses the cnv converter parameter.
+ * The pivot-to-target conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param algorithmicType UConverterType constant identifying the desired target
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param cnv The converter that is used to convert
+ * from the source to the UTF-16 pivot buffer.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toAlgorithmic(UConverterType algorithmicType,
+ UConverter *cnv,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert from one external charset to another.
+ * Internally, the text is converted to and from the 16-bit Unicode "pivot"
+ * using ucnv_convertEx(). ucnv_fromAlgorithmic() works exactly like ucnv_convert()
+ * except that the two converters need not be looked up and opened completely.
+ *
+ * The source-to-pivot conversion uses a purely algorithmic converter
+ * according to the specified type, e.g., UCNV_UTF8 for a UTF-8 converter.
+ * The pivot-to-target conversion uses the cnv converter parameter.
+ *
+ * Internally, the algorithmic converter is opened and closed for each
+ * function call, which is more efficient than using the public ucnv_open()
+ * but somewhat less efficient than only resetting an existing converter
+ * and using ucnv_convertEx().
+ *
+ * This function is more convenient than ucnv_convertEx() for single-string
+ * conversions, especially when "preflighting" is desired (returning the length
+ * of the complete output even if it does not fit into the target buffer;
+ * see the User Guide Strings chapter). See ucnv_convert() for details.
+ *
+ * @param cnv The converter that is used to convert
+ * from the UTF-16 pivot buffer to the target.
+ * @param algorithmicType UConverterType constant identifying the desired source
+ * charset as a purely algorithmic converter.
+ * Those are converters for Unicode charsets like
+ * UTF-8, BOCU-1, SCSU, UTF-7, IMAP-mailbox-name, etc.,
+ * as well as US-ASCII and ISO-8859-1.
+ * @param target Pointer to the output buffer.
+ * @param targetCapacity Capacity of the target, in bytes.
+ * @param source Pointer to the input buffer.
+ * @param sourceLength Length of the input text, in bytes
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of the complete output text in bytes, even if it exceeds the targetCapacity
+ * and a U_BUFFER_OVERFLOW_ERROR is set.
+ *
+ * @see ucnv_fromAlgorithmic
+ * @see ucnv_convert
+ * @see ucnv_convertEx
+ * @see ucnv_fromUnicode
+ * @see ucnv_toUnicode
+ * @see ucnv_fromUChars
+ * @see ucnv_toUChars
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromAlgorithmic(UConverter *cnv,
+ UConverterType algorithmicType,
+ char *target, int32_t targetCapacity,
+ const char *source, int32_t sourceLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Frees up memory occupied by unused, cached converter shared data.
+ *
+ * @return the number of cached converters successfully deleted
+ * @see ucnv_close
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_flushCache(void);
+
+/**
+ * Returns the number of available converters, as per the alias file.
+ *
+ * @return the number of available converters
+ * @see ucnv_getAvailableName
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_countAvailable(void);
+
+/**
+ * Gets the canonical converter name of the specified converter from a list of
+ * all available converters contained in the alias file. All converters
+ * in this list can be opened.
+ *
+ * @param n the index to a converter available on the system (in the range <TT>[0..ucnv_countAvailable()]</TT>)
+ * @return a pointer a string (library owned), or <TT>NULL</TT> if the index is out of bounds.
+ * @see ucnv_countAvailable
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_getAvailableName(int32_t n);
+
+/**
+ * Returns a UEnumeration to enumerate all of the canonical converter
+ * names, as per the alias file, regardless of the ability to open each
+ * converter.
+ *
+ * @return A UEnumeration object for getting all the recognized canonical
+ * converter names.
+ * @see ucnv_getAvailableName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.4
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openAllNames(UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of aliases for a given converter or alias name.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * This method only enumerates the listed entries in the alias file.
+ * @param alias alias name
+ * @param pErrorCode error status
+ * @return number of names on alias list for given alias
+ * @stable ICU 2.0
+ */
+U_CAPI uint16_t U_EXPORT2
+ucnv_countAliases(const char *alias, UErrorCode *pErrorCode);
+
+/**
+ * Gives the name of the alias at given index of alias list.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param n index in alias list
+ * @param pErrorCode result of operation
+ * @return returns the name of the alias at given index
+ * @see ucnv_countAliases
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getAlias(const char *alias, uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Fill-up the list of alias names for the given alias.
+ * This method only enumerates the listed entries in the alias file.
+ * If the alias is ambiguous, then the preferred converter is used
+ * and the status is set to U_AMBIGUOUS_ALIAS_WARNING.
+ * @param alias alias name
+ * @param aliases fill-in list, aliases is a pointer to an array of
+ * <code>ucnv_countAliases()</code> string-pointers
+ * (<code>const char *</code>) that will be filled in.
+ * The strings themselves are owned by the library.
+ * @param pErrorCode result of operation
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_getAliases(const char *alias, const char **aliases, UErrorCode *pErrorCode);
+
+/**
+ * Return a new UEnumeration object for enumerating all the
+ * alias names for a given converter that are recognized by a standard.
+ * This method only enumerates the listed entries in the alias file.
+ * The convrtrs.txt file can be modified to change the results of
+ * this function.
+ * The first result in this list is the same result given by
+ * <code>ucnv_getStandardName</code>, which is the default alias for
+ * the specified standard name. The returned object must be closed with
+ * <code>uenum_close</code> when you are done with the object.
+ *
+ * @param convName original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode The error code
+ * @return A UEnumeration object for getting all aliases that are recognized
+ * by a standard. If any of the parameters are invalid, NULL
+ * is returned.
+ * @see ucnv_getStandardName
+ * @see uenum_close
+ * @see uenum_next
+ * @stable ICU 2.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnv_openStandardNames(const char *convName,
+ const char *standard,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gives the number of standards associated to converter names.
+ * @return number of standards
+ * @stable ICU 2.0
+ */
+U_CAPI uint16_t U_EXPORT2
+ucnv_countStandards(void);
+
+/**
+ * Gives the name of the standard at given index of standard list.
+ * @param n index in standard list
+ * @param pErrorCode result of operation
+ * @return returns the name of the standard at given index. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getStandard(uint16_t n, UErrorCode *pErrorCode);
+
+/**
+ * Returns a standard name for a given converter name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("conv", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"alias2"</b>
+ *
+ * @param name original converter name
+ * @param standard name of the standard governing the names; MIME and IANA
+ * are such standards
+ * @param pErrorCode result of operation
+ * @return returns the standard converter name;
+ * if a standard converter name cannot be determined,
+ * then <code>NULL</code> is returned. Owned by the library.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getStandardName(const char *name, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * This function will return the internal canonical converter name of the
+ * tagged alias. This is the opposite of ucnv_openStandardNames, which
+ * returns the tagged alias given the canonical name.
+ * <p>
+ * Example alias table:<br>
+ * conv alias1 { STANDARD1 } alias2 { STANDARD1* }
+ * <p>
+ * Result of ucnv_getStandardName("alias1", "STANDARD1") from example
+ * alias table:<br>
+ * <b>"conv"</b>
+ *
+ * @return returns the canonical converter name;
+ * if a standard or alias name cannot be determined,
+ * then <code>NULL</code> is returned. The returned string is
+ * owned by the library.
+ * @see ucnv_getStandardName
+ * @stable ICU 2.4
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getCanonicalName(const char *alias, const char *standard, UErrorCode *pErrorCode);
+
+/**
+ * Returns the current default converter name. If you want to open
+ * a default converter, you do not need to use this function.
+ * It is faster if you pass a NULL argument to ucnv_open the
+ * default converter.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * always returns "UTF-8".
+ *
+ * @return returns the current default converter name.
+ * Storage owned by the library
+ * @see ucnv_setDefaultName
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ucnv_getDefaultName(void);
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * This function is not thread safe. DO NOT call this function when ANY ICU
+ * function is being used from more than one thread! This function sets the
+ * current default converter name. If this function needs to be called, it
+ * should be called during application initialization. Most of the time, the
+ * results from ucnv_getDefaultName() or ucnv_open with a NULL string argument
+ * is sufficient for your application.
+ *
+ * If U_CHARSET_IS_UTF8 is defined to 1 in utypes.h then this function
+ * does nothing.
+ *
+ * @param name the converter name to be the default (must be known by ICU).
+ * @see ucnv_getDefaultName
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_setDefaultName(const char *name);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Fixes the backslash character mismapping. For example, in SJIS, the backslash
+ * character in the ASCII portion is also used to represent the yen currency sign.
+ * When mapping from Unicode character 0x005C, it's unclear whether to map the
+ * character back to yen or backslash in SJIS. This function will take the input
+ * buffer and replace all the yen sign characters with backslash. This is necessary
+ * when the user tries to open a file with the input buffer on Windows.
+ * This function will test the converter to see whether such mapping is
+ * required. You can sometimes avoid using this function by using the correct version
+ * of Shift-JIS.
+ *
+ * @param cnv The converter representing the target codepage.
+ * @param source the input buffer to be fixed
+ * @param sourceLen the length of the input buffer
+ * @see ucnv_isAmbiguous
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_fixFileSeparator(const UConverter *cnv, UChar *source, int32_t sourceLen);
+
+/**
+ * Determines if the converter contains ambiguous mappings of the same
+ * character or not.
+ * @param cnv the converter to be tested
+ * @return true if the converter contains ambiguous mapping of the same
+ * character, false otherwise.
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_isAmbiguous(const UConverter *cnv);
+
+/**
+ * Sets the converter to use fallback mappings or not.
+ * Regardless of this flag, the converter will always use
+ * fallbacks from Unicode Private Use code points, as well as
+ * reverse fallbacks (to Unicode).
+ * For details see ".ucm File Format"
+ * in the Conversion Data chapter of the ICU User Guide:
+ * https://unicode-org.github.io/icu/userguide/conversion/data.html#ucm-file-format
+ *
+ * @param cnv The converter to set the fallback mapping usage on.
+ * @param usesFallback true if the user wants the converter to take advantage of the fallback
+ * mapping, false otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_usesFallback
+ */
+U_CAPI void U_EXPORT2
+ucnv_setFallback(UConverter *cnv, UBool usesFallback);
+
+/**
+ * Determines if the converter uses fallback mappings or not.
+ * This flag has restrictions, see ucnv_setFallback().
+ *
+ * @param cnv The converter to be tested
+ * @return true if the converter uses fallback, false otherwise.
+ * @stable ICU 2.0
+ * @see ucnv_setFallback
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_usesFallback(const UConverter *cnv);
+
+/**
+ * Detects Unicode signature byte sequences at the start of the byte stream
+ * and returns the charset name of the indicated Unicode charset.
+ * NULL is returned when no Unicode signature is recognized.
+ * The number of bytes in the signature is output as well.
+ *
+ * The caller can ucnv_open() a converter using the charset name.
+ * The first code unit (UChar) from the start of the stream will be U+FEFF
+ * (the Unicode BOM/signature character) and can usually be ignored.
+ *
+ * For most Unicode charsets it is also possible to ignore the indicated
+ * number of initial stream bytes and start converting after them.
+ * However, there are stateful Unicode charsets (UTF-7 and BOCU-1) for which
+ * this will not work. Therefore, it is best to ignore the first output UChar
+ * instead of the input signature bytes.
+ * <p>
+ * Usage:
+ * \snippet samples/ucnv/convsamp.cpp ucnv_detectUnicodeSignature
+ *
+ * @param source The source string in which the signature should be detected.
+ * @param sourceLength Length of the input string, or -1 if terminated with a NUL byte.
+ * @param signatureLength A pointer to int32_t to receive the number of bytes that make up the signature
+ * of the detected UTF. 0 if not detected.
+ * Can be a NULL pointer.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The name of the encoding detected. NULL if encoding is not detected.
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+ucnv_detectUnicodeSignature(const char* source,
+ int32_t sourceLength,
+ int32_t *signatureLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the number of UChars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of UChars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_fromUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns the number of chars held in the converter's internal state
+ * because more input is needed for completing the conversion. This function is
+ * useful for mapping semantics of ICU's converter interface to those of iconv,
+ * and this information is not needed for normal conversion.
+ * @param cnv The converter in which the input is held as internal state
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return The number of chars in the state. -1 if an error is encountered.
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+ucnv_toUCountPending(const UConverter* cnv, UErrorCode* status);
+
+/**
+ * Returns whether or not the charset of the converter has a fixed number of bytes
+ * per charset character.
+ * An example of this are converters that are of the type UCNV_SBCS or UCNV_DBCS.
+ * Another example is UTF-32 which is always 4 bytes per character.
+ * A Unicode code point may be represented by more than one UTF-8 or UTF-16 code unit
+ * but a UTF-32 converter encodes each code point with 4 bytes.
+ * Note: This method is not intended to be used to determine whether the charset has a
+ * fixed ratio of bytes to Unicode codes <i>units</i> for any particular Unicode encoding form.
+ * false is returned with the UErrorCode if error occurs or cnv is NULL.
+ * @param cnv The converter to be tested
+ * @param status ICU error code in/out parameter
+ * @return true if the converter is fixed-width
+ * @stable ICU 4.8
+ */
+U_CAPI UBool U_EXPORT2
+ucnv_isFixedWidth(UConverter *cnv, UErrorCode *status);
+
+#endif
+
+#endif
+/*_UCNV*/
diff --git a/intl/icu/source/common/unicode/ucnv_cb.h b/intl/icu/source/common/unicode/ucnv_cb.h
new file mode 100644
index 0000000000..b4ef99208b
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucnv_cb.h
@@ -0,0 +1,164 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2000-2004, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ * ucnv_cb.h:
+ * External APIs for the ICU's codeset conversion library
+ * Helena Shih
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ */
+
+/**
+ * \file
+ * \brief C API: UConverter functions to aid the writers of callbacks
+ *
+ * <h2> Callback API for UConverter </h2>
+ *
+ * These functions are provided here for the convenience of the callback
+ * writer. If you are just looking for callback functions to use, please
+ * see ucnv_err.h. DO NOT call these functions directly when you are
+ * working with converters, unless your code has been called as a callback
+ * via ucnv_setFromUCallback or ucnv_setToUCallback !!
+ *
+ * A note about error codes and overflow. Unlike other ICU functions,
+ * these functions do not expect the error status to be U_ZERO_ERROR.
+ * Callbacks must be much more careful about their error codes.
+ * The error codes used here are in/out parameters, which should be passed
+ * back in the callback's error parameter.
+ *
+ * For example, if you call ucnv_cbfromUWriteBytes to write data out
+ * to the output codepage, it may return U_BUFFER_OVERFLOW_ERROR if
+ * the data did not fit in the target. But this isn't a failing error,
+ * in fact, ucnv_cbfromUWriteBytes may be called AGAIN with the error
+ * status still U_BUFFER_OVERFLOW_ERROR to attempt to write further bytes,
+ * which will also go into the internal overflow buffers.
+ *
+ * Concerning offsets, the 'offset' parameters here are relative to the start
+ * of SOURCE. For example, Suppose the string "ABCD" was being converted
+ * from Unicode into a codepage which doesn't have a mapping for 'B'.
+ * 'A' will be written out correctly, but
+ * The FromU Callback will be called on an unassigned character for 'B'.
+ * At this point, this is the state of the world:
+ * Target: A [..] [points after A]
+ * Source: A B [C] D [points to C - B has been consumed]
+ * 0 1 2 3
+ * codePoint = "B" [the unassigned codepoint]
+ *
+ * Now, suppose a callback wants to write the substitution character '?' to
+ * the target. It calls ucnv_cbFromUWriteBytes() to write the ?.
+ * It should pass ZERO as the offset, because the offset as far as the
+ * callback is concerned is relative to the SOURCE pointer [which points
+ * before 'C'.] If the callback goes into the args and consumes 'C' also,
+ * it would call FromUWriteBytes with an offset of 1 (and advance the source
+ * pointer).
+ *
+ */
+
+#ifndef UCNV_CB_H
+#define UCNV_CB_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/ucnv.h"
+#include "unicode/ucnv_err.h"
+
+/**
+ * ONLY used by FromU callback functions.
+ * Writes out the specified byte output bytes to the target byte buffer or to converter internal buffers.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source source bytes to write
+ * @param length length of bytes to write
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteBytes (UConverterFromUnicodeArgs *args,
+ const char* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by FromU callback functions.
+ * This function will write out the correct substitution character sequence
+ * to the target.
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from the current source pointer to be used
+ * @param err error status. If <TT>U_BUFFER_OVERFLOW</TT> is returned, then U_BUFFER_OVERFLOW <STRONG>must</STRONG>
+ * be returned to the user, because it means that not all data could be written into the target buffer, and some is
+ * in the converter error buffer.
+ * @see ucnv_cbFromUWriteBytes
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ucnv_cbFromUWriteSub (UConverterFromUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by fromU callback functions.
+ * This function will write out the error character(s) to the target UChar buffer.
+ *
+ * @param args callback fromUnicode arguments
+ * @param source pointer to pointer to first UChar to write [on exit: 1 after last UChar processed]
+ * @param sourceLimit pointer after last UChar to write
+ * @param offsetIndex the relative offset index from callback which will be set
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbFromUWriteUChars(UConverterFromUnicodeArgs *args,
+ const UChar** source,
+ const UChar* sourceLimit,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the specified characters to the target
+ * UChar buffer.
+ *
+ * @param args callback toUnicode arguments
+ * @param source source string to write
+ * @param length the length of source string
+ * @param offsetIndex the relative offset index which will be written.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteSub
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbToUWriteUChars (UConverterToUnicodeArgs *args,
+ const UChar* source,
+ int32_t length,
+ int32_t offsetIndex,
+ UErrorCode * err);
+
+/**
+ * ONLY used by ToU callback functions.
+ * This function will write out the Unicode substitution character (U+FFFD).
+ *
+ * @param args callback fromUnicode arguments
+ * @param offsetIndex the relative offset index from callback.
+ * @param err error status <TT>U_BUFFER_OVERFLOW</TT>
+ * @see ucnv_cbToUWriteUChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 ucnv_cbToUWriteSub (UConverterToUnicodeArgs *args,
+ int32_t offsetIndex,
+ UErrorCode * err);
+#endif
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucnv_err.h b/intl/icu/source/common/unicode/ucnv_err.h
new file mode 100644
index 0000000000..c743e5614f
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucnv_err.h
@@ -0,0 +1,465 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2009, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+ *
+ *
+ * ucnv_err.h:
+ */
+
+/**
+ * \file
+ * \brief C API: UConverter predefined error callbacks
+ *
+ * <h2>Error Behaviour Functions</h2>
+ * Defines some error behaviour functions called by ucnv_{from,to}Unicode
+ * These are provided as part of ICU and many are stable, but they
+ * can also be considered only as an example of what can be done with
+ * callbacks. You may of course write your own.
+ *
+ * If you want to write your own, you may also find the functions from
+ * ucnv_cb.h useful when writing your own callbacks.
+ *
+ * These functions, although public, should NEVER be called directly.
+ * They should be used as parameters to the ucnv_setFromUCallback
+ * and ucnv_setToUCallback functions, to set the behaviour of a converter
+ * when it encounters ILLEGAL/UNMAPPED/INVALID sequences.
+ *
+ * usage example: 'STOP' doesn't need any context, but newContext
+ * could be set to something other than 'NULL' if needed. The available
+ * contexts in this header can modify the default behavior of the callback.
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setFromUCallBack(myConverter,
+ * UCNV_FROM_U_CALLBACK_STOP,
+ * NULL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Unicode -> Codepage. The behavior from Codepage to Unicode is not changed,
+ * and ucnv_setToUCallBack would need to be called in order to change
+ * that behavior too.
+ *
+ * Here is an example with a context:
+ *
+ * \code
+ * UErrorCode err = U_ZERO_ERROR;
+ * UConverter *myConverter = ucnv_open("ibm-949", &err);
+ * const void *oldContext;
+ * UConverterFromUCallback oldAction;
+ *
+ *
+ * if (U_SUCCESS(err))
+ * {
+ * ucnv_setToUCallBack(myConverter,
+ * UCNV_TO_U_CALLBACK_SUBSTITUTE,
+ * UCNV_SUB_STOP_ON_ILLEGAL,
+ * &oldAction,
+ * &oldContext,
+ * &status);
+ * }
+ * \endcode
+ *
+ * The code above tells "myConverter" to stop when it encounters an
+ * ILLEGAL/TRUNCATED/INVALID sequences when it is used to convert from
+ * Codepage -> Unicode. Any unmapped and legal characters will be
+ * substituted to be the default substitution character.
+ */
+
+#ifndef UCNV_ERR_H
+#define UCNV_ERR_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+/** Forward declaring the UConverter structure. @stable ICU 2.0 */
+struct UConverter;
+
+/** @stable ICU 2.0 */
+typedef struct UConverter UConverter;
+
+/**
+ * FROM_U, TO_U context options for sub callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SUB_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U, TO_U context options for skip callback
+ * @stable ICU 2.0
+ */
+#define UCNV_SKIP_STOP_ON_ILLEGAL "i"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to ICU (%UXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_ICU NULL
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to JAVA (\\uXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_JAVA "J"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to C (\\uXXXX \\UXXXXXXXX)
+ * TO_U_CALLBACK_ESCAPE option to escape the character value according to C (\\xXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_C "C"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Decimal escape \htmlonly(&amp;#DDDD;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_DEC "D"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * TO_U_CALLBACK_ESCAPE context option to escape the character value according to XML Hex escape \htmlonly(&amp;#xXXXX;)\endhtmlonly
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_XML_HEX "X"
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to Unicode (U+XXXXX)
+ * @stable ICU 2.0
+ */
+#define UCNV_ESCAPE_UNICODE "U"
+
+/**
+ * FROM_U_CALLBACK_ESCAPE context option to escape the code unit according to CSS2 conventions (\\HH..H<space>, that is,
+ * a backslash, 1..6 hex digits, and a space)
+ * @stable ICU 4.0
+ */
+#define UCNV_ESCAPE_CSS2 "S"
+
+/**
+ * The process condition code to be used with the callbacks.
+ * Codes which are greater than UCNV_IRREGULAR should be
+ * passed on to any chained callbacks.
+ * @stable ICU 2.0
+ */
+typedef enum {
+ UCNV_UNASSIGNED = 0, /**< The code point is unassigned.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_ILLEGAL = 1, /**< The code point is illegal. For example,
+ \\x81\\x2E is illegal in SJIS because \\x2E
+ is not a valid trail byte for the \\x81
+ lead byte.
+ Also, starting with Unicode 3.0.1, non-shortest byte sequences
+ in UTF-8 (like \\xC1\\xA1 instead of \\x61 for U+0061)
+ are also illegal, not just irregular.
+ The error code U_ILLEGAL_CHAR_FOUND will be set. */
+ UCNV_IRREGULAR = 2, /**< The codepoint is not a regular sequence in
+ the encoding. For example, \\xED\\xA0\\x80..\\xED\\xBF\\xBF
+ are irregular UTF-8 byte sequences for single surrogate
+ code points.
+ The error code U_INVALID_CHAR_FOUND will be set. */
+ UCNV_RESET = 3, /**< The callback is called with this reason when a
+ 'reset' has occurred. Callback should reset all
+ state. */
+ UCNV_CLOSE = 4, /**< Called when the converter is closed. The
+ callback should release any allocated memory.*/
+ UCNV_CLONE = 5 /**< Called when ucnv_safeClone() is called on the
+ converter. the pointer available as the
+ 'context' is an alias to the original converters'
+ context pointer. If the context must be owned
+ by the new converter, the callback must clone
+ the data and call ucnv_setFromUCallback
+ (or setToUCallback) with the correct pointer.
+ @stable ICU 2.2
+ */
+} UConverterCallbackReason;
+
+
+/**
+ * The structure for the fromUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct. @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const UChar *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const UChar *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ char *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const char *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterFromUnicodeArgs;
+
+
+/**
+ * The structure for the toUnicode callback function parameter.
+ * @stable ICU 2.0
+ */
+typedef struct {
+ uint16_t size; /**< The size of this struct @stable ICU 2.0 */
+ UBool flush; /**< The internal state of converter will be reset and data flushed if set to true. @stable ICU 2.0 */
+ UConverter *converter; /**< Pointer to the converter that is opened and to which this struct is passed as an argument. @stable ICU 2.0 */
+ const char *source; /**< Pointer to the source source buffer. @stable ICU 2.0 */
+ const char *sourceLimit; /**< Pointer to the limit (end + 1) of source buffer. @stable ICU 2.0 */
+ UChar *target; /**< Pointer to the target buffer. @stable ICU 2.0 */
+ const UChar *targetLimit; /**< Pointer to the limit (end + 1) of target buffer. @stable ICU 2.0 */
+ int32_t *offsets; /**< Pointer to the buffer that receives the offsets. *offset = blah ; offset++;. @stable ICU 2.0 */
+} UConverterToUnicodeArgs;
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_STOP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ *
+ * @param context Pointer to the callback's private data
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err This should always be set to a failure status prior to calling.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_STOP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSIGNED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE, or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * current substitution string for the converter. This is the default
+ * callback.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @see ucnv_setSubstChars
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This From Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal codepoints
+ *
+ * @param context The function currently recognizes the callback options:
+ * <ul>
+ * <li>UCNV_ESCAPE_ICU: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format %UXXXX, e.g. "%uFFFE%u00AC%uC8FE").
+ * In the Event the converter doesn't support the characters {%,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * %UD84D%UDC56</li>
+ * <li>UCNV_ESCAPE_JAVA: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\uD84D\\uDC56</li>
+ * <li>UCNV_ESCAPE_C: Substitutes the ILLEGAL SEQUENCE with the hexadecimal
+ * representation in the format \\uXXXX, e.g. "\\uFFFE\\u00AC\\uC8FE").
+ * In the Event the converter doesn't support the characters {\,u,U}[A-F][0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \\U00023456</li>
+ * <li>UCNV_ESCAPE_XML_DEC: Substitutes the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#DDDDDDDD;, e.g. "&amp;#65534;&amp;#172;&amp;#51454;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&amp;,#}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * &amp;#144470; and Zero padding is ignored.</li>
+ * <li>UCNV_ESCAPE_XML_HEX:Substitutes the ILLEGAL SEQUENCE with the decimal
+ * representation in the format \htmlonly&amp;#xXXXX; e.g. "&amp;#xFFFE;&amp;#x00AC;&amp;#xC8FE;")\endhtmlonly.
+ * In the Event the converter doesn't support the characters {&,#,x}[0-9],
+ * it will substitute the illegal sequence with the substitution characters.
+ * Note that codeUnit(32bit int eg: unit of a surrogate pair) is represented as
+ * \htmlonly&amp;#x23456;\endhtmlonly</li>
+ * </ul>
+ * @param fromUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' UChars of the concerned Unicode sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param codePoint Single UChar32 (UTF-32) containing the concerend Unicode codepoint.
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_FROM_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterFromUnicodeArgs *fromUArgs,
+ const UChar* codeUnits,
+ int32_t length,
+ UChar32 codePoint,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback skips any ILLEGAL_SEQUENCE, or
+ * skips only UNASSIGNED_SEQUENCE depending on the context parameter
+ * simply ignoring those characters.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SKIP_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Skips any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SKIP (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE,or
+ * UNASSIGNED_SEQUENCE depending on context parameter, with the
+ * Unicode substitution character, U+FFFD.
+ *
+ * @param context The function currently recognizes the callback options:
+ * UCNV_SUB_STOP_ON_ILLEGAL: STOPS at the ILLEGAL_SEQUENCE,
+ * returning the error code back to the caller immediately.
+ * NULL: Substitutes any ILLEGAL_SEQUENCE
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_SUBSTITUTE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+/**
+ * DO NOT CALL THIS FUNCTION DIRECTLY!
+ * This To Unicode callback will Substitute the ILLEGAL SEQUENCE with the
+ * hexadecimal representation of the illegal bytes
+ * (in the format %XNN, e.g. "%XFF%X0A%XC8%X03").
+ *
+ * @param context This function currently recognizes the callback options:
+ * UCNV_ESCAPE_ICU, UCNV_ESCAPE_JAVA, UCNV_ESCAPE_C, UCNV_ESCAPE_XML_DEC,
+ * UCNV_ESCAPE_XML_HEX and UCNV_ESCAPE_UNICODE.
+ * @param toUArgs Information about the conversion in progress
+ * @param codeUnits Points to 'length' bytes of the concerned codepage sequence
+ * @param length Size (in bytes) of the concerned codepage sequence
+ * @param reason Defines the reason the callback was invoked
+ * @param err Return value will be set to success if the callback was handled,
+ * otherwise this value will be set to a failure status.
+ * @stable ICU 2.0
+ */
+
+U_CAPI void U_EXPORT2 UCNV_TO_U_CALLBACK_ESCAPE (
+ const void *context,
+ UConverterToUnicodeArgs *toUArgs,
+ const char* codeUnits,
+ int32_t length,
+ UConverterCallbackReason reason,
+ UErrorCode * err);
+
+#endif
+
+#endif
+
+/*UCNV_ERR_H*/
diff --git a/intl/icu/source/common/unicode/ucnvsel.h b/intl/icu/source/common/unicode/ucnvsel.h
new file mode 100644
index 0000000000..9373ec951b
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucnvsel.h
@@ -0,0 +1,193 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2008-2011, International Business Machines
+* Corporation, Google and others. All Rights Reserved.
+*
+*******************************************************************************
+*/
+/*
+ * Author : eldawy@google.com (Mohamed Eldawy)
+ * ucnvsel.h
+ *
+ * Purpose: To generate a list of encodings capable of handling
+ * a given Unicode text
+ *
+ * Started 09-April-2008
+ */
+
+#ifndef __ICU_UCNV_SEL_H__
+#define __ICU_UCNV_SEL_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_CONVERSION
+
+#include "unicode/uset.h"
+#include "unicode/utf16.h"
+#include "unicode/uenum.h"
+#include "unicode/ucnv.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Encoding/charset encoding selector
+ *
+ * A converter selector is built with a set of encoding/charset names
+ * and given an input string returns the set of names of the
+ * corresponding converters which can convert the string.
+ *
+ * A converter selector can be serialized into a buffer and reopened
+ * from the serialized form.
+ */
+
+struct UConverterSelector;
+/**
+ * @{
+ * Typedef for selector data structure.
+ */
+typedef struct UConverterSelector UConverterSelector;
+/** @} */
+
+/**
+ * Open a selector.
+ * If converterListSize is 0, build for all available converters.
+ * If excludedCodePoints is NULL, don't exclude any code points.
+ *
+ * @param converterList a pointer to encoding names needed to be involved.
+ * Can be NULL if converterListSize==0.
+ * The list and the names will be cloned, and the caller
+ * retains ownership of the original.
+ * @param converterListSize number of encodings in above list.
+ * If 0, builds a selector for all available converters.
+ * @param excludedCodePoints a set of code points to be excluded from consideration.
+ * That is, excluded code points in a string do not change
+ * the selection result. (They might be handled by a callback.)
+ * Use NULL to exclude nothing.
+ * @param whichSet what converter set to use? Use this to determine whether
+ * to consider only roundtrip mappings or also fallbacks.
+ * @param status an in/out ICU UErrorCode
+ * @return the new selector
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_open(const char* const* converterList, int32_t converterListSize,
+ const USet* excludedCodePoints,
+ const UConverterUnicodeSet whichSet, UErrorCode* status);
+
+/**
+ * Closes a selector.
+ * If any Enumerations were returned by ucnv_select*, they become invalid.
+ * They can be closed before or after calling ucnv_closeSelector,
+ * but should never be used after the selector is closed.
+ *
+ * @see ucnv_selectForString
+ * @see ucnv_selectForUTF8
+ *
+ * @param sel selector to close
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+ucnvsel_close(UConverterSelector *sel);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUConverterSelectorPointer
+ * "Smart pointer" class, closes a UConverterSelector via ucnvsel_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUConverterSelectorPointer, UConverterSelector, ucnvsel_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Open a selector from its serialized form.
+ * The buffer must remain valid and unchanged for the lifetime of the selector.
+ * This is much faster than creating a selector from scratch.
+ * Using a serialized form from a different machine (endianness/charset) is supported.
+ *
+ * @param buffer pointer to the serialized form of a converter selector;
+ * must be 32-bit-aligned
+ * @param length the capacity of this buffer (can be equal to or larger than
+ * the actual data length)
+ * @param status an in/out ICU UErrorCode
+ * @return the new selector
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UConverterSelector* U_EXPORT2
+ucnvsel_openFromSerialized(const void* buffer, int32_t length, UErrorCode* status);
+
+/**
+ * Serialize a selector into a linear buffer.
+ * The serialized form is portable to different machines.
+ *
+ * @param sel selector to consider
+ * @param buffer pointer to 32-bit-aligned memory to be filled with the
+ * serialized form of this converter selector
+ * @param bufferCapacity the capacity of this buffer
+ * @param status an in/out ICU UErrorCode
+ * @return the required buffer capacity to hold serialize data (even if the call fails
+ * with a U_BUFFER_OVERFLOW_ERROR, it will return the required capacity)
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+ucnvsel_serialize(const UConverterSelector* sel,
+ void* buffer, int32_t bufferCapacity, UErrorCode* status);
+
+/**
+ * Select converters that can map all characters in a UTF-16 string,
+ * ignoring the excluded code points.
+ *
+ * @param sel a selector
+ * @param s UTF-16 string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param status an in/out ICU UErrorCode
+ * @return an enumeration containing encoding names.
+ * The returned encoding names and their order will be the same as
+ * supplied when building the selector.
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForString(const UConverterSelector* sel,
+ const UChar *s, int32_t length, UErrorCode *status);
+
+/**
+ * Select converters that can map all characters in a UTF-8 string,
+ * ignoring the excluded code points.
+ *
+ * @param sel a selector
+ * @param s UTF-8 string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param status an in/out ICU UErrorCode
+ * @return an enumeration containing encoding names.
+ * The returned encoding names and their order will be the same as
+ * supplied when building the selector.
+ *
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucnvsel_selectForUTF8(const UConverterSelector* sel,
+ const char *s, int32_t length, UErrorCode *status);
+
+#endif /* !UCONFIG_NO_CONVERSION */
+
+#endif /* __ICU_UCNV_SEL_H__ */
diff --git a/intl/icu/source/common/unicode/uconfig.h b/intl/icu/source/common/unicode/uconfig.h
new file mode 100644
index 0000000000..3818ca02ef
--- /dev/null
+++ b/intl/icu/source/common/unicode/uconfig.h
@@ -0,0 +1,466 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: uconfig.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep19
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UCONFIG_H__
+#define __UCONFIG_H__
+
+
+/*!
+ * \file
+ * \brief User-configurable settings
+ *
+ * Miscellaneous switches:
+ *
+ * A number of macros affect a variety of minor aspects of ICU.
+ * Most of them used to be defined elsewhere (e.g., in utypes.h or platform.h)
+ * and moved here to make them easier to find.
+ *
+ * Switches for excluding parts of ICU library code modules:
+ *
+ * Changing these macros allows building partial, smaller libraries for special purposes.
+ * By default, all modules are built.
+ * The switches are fairly coarse, controlling large modules.
+ * Basic services cannot be turned off.
+ *
+ * Building with any of these options does not guarantee that the
+ * ICU build process will completely work. It is recommended that
+ * the ICU libraries and data be built using the normal build.
+ * At that time you should remove the data used by those services.
+ * After building the ICU data library, you should rebuild the ICU
+ * libraries with these switches customized to your needs.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * If this switch is defined, ICU will attempt to load a header file named "uconfig_local.h"
+ * prior to determining default settings for uconfig variables.
+ *
+ * @internal ICU 4.0
+ */
+#if defined(UCONFIG_USE_LOCAL)
+#include "uconfig_local.h"
+#endif
+
+/**
+ * \def U_DEBUG
+ * Determines whether to include debugging code.
+ * Automatically set on Windows, but most compilers do not have
+ * related predefined macros.
+ * @internal
+ */
+#ifdef U_DEBUG
+ /* Use the predefined value. */
+#elif defined(_DEBUG)
+ /*
+ * _DEBUG is defined by Visual Studio debug compilation.
+ * Do *not* test for its NDEBUG macro: It is an orthogonal macro
+ * which disables assert().
+ */
+# define U_DEBUG 1
+# else
+# define U_DEBUG 0
+#endif
+
+/**
+ * Determines whether to enable auto cleanup of libraries.
+ * @internal
+ */
+#ifndef UCLN_NO_AUTO_CLEANUP
+#define UCLN_NO_AUTO_CLEANUP 1
+#endif
+
+/**
+ * \def U_DISABLE_RENAMING
+ * Determines whether to disable renaming or not.
+ * @internal
+ */
+#ifndef U_DISABLE_RENAMING
+#define U_DISABLE_RENAMING 0
+#endif
+
+/**
+ * \def U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ * Determines whether utypes.h includes utf.h, utf8.h, utf16.h and utf_old.h.
+ * utypes.h includes those headers if this macro is defined to 0.
+ * Otherwise, each those headers must be included explicitly when using one of their macros.
+ * Defaults to 0 for backward compatibility, except inside ICU.
+ * @stable ICU 49
+ */
+#ifdef U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+ /* Use the predefined value. */
+#elif defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || \
+ defined(U_IO_IMPLEMENTATION) || defined(U_LAYOUT_IMPLEMENTATION) || defined(U_LAYOUTEX_IMPLEMENTATION) || \
+ defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 1
+#else
+# define U_NO_DEFAULT_INCLUDE_UTF_HEADERS 0
+#endif
+
+/**
+ * \def U_OVERRIDE_CXX_ALLOCATION
+ * Determines whether to override new and delete.
+ * ICU is normally built such that all of its C++ classes, via their UMemory base,
+ * override operators new and delete to use its internal, customizable,
+ * non-exception-throwing memory allocation functions. (Default value 1 for this macro.)
+ *
+ * This is especially important when the application and its libraries use multiple heaps.
+ * For example, on Windows, this allows the ICU DLL to be used by
+ * applications that statically link the C Runtime library.
+ *
+ * @stable ICU 2.2
+ */
+#ifndef U_OVERRIDE_CXX_ALLOCATION
+#define U_OVERRIDE_CXX_ALLOCATION 1
+#endif
+
+/**
+ * \def U_ENABLE_TRACING
+ * Determines whether to enable tracing.
+ * @internal
+ */
+#ifndef U_ENABLE_TRACING
+#define U_ENABLE_TRACING 0
+#endif
+
+/**
+ * \def UCONFIG_ENABLE_PLUGINS
+ * Determines whether to enable ICU plugins.
+ * @internal
+ */
+#ifndef UCONFIG_ENABLE_PLUGINS
+#define UCONFIG_ENABLE_PLUGINS 0
+#endif
+
+/**
+ * \def U_ENABLE_DYLOAD
+ * Whether to enable Dynamic loading in ICU.
+ * @internal
+ */
+#ifndef U_ENABLE_DYLOAD
+#define U_ENABLE_DYLOAD 1
+#endif
+
+/**
+ * \def U_CHECK_DYLOAD
+ * Whether to test Dynamic loading as an OS capability.
+ * @internal
+ */
+#ifndef U_CHECK_DYLOAD
+#define U_CHECK_DYLOAD 1
+#endif
+
+/**
+ * \def U_DEFAULT_SHOW_DRAFT
+ * Do we allow ICU users to use the draft APIs by default?
+ * @internal
+ */
+#ifndef U_DEFAULT_SHOW_DRAFT
+#define U_DEFAULT_SHOW_DRAFT 1
+#endif
+
+/*===========================================================================*/
+/* Custom icu entry point renaming */
+/*===========================================================================*/
+
+/**
+ * \def U_HAVE_LIB_SUFFIX
+ * 1 if a custom library suffix is set.
+ * @internal
+ */
+#ifdef U_HAVE_LIB_SUFFIX
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME) || defined(U_IN_DOXYGEN)
+# define U_HAVE_LIB_SUFFIX 1
+#endif
+
+/**
+ * \def U_LIB_SUFFIX_C_NAME_STRING
+ * Defines the library suffix as a string with C syntax.
+ * @internal
+ */
+#ifdef U_LIB_SUFFIX_C_NAME_STRING
+ /* Use the predefined value. */
+#elif defined(U_LIB_SUFFIX_C_NAME)
+# define CONVERT_TO_STRING(s) #s
+# define U_LIB_SUFFIX_C_NAME_STRING CONVERT_TO_STRING(U_LIB_SUFFIX_C_NAME)
+#else
+# define U_LIB_SUFFIX_C_NAME_STRING ""
+#endif
+
+/* common/i18n library switches --------------------------------------------- */
+
+/**
+ * \def UCONFIG_ONLY_COLLATION
+ * This switch turns off modules that are not needed for collation.
+ *
+ * It does not turn off legacy conversion because that is necessary
+ * for ICU to work on EBCDIC platforms (for the default converter).
+ * If you want "only collation" and do not build for EBCDIC,
+ * then you can define UCONFIG_NO_CONVERSION or UCONFIG_NO_LEGACY_CONVERSION to 1 as well.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_ONLY_COLLATION
+# define UCONFIG_ONLY_COLLATION 0
+#endif
+
+#if UCONFIG_ONLY_COLLATION
+ /* common library */
+# define UCONFIG_NO_BREAK_ITERATION 1
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_NO_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_FORMATTING 1
+# define UCONFIG_NO_TRANSLITERATION 1
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 1
+#endif
+
+/* common library switches -------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_FILE_IO
+ * This switch turns off all file access in the common library
+ * where file access is only used for data loading.
+ * ICU data must then be provided in the form of a data DLL (or with an
+ * equivalent way to link to the data residing in an executable,
+ * as in building a combined library with both the common library's code and
+ * the data), or via udata_setCommonData().
+ * Application data must be provided via udata_setAppData() or by using
+ * "open" functions that take pointers to data, for example ucol_openBinary().
+ *
+ * File access is not used at all in the i18n library.
+ *
+ * File access cannot be turned off for the icuio library or for the ICU
+ * test suites and ICU tools.
+ *
+ * @stable ICU 3.6
+ */
+#ifndef UCONFIG_NO_FILE_IO
+# define UCONFIG_NO_FILE_IO 0
+#endif
+
+#if UCONFIG_NO_FILE_IO && defined(U_TIMEZONE_FILES_DIR)
+# error Contradictory file io switches in uconfig.h.
+#endif
+
+/**
+ * \def UCONFIG_NO_CONVERSION
+ * ICU will not completely build (compiling the tools fails) with this
+ * switch turned on.
+ * This switch turns off all converters.
+ *
+ * You may want to use this together with U_CHARSET_IS_UTF8 defined to 1
+ * in utypes.h if char* strings in your environment are always in UTF-8.
+ *
+ * @stable ICU 3.2
+ * @see U_CHARSET_IS_UTF8
+ */
+#ifndef UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_CONVERSION 0
+#endif
+
+#if UCONFIG_NO_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 1
+#endif
+
+/**
+ * \def UCONFIG_ONLY_HTML_CONVERSION
+ * This switch turns off all of the converters NOT listed in
+ * the HTML encoding standard:
+ * http://www.w3.org/TR/encoding/#names-and-labels
+ *
+ * This is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 55
+ */
+#ifndef UCONFIG_ONLY_HTML_CONVERSION
+# define UCONFIG_ONLY_HTML_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_LEGACY_CONVERSION
+ * This switch turns off all converters except for
+ * - Unicode charsets (UTF-7/8/16/32, CESU-8, SCSU, BOCU-1)
+ * - US-ASCII
+ * - ISO-8859-1
+ *
+ * Turning off legacy conversion is not possible on EBCDIC platforms
+ * because they need ibm-37 or ibm-1047 default converters.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_LEGACY_CONVERSION
+# define UCONFIG_NO_LEGACY_CONVERSION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_NORMALIZATION
+ * This switch turns off normalization.
+ * It implies turning off several other services as well, for example
+ * collation and IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_NORMALIZATION
+# define UCONFIG_NO_NORMALIZATION 0
+#endif
+
+/**
+ * \def UCONFIG_USE_ML_PHRASE_BREAKING
+ * This switch turns on BudouX ML phrase-based line breaking, rather than using the dictionary.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_USE_ML_PHRASE_BREAKING
+# define UCONFIG_USE_ML_PHRASE_BREAKING 0
+#endif
+
+#if UCONFIG_NO_NORMALIZATION
+ /* common library */
+ /* ICU 50 CJK dictionary BreakIterator uses normalization */
+# define UCONFIG_NO_BREAK_ITERATION 1
+ /* IDNA (UTS #46) is implemented via normalization */
+# define UCONFIG_NO_IDNA 1
+
+ /* i18n library */
+# if UCONFIG_ONLY_COLLATION
+# error Contradictory collation switches in uconfig.h.
+# endif
+# define UCONFIG_NO_COLLATION 1
+# define UCONFIG_NO_TRANSLITERATION 1
+#endif
+
+/**
+ * \def UCONFIG_NO_BREAK_ITERATION
+ * This switch turns off break iteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_BREAK_ITERATION
+# define UCONFIG_NO_BREAK_ITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_IDNA
+ * This switch turns off IDNA.
+ *
+ * @stable ICU 2.6
+ */
+#ifndef UCONFIG_NO_IDNA
+# define UCONFIG_NO_IDNA 0
+#endif
+
+/**
+ * \def UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+ * Determines the default UMessagePatternApostropheMode.
+ * See the documentation for that enum.
+ *
+ * @stable ICU 4.8
+ */
+#ifndef UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE
+# define UCONFIG_MSGPAT_DEFAULT_APOSTROPHE_MODE UMSGPAT_APOS_DOUBLE_OPTIONAL
+#endif
+
+/**
+ * \def UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+ * On platforms where U_PLATFORM_HAS_WIN32_API is true, this switch determines
+ * if the Windows platform APIs are used for LCID<->Locale Name conversions.
+ * Otherwise, only the built-in ICU tables are used.
+ *
+ * @internal ICU 64
+ */
+#ifndef UCONFIG_USE_WINDOWS_LCID_MAPPING_API
+# define UCONFIG_USE_WINDOWS_LCID_MAPPING_API 1
+#endif
+
+/* i18n library switches ---------------------------------------------------- */
+
+/**
+ * \def UCONFIG_NO_COLLATION
+ * This switch turns off collation and collation-based string search.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_COLLATION
+# define UCONFIG_NO_COLLATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_FORMATTING
+ * This switch turns off formatting and calendar/timezone services.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_FORMATTING
+# define UCONFIG_NO_FORMATTING 0
+#endif
+
+/**
+ * \def UCONFIG_NO_TRANSLITERATION
+ * This switch turns off transliteration.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_TRANSLITERATION
+# define UCONFIG_NO_TRANSLITERATION 0
+#endif
+
+/**
+ * \def UCONFIG_NO_REGULAR_EXPRESSIONS
+ * This switch turns off regular expressions.
+ *
+ * @stable ICU 2.4
+ */
+#ifndef UCONFIG_NO_REGULAR_EXPRESSIONS
+# define UCONFIG_NO_REGULAR_EXPRESSIONS 0
+#endif
+
+/**
+ * \def UCONFIG_NO_SERVICE
+ * This switch turns off service registration.
+ *
+ * @stable ICU 3.2
+ */
+#ifndef UCONFIG_NO_SERVICE
+# define UCONFIG_NO_SERVICE 0
+#endif
+
+/**
+ * \def UCONFIG_HAVE_PARSEALLINPUT
+ * This switch turns on the "parse all input" attribute. Binary incompatible.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_HAVE_PARSEALLINPUT
+# define UCONFIG_HAVE_PARSEALLINPUT 1
+#endif
+
+/**
+ * \def UCONFIG_NO_FILTERED_BREAK_ITERATION
+ * This switch turns off filtered break iteration code.
+ *
+ * @internal
+ */
+#ifndef UCONFIG_NO_FILTERED_BREAK_ITERATION
+# define UCONFIG_NO_FILTERED_BREAK_ITERATION 0
+#endif
+
+#endif // __UCONFIG_H__
diff --git a/intl/icu/source/common/unicode/ucpmap.h b/intl/icu/source/common/unicode/ucpmap.h
new file mode 100644
index 0000000000..a740bd160f
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucpmap.h
@@ -0,0 +1,158 @@
+// © 2018 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucpmap.h
+// created: 2018sep03 Markus W. Scherer
+
+#ifndef __UCPMAP_H__
+#define __UCPMAP_H__
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: This file defines an abstract map from Unicode code points to integer values.
+ *
+ * @see UCPMap
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+/**
+ * Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ * @stable ICU 63
+ */
+typedef struct UCPMap UCPMap;
+
+/**
+ * Selectors for how ucpmap_getRange() etc. should report value ranges overlapping with surrogates.
+ * Most users should use UCPMAP_RANGE_NORMAL.
+ *
+ * @see ucpmap_getRange
+ * @see ucptrie_getRange
+ * @see umutablecptrie_getRange
+ * @stable ICU 63
+ */
+enum UCPMapRangeOption {
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map.
+ * Most users should use this option.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_NORMAL,
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+ * except that lead surrogates (U+D800..U+DBFF) are treated as having the
+ * surrogateValue, which is passed to getRange() as a separate parameter.
+ * The surrogateValue is not transformed via filter().
+ * See U_IS_LEAD(c).
+ *
+ * Most users should use UCPMAP_RANGE_NORMAL instead.
+ *
+ * This option is useful for maps that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_FIXED_LEAD_SURROGATES,
+ /**
+ * ucpmap_getRange() enumerates all same-value ranges as stored in the map,
+ * except that all surrogates (U+D800..U+DFFF) are treated as having the
+ * surrogateValue, which is passed to getRange() as a separate parameter.
+ * The surrogateValue is not transformed via filter().
+ * See U_IS_SURROGATE(c).
+ *
+ * Most users should use UCPMAP_RANGE_NORMAL instead.
+ *
+ * This option is useful for maps that map surrogate code *units* to
+ * special values optimized for UTF-16 string processing
+ * or for special error behavior for unpaired surrogates,
+ * but those values are not to be associated with the lead surrogate code *points*.
+ * @stable ICU 63
+ */
+ UCPMAP_RANGE_FIXED_ALL_SURROGATES
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPMapRangeOption UCPMapRangeOption;
+#endif
+
+/**
+ * Returns the value for a code point as stored in the map, with range checking.
+ * Returns an implementation-defined error value if c is not in the range 0..U+10FFFF.
+ *
+ * @param map the map
+ * @param c the code point
+ * @return the map value,
+ * or an implementation-defined error value if the code point is not in the range 0..U+10FFFF
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+ucpmap_get(const UCPMap *map, UChar32 c);
+
+/**
+ * Callback function type: Modifies a map value.
+ * Optionally called by ucpmap_getRange()/ucptrie_getRange()/umutablecptrie_getRange().
+ * The modified value will be returned by the getRange function.
+ *
+ * Can be used to ignore some of the value bits,
+ * make a filter for one of several values,
+ * return a value index computed from the map value, etc.
+ *
+ * @param context an opaque pointer, as passed into the getRange function
+ * @param value a value from the map
+ * @return the modified value
+ * @stable ICU 63
+ */
+typedef uint32_t U_CALLCONV
+UCPMapValueFilter(const void *context, uint32_t value);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a map.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * Example:
+ * \code
+ * UChar32 start = 0, end;
+ * uint32_t value;
+ * while ((end = ucpmap_getRange(map, start, UCPMAP_RANGE_NORMAL, 0,
+ * NULL, NULL, &value)) >= 0) {
+ * // Work with the range start..end and its value.
+ * start = end + 1;
+ * }
+ * \endcode
+ *
+ * @param map the map
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the map data value,
+ * or NULL if the values from the map are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, map value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+ucpmap_getRange(const UCPMap *map, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucptrie.h b/intl/icu/source/common/unicode/ucptrie.h
new file mode 100644
index 0000000000..dadef79c51
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucptrie.h
@@ -0,0 +1,645 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// ucptrie.h (modified from utrie2.h)
+// created: 2017dec29 Markus W. Scherer
+
+#ifndef __UCPTRIE_H__
+#define __UCPTRIE_H__
+
+#include "unicode/utypes.h"
+#include "unicode/ucpmap.h"
+#include "unicode/utf8.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: This file defines an immutable Unicode code point trie.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+#ifndef U_IN_DOXYGEN
+/** @internal */
+typedef union UCPTrieData {
+ /** @internal */
+ const void *ptr0;
+ /** @internal */
+ const uint16_t *ptr16;
+ /** @internal */
+ const uint32_t *ptr32;
+ /** @internal */
+ const uint8_t *ptr8;
+} UCPTrieData;
+#endif
+
+/**
+ * Immutable Unicode code point trie structure.
+ * Fast, reasonably compact, map from Unicode code points (U+0000..U+10FFFF) to integer values.
+ * For details see https://icu.unicode.org/design/struct/utrie
+ *
+ * Do not access UCPTrie fields directly; use public functions and macros.
+ * Functions are easy to use: They support all trie types and value widths.
+ *
+ * When performance is really important, macros provide faster access.
+ * Most macros are specific to either "fast" or "small" tries, see UCPTrieType.
+ * There are "fast" macros for special optimized use cases.
+ *
+ * The macros will return bogus values, or may crash, if used on the wrong type or value width.
+ *
+ * @see UMutableCPTrie
+ * @stable ICU 63
+ */
+struct UCPTrie {
+#ifndef U_IN_DOXYGEN
+ /** @internal */
+ const uint16_t *index;
+ /** @internal */
+ UCPTrieData data;
+
+ /** @internal */
+ int32_t indexLength;
+ /** @internal */
+ int32_t dataLength;
+ /** Start of the last range which ends at U+10FFFF. @internal */
+ UChar32 highStart;
+ /** highStart>>12 @internal */
+ uint16_t shifted12HighStart;
+
+ /** @internal */
+ int8_t type; // UCPTrieType
+ /** @internal */
+ int8_t valueWidth; // UCPTrieValueWidth
+
+ /** padding/reserved @internal */
+ uint32_t reserved32;
+ /** padding/reserved @internal */
+ uint16_t reserved16;
+
+ /**
+ * Internal index-3 null block offset.
+ * Set to an impossibly high value (e.g., 0xffff) if there is no dedicated index-3 null block.
+ * @internal
+ */
+ uint16_t index3NullOffset;
+ /**
+ * Internal data null block offset, not shifted.
+ * Set to an impossibly high value (e.g., 0xfffff) if there is no dedicated data null block.
+ * @internal
+ */
+ int32_t dataNullOffset;
+ /** @internal */
+ uint32_t nullValue;
+
+#ifdef UCPTRIE_DEBUG
+ /** @internal */
+ const char *name;
+#endif
+#endif
+};
+#ifndef U_IN_DOXYGEN
+typedef struct UCPTrie UCPTrie;
+#endif
+
+/**
+ * Selectors for the type of a UCPTrie.
+ * Different trade-offs for size vs. speed.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getType
+ * @stable ICU 63
+ */
+enum UCPTrieType {
+ /**
+ * For ucptrie_openFromBinary() to accept any type.
+ * ucptrie_getType() will return the actual type.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_ANY = -1,
+ /**
+ * Fast/simple/larger BMP data structure. Use functions and "fast" macros.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_FAST,
+ /**
+ * Small/slower BMP data structure. Use functions and "small" macros.
+ * @stable ICU 63
+ */
+ UCPTRIE_TYPE_SMALL
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPTrieType UCPTrieType;
+#endif
+
+/**
+ * Selectors for the number of bits in a UCPTrie data value.
+ *
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_openFromBinary
+ * @see ucptrie_getValueWidth
+ * @stable ICU 63
+ */
+enum UCPTrieValueWidth {
+ /**
+ * For ucptrie_openFromBinary() to accept any data value width.
+ * ucptrie_getValueWidth() will return the actual data value width.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_ANY = -1,
+ /**
+ * The trie stores 16 bits per data value.
+ * It returns them as unsigned values 0..0xffff=65535.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_16,
+ /**
+ * The trie stores 32 bits per data value.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_32,
+ /**
+ * The trie stores 8 bits per data value.
+ * It returns them as unsigned values 0..0xff=255.
+ * @stable ICU 63
+ */
+ UCPTRIE_VALUE_BITS_8
+};
+#ifndef U_IN_DOXYGEN
+typedef enum UCPTrieValueWidth UCPTrieValueWidth;
+#endif
+
+/**
+ * Opens a trie from its binary form, stored in 32-bit-aligned memory.
+ * Inverse of ucptrie_toBinary().
+ *
+ * The memory must remain valid and unchanged as long as the trie is used.
+ * You must ucptrie_close() the trie once you are done using it.
+ *
+ * @param type selects the trie type; results in an
+ * U_INVALID_FORMAT_ERROR if it does not match the binary data;
+ * use UCPTRIE_TYPE_ANY to accept any type
+ * @param valueWidth selects the number of bits in a data value; results in an
+ * U_INVALID_FORMAT_ERROR if it does not match the binary data;
+ * use UCPTRIE_VALUE_BITS_ANY to accept any data value width
+ * @param data a pointer to 32-bit-aligned memory containing the binary data of a UCPTrie
+ * @param length the number of bytes available at data;
+ * can be more than necessary
+ * @param pActualLength receives the actual number of bytes at data taken up by the trie data;
+ * can be NULL
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie
+ *
+ * @see umutablecptrie_open
+ * @see umutablecptrie_buildImmutable
+ * @see ucptrie_toBinary
+ * @stable ICU 63
+ */
+U_CAPI UCPTrie * U_EXPORT2
+ucptrie_openFromBinary(UCPTrieType type, UCPTrieValueWidth valueWidth,
+ const void *data, int32_t length, int32_t *pActualLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Closes a trie and releases associated memory.
+ *
+ * @param trie the trie
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+ucptrie_close(UCPTrie *trie);
+
+/**
+ * Returns the trie type.
+ *
+ * @param trie the trie
+ * @return the trie type
+ * @see ucptrie_openFromBinary
+ * @see UCPTRIE_TYPE_ANY
+ * @stable ICU 63
+ */
+U_CAPI UCPTrieType U_EXPORT2
+ucptrie_getType(const UCPTrie *trie);
+
+/**
+ * Returns the number of bits in a trie data value.
+ *
+ * @param trie the trie
+ * @return the number of bits in a trie data value
+ * @see ucptrie_openFromBinary
+ * @see UCPTRIE_VALUE_BITS_ANY
+ * @stable ICU 63
+ */
+U_CAPI UCPTrieValueWidth U_EXPORT2
+ucptrie_getValueWidth(const UCPTrie *trie);
+
+/**
+ * Returns the value for a code point as stored in the trie, with range checking.
+ * Returns the trie error value if c is not in the range 0..U+10FFFF.
+ *
+ * Easier to use than UCPTRIE_FAST_GET() and similar macros but slower.
+ * Easier to use because, unlike the macros, this function works on all UCPTrie
+ * objects, for all types and value widths.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @return the trie value,
+ * or the trie error value if the code point is not in the range 0..U+10FFFF
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+ucptrie_get(const UCPTrie *trie, UChar32 c);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * Example:
+ * \code
+ * UChar32 start = 0, end;
+ * uint32_t value;
+ * while ((end = ucptrie_getRange(trie, start, UCPMAP_RANGE_NORMAL, 0,
+ * NULL, NULL, &value)) >= 0) {
+ * // Work with the range start..end and its value.
+ * start = end + 1;
+ * }
+ * \endcode
+ *
+ * @param trie the trie
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the trie data value,
+ * or NULL if the values from the trie are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, trie value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+ucptrie_getRange(const UCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+/**
+ * Writes a memory-mappable form of the trie into 32-bit aligned memory.
+ * Inverse of ucptrie_openFromBinary().
+ *
+ * @param trie the trie
+ * @param data a pointer to 32-bit-aligned memory to be filled with the trie data;
+ * can be NULL if capacity==0
+ * @param capacity the number of bytes available at data, or 0 for pure preflighting
+ * @param pErrorCode an in/out ICU UErrorCode;
+ * U_BUFFER_OVERFLOW_ERROR if the capacity is too small
+ * @return the number of bytes written or (if buffer overflow) needed for the trie
+ *
+ * @see ucptrie_openFromBinary()
+ * @stable ICU 63
+ */
+U_CAPI int32_t U_EXPORT2
+ucptrie_toBinary(const UCPTrie *trie, void *data, int32_t capacity, UErrorCode *pErrorCode);
+
+/**
+ * Macro parameter value for a trie with 16-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_16
+ * @stable ICU 63
+ */
+#define UCPTRIE_16(trie, i) ((trie)->data.ptr16[i])
+
+/**
+ * Macro parameter value for a trie with 32-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_32
+ * @stable ICU 63
+ */
+#define UCPTRIE_32(trie, i) ((trie)->data.ptr32[i])
+
+/**
+ * Macro parameter value for a trie with 8-bit data values.
+ * Use the name of this macro as a "dataAccess" parameter in other macros.
+ * Do not use this macro in any other way.
+ *
+ * @see UCPTRIE_VALUE_BITS_8
+ * @stable ICU 63
+ */
+#define UCPTRIE_8(trie, i) ((trie)->data.ptr8[i])
+
+/**
+ * Returns a trie value for a code point, with range checking.
+ * Returns the trie error value if c is not in the range 0..U+10FFFF.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point
+ * @return The code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_CP_INDEX(trie, 0xffff, c))
+
+/**
+ * Returns a 16-bit trie value for a code point, with range checking.
+ * Returns the trie error value if c is not in the range U+0000..U+10FFFF.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_SMALL
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point
+ * @return The code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_SMALL_GET(trie, dataAccess, c) \
+ dataAccess(trie, _UCPTRIE_CP_INDEX(trie, UCPTRIE_SMALL_MAX, c))
+
+/**
+ * UTF-16: Reads the next code point (UChar32 c, out), post-increments src,
+ * and gets a value from the trie.
+ * Sets the trie error value if c is an unpaired surrogate.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param limit (const UChar *, in) the limit pointer for the text, or NULL if NUL-terminated
+ * @param c (UChar32, out) variable for the code point
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U16_NEXT(trie, dataAccess, src, limit, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ (c) = *(src)++; \
+ int32_t __index; \
+ if (!U16_IS_SURROGATE(c)) { \
+ __index = _UCPTRIE_FAST_INDEX(trie, c); \
+ } else { \
+ uint16_t __c2; \
+ if (U16_IS_SURROGATE_LEAD(c) && (src) != (limit) && U16_IS_TRAIL(__c2 = *(src))) { \
+ ++(src); \
+ (c) = U16_GET_SUPPLEMENTARY((c), __c2); \
+ __index = _UCPTRIE_SMALL_INDEX(trie, c); \
+ } else { \
+ __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
+ } \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-16: Reads the previous code point (UChar32 c, out), pre-decrements src,
+ * and gets a value from the trie.
+ * Sets the trie error value if c is an unpaired surrogate.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param start (const UChar *, in) the start pointer for the text
+ * @param src (const UChar *, in/out) the source text pointer
+ * @param c (UChar32, out) variable for the code point
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U16_PREV(trie, dataAccess, start, src, c, result) UPRV_BLOCK_MACRO_BEGIN { \
+ (c) = *--(src); \
+ int32_t __index; \
+ if (!U16_IS_SURROGATE(c)) { \
+ __index = _UCPTRIE_FAST_INDEX(trie, c); \
+ } else { \
+ uint16_t __c2; \
+ if (U16_IS_SURROGATE_TRAIL(c) && (src) != (start) && U16_IS_LEAD(__c2 = *((src) - 1))) { \
+ --(src); \
+ (c) = U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ __index = _UCPTRIE_SMALL_INDEX(trie, c); \
+ } else { \
+ __index = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; \
+ } \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-8: Post-increments src and gets a value from the trie.
+ * Sets the trie error value for an ill-formed byte sequence.
+ *
+ * Unlike UCPTRIE_FAST_U16_NEXT() this UTF-8 macro does not provide the code point
+ * because it would be more work to do so and is often not needed.
+ * If the trie value differs from the error value, then the byte sequence is well-formed,
+ * and the code point can be assembled without revalidation.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param src (const char *, in/out) the source text pointer
+ * @param limit (const char *, in) the limit pointer for the text (must not be NULL)
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U8_NEXT(trie, dataAccess, src, limit, result) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __lead = (uint8_t)*(src)++; \
+ if (!U8_IS_SINGLE(__lead)) { \
+ uint8_t __t1, __t2, __t3; \
+ if ((src) != (limit) && \
+ (__lead >= 0xe0 ? \
+ __lead < 0xf0 ? /* U+0800..U+FFFF except surrogates */ \
+ U8_LEAD3_T1_BITS[__lead &= 0xf] & (1 << ((__t1 = *(src)) >> 5)) && \
+ ++(src) != (limit) && (__t2 = *(src) - 0x80) <= 0x3f && \
+ (__lead = ((int32_t)(trie)->index[(__lead << 6) + (__t1 & 0x3f)]) + __t2, 1) \
+ : /* U+10000..U+10FFFF */ \
+ (__lead -= 0xf0) <= 4 && \
+ U8_LEAD4_T1_BITS[(__t1 = *(src)) >> 4] & (1 << __lead) && \
+ (__lead = (__lead << 6) | (__t1 & 0x3f), ++(src) != (limit)) && \
+ (__t2 = *(src) - 0x80) <= 0x3f && \
+ ++(src) != (limit) && (__t3 = *(src) - 0x80) <= 0x3f && \
+ (__lead = __lead >= (trie)->shifted12HighStart ? \
+ (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
+ ucptrie_internalSmallU8Index((trie), __lead, __t2, __t3), 1) \
+ : /* U+0080..U+07FF */ \
+ __lead >= 0xc2 && (__t1 = *(src) - 0x80) <= 0x3f && \
+ (__lead = (int32_t)(trie)->index[__lead & 0x1f] + __t1, 1))) { \
+ ++(src); \
+ } else { \
+ __lead = (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET; /* ill-formed*/ \
+ } \
+ } \
+ (result) = dataAccess(trie, __lead); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * UTF-8: Pre-decrements src and gets a value from the trie.
+ * Sets the trie error value for an ill-formed byte sequence.
+ *
+ * Unlike UCPTRIE_FAST_U16_PREV() this UTF-8 macro does not provide the code point
+ * because it would be more work to do so and is often not needed.
+ * If the trie value differs from the error value, then the byte sequence is well-formed,
+ * and the code point can be assembled without revalidation.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param start (const char *, in) the start pointer for the text
+ * @param src (const char *, in/out) the source text pointer
+ * @param result (out) variable for the trie lookup result
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_U8_PREV(trie, dataAccess, start, src, result) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __index = (uint8_t)*--(src); \
+ if (!U8_IS_SINGLE(__index)) { \
+ __index = ucptrie_internalU8PrevIndex((trie), __index, (const uint8_t *)(start), \
+ (const uint8_t *)(src)); \
+ (src) -= __index & 7; \
+ __index >>= 3; \
+ } \
+ (result) = dataAccess(trie, __index); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Returns a trie value for an ASCII code point, without range checking.
+ *
+ * @param trie (const UCPTrie *, in) the trie (of either fast or small type)
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point; must be U+0000..U+007F
+ * @return The ASCII code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_ASCII_GET(trie, dataAccess, c) dataAccess(trie, c)
+
+/**
+ * Returns a trie value for a BMP code point (U+0000..U+FFFF), without range checking.
+ * Can be used to look up a value for a UTF-16 code unit if other parts of
+ * the string processing check for surrogates.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point, must be U+0000..U+FFFF
+ * @return The BMP code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_BMP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_FAST_INDEX(trie, c))
+
+/**
+ * Returns a trie value for a supplementary code point (U+10000..U+10FFFF),
+ * without range checking.
+ *
+ * @param trie (const UCPTrie *, in) the trie; must have type UCPTRIE_TYPE_FAST
+ * @param dataAccess UCPTRIE_16, UCPTRIE_32, or UCPTRIE_8 according to the trie’s value width
+ * @param c (UChar32, in) the input code point, must be U+10000..U+10FFFF
+ * @return The supplementary code point's trie value.
+ * @stable ICU 63
+ */
+#define UCPTRIE_FAST_SUPP_GET(trie, dataAccess, c) dataAccess(trie, _UCPTRIE_SMALL_INDEX(trie, c))
+
+/* Internal definitions ----------------------------------------------------- */
+
+#ifndef U_IN_DOXYGEN
+
+/**
+ * Internal implementation constants.
+ * These are needed for the API macros, but users should not use these directly.
+ * @internal
+ */
+enum {
+ /** @internal */
+ UCPTRIE_FAST_SHIFT = 6,
+
+ /** Number of entries in a data block for code points below the fast limit. 64=0x40 @internal */
+ UCPTRIE_FAST_DATA_BLOCK_LENGTH = 1 << UCPTRIE_FAST_SHIFT,
+
+ /** Mask for getting the lower bits for the in-fast-data-block offset. @internal */
+ UCPTRIE_FAST_DATA_MASK = UCPTRIE_FAST_DATA_BLOCK_LENGTH - 1,
+
+ /** @internal */
+ UCPTRIE_SMALL_MAX = 0xfff,
+
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for out-of-range code points and ill-formed UTF-8/16.
+ * @internal
+ */
+ UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET = 1,
+ /**
+ * Offset from dataLength (to be subtracted) for fetching the
+ * value returned for code points highStart..U+10FFFF.
+ * @internal
+ */
+ UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET = 2
+};
+
+/* Internal functions and macros -------------------------------------------- */
+// Do not conditionalize with #ifndef U_HIDE_INTERNAL_API, needed for public API
+
+/** @internal */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallIndex(const UCPTrie *trie, UChar32 c);
+
+/** @internal */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalSmallU8Index(const UCPTrie *trie, int32_t lt1, uint8_t t2, uint8_t t3);
+
+/**
+ * Internal function for part of the UCPTRIE_FAST_U8_PREVxx() macro implementations.
+ * Do not call directly.
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+ucptrie_internalU8PrevIndex(const UCPTrie *trie, UChar32 c,
+ const uint8_t *start, const uint8_t *src);
+
+/** Internal trie getter for a code point below the fast limit. Returns the data index. @internal */
+#define _UCPTRIE_FAST_INDEX(trie, c) \
+ ((int32_t)(trie)->index[(c) >> UCPTRIE_FAST_SHIFT] + ((c) & UCPTRIE_FAST_DATA_MASK))
+
+/** Internal trie getter for a code point at or above the fast limit. Returns the data index. @internal */
+#define _UCPTRIE_SMALL_INDEX(trie, c) \
+ ((c) >= (trie)->highStart ? \
+ (trie)->dataLength - UCPTRIE_HIGH_VALUE_NEG_DATA_OFFSET : \
+ ucptrie_internalSmallIndex(trie, c))
+
+/**
+ * Internal trie getter for a code point, with checking that c is in U+0000..10FFFF.
+ * Returns the data index.
+ * @internal
+ */
+#define _UCPTRIE_CP_INDEX(trie, fastMax, c) \
+ ((uint32_t)(c) <= (uint32_t)(fastMax) ? \
+ _UCPTRIE_FAST_INDEX(trie, c) : \
+ (uint32_t)(c) <= 0x10ffff ? \
+ _UCPTRIE_SMALL_INDEX(trie, c) : \
+ (trie)->dataLength - UCPTRIE_ERROR_VALUE_NEG_DATA_OFFSET)
+
+U_CDECL_END
+
+#endif // U_IN_DOXYGEN
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUCPTriePointer
+ * "Smart pointer" class, closes a UCPTrie via ucptrie_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 63
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUCPTriePointer, UCPTrie, ucptrie_close);
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+
+#endif
diff --git a/intl/icu/source/common/unicode/ucurr.h b/intl/icu/source/common/unicode/ucurr.h
new file mode 100644
index 0000000000..5589e79990
--- /dev/null
+++ b/intl/icu/source/common/unicode/ucurr.h
@@ -0,0 +1,466 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef _UCURR_H_
+#define _UCURR_H_
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Encapsulates information about a currency.
+ *
+ * The ucurr API encapsulates information about a currency, as defined by
+ * ISO 4217. A currency is represented by a 3-character string
+ * containing its ISO 4217 code. This API can return various data
+ * necessary the proper display of a currency:
+ *
+ * <ul><li>A display symbol, for a specific locale
+ * <li>The number of fraction digits to display
+ * <li>A rounding increment
+ * </ul>
+ *
+ * The <tt>DecimalFormat</tt> class uses these data to display
+ * currencies.
+ * @author Alan Liu
+ * @since ICU 2.2
+ */
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Currency Usage used for Decimal Format
+ * @stable ICU 54
+ */
+enum UCurrencyUsage {
+ /**
+ * a setting to specify currency usage which determines currency digit
+ * and rounding for standard usage, for example: "50.00 NT$"
+ * used as DEFAULT value
+ * @stable ICU 54
+ */
+ UCURR_USAGE_STANDARD=0,
+ /**
+ * a setting to specify currency usage which determines currency digit
+ * and rounding for cash usage, for example: "50 NT$"
+ * @stable ICU 54
+ */
+ UCURR_USAGE_CASH=1,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One higher than the last enum UCurrencyUsage constant.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UCURR_USAGE_COUNT=2
+#endif // U_HIDE_DEPRECATED_API
+};
+/** Currency Usage used for Decimal Format */
+typedef enum UCurrencyUsage UCurrencyUsage;
+
+/**
+ * Finds a currency code for the given locale.
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3. If 0,
+ * currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocale(const char* locale,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_getName().
+ *
+ * @see ucurr_getName
+ * @stable ICU 2.6
+ */
+typedef enum UCurrNameStyle {
+ /**
+ * Selector for ucurr_getName indicating a symbolic name for a
+ * currency, such as "$" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_SYMBOL_NAME,
+
+ /**
+ * Selector for ucurr_getName indicating the long name for a
+ * currency, such as "US Dollar" for USD.
+ * @stable ICU 2.6
+ */
+ UCURR_LONG_NAME,
+
+ /**
+ * Selector for getName() indicating the narrow currency symbol.
+ * The narrow currency symbol is similar to the regular currency
+ * symbol, but it always takes the shortest form: for example,
+ * "$" instead of "US$" for USD in en-CA.
+ *
+ * @stable ICU 61
+ */
+ UCURR_NARROW_SYMBOL_NAME,
+
+ /**
+ * Selector for getName() indicating the formal currency symbol.
+ * The formal currency symbol is similar to the regular currency
+ * symbol, but it always takes the form used in formal settings
+ * such as banking; for example, "NT$" instead of "$" for TWD in zh-TW.
+ *
+ * @stable ICU 68
+ */
+ UCURR_FORMAL_SYMBOL_NAME,
+
+ /**
+ * Selector for getName() indicating the variant currency symbol.
+ * The variant symbol for a currency is an alternative symbol
+ * that is not necessarily as widely used as the regular symbol.
+ *
+ * @stable ICU 68
+ */
+ UCURR_VARIANT_SYMBOL_NAME
+
+} UCurrNameStyle;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * @stable ICU 2.6
+ */
+typedef const void* UCurrRegistryKey;
+
+/**
+ * Register an (existing) ISO 4217 currency code for the given locale.
+ * Only the country code and the two variants EURO and PRE_EURO are
+ * recognized.
+ * @param isoCode the three-letter ISO 4217 currency code
+ * @param locale the locale for which to register this currency code
+ * @param status the in/out status code
+ * @return a registry key that can be used to unregister this currency code, or NULL
+ * if there was an error.
+ * @stable ICU 2.6
+ */
+U_CAPI UCurrRegistryKey U_EXPORT2
+ucurr_register(const UChar* isoCode,
+ const char* locale,
+ UErrorCode* status);
+/**
+ * Unregister the previously-registered currency definitions using the
+ * URegistryKey returned from ucurr_register. Key becomes invalid after
+ * a successful call and should not be used again. Any currency
+ * that might have been hidden by the original ucurr_register call is
+ * restored.
+ * @param key the registry key returned by a previous call to ucurr_register
+ * @param status the in/out status code, no special meanings are assigned
+ * @return true if the currency for this key was successfully unregistered
+ * @stable ICU 2.6
+ */
+U_CAPI UBool U_EXPORT2
+ucurr_unregister(UCurrRegistryKey key, UErrorCode* status);
+#endif /* UCONFIG_NO_SERVICE */
+
+/**
+ * Returns the display name for the given currency in the
+ * given locale. For example, the display name for the USD
+ * currency object in the en_US locale is "$".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param nameStyle selector for which kind of name to return
+ * @param isChoiceFormat always set to false, or can be NULL;
+ * display names are static strings;
+ * since ICU 4.4, ChoiceFormat patterns are no longer supported
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.
+ * @stable ICU 2.6
+ */
+U_CAPI const UChar* U_EXPORT2
+ucurr_getName(const UChar* currency,
+ const char* locale,
+ UCurrNameStyle nameStyle,
+ UBool* isChoiceFormat,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the plural name for the given currency in the
+ * given locale. For example, the plural name for the USD
+ * currency object in the en_US locale is "US dollar" or "US dollars".
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param locale locale in which to display currency
+ * @param isChoiceFormat always set to false, or can be NULL;
+ * display names are static strings;
+ * since ICU 4.4, ChoiceFormat patterns are no longer supported
+ * @param pluralCount plural count
+ * @param len fill-in parameter to receive length of result
+ * @param ec error code
+ * @return pointer to display string of 'len' UChars. If the resource
+ * data contains no entry for 'currency', then 'currency' itself is
+ * returned.
+ * @stable ICU 4.2
+ */
+U_CAPI const UChar* U_EXPORT2
+ucurr_getPluralName(const UChar* currency,
+ const char* locale,
+ UBool* isChoiceFormat,
+ const char* pluralCount,
+ int32_t* len,
+ UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency.
+ * This is equivalent to ucurr_getDefaultFractionDigitsForUsage(currency,UCURR_USAGE_STANDARD,ec);
+ *
+ * Important: The number of fraction digits for a given currency is NOT
+ * guaranteed to be constant across versions of ICU or CLDR. For example,
+ * do NOT use this value as a mechanism for deciding the magnitude used
+ * to store currency values in a database. You should use this value for
+ * display purposes only.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 3.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigits(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Returns the number of the number of fraction digits that should
+ * be displayed for the given currency with usage.
+ *
+ * Important: The number of fraction digits for a given currency is NOT
+ * guaranteed to be constant across versions of ICU or CLDR. For example,
+ * do NOT use this value as a mechanism for deciding the magnitude used
+ * to store currency values in a database. You should use this value for
+ * display purposes only.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param usage enum usage for the currency
+ * @param ec input-output error code
+ * @return a non-negative number of fraction digits to be
+ * displayed, or 0 if there is an error
+ * @stable ICU 54
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getDefaultFractionDigitsForUsage(const UChar* currency,
+ const UCurrencyUsage usage,
+ UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency.
+ * This is equivalent to ucurr_getRoundingIncrementForUsage(currency,UCURR_USAGE_STANDARD,ec);
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 3.0
+ */
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrement(const UChar* currency,
+ UErrorCode* ec);
+
+/**
+ * Returns the rounding increment for the given currency, or 0.0 if no
+ * rounding is done by the currency given usage.
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @param usage enum usage for the currency
+ * @param ec input-output error code
+ * @return the non-negative rounding increment, or 0.0 if none,
+ * or 0.0 if there is an error
+ * @stable ICU 54
+ */
+U_CAPI double U_EXPORT2
+ucurr_getRoundingIncrementForUsage(const UChar* currency,
+ const UCurrencyUsage usage,
+ UErrorCode* ec);
+
+/**
+ * Selector constants for ucurr_openCurrencies().
+ *
+ * @see ucurr_openCurrencies
+ * @stable ICU 3.2
+ */
+typedef enum UCurrCurrencyType {
+ /**
+ * Select all ISO-4217 currency codes.
+ * @stable ICU 3.2
+ */
+ UCURR_ALL = INT32_MAX,
+ /**
+ * Select only ISO-4217 commonly used currency codes.
+ * These currencies can be found in common use, and they usually have
+ * bank notes or coins associated with the currency code.
+ * This does not include fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * @stable ICU 3.2
+ */
+ UCURR_COMMON = 1,
+ /**
+ * Select ISO-4217 uncommon currency codes.
+ * These codes respresent fund codes, precious metals and other
+ * various ISO-4217 codes limited to special financial products.
+ * A fund code is a monetary resource associated with a currency.
+ * @stable ICU 3.2
+ */
+ UCURR_UNCOMMON = 2,
+ /**
+ * Select only deprecated ISO-4217 codes.
+ * These codes are no longer in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_DEPRECATED = 4,
+ /**
+ * Select only non-deprecated ISO-4217 codes.
+ * These codes are in general public use.
+ * @stable ICU 3.2
+ */
+ UCURR_NON_DEPRECATED = 8
+} UCurrCurrencyType;
+
+/**
+ * Provides a UEnumeration object for listing ISO-4217 codes.
+ * @param currType You can use one of several UCurrCurrencyType values for this
+ * variable. You can also | (or) them together to get a specific list of
+ * currencies. Most people will want to use the (UCURR_COMMON|UCURR_NON_DEPRECATED) value to
+ * get a list of current currencies.
+ * @param pErrorCode Error code
+ * @stable ICU 3.2
+ */
+U_CAPI UEnumeration * U_EXPORT2
+ucurr_openISOCurrencies(uint32_t currType, UErrorCode *pErrorCode);
+
+/**
+ * Queries if the given ISO 4217 3-letter code is available on the specified date range.
+ *
+ * Note: For checking availability of a currency on a specific date, specify the date on both 'from' and 'to'
+ *
+ * When 'from' is U_DATE_MIN and 'to' is U_DATE_MAX, this method checks if the specified currency is available any time.
+ * If 'from' and 'to' are same UDate value, this method checks if the specified currency is available on that date.
+ *
+ * @param isoCode
+ * The ISO 4217 3-letter code.
+ *
+ * @param from
+ * The lower bound of the date range, inclusive. When 'from' is U_DATE_MIN, check the availability
+ * of the currency any date before 'to'
+ *
+ * @param to
+ * The upper bound of the date range, inclusive. When 'to' is U_DATE_MAX, check the availability of
+ * the currency any date after 'from'
+ *
+ * @param errorCode
+ * ICU error code
+ *
+ * @return true if the given ISO 4217 3-letter code is supported on the specified date range.
+ *
+ * @stable ICU 4.8
+ */
+U_CAPI UBool U_EXPORT2
+ucurr_isAvailable(const UChar* isoCode,
+ UDate from,
+ UDate to,
+ UErrorCode* errorCode);
+
+/**
+ * Finds the number of valid currency codes for the
+ * given locale and date.
+ * @param locale the locale for which to retrieve the
+ * currency count.
+ * @param date the date for which to retrieve the
+ * currency count for the given locale.
+ * @param ec error code
+ * @return the number of currency codes for the
+ * given locale and date. If 0, currency
+ * codes couldn't be found for the input
+ * values are invalid.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_countCurrencies(const char* locale,
+ UDate date,
+ UErrorCode* ec);
+
+/**
+ * Finds a currency code for the given locale and date
+ * @param locale the locale for which to retrieve a currency code.
+ * Currency can be specified by the "currency" keyword
+ * in which case it overrides the default currency code
+ * @param date the date for which to retrieve a currency code for
+ * the given locale.
+ * @param index the index within the available list of currency codes
+ * for the given locale on the given date.
+ * @param buff fill in buffer. Can be NULL for preflighting.
+ * @param buffCapacity capacity of the fill in buffer. Can be 0 for
+ * preflighting. If it is non-zero, the buff parameter
+ * must not be NULL.
+ * @param ec error code
+ * @return length of the currency string. It should always be 3.
+ * If 0, currency couldn't be found or the input values are
+ * invalid.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_forLocaleAndDate(const char* locale,
+ UDate date,
+ int32_t index,
+ UChar* buff,
+ int32_t buffCapacity,
+ UErrorCode* ec);
+
+/**
+ * Given a key and a locale, returns an array of string values in a preferred
+ * order that would make a difference. These are all and only those values where
+ * the open (creation) of the service with the locale formed from the input locale
+ * plus input keyword and that value has different behavior than creation with the
+ * input locale alone.
+ * @param key one of the keys supported by this service. For now, only
+ * "currency" is supported.
+ * @param locale the locale
+ * @param commonlyUsed if set to true it will return only commonly used values
+ * with the given locale in preferred order. Otherwise,
+ * it will return all the available values for the locale.
+ * @param status error status
+ * @return a string enumeration over keyword values for the given key and the locale.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ucurr_getKeywordValuesForLocale(const char* key,
+ const char* locale,
+ UBool commonlyUsed,
+ UErrorCode* status);
+
+/**
+ * Returns the ISO 4217 numeric code for the currency.
+ * <p>Note: If the ISO 4217 numeric code is not assigned for the currency or
+ * the currency is unknown, this function returns 0.
+ *
+ * @param currency null-terminated 3-letter ISO 4217 code
+ * @return The ISO 4217 numeric code of the currency
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+ucurr_getNumericCode(const UChar* currency);
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/intl/icu/source/common/unicode/udata.h b/intl/icu/source/common/unicode/udata.h
new file mode 100644
index 0000000000..4cda255010
--- /dev/null
+++ b/intl/icu/source/common/unicode/udata.h
@@ -0,0 +1,440 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: udata.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct25
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UDATA_H__
+#define __UDATA_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: Data loading interface
+ *
+ * <h2>Information about data loading interface</h2>
+ *
+ * This API is used to find and efficiently load data for ICU and applications
+ * using ICU. It provides an abstract interface that specifies a data type and
+ * name to find and load the data. Normally this API is used by other ICU APIs
+ * to load required data out of the ICU data library, but it can be used to
+ * load data out of other places.
+ *
+ * See the User Guide Data Management chapter.
+ */
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Character used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR '-'
+
+/**
+ * String used to separate package names from tree names
+ * @internal ICU 3.0
+ */
+#define U_TREE_SEPARATOR_STRING "-"
+
+/**
+ * Character used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_CHAR '/'
+
+/**
+ * String used to separate parts of entry names
+ * @internal ICU 3.0
+ */
+#define U_TREE_ENTRY_SEP_STRING "/"
+
+/**
+ * Alias for standard ICU data
+ * @internal ICU 3.0
+ */
+#define U_ICUDATA_ALIAS "ICUDATA"
+
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * UDataInfo contains the properties about the requested data.
+ * This is meta data.
+ *
+ * <p>This structure may grow in the future, indicated by the
+ * <code>size</code> field.</p>
+ *
+ * <p>ICU data must be at least 8-aligned, and should be 16-aligned.
+ * The UDataInfo struct begins 4 bytes after the start of the data item,
+ * so it is 4-aligned.
+ *
+ * <p>The platform data property fields help determine if a data
+ * file can be efficiently used on a given machine.
+ * The particular fields are of importance only if the data
+ * is affected by the properties - if there is integer data
+ * with word sizes > 1 byte, char* text, or UChar* text.</p>
+ *
+ * <p>The implementation for the <code>udata_open[Choice]()</code>
+ * functions may reject data based on the value in <code>isBigEndian</code>.
+ * No other field is used by the <code>udata</code> API implementation.</p>
+ *
+ * <p>The <code>dataFormat</code> may be used to identify
+ * the kind of data, e.g. a converter table.</p>
+ *
+ * <p>The <code>formatVersion</code> field should be used to
+ * make sure that the format can be interpreted.
+ * It may be a good idea to check only for the one or two highest
+ * of the version elements to allow the data memory to
+ * get more or somewhat rearranged contents, for as long
+ * as the using code can still interpret the older contents.</p>
+ *
+ * <p>The <code>dataVersion</code> field is intended to be a
+ * common place to store the source version of the data;
+ * for data from the Unicode character database, this could
+ * reflect the Unicode version.</p>
+ *
+ * @stable ICU 2.0
+ */
+typedef struct {
+ /** sizeof(UDataInfo)
+ * @stable ICU 2.0 */
+ uint16_t size;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint16_t reservedWord;
+
+ /* platform data properties */
+ /** 0 for little-endian machine, 1 for big-endian
+ * @stable ICU 2.0 */
+ uint8_t isBigEndian;
+
+ /** see U_CHARSET_FAMILY values in utypes.h
+ * @stable ICU 2.0*/
+ uint8_t charsetFamily;
+
+ /** sizeof(UChar), one of { 1, 2, 4 }
+ * @stable ICU 2.0*/
+ uint8_t sizeofUChar;
+
+ /** unused, set to 0
+ * @stable ICU 2.0*/
+ uint8_t reservedByte;
+
+ /** data format identifier
+ * @stable ICU 2.0*/
+ uint8_t dataFormat[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t formatVersion[4];
+
+ /** versions: [0] major [1] minor [2] milli [3] micro
+ * @stable ICU 2.0*/
+ uint8_t dataVersion[4];
+} UDataInfo;
+
+/* API for reading data -----------------------------------------------------*/
+
+/**
+ * Forward declaration of the data memory type.
+ * @stable ICU 2.0
+ */
+typedef struct UDataMemory UDataMemory;
+
+/**
+ * Callback function for udata_openChoice().
+ * @param context parameter passed into <code>udata_openChoice()</code>.
+ * @param type The type of the data as passed into <code>udata_openChoice()</code>.
+ * It may be <code>NULL</code>.
+ * @param name The name of the data as passed into <code>udata_openChoice()</code>.
+ * @param pInfo A pointer to the <code>UDataInfo</code> structure
+ * of data that has been loaded and will be returned
+ * by <code>udata_openChoice()</code> if this function
+ * returns <code>true</code>.
+ * @return true if the current data memory is acceptable
+ * @stable ICU 2.0
+ */
+typedef UBool U_CALLCONV
+UDataMemoryIsAcceptable(void *context,
+ const char *type, const char *name,
+ const UDataInfo *pInfo);
+
+
+/**
+ * Convenience function.
+ * This function works the same as <code>udata_openChoice</code>
+ * except that any data that matches the type and name
+ * is assumed to be acceptable.
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ *
+ * @see udata_openChoice
+ * @stable ICU 2.0
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_open(const char *path, const char *type, const char *name,
+ UErrorCode *pErrorCode);
+
+/**
+ * Data loading function.
+ * This function is used to find and load efficiently data for
+ * ICU and applications using ICU.
+ * It provides an abstract interface that allows to specify a data
+ * type and name to find and load the data.
+ *
+ * <p>The implementation depends on platform properties and user preferences
+ * and may involve loading shared libraries (DLLs), mapping
+ * files into memory, or fopen()/fread() files.
+ * It may also involve using static memory or database queries etc.
+ * Several or all data items may be combined into one entity
+ * (DLL, memory-mappable file).</p>
+ *
+ * <p>The data is always preceded by a header that includes
+ * a <code>UDataInfo</code> structure.
+ * The caller's <code>isAcceptable()</code> function is called to make
+ * sure that the data is useful. It may be called several times if it
+ * rejects the data and there is more than one location with data
+ * matching the type and name.</p>
+ *
+ * <p>If <code>path==NULL</code>, then ICU data is loaded.
+ * Otherwise, it is separated into a basename and a basename-less directory string.
+ * The basename is used as the data package name, and the directory is
+ * logically prepended to the ICU data directory string.</p>
+ *
+ * <p>For details about ICU data loading see the User Guide
+ * Data Management chapter. (https://unicode-org.github.io/icu/userguide/icu_data/)</p>
+ *
+ * @param path Specifies an absolute path and/or a basename for the
+ * finding of the data in the file system.
+ * <code>NULL</code> for ICU data.
+ * @param type A string that specifies the type of data to be loaded.
+ * For example, resource bundles are loaded with type "res",
+ * conversion tables with type "cnv".
+ * This may be <code>NULL</code> or empty.
+ * @param name A string that specifies the name of the data.
+ * @param isAcceptable This function is called to verify that loaded data
+ * is useful for the client code. If it returns false
+ * for all data items, then <code>udata_openChoice()</code>
+ * will return with an error.
+ * @param context Arbitrary parameter to be passed into isAcceptable.
+ * @param pErrorCode An ICU UErrorCode parameter. It must not be <code>NULL</code>.
+ * @return A pointer (handle) to a data memory object, or <code>NULL</code>
+ * if an error occurs. Call <code>udata_getMemory()</code>
+ * to get a pointer to the actual data.
+ * @stable ICU 2.0
+ */
+U_CAPI UDataMemory * U_EXPORT2
+udata_openChoice(const char *path, const char *type, const char *name,
+ UDataMemoryIsAcceptable *isAcceptable, void *context,
+ UErrorCode *pErrorCode);
+
+/**
+ * Close the data memory.
+ * This function must be called to allow the system to
+ * release resources associated with this data memory.
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_close(UDataMemory *pData);
+
+/**
+ * Get the pointer to the actual data inside the data memory.
+ * The data is read-only.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ *
+ * @param pData The pointer to data memory object
+ * @stable ICU 2.0
+ */
+U_CAPI const void * U_EXPORT2
+udata_getMemory(UDataMemory *pData);
+
+/**
+ * Get the information from the data memory header.
+ * This allows to get access to the header containing
+ * platform data properties etc. which is not part of
+ * the data itself and can therefore not be accessed
+ * via the pointer that <code>udata_getMemory()</code> returns.
+ *
+ * @param pData pointer to the data memory object
+ * @param pInfo pointer to a UDataInfo object;
+ * its <code>size</code> field must be set correctly,
+ * typically to <code>sizeof(UDataInfo)</code>.
+ *
+ * <code>*pInfo</code> will be filled with the UDataInfo structure
+ * in the data memory object. If this structure is smaller than
+ * <code>pInfo->size</code>, then the <code>size</code> will be
+ * adjusted and only part of the structure will be filled.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_getInfo(UDataMemory *pData, UDataInfo *pInfo);
+
+/**
+ * This function bypasses the normal ICU data loading process and
+ * allows you to force ICU's system data to come out of a user-specified
+ * area in memory.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See https://unicode-org.github.io/icu/userguide/icu_data
+ *
+ * The format of this data is that of the icu common data file, as is
+ * generated by the pkgdata tool with mode=common or mode=dll.
+ * You can read in a whole common mode file and pass the address to the start of the
+ * data, or (with the appropriate link options) pass in the pointer to
+ * the data that has been loaded from a dll by the operating system,
+ * as shown in this code:
+ *
+ * extern const char U_IMPORT U_ICUDATA_ENTRY_POINT [];
+ * // U_ICUDATA_ENTRY_POINT is same as entry point specified to pkgdata tool
+ * UErrorCode status = U_ZERO_ERROR;
+ *
+ * udata_setCommonData(&U_ICUDATA_ENTRY_POINT, &status);
+ *
+ * It is important that the declaration be as above. The entry point
+ * must not be declared as an extern void*.
+ *
+ * Starting with ICU 4.4, it is possible to set several data packages,
+ * one per call to this function.
+ * udata_open() will look for data in the multiple data packages in the order
+ * in which they were set.
+ * The position of the linked-in or default-name ICU .data package in the
+ * search list depends on when the first data item is loaded that is not contained
+ * in the already explicitly set packages.
+ * If data was loaded implicitly before the first call to this function
+ * (for example, via opening a converter, constructing a UnicodeString
+ * from default-codepage data, using formatting or collation APIs, etc.),
+ * then the default data will be first in the list.
+ *
+ * This function has no effect on application (non ICU) data. See udata_setAppData()
+ * for similar functionality for application data.
+ *
+ * @param data pointer to ICU common data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_setCommonData(const void *data, UErrorCode *err);
+
+
+/**
+ * This function bypasses the normal ICU data loading process for application-specific
+ * data and allows you to force the it to come out of a user-specified
+ * pointer.
+ *
+ * ICU data must be at least 8-aligned, and should be 16-aligned.
+ * See https://unicode-org.github.io/icu/userguide/icu_data
+ *
+ * The format of this data is that of the icu common data file, like 'icudt26l.dat'
+ * or the corresponding shared library (DLL) file.
+ * The application must read in or otherwise construct an image of the data and then
+ * pass the address of it to this function.
+ *
+ *
+ * Warning: setAppData will set a U_USING_DEFAULT_WARNING code if
+ * data with the specified path that has already been opened, or
+ * if setAppData with the same path has already been called.
+ * Any such calls to setAppData will have no effect.
+ *
+ *
+ * @param packageName the package name by which the application will refer
+ * to (open) this data
+ * @param data pointer to the data
+ * @param err outgoing error status <code>U_USING_DEFAULT_WARNING, U_UNSUPPORTED_ERROR</code>
+ * @see udata_setCommonData
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+udata_setAppData(const char *packageName, const void *data, UErrorCode *err);
+
+/**
+ * Possible settings for udata_setFileAccess()
+ * @see udata_setFileAccess
+ * @stable ICU 3.4
+ */
+typedef enum UDataFileAccess {
+ /** ICU looks for data in single files first, then in packages. (default) @stable ICU 3.4 */
+ UDATA_FILES_FIRST,
+ /** An alias for the default access mode. @stable ICU 3.4 */
+ UDATA_DEFAULT_ACCESS = UDATA_FILES_FIRST,
+ /** ICU only loads data from packages, not from single files. @stable ICU 3.4 */
+ UDATA_ONLY_PACKAGES,
+ /** ICU loads data from packages first, and only from single files
+ if the data cannot be found in a package. @stable ICU 3.4 */
+ UDATA_PACKAGES_FIRST,
+ /** ICU does not access the file system for data loading. @stable ICU 3.4 */
+ UDATA_NO_FILES,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Number of real UDataFileAccess values.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UDATA_FILE_ACCESS_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} UDataFileAccess;
+
+/**
+ * This function may be called to control how ICU loads data. It must be called
+ * before any ICU data is loaded, including application data loaded with
+ * ures/ResourceBundle or udata APIs. This function is not multithread safe.
+ * The results of calling it while other threads are loading data are undefined.
+ * @param access The type of file access to be used
+ * @param status Error code.
+ * @see UDataFileAccess
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+udata_setFileAccess(UDataFileAccess access, UErrorCode *status);
+
+U_CDECL_END
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUDataMemoryPointer
+ * "Smart pointer" class, closes a UDataMemory via udata_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUDataMemoryPointer, UDataMemory, udata_close);
+
+U_NAMESPACE_END
+
+#endif // U_SHOW_CPLUSPLUS_API
+
+#endif
diff --git a/intl/icu/source/common/unicode/udisplaycontext.h b/intl/icu/source/common/unicode/udisplaycontext.h
new file mode 100644
index 0000000000..6e14217980
--- /dev/null
+++ b/intl/icu/source/common/unicode/udisplaycontext.h
@@ -0,0 +1,173 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*****************************************************************************************
+* Copyright (C) 2014-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*****************************************************************************************
+*/
+
+#ifndef UDISPLAYCONTEXT_H
+#define UDISPLAYCONTEXT_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * \file
+ * \brief C API: Display context types (enum values)
+ */
+
+/**
+ * Display context types, for getting values of a particular setting.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContextType {
+ /**
+ * Type to retrieve the dialect handling setting, e.g.
+ * UDISPCTX_STANDARD_NAMES or UDISPCTX_DIALECT_NAMES.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_DIALECT_HANDLING = 0,
+ /**
+ * Type to retrieve the capitalization context setting, e.g.
+ * UDISPCTX_CAPITALIZATION_NONE, UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE, etc.
+ * @stable ICU 51
+ */
+ UDISPCTX_TYPE_CAPITALIZATION = 1,
+ /**
+ * Type to retrieve the display length setting, e.g.
+ * UDISPCTX_LENGTH_FULL, UDISPCTX_LENGTH_SHORT.
+ * @stable ICU 54
+ */
+ UDISPCTX_TYPE_DISPLAY_LENGTH = 2,
+ /**
+ * Type to retrieve the substitute handling setting, e.g.
+ * UDISPCTX_SUBSTITUTE, UDISPCTX_NO_SUBSTITUTE.
+ * @stable ICU 58
+ */
+ UDISPCTX_TYPE_SUBSTITUTE_HANDLING = 3
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContextType UDisplayContextType;
+
+/**
+ * Display context settings.
+ * Note, the specific numeric values are internal and may change.
+ * @stable ICU 51
+ */
+enum UDisplayContext {
+ /**
+ * ================================
+ * DIALECT_HANDLING can be set to one of UDISPCTX_STANDARD_NAMES or
+ * UDISPCTX_DIALECT_NAMES. Use UDisplayContextType UDISPCTX_TYPE_DIALECT_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use standard names when generating a locale name,
+ * e.g. en_GB displays as 'English (United Kingdom)'.
+ * @stable ICU 51
+ */
+ UDISPCTX_STANDARD_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 0,
+ /**
+ * A possible setting for DIALECT_HANDLING:
+ * use dialect names, when generating a locale name,
+ * e.g. en_GB displays as 'British English'.
+ * @stable ICU 51
+ */
+ UDISPCTX_DIALECT_NAMES = (UDISPCTX_TYPE_DIALECT_HANDLING<<8) + 1,
+ /**
+ * ================================
+ * CAPITALIZATION can be set to one of UDISPCTX_CAPITALIZATION_NONE,
+ * UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE,
+ * UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU, or
+ * UDISPCTX_CAPITALIZATION_FOR_STANDALONE.
+ * Use UDisplayContextType UDISPCTX_TYPE_CAPITALIZATION to get the value.
+ */
+ /**
+ * The capitalization context to be used is unknown (this is the default value).
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_NONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 0,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the middle of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_MIDDLE_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 1,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for the beginning of a sentence.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_BEGINNING_OF_SENTENCE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 2,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for a user-interface list or menu item.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_UI_LIST_OR_MENU = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 3,
+ /**
+ * The capitalization context if a date, date symbol or display name is to be
+ * formatted with capitalization appropriate for stand-alone usage such as an
+ * isolated name on a calendar page.
+ * @stable ICU 51
+ */
+ UDISPCTX_CAPITALIZATION_FOR_STANDALONE = (UDISPCTX_TYPE_CAPITALIZATION<<8) + 4,
+ /**
+ * ================================
+ * DISPLAY_LENGTH can be set to one of UDISPCTX_LENGTH_FULL or
+ * UDISPCTX_LENGTH_SHORT. Use UDisplayContextType UDISPCTX_TYPE_DISPLAY_LENGTH
+ * to get the value.
+ */
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use full names when generating a locale name,
+ * e.g. "United States" for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_FULL = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 0,
+ /**
+ * A possible setting for DISPLAY_LENGTH:
+ * use short names when generating a locale name,
+ * e.g. "U.S." for US.
+ * @stable ICU 54
+ */
+ UDISPCTX_LENGTH_SHORT = (UDISPCTX_TYPE_DISPLAY_LENGTH<<8) + 1,
+ /**
+ * ================================
+ * SUBSTITUTE_HANDLING can be set to one of UDISPCTX_SUBSTITUTE or
+ * UDISPCTX_NO_SUBSTITUTE. Use UDisplayContextType UDISPCTX_TYPE_SUBSTITUTE_HANDLING
+ * to get the value.
+ */
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a fallback value (e.g., the input code) when no data is available.
+ * This is the default value.
+ * @stable ICU 58
+ */
+ UDISPCTX_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 0,
+ /**
+ * A possible setting for SUBSTITUTE_HANDLING:
+ * Returns a null value with error code set to U_ILLEGAL_ARGUMENT_ERROR when no
+ * data is available.
+ * @stable ICU 58
+ */
+ UDISPCTX_NO_SUBSTITUTE = (UDISPCTX_TYPE_SUBSTITUTE_HANDLING<<8) + 1
+
+};
+/**
+* @stable ICU 51
+*/
+typedef enum UDisplayContext UDisplayContext;
+
+#endif /* #if !UCONFIG_NO_FORMATTING */
+
+#endif
diff --git a/intl/icu/source/common/unicode/uenum.h b/intl/icu/source/common/unicode/uenum.h
new file mode 100644
index 0000000000..d9c893e06d
--- /dev/null
+++ b/intl/icu/source/common/unicode/uenum.h
@@ -0,0 +1,209 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uenum.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:2
+*
+* created on: 2002jul08
+* created by: Vladimir Weinstein
+*/
+
+#ifndef __UENUM_H
+#define __UENUM_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+
+U_NAMESPACE_BEGIN
+class StringEnumeration;
+U_NAMESPACE_END
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: String Enumeration
+ */
+
+/**
+ * An enumeration object.
+ * For usage in C programs.
+ * @stable ICU 2.2
+ */
+struct UEnumeration;
+/** structure representing an enumeration object instance @stable ICU 2.2 */
+typedef struct UEnumeration UEnumeration;
+
+/**
+ * Disposes of resources in use by the iterator. If en is NULL,
+ * does nothing. After this call, any char* or UChar* pointer
+ * returned by uenum_unext() or uenum_next() is invalid.
+ * @param en UEnumeration structure pointer
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uenum_close(UEnumeration* en);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUEnumerationPointer
+ * "Smart pointer" class, closes a UEnumeration via uenum_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUEnumerationPointer, UEnumeration, uenum_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns the number of elements that the iterator traverses. If
+ * the iterator is out-of-sync with its service, status is set to
+ * U_ENUM_OUT_OF_SYNC_ERROR.
+ * This is a convenience function. It can end up being very
+ * expensive as all the items might have to be pre-fetched (depending
+ * on the type of data being traversed). Use with caution and only
+ * when necessary.
+ * @param en UEnumeration structure pointer
+ * @param status error code, can be U_ENUM_OUT_OF_SYNC_ERROR if the
+ * iterator is out of sync.
+ * @return number of elements in the iterator
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+uenum_count(UEnumeration* en, UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a char* string,
+ * it is converted to UChar* with the invariant converter.
+ * The result is terminated by (UChar)0.
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_CAPI const UChar* U_EXPORT2
+uenum_unext(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Returns the next element in the iterator's list. If there are
+ * no more elements, returns NULL. If the iterator is out-of-sync
+ * with its service, status is set to U_ENUM_OUT_OF_SYNC_ERROR and
+ * NULL is returned. If the native service string is a UChar*
+ * string, it is converted to char* with the invariant converter.
+ * The result is terminated by (char)0. If the conversion fails
+ * (because a character cannot be converted) then status is set to
+ * U_INVARIANT_CONVERSION_ERROR and the return value is undefined
+ * (but non-NULL).
+ * @param en the iterator object
+ * @param resultLength pointer to receive the length of the result
+ * (not including the terminating \\0).
+ * If the pointer is NULL it is ignored.
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service. Set to
+ * U_INVARIANT_CONVERSION_ERROR if the underlying native string is
+ * UChar* and conversion to char* with the invariant converter
+ * fails. This error pertains only to current string, so iteration
+ * might be able to continue successfully.
+ * @return a pointer to the string. The string will be
+ * zero-terminated. The return pointer is owned by this iterator
+ * and must not be deleted by the caller. The pointer is valid
+ * until the next call to any uenum_... method, including
+ * uenum_next() or uenum_unext(). When all strings have been
+ * traversed, returns NULL.
+ * @stable ICU 2.2
+ */
+U_CAPI const char* U_EXPORT2
+uenum_next(UEnumeration* en,
+ int32_t* resultLength,
+ UErrorCode* status);
+
+/**
+ * Resets the iterator to the current list of service IDs. This
+ * re-establishes sync with the service and rewinds the iterator
+ * to start at the first element.
+ * @param en the iterator object
+ * @param status the error code, set to U_ENUM_OUT_OF_SYNC_ERROR if
+ * the iterator is out of sync with its service.
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uenum_reset(UEnumeration* en, UErrorCode* status);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Given a StringEnumeration, wrap it in a UEnumeration. The
+ * StringEnumeration is adopted; after this call, the caller must not
+ * delete it (regardless of error status).
+ * @param adopted the C++ StringEnumeration to be wrapped in a UEnumeration.
+ * @param ec the error code.
+ * @return a UEnumeration wrapping the adopted StringEnumeration.
+ * @stable ICU 4.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openFromStringEnumeration(icu::StringEnumeration* adopted, UErrorCode* ec);
+
+#endif
+
+/**
+ * Given an array of const UChar* strings, return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openUCharStringsEnumeration
+ * @param strings array of const UChar* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory.
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openUCharStringsEnumeration(const UChar* const strings[], int32_t count,
+ UErrorCode* ec);
+
+/**
+ * Given an array of const char* strings (invariant chars only), return a UEnumeration. String pointers from 0..count-1 must not be null.
+ * Do not free or modify either the string array or the characters it points to until this object has been destroyed with uenum_close.
+ * \snippet test/cintltst/uenumtst.c uenum_openCharStringsEnumeration
+ * @param strings array of char* strings (each null terminated). All storage is owned by the caller.
+ * @param count length of the array
+ * @param ec error code
+ * @return the new UEnumeration object. Caller is responsible for calling uenum_close to free memory
+ * @see uenum_close
+ * @stable ICU 50
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uenum_openCharStringsEnumeration(const char* const strings[], int32_t count,
+ UErrorCode* ec);
+
+#endif
diff --git a/intl/icu/source/common/unicode/uidna.h b/intl/icu/source/common/unicode/uidna.h
new file mode 100644
index 0000000000..24a81ceadd
--- /dev/null
+++ b/intl/icu/source/common/unicode/uidna.h
@@ -0,0 +1,776 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: uidna.h
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003feb1
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __UIDNA_H__
+#define __UIDNA_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_IDNA
+
+#include <stdbool.h>
+#include "unicode/parseerr.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Internationalizing Domain Names in Applications (IDNA)
+ *
+ * IDNA2008 is implemented according to UTS #46, see the IDNA C++ class in idna.h.
+ *
+ * The C API functions which do take a UIDNA * service object pointer
+ * implement UTS #46 and IDNA2008.
+ *
+ * IDNA2003 is obsolete.
+ * The C API functions which do not take a service object pointer
+ * implement IDNA2003. They are all deprecated.
+ */
+
+/*
+ * IDNA option bit set values.
+ */
+enum {
+ /**
+ * Default options value: None of the other options are set.
+ * For use in static worker and factory methods.
+ * @stable ICU 2.6
+ */
+ UIDNA_DEFAULT=0,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * Option to allow unassigned code points in domain names and labels.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the UTS46 implementation.
+ * (UTS #46 disallows unassigned code points.)
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+ UIDNA_ALLOW_UNASSIGNED=1,
+#endif /* U_HIDE_DEPRECATED_API */
+ /**
+ * Option to check whether the input conforms to the STD3 ASCII rules,
+ * for example the restriction of labels to LDH characters
+ * (ASCII Letters, Digits and Hyphen-Minus).
+ * For use in static worker and factory methods.
+ * @stable ICU 2.6
+ */
+ UIDNA_USE_STD3_RULES=2,
+ /**
+ * IDNA option to check for whether the input conforms to the BiDi rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (IDNA2003 always performs a BiDi check.)
+ * @stable ICU 4.6
+ */
+ UIDNA_CHECK_BIDI=4,
+ /**
+ * IDNA option to check for whether the input conforms to the CONTEXTJ rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (The CONTEXTJ check is new in IDNA2008.)
+ * @stable ICU 4.6
+ */
+ UIDNA_CHECK_CONTEXTJ=8,
+ /**
+ * IDNA option for nontransitional processing in ToASCII().
+ * For use in static worker and factory methods.
+ * <p>By default, ToASCII() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+ * @stable ICU 4.6
+ */
+ UIDNA_NONTRANSITIONAL_TO_ASCII=0x10,
+ /**
+ * IDNA option for nontransitional processing in ToUnicode().
+ * For use in static worker and factory methods.
+ * <p>By default, ToUnicode() uses transitional processing.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (This is only relevant for compatibility of newer IDNA implementations with IDNA2003.)
+ * @stable ICU 4.6
+ */
+ UIDNA_NONTRANSITIONAL_TO_UNICODE=0x20,
+ /**
+ * IDNA option to check for whether the input conforms to the CONTEXTO rules.
+ * For use in static worker and factory methods.
+ * <p>This option is ignored by the IDNA2003 implementation.
+ * (The CONTEXTO check is new in IDNA2008.)
+ * <p>This is for use by registries for IDNA2008 conformance.
+ * UTS #46 does not require the CONTEXTO check.
+ * @stable ICU 49
+ */
+ UIDNA_CHECK_CONTEXTO=0x40
+};
+
+/**
+ * Opaque C service object type for the new IDNA API.
+ * @stable ICU 4.6
+ */
+struct UIDNA;
+typedef struct UIDNA UIDNA; /**< C typedef for struct UIDNA. @stable ICU 4.6 */
+
+/**
+ * Returns a UIDNA instance which implements UTS #46.
+ * Returns an unmodifiable instance, owned by the caller.
+ * Cache it for multiple operations, and uidna_close() it when done.
+ * The instance is thread-safe, that is, it can be used concurrently.
+ *
+ * For details about the UTS #46 implementation see the IDNA C++ class in idna.h.
+ *
+ * @param options Bit set to modify the processing and error checking.
+ * See option bit set values in uidna.h.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the UTS #46 UIDNA instance, if successful
+ * @stable ICU 4.6
+ */
+U_CAPI UIDNA * U_EXPORT2
+uidna_openUTS46(uint32_t options, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UIDNA instance.
+ * @param idna UIDNA instance to be closed
+ * @stable ICU 4.6
+ */
+U_CAPI void U_EXPORT2
+uidna_close(UIDNA *idna);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUIDNAPointer
+ * "Smart pointer" class, closes a UIDNA via uidna_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.6
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUIDNAPointer, UIDNA, uidna_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Output container for IDNA processing errors.
+ * Initialize with UIDNA_INFO_INITIALIZER:
+ * \code
+ * UIDNAInfo info = UIDNA_INFO_INITIALIZER;
+ * int32_t length = uidna_nameToASCII(..., &info, &errorCode);
+ * if(U_SUCCESS(errorCode) && info.errors!=0) { ... }
+ * \endcode
+ * @stable ICU 4.6
+ */
+typedef struct UIDNAInfo {
+ /** sizeof(UIDNAInfo) @stable ICU 4.6 */
+ int16_t size;
+ /**
+ * Set to true if transitional and nontransitional processing produce different results.
+ * For details see C++ IDNAInfo::isTransitionalDifferent().
+ * @stable ICU 4.6
+ */
+ UBool isTransitionalDifferent;
+ UBool reservedB3; /**< Reserved field, do not use. @internal */
+ /**
+ * Bit set indicating IDNA processing errors. 0 if no errors.
+ * See UIDNA_ERROR_... constants.
+ * @stable ICU 4.6
+ */
+ uint32_t errors;
+ int32_t reservedI2; /**< Reserved field, do not use. @internal */
+ int32_t reservedI3; /**< Reserved field, do not use. @internal */
+} UIDNAInfo;
+
+/**
+ * Static initializer for a UIDNAInfo struct.
+ * @stable ICU 4.6
+ */
+#define UIDNA_INFO_INITIALIZER { \
+ (int16_t)sizeof(UIDNAInfo), \
+ false, false, \
+ 0, 0, 0 }
+
+/**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * If any processing step fails, then pInfo->errors will be non-zero and
+ * the result might not be an ASCII string.
+ * The label might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * If any processing step fails, then pInfo->errors will be non-zero.
+ * The label might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicode(const UIDNA *idna,
+ const UChar *label, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * If any processing step fails, then pInfo->errors will be non-zero and
+ * the result might not be an ASCII string.
+ * The domain name might be modified according to the types of errors.
+ * Labels with severe errors will be left in (or turned into) their Unicode form.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * If any processing step fails, then pInfo->errors will be non-zero.
+ * The domain name might be modified according to the types of errors.
+ *
+ * The UErrorCode indicates an error only in exceptional cases,
+ * such as a U_MEMORY_ALLOCATION_ERROR.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicode(const UIDNA *idna,
+ const UChar *name, int32_t length,
+ UChar *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/* UTF-8 versions of the processing methods --------------------------------- */
+
+/**
+ * Converts a single domain name label into its ASCII form for DNS lookup.
+ * UTF-8 version of uidna_labelToASCII(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToASCII_UTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a single domain name label into its Unicode form for human-readable display.
+ * UTF-8 version of uidna_labelToUnicode(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param label Input domain name label
+ * @param length Label length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_labelToUnicodeUTF8(const UIDNA *idna,
+ const char *label, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its ASCII form for DNS lookup.
+ * UTF-8 version of uidna_nameToASCII(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToASCII_UTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/**
+ * Converts a whole domain name into its Unicode form for human-readable display.
+ * UTF-8 version of uidna_nameToUnicode(), same behavior.
+ *
+ * @param idna UIDNA instance
+ * @param name Input domain name
+ * @param length Domain name length, or -1 if NUL-terminated
+ * @param dest Destination string buffer
+ * @param capacity Destination buffer capacity
+ * @param pInfo Output container of IDNA processing details.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return destination string length
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+uidna_nameToUnicodeUTF8(const UIDNA *idna,
+ const char *name, int32_t length,
+ char *dest, int32_t capacity,
+ UIDNAInfo *pInfo, UErrorCode *pErrorCode);
+
+/*
+ * IDNA error bit set values.
+ * When a domain name or label fails a processing step or does not meet the
+ * validity criteria, then one or more of these error bits are set.
+ */
+enum {
+ /**
+ * A non-final domain name label (or the whole domain name) is empty.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_EMPTY_LABEL=1,
+ /**
+ * A domain name label is longer than 63 bytes.
+ * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+ * This is only checked in ToASCII operations, and only if the output label is all-ASCII.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LABEL_TOO_LONG=2,
+ /**
+ * A domain name is longer than 255 bytes in its storage form.
+ * (See STD13/RFC1034 3.1. Name space specifications and terminology.)
+ * This is only checked in ToASCII operations, and only if the output domain name is all-ASCII.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_DOMAIN_NAME_TOO_LONG=4,
+ /**
+ * A label starts with a hyphen-minus ('-').
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LEADING_HYPHEN=8,
+ /**
+ * A label ends with a hyphen-minus ('-').
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_TRAILING_HYPHEN=0x10,
+ /**
+ * A label contains hyphen-minus ('-') in the third and fourth positions.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_HYPHEN_3_4=0x20,
+ /**
+ * A label starts with a combining mark.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LEADING_COMBINING_MARK=0x40,
+ /**
+ * A label or domain name contains disallowed characters.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_DISALLOWED=0x80,
+ /**
+ * A label starts with "xn--" but does not contain valid Punycode.
+ * That is, an xn-- label failed Punycode decoding.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_PUNYCODE=0x100,
+ /**
+ * A label contains a dot=full stop.
+ * This can occur in an input string for a single-label function.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_LABEL_HAS_DOT=0x200,
+ /**
+ * An ACE label does not contain a valid label string.
+ * The label was successfully ACE (Punycode) decoded but the resulting
+ * string had severe validation errors. For example,
+ * it might contain characters that are not allowed in ACE labels,
+ * or it might not be normalized.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_INVALID_ACE_LABEL=0x400,
+ /**
+ * A label does not meet the IDNA BiDi requirements (for right-to-left characters).
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_BIDI=0x800,
+ /**
+ * A label does not meet the IDNA CONTEXTJ requirements.
+ * @stable ICU 4.6
+ */
+ UIDNA_ERROR_CONTEXTJ=0x1000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for punctuation characters.
+ * Some punctuation characters "Would otherwise have been DISALLOWED"
+ * but are allowed in certain contexts. (RFC 5892)
+ * @stable ICU 49
+ */
+ UIDNA_ERROR_CONTEXTO_PUNCTUATION=0x2000,
+ /**
+ * A label does not meet the IDNA CONTEXTO requirements for digits.
+ * Arabic-Indic Digits (U+066x) must not be mixed with Extended Arabic-Indic Digits (U+06Fx).
+ * @stable ICU 49
+ */
+ UIDNA_ERROR_CONTEXTO_DIGITS=0x4000
+};
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/* IDNA2003 API ------------------------------------------------------------- */
+
+/**
+ * IDNA2003: This function implements the ToASCII operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * ASCII names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
+ *
+ * IDNA2003 API Overview:
+ *
+ * The uidna_ API implements the IDNA protocol as defined in the IDNA RFC
+ * (http://www.ietf.org/rfc/rfc3490.txt).
+ * The RFC defines 2 operations: ToASCII and ToUnicode. Domain name labels
+ * containing non-ASCII code points are processed by the
+ * ToASCII operation before passing it to resolver libraries. Domain names
+ * that are obtained from resolver libraries are processed by the
+ * ToUnicode operation before displaying the domain name to the user.
+ * IDNA requires that implementations process input strings with Nameprep
+ * (http://www.ietf.org/rfc/rfc3491.txt),
+ * which is a profile of Stringprep (http://www.ietf.org/rfc/rfc3454.txt),
+ * and then with Punycode (http://www.ietf.org/rfc/rfc3492.txt).
+ * Implementations of IDNA MUST fully implement Nameprep and Punycode;
+ * neither Nameprep nor Punycode are optional.
+ * The input and output of ToASCII and ToUnicode operations are Unicode
+ * and are designed to be chainable, i.e., applying ToASCII or ToUnicode operations
+ * multiple times to an input string will yield the same result as applying the operation
+ * once.
+ * ToUnicode(ToUnicode(ToUnicode...(ToUnicode(string)))) == ToUnicode(string)
+ * ToASCII(ToASCII(ToASCII...(ToASCII(string))) == ToASCII(string).
+ *
+ * @param src Input UChar array containing label in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_toASCII(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * IDNA2003: This function implements the ToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on <b>single labels</b> before sending it to something that expects
+ * Unicode names. A label is an individual part of a domain name. Labels are usually
+ * separated by dots; for e.g. "www.example.com" is composed of 3 labels "www","example", and "com".
+ *
+ * @param src Input UChar array containing ASCII (ACE encoded) label.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output Converted UChar array containing Unicode equivalent of label.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_ERROR error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points. <b> Note: </b> This option is
+ * required on toUnicode operation because the RFC mandates
+ * verification of decoded ACE input by applying toASCII and comparing
+ * its output with source
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_toUnicode(const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+
+/**
+ * IDNA2003: Convenience function that implements the IDNToASCII operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ * It is important to note that this operation can fail. If it fails, then the input
+ * domain name cannot be used as an Internationalized Domain Name and the application
+ * should have methods defined to deal with the failure.
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in Unicode.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array with ASCII (ACE encoded) IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_IDNToASCII( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * IDNA2003: Convenience function that implements the IDNToUnicode operation as defined in the IDNA RFC.
+ * This operation is done on complete domain names, e.g: "www.example.com".
+ *
+ * <b>Note:</b> IDNA RFC specifies that a conformant application should divide a domain name
+ * into separate labels, decide whether to apply allowUnassigned and useSTD3ASCIIRules on each,
+ * and then convert. This function does not offer that level of granularity. The options once
+ * set will apply to all labels in the domain name
+ *
+ * @param src Input UChar array containing IDN in ASCII (ACE encoded) form.
+ * @param srcLength Number of UChars in src, or -1 if NUL-terminated.
+ * @param dest Output UChar array containing Unicode equivalent of source IDN.
+ * @param destCapacity Size of dest.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The length of the result string, if successful - or in case of a buffer overflow,
+ * in which case it will be greater than destCapacity.
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_IDNToUnicode( const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status);
+
+/**
+ * IDNA2003: Compare two IDN strings for equivalence.
+ * This function splits the domain names into labels and compares them.
+ * According to IDN RFC, whenever two labels are compared, they are
+ * considered equal if and only if their ASCII forms (obtained by
+ * applying toASCII) match using an case-insensitive ASCII comparison.
+ * Two domain names are considered a match if and only if all labels
+ * match regardless of whether label separators match.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ * @param options A bit set of options:
+ *
+ * - UIDNA_DEFAULT Use default options, i.e., do not process unassigned code points
+ * and do not use STD3 ASCII rules
+ * If unassigned code points are found the operation fails with
+ * U_UNASSIGNED_CODE_POINT_FOUND error code.
+ *
+ * - UIDNA_ALLOW_UNASSIGNED Unassigned values can be converted to ASCII for query operations
+ * If this option is set, the unassigned code points are in the input
+ * are treated as normal Unicode code points.
+ *
+ * - UIDNA_USE_STD3_RULES Use STD3 ASCII rules for host name syntax restrictions
+ * If this option is set and the input does not satisfy STD3 rules,
+ * the operation will fail with U_IDNA_STD3_ASCII_RULES_ERROR
+ *
+ * @param status ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ * @deprecated ICU 55 Use UTS #46 instead via uidna_openUTS46() or class IDNA.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+uidna_compare( const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ int32_t options,
+ UErrorCode* status);
+
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/intl/icu/source/common/unicode/uiter.h b/intl/icu/source/common/unicode/uiter.h
new file mode 100644
index 0000000000..be232c774d
--- /dev/null
+++ b/intl/icu/source/common/unicode/uiter.h
@@ -0,0 +1,709 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2011 International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uiter.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jan18
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITER_H__
+#define __UITER_H__
+
+/**
+ * \file
+ * \brief C API: Unicode Character Iteration
+ *
+ * @see UCharIterator
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+ U_NAMESPACE_BEGIN
+
+ class CharacterIterator;
+ class Replaceable;
+
+ U_NAMESPACE_END
+#endif
+
+U_CDECL_BEGIN
+
+struct UCharIterator;
+typedef struct UCharIterator UCharIterator; /**< C typedef for struct UCharIterator. @stable ICU 2.1 */
+
+/**
+ * Origin constants for UCharIterator.getIndex() and UCharIterator.move().
+ * @see UCharIteratorMove
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef enum UCharIteratorOrigin {
+ UITER_START, UITER_CURRENT, UITER_LIMIT, UITER_ZERO, UITER_LENGTH
+} UCharIteratorOrigin;
+
+/** Constants for UCharIterator. @stable ICU 2.6 */
+enum {
+ /**
+ * Constant value that may be returned by UCharIteratorMove
+ * indicating that the final UTF-16 index is not known, but that the move succeeded.
+ * This can occur when moving relative to limit or length, or
+ * when moving relative to the current index after a setState()
+ * when the current UTF-16 index is not known.
+ *
+ * It would be very inefficient to have to count from the beginning of the text
+ * just to get the current/limit/length index after moving relative to it.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ *
+ * @stable ICU 2.6
+ */
+ UITER_UNKNOWN_INDEX=-2
+};
+
+
+/**
+ * Constant for UCharIterator getState() indicating an error or
+ * an unknown state.
+ * Returned by uiter_getState()/UCharIteratorGetState
+ * when an error occurs.
+ * Also, some UCharIterator implementations may not be able to return
+ * a valid state for each position. This will be clearly documented
+ * for each such iterator (none of the public ones here).
+ *
+ * @stable ICU 2.6
+ */
+#define UITER_NO_STATE ((uint32_t)0xffffffff)
+
+/**
+ * Function type declaration for UCharIterator.getIndex().
+ *
+ * Gets the current position, or the start or limit of the
+ * iteration range.
+ *
+ * This function may perform slowly for UITER_CURRENT after setState() was called,
+ * or for UITER_LENGTH, because an iterator implementation may have to count
+ * UChars if the underlying storage is not UTF-16.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param origin get the 0, start, limit, length, or current index
+ * @return the requested index, or U_SENTINEL in an error condition
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorGetIndex(UCharIterator *iter, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.move().
+ *
+ * Use iter->move(iter, index, UITER_ZERO) like CharacterIterator::setIndex(index).
+ *
+ * Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ * Out of bounds movement will be pinned to the start or limit.
+ *
+ * This function may perform slowly for moving relative to UITER_LENGTH
+ * because an iterator implementation may have to count the rest of the
+ * UChars if the native storage is not UTF-16.
+ *
+ * When moving relative to the limit or length, or
+ * relative to the current position after setState() was called,
+ * move() may return UITER_UNKNOWN_INDEX (-2) to avoid an inefficient
+ * determination of the actual UTF-16 index.
+ * The actual index can be determined with getIndex(UITER_CURRENT)
+ * which will count the UChars if necessary.
+ * See UITER_UNKNOWN_INDEX for details.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param delta can be positive, zero, or negative
+ * @param origin move relative to the 0, start, limit, length, or current index
+ * @return the new index, or U_SENTINEL on an error condition,
+ * or UITER_UNKNOWN_INDEX when the index is not known.
+ *
+ * @see UCharIteratorOrigin
+ * @see UCharIterator
+ * @see UITER_UNKNOWN_INDEX
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorMove(UCharIterator *iter, int32_t delta, UCharIteratorOrigin origin);
+
+/**
+ * Function type declaration for UCharIterator.hasNext().
+ *
+ * Check if current() and next() can still
+ * return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether current() and next() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.hasPrevious().
+ *
+ * Check if previous() can still return another code unit.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return boolean value for whether previous() can still return another code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UBool U_CALLCONV
+UCharIteratorHasPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.current().
+ *
+ * Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorCurrent(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.next().
+ *
+ * Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code unit (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorNext(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.previous().
+ *
+ * Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code unit (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef UChar32 U_CALLCONV
+UCharIteratorPrevious(UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.reservedFn().
+ * Reserved for future use.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param something some integer argument
+ * @return some integer
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+typedef int32_t U_CALLCONV
+UCharIteratorReserved(UCharIterator *iter, int32_t something);
+
+/**
+ * Function type declaration for UCharIterator.getState().
+ *
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * It is recommended that the state value be calculated to be as small as
+ * is feasible. For strings with limited lengths, fewer than 32 bits may
+ * be sufficient.
+ *
+ * This is used together with setState()/UCharIteratorSetState
+ * to save and restore the iterator position more efficiently than with
+ * getIndex()/move().
+ *
+ * The iterator state is defined as a uint32_t value because it is designed
+ * for use in ucol_nextSortKeyPart() which provides 32 bits to store the state
+ * of the character iterator.
+ *
+ * With some UCharIterator implementations (e.g., UTF-8),
+ * getting and setting the UTF-16 index with existing functions
+ * (getIndex(UITER_CURRENT) followed by move(pos, UITER_ZERO)) is possible but
+ * relatively slow because the iterator has to "walk" from a known index
+ * to the requested one.
+ * This takes more time the farther it needs to go.
+ *
+ * An opaque state value allows an iterator implementation to provide
+ * an internal index (UTF-8: the source byte array index) for
+ * fast, constant-time restoration.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+typedef uint32_t U_CALLCONV
+UCharIteratorGetState(const UCharIterator *iter);
+
+/**
+ * Function type declaration for UCharIterator.setState().
+ *
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * The iterator object need not be the same one as for which getState() was called,
+ * but it must be of the same type (set up using the same uiter_setXYZ function)
+ * and it must iterate over the same string
+ * (binary identical regardless of memory address).
+ * For more about the state word see UCharIteratorGetState.
+ *
+ * After calling setState(), a getIndex(UITER_CURRENT) may be slow because
+ * the UTF-16 index may not be restored as well, but the iterator can deliver
+ * the correct text contents and move relative to the current position
+ * without performance degradation.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @stable ICU 2.6
+ */
+typedef void U_CALLCONV
+UCharIteratorSetState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+
+/**
+ * C API for code unit iteration.
+ * This can be used as a C wrapper around
+ * CharacterIterator, Replaceable, or implemented using simple strings, etc.
+ *
+ * There are two roles for using UCharIterator:
+ *
+ * A "provider" sets the necessary function pointers and controls the "protected"
+ * fields of the UCharIterator structure. A "provider" passes a UCharIterator
+ * into C APIs that need a UCharIterator as an abstract, flexible string interface.
+ *
+ * Implementations of such C APIs are "callers" of UCharIterator functions;
+ * they only use the "public" function pointers and never access the "protected"
+ * fields directly.
+ *
+ * The current() and next() functions only check the current index against the
+ * limit, and previous() only checks the current index against the start,
+ * to see if the iterator already reached the end of the iteration range.
+ *
+ * The assumption - in all iterators - is that the index is moved via the API,
+ * which means it won't go out of bounds, or the index is modified by
+ * user code that knows enough about the iterator implementation to set valid
+ * index values.
+ *
+ * UCharIterator functions return code unit values 0..0xffff,
+ * or U_SENTINEL if the iteration bounds are reached.
+ *
+ * @stable ICU 2.1
+ */
+struct UCharIterator {
+ /**
+ * (protected) Pointer to string or wrapped object or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ const void *context;
+
+ /**
+ * (protected) Length of string or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t length;
+
+ /**
+ * (protected) Start index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t start;
+
+ /**
+ * (protected) Current index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t index;
+
+ /**
+ * (protected) Limit index or similar.
+ * Not used by caller.
+ * @stable ICU 2.1
+ */
+ int32_t limit;
+
+ /**
+ * (protected) Used by UTF-8 iterators and possibly others.
+ * @stable ICU 2.1
+ */
+ int32_t reservedField;
+
+ /**
+ * (public) Returns the current position or the
+ * start or limit index of the iteration range.
+ *
+ * @see UCharIteratorGetIndex
+ * @stable ICU 2.1
+ */
+ UCharIteratorGetIndex *getIndex;
+
+ /**
+ * (public) Moves the current position relative to the start or limit of the
+ * iteration range, or relative to the current position itself.
+ * The movement is expressed in numbers of code units forward
+ * or backward by specifying a positive or negative delta.
+ *
+ * @see UCharIteratorMove
+ * @stable ICU 2.1
+ */
+ UCharIteratorMove *move;
+
+ /**
+ * (public) Check if current() and next() can still
+ * return another code unit.
+ *
+ * @see UCharIteratorHasNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasNext *hasNext;
+
+ /**
+ * (public) Check if previous() can still return another code unit.
+ *
+ * @see UCharIteratorHasPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorHasPrevious *hasPrevious;
+
+ /**
+ * (public) Return the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorCurrent
+ * @stable ICU 2.1
+ */
+ UCharIteratorCurrent *current;
+
+ /**
+ * (public) Return the code unit at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @see UCharIteratorNext
+ * @stable ICU 2.1
+ */
+ UCharIteratorNext *next;
+
+ /**
+ * (public) Decrement the index and return the code unit from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @see UCharIteratorPrevious
+ * @stable ICU 2.1
+ */
+ UCharIteratorPrevious *previous;
+
+ /**
+ * (public) Reserved for future use. Currently NULL.
+ *
+ * @see UCharIteratorReserved
+ * @stable ICU 2.1
+ */
+ UCharIteratorReserved *reservedFn;
+
+ /**
+ * (public) Return the state of the iterator, to be restored later with setState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorGet
+ * @stable ICU 2.6
+ */
+ UCharIteratorGetState *getState;
+
+ /**
+ * (public) Restore the iterator state from the state word from a call
+ * to getState().
+ * This function pointer is NULL if the iterator does not implement it.
+ *
+ * @see UCharIteratorSet
+ * @stable ICU 2.6
+ */
+ UCharIteratorSetState *setState;
+};
+
+/**
+ * Helper function for UCharIterator to get the code point
+ * at the current index.
+ *
+ * Return the code point that includes the code unit at the current position,
+ * or U_SENTINEL if there is none (index is at the limit).
+ * If the current code unit is a lead or trail surrogate,
+ * then the following or preceding surrogate is used to form
+ * the code point value.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point
+ *
+ * @see UCharIterator
+ * @see U16_GET
+ * @see UnicodeString::char32At()
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_current32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the next code point.
+ *
+ * Return the code point at the current index and increment
+ * the index (post-increment, like s[i++]),
+ * or return U_SENTINEL if there is none (index is at the limit).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the current code point (and post-increment the current index)
+ *
+ * @see UCharIterator
+ * @see U16_NEXT
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_next32(UCharIterator *iter);
+
+/**
+ * Helper function for UCharIterator to get the previous code point.
+ *
+ * Decrement the index and return the code point from there
+ * (pre-decrement, like s[--i]),
+ * or return U_SENTINEL if there is none (index is at the start).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the previous code point (after pre-decrementing the current index)
+ *
+ * @see UCharIterator
+ * @see U16_PREV
+ * @stable ICU 2.1
+ */
+U_CAPI UChar32 U_EXPORT2
+uiter_previous32(UCharIterator *iter);
+
+/**
+ * Get the "state" of the iterator in the form of a single 32-bit word.
+ * This is a convenience function that calls iter->getState(iter)
+ * if iter->getState is not NULL;
+ * if it is NULL or any other error occurs, then UITER_NO_STATE is returned.
+ *
+ * Some UCharIterator implementations may not be able to return
+ * a valid state for each position, in which case they return UITER_NO_STATE instead.
+ * This will be clearly documented for each such iterator (none of the public ones here).
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @return the state word
+ *
+ * @see UCharIterator
+ * @see UCharIteratorGetState
+ * @see UITER_NO_STATE
+ * @stable ICU 2.6
+ */
+U_CAPI uint32_t U_EXPORT2
+uiter_getState(const UCharIterator *iter);
+
+/**
+ * Restore the "state" of the iterator using a state word from a getState() call.
+ * This is a convenience function that calls iter->setState(iter, state, pErrorCode)
+ * if iter->setState is not NULL; if it is NULL, then U_UNSUPPORTED_ERROR is set.
+ *
+ * @param iter the UCharIterator structure ("this pointer")
+ * @param state the state word from a getState() call
+ * on a same-type, same-string iterator
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @see UCharIterator
+ * @see UCharIteratorSetState
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setState(UCharIterator *iter, uint32_t state, UErrorCode *pErrorCode);
+
+/**
+ * Set up a UCharIterator to iterate over a string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the string s
+ * with iteration boundaries start=index=0 and length=limit=string length.
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length.
+ * The length field will be ignored.
+ *
+ * The string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s String to iterate over
+ * @param length Length of s, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setString(UCharIterator *iter, const UChar *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-16BE string
+ * (byte vector with a big-endian pair of bytes per UChar).
+ *
+ * Everything works just like with a normal UChar iterator (uiter_setString),
+ * except that UChars are assembled from byte pairs,
+ * and that the length argument here indicates an even number of bytes.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-16BE string to iterate over
+ * @param length Length of s as an even number of bytes, or -1 if NUL-terminated
+ * (NUL means pair of 0 bytes at even index from s)
+ *
+ * @see UCharIterator
+ * @see uiter_setString
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setUTF16BE(UCharIterator *iter, const char *s, int32_t length);
+
+/**
+ * Set up a UCharIterator to iterate over a UTF-8 string.
+ *
+ * Sets the UCharIterator function pointers for iteration over the UTF-8 string s
+ * with UTF-8 iteration boundaries 0 and length.
+ * The implementation counts the UTF-16 index on the fly and
+ * lazily evaluates the UTF-16 length of the text.
+ *
+ * The start field is used as the UTF-8 offset, the limit field as the UTF-8 length.
+ * When the reservedField is not 0, then it contains a supplementary code point
+ * and the UTF-16 index is between the two corresponding surrogates.
+ * At that point, the UTF-8 index is behind that code point.
+ *
+ * The UTF-8 string pointer s is set into UCharIterator.context without copying
+ * or reallocating the string contents.
+ *
+ * getState() returns a state value consisting of
+ * - the current UTF-8 source byte index (bits 31..1)
+ * - a flag (bit 0) that indicates whether the UChar position is in the middle
+ * of a surrogate pair
+ * (from a 4-byte UTF-8 sequence for the corresponding supplementary code point)
+ *
+ * getState() cannot also encode the UTF-16 index in the state value.
+ * move(relative to limit or length), or
+ * move(relative to current) after setState(), may return UITER_UNKNOWN_INDEX.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param s UTF-8 string to iterate over
+ * @param length Length of s in bytes, or -1 if NUL-terminated
+ *
+ * @see UCharIterator
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uiter_setUTF8(UCharIterator *iter, const char *s, int32_t length);
+
+#if U_SHOW_CPLUSPLUS_API
+
+/**
+ * Set up a UCharIterator to wrap around a C++ CharacterIterator.
+ *
+ * Sets the UCharIterator function pointers for iteration using the
+ * CharacterIterator charIter.
+ *
+ * The CharacterIterator pointer charIter is set into UCharIterator.context
+ * without copying or cloning the CharacterIterator object.
+ * The other "protected" UCharIterator fields are set to 0 and will be ignored.
+ * The iteration index and boundaries are controlled by the CharacterIterator.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param charIter CharacterIterator to wrap
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setCharacterIterator(UCharIterator *iter, icu::CharacterIterator *charIter);
+
+/**
+ * Set up a UCharIterator to iterate over a C++ Replaceable.
+ *
+ * Sets the UCharIterator function pointers for iteration over the
+ * Replaceable rep with iteration boundaries start=index=0 and
+ * length=limit=rep->length().
+ * The "provider" may set the start, index, and limit values at any time
+ * within the range 0..length=rep->length().
+ * The length field will be ignored.
+ *
+ * The Replaceable pointer rep is set into UCharIterator.context without copying
+ * or cloning/reallocating the Replaceable object.
+ *
+ * getState() simply returns the current index.
+ * move() will always return the final index.
+ *
+ * @param iter UCharIterator structure to be set for iteration
+ * @param rep Replaceable to iterate over
+ *
+ * @see UCharIterator
+ * @stable ICU 2.1
+ */
+U_CAPI void U_EXPORT2
+uiter_setReplaceable(UCharIterator *iter, const icu::Replaceable *rep);
+
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/uldnames.h b/intl/icu/source/common/unicode/uldnames.h
new file mode 100644
index 0000000000..47b047ece9
--- /dev/null
+++ b/intl/icu/source/common/unicode/uldnames.h
@@ -0,0 +1,307 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2016, International Business Machines Corporation and
+* others. All Rights Reserved.
+*******************************************************************************
+*/
+
+#ifndef __ULDNAMES_H__
+#define __ULDNAMES_H__
+
+/**
+ * \file
+ * \brief C API: Provides display names of Locale ids and their components.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/uscript.h"
+#include "unicode/udisplaycontext.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * Enum used in LocaleDisplayNames::createInstance.
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Use standard names when generating a locale name,
+ * e.g. en_GB displays as 'English (United Kingdom)'.
+ * @stable ICU 4.4
+ */
+ ULDN_STANDARD_NAMES = 0,
+ /**
+ * Use dialect names, when generating a locale name,
+ * e.g. en_GB displays as 'British English'.
+ * @stable ICU 4.4
+ */
+ ULDN_DIALECT_NAMES
+} UDialectHandling;
+
+/**
+ * Opaque C service object type for the locale display names API
+ * @stable ICU 4.4
+ */
+struct ULocaleDisplayNames;
+
+/**
+ * C typedef for struct ULocaleDisplayNames.
+ * @stable ICU 4.4
+ */
+typedef struct ULocaleDisplayNames ULocaleDisplayNames;
+
+#if !UCONFIG_NO_FORMATTING
+
+/**
+ * Returns an instance of LocaleDisplayNames that returns names
+ * formatted for the provided locale, using the provided
+ * dialectHandling. The usual value for dialectHandling is
+ * ULOC_STANDARD_NAMES.
+ *
+ * @param locale the display locale
+ * @param dialectHandling how to select names for locales
+ * @return a ULocaleDisplayNames instance
+ * @param pErrorCode the status code
+ * @stable ICU 4.4
+ */
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_open(const char * locale,
+ UDialectHandling dialectHandling,
+ UErrorCode *pErrorCode);
+
+/**
+ * Closes a ULocaleDisplayNames instance obtained from uldn_open().
+ * @param ldn the ULocaleDisplayNames instance to be closed
+ * @stable ICU 4.4
+ */
+U_CAPI void U_EXPORT2
+uldn_close(ULocaleDisplayNames *ldn);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalULocaleDisplayNamesPointer
+ * "Smart pointer" class, closes a ULocaleDisplayNames via uldn_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalULocaleDisplayNamesPointer, ULocaleDisplayNames, uldn_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/* getters for state */
+
+/**
+ * Returns the locale used to determine the display names. This is
+ * not necessarily the same locale passed to {@link #uldn_open}.
+ * @param ldn the LocaleDisplayNames instance
+ * @return the display locale
+ * @stable ICU 4.4
+ */
+U_CAPI const char * U_EXPORT2
+uldn_getLocale(const ULocaleDisplayNames *ldn);
+
+/**
+ * Returns the dialect handling used in the display names.
+ * @param ldn the LocaleDisplayNames instance
+ * @return the dialect handling enum
+ * @stable ICU 4.4
+ */
+U_CAPI UDialectHandling U_EXPORT2
+uldn_getDialectHandling(const ULocaleDisplayNames *ldn);
+
+/* names for entire locales */
+
+/**
+ * Returns the display name of the provided locale.
+ * @param ldn the LocaleDisplayNames instance
+ * @param locale the locale whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_localeDisplayName(const ULocaleDisplayNames *ldn,
+ const char *locale,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/* names for components of a locale */
+
+/**
+ * Returns the display name of the provided language code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param lang the language code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_languageDisplayName(const ULocaleDisplayNames *ldn,
+ const char *lang,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided script.
+ * @param ldn the LocaleDisplayNames instance
+ * @param script the script whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_scriptDisplayName(const ULocaleDisplayNames *ldn,
+ const char *script,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided script code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param scriptCode the script code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_scriptCodeDisplayName(const ULocaleDisplayNames *ldn,
+ UScriptCode scriptCode,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided region code.
+ * @param ldn the LocaleDisplayNames instance
+ * @param region the region code whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_regionDisplayName(const ULocaleDisplayNames *ldn,
+ const char *region,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided variant
+ * @param ldn the LocaleDisplayNames instance
+ * @param variant the variant whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_variantDisplayName(const ULocaleDisplayNames *ldn,
+ const char *variant,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided locale key
+ * @param ldn the LocaleDisplayNames instance
+ * @param key the locale key whose display name to return
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_keyDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the display name of the provided value (used with the provided key).
+ * @param ldn the LocaleDisplayNames instance
+ * @param key the locale key
+ * @param value the locale key's value
+ * @param result receives the display name
+ * @param maxResultSize the size of the result buffer
+ * @param pErrorCode the status code
+ * @return the actual buffer size needed for the display name. If it's
+ * greater than maxResultSize, the returned name will be truncated.
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+uldn_keyValueDisplayName(const ULocaleDisplayNames *ldn,
+ const char *key,
+ const char *value,
+ UChar *result,
+ int32_t maxResultSize,
+ UErrorCode *pErrorCode);
+
+/**
+* Returns an instance of LocaleDisplayNames that returns names formatted
+* for the provided locale, using the provided UDisplayContext settings.
+*
+* @param locale The display locale
+* @param contexts List of one or more context settings (e.g. for dialect
+* handling, capitalization, etc.
+* @param length Number of items in the contexts list
+* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
+* a failure status, the function will do nothing; otherwise this will be
+* updated with any new status from the function.
+* @return a ULocaleDisplayNames instance
+* @stable ICU 51
+*/
+U_CAPI ULocaleDisplayNames * U_EXPORT2
+uldn_openForContext(const char * locale, UDisplayContext *contexts,
+ int32_t length, UErrorCode *pErrorCode);
+
+/**
+* Returns the UDisplayContext value for the specified UDisplayContextType.
+* @param ldn the ULocaleDisplayNames instance
+* @param type the UDisplayContextType whose value to return
+* @param pErrorCode Pointer to UErrorCode input/output status. If at entry this indicates
+* a failure status, the function will do nothing; otherwise this will be
+* updated with any new status from the function.
+* @return the UDisplayContextValue for the specified type.
+* @stable ICU 51
+*/
+U_CAPI UDisplayContext U_EXPORT2
+uldn_getContext(const ULocaleDisplayNames *ldn, UDisplayContextType type,
+ UErrorCode *pErrorCode);
+
+#endif /* !UCONFIG_NO_FORMATTING */
+#endif /* __ULDNAMES_H__ */
diff --git a/intl/icu/source/common/unicode/uloc.h b/intl/icu/source/common/unicode/uloc.h
new file mode 100644
index 0000000000..21179c1b62
--- /dev/null
+++ b/intl/icu/source/common/unicode/uloc.h
@@ -0,0 +1,1393 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ULOC.H
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 08/22/98 stephen JDK 1.2 sync.
+* 12/08/98 rtg New C API for Locale
+* 03/30/99 damiba overhaul
+* 03/31/99 helena Javadoc for uloc functions.
+* 04/15/99 Madhu Updated Javadoc
+********************************************************************************
+*/
+
+#ifndef ULOC_H
+#define ULOC_H
+
+#include "unicode/utypes.h"
+#include "unicode/uenum.h"
+
+/**
+ * \file
+ * \brief C API: Locale ID functionality similar to C++ class Locale
+ *
+ * <h2> ULoc C API for Locale </h2>
+ * A <code>Locale</code> represents a specific geographical, political,
+ * or cultural region. An operation that requires a <code>Locale</code> to perform
+ * its task is called <em>locale-sensitive</em> and uses the <code>Locale</code>
+ * to tailor information for the user. For example, displaying a number
+ * is a locale-sensitive operation--the number should be formatted
+ * according to the customs/conventions of the user's native country,
+ * region, or culture. In the C APIs, a locales is simply a const char string.
+ *
+ * <P>
+ * You create a <code>Locale</code> with one of the three options listed below.
+ * Each of the component is separated by '_' in the locale string.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * newLanguage
+ *
+ * newLanguage + newCountry
+ *
+ * newLanguage + newCountry + newVariant
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * The first option is a valid <STRONG>ISO
+ * Language Code.</STRONG> These codes are the lower-case two-letter
+ * codes as defined by ISO-639.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href ="http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt">
+ * http://www.ics.uci.edu/pub/ietf/http/related/iso639.txt</a>
+ *
+ * <P>
+ * The second option includes an additional <STRONG>ISO Country
+ * Code.</STRONG> These codes are the upper-case two-letter codes
+ * as defined by ISO-3166.
+ * You can find a full list of these codes at a number of sites, such as:
+ * <BR><a href="http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html">
+ * http://www.chemie.fu-berlin.de/diverse/doc/ISO_3166.html</a>
+ *
+ * <P>
+ * The third option requires another additional information--the
+ * <STRONG>Variant.</STRONG>
+ * The Variant codes are vendor and browser-specific.
+ * For example, use WIN for Windows, MAC for Macintosh, and POSIX for POSIX.
+ * Where there are two variants, separate them with an underscore, and
+ * put the most important one first. For
+ * example, a Traditional Spanish collation might be referenced, with
+ * "ES", "ES", "Traditional_WIN".
+ *
+ * <P>
+ * Because a <code>Locale</code> is just an identifier for a region,
+ * no validity check is performed when you specify a <code>Locale</code>.
+ * If you want to see whether particular resources are available for the
+ * <code>Locale</code> you asked for, you must query those resources. For
+ * example, ask the <code>UNumberFormat</code> for the locales it supports
+ * using its <code>getAvailable</code> method.
+ * <BR><STRONG>Note:</STRONG> When you ask for a resource for a particular
+ * locale, you get back the best available match, not necessarily
+ * precisely what you asked for. For more information, look at
+ * <code>UResourceBundle</code>.
+ *
+ * <P>
+ * The <code>Locale</code> provides a number of convenient constants
+ * that you can use to specify the commonly used
+ * locales. For example, the following refers to a locale
+ * for the United States:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * ULOC_US
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <P>
+ * Once you've specified a locale you can query it for information about
+ * itself. Use <code>uloc_getCountry</code> to get the ISO Country Code and
+ * <code>uloc_getLanguage</code> to get the ISO Language Code. You can
+ * use <code>uloc_getDisplayCountry</code> to get the
+ * name of the country suitable for displaying to the user. Similarly,
+ * you can use <code>uloc_getDisplayLanguage</code> to get the name of
+ * the language suitable for displaying to the user. Interestingly,
+ * the <code>uloc_getDisplayXXX</code> methods are themselves locale-sensitive
+ * and have two versions: one that uses the default locale and one
+ * that takes a locale as an argument and displays the name or country in
+ * a language appropriate to that locale.
+ *
+ * <P>
+ * The ICU provides a number of services that perform locale-sensitive
+ * operations. For example, the <code>unum_xxx</code> functions format
+ * numbers, currency, or percentages in a locale-sensitive manner.
+ * </P>
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * UErrorCode success = U_ZERO_ERROR;
+ * UNumberFormat *nf;
+ * const char* myLocale = "fr_FR";
+ *
+ * nf = unum_open( UNUM_DEFAULT, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, NULL, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, NULL, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * Each of these methods has two variants; one with an explicit locale
+ * and one without; the latter using the default locale.
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ *
+ * nf = unum_open( UNUM_DEFAULT, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_CURRENCY, myLocale, success );
+ * unum_close(nf);
+ * nf = unum_open( UNUM_PERCENT, myLocale, success );
+ * unum_close(nf);
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * A <code>Locale</code> is the mechanism for identifying the kind of services
+ * (<code>UNumberFormat</code>) that you would like to get. The locale is
+ * <STRONG>just</STRONG> a mechanism for identifying these services.
+ *
+ * <P>
+ * Each international service that performs locale-sensitive operations
+ * allows you
+ * to get all the available objects of that type. You can sift
+ * through these objects by language, country, or variant,
+ * and use the display names to present a menu to the user.
+ * For example, you can create a menu of all the collation objects
+ * suitable for a given language. Such classes implement these
+ * three class methods:
+ * \htmlonly<blockquote>\endhtmlonly
+ * <pre>
+ * \code
+ * const char* uloc_getAvailable(int32_t index);
+ * int32_t uloc_countAvailable();
+ * int32_t
+ * uloc_getDisplayName(const char* localeID,
+ * const char* inLocaleID,
+ * UChar* result,
+ * int32_t maxResultSize,
+ * UErrorCode* err);
+ *
+ * \endcode
+ * </pre>
+ * \htmlonly</blockquote>\endhtmlonly
+ * <P>
+ * Concerning POSIX/RFC1766 Locale IDs,
+ * the getLanguage/getCountry/getVariant/getName functions do understand
+ * the POSIX type form of language_COUNTRY.ENCODING\@VARIANT
+ * and if there is not an ICU-stype variant, uloc_getVariant() for example
+ * will return the one listed after the \@at sign. As well, the hyphen
+ * "-" is recognized as a country/variant separator similarly to RFC1766.
+ * So for example, "en-us" will be interpreted as en_US.
+ * As a result, uloc_getName() is far from a no-op, and will have the
+ * effect of converting POSIX/RFC1766 IDs into ICU form, although it does
+ * NOT map any of the actual codes (i.e. russian->ru) in any way.
+ * Applications should call uloc_getName() at the point where a locale ID
+ * is coming from an external source (user entry, OS, web browser)
+ * and pass the resulting string to other ICU functions. For example,
+ * don't use de-de\@EURO as an argument to resourcebundle.
+ *
+ * @see UResourceBundle
+ */
+
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_CHINESE "zh"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ENGLISH "en"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_FRENCH "fr"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_GERMAN "de"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_ITALIAN "it"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_JAPANESE "ja"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_KOREAN "ko"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_SIMPLIFIED_CHINESE "zh_CN"
+/** Useful constant for this language. @stable ICU 2.0 */
+#define ULOC_TRADITIONAL_CHINESE "zh_TW"
+
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA "en_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CANADA_FRENCH "fr_CA"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_CHINA "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_PRC "zh_CN"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_FRANCE "fr_FR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_GERMANY "de_DE"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_ITALY "it_IT"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_JAPAN "ja_JP"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_KOREA "ko_KR"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_TAIWAN "zh_TW"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_UK "en_GB"
+/** Useful constant for this country/region. @stable ICU 2.0 */
+#define ULOC_US "en_US"
+
+/**
+ * Useful constant for the maximum size of the language part of a locale ID.
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_LANG_CAPACITY 12
+
+/**
+ * Useful constant for the maximum size of the country part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.0
+ */
+#define ULOC_COUNTRY_CAPACITY 4
+/**
+ * Useful constant for the maximum size of the whole locale ID
+ * (including the terminating NULL and all keywords).
+ * @stable ICU 2.0
+ */
+#define ULOC_FULLNAME_CAPACITY 157
+
+/**
+ * Useful constant for the maximum size of the script part of a locale ID
+ * (including the terminating NULL).
+ * @stable ICU 2.8
+ */
+#define ULOC_SCRIPT_CAPACITY 6
+
+/**
+ * Useful constant for the maximum size of keywords in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORDS_CAPACITY 96
+
+/**
+ * Useful constant for the maximum total size of keywords and their values in a locale
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_AND_VALUES_CAPACITY 100
+
+/**
+ * Invariant character separating keywords from the locale string
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_SEPARATOR '@'
+
+/**
+ * Unicode code point for '@' separating keywords from the locale string.
+ * @see ULOC_KEYWORD_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_SEPARATOR_UNICODE 0x40
+
+/**
+ * Invariant character for assigning value to a keyword
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ASSIGN '='
+
+/**
+ * Unicode code point for '=' for assigning value to a keyword.
+ * @see ULOC_KEYWORD_ASSIGN
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ASSIGN_UNICODE 0x3D
+
+/**
+ * Invariant character separating keywords
+ * @stable ICU 2.8
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR ';'
+
+/**
+ * Unicode code point for ';' separating keywords
+ * @see ULOC_KEYWORD_ITEM_SEPARATOR
+ * @stable ICU 4.6
+ */
+#define ULOC_KEYWORD_ITEM_SEPARATOR_UNICODE 0x3B
+
+/**
+ * Constants for *_getLocale()
+ * Allow user to select whether she wants information on
+ * requested, valid or actual locale.
+ * For example, a collator for "en_US_CALIFORNIA" was
+ * requested. In the current state of ICU (2.0),
+ * the requested locale is "en_US_CALIFORNIA",
+ * the valid locale is "en_US" (most specific locale supported by ICU)
+ * and the actual locale is "root" (the collation data comes unmodified
+ * from the UCA)
+ * The locale is considered supported by ICU if there is a core ICU bundle
+ * for that locale (although it may be empty).
+ * @stable ICU 2.1
+ */
+typedef enum {
+ /** This is locale the data actually comes from
+ * @stable ICU 2.1
+ */
+ ULOC_ACTUAL_LOCALE = 0,
+ /** This is the most specific locale supported by ICU
+ * @stable ICU 2.1
+ */
+ ULOC_VALID_LOCALE = 1,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /** This is the requested locale
+ * @deprecated ICU 2.8
+ */
+ ULOC_REQUESTED_LOCALE = 2,
+
+ /**
+ * One more than the highest normal ULocDataLocaleType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ ULOC_DATA_LOCALE_TYPE_LIMIT = 3
+#endif // U_HIDE_DEPRECATED_API
+} ULocDataLocaleType;
+
+#ifndef U_HIDE_SYSTEM_API
+/**
+ * Gets ICU's default locale.
+ * The returned string is a snapshot in time, and will remain valid
+ * and unchanged even when uloc_setDefault() is called.
+ * The returned storage is owned by ICU, and must not be altered or deleted
+ * by the caller.
+ *
+ * @return the ICU default locale
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getDefault(void);
+
+/**
+ * Sets ICU's default locale.
+ * By default (without calling this function), ICU's default locale will be based
+ * on information obtained from the underlying system environment.
+ * <p>
+ * Changes to ICU's default locale do not propagate back to the
+ * system environment.
+ * <p>
+ * Changes to ICU's default locale to not affect any ICU services that
+ * may already be open based on the previous default locale value.
+ *
+ * @param localeID the new ICU default locale. A value of NULL will try to get
+ * the system's default locale.
+ * @param status the error information if the setting of default locale fails
+ * @system
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+uloc_setDefault(const char* localeID,
+ UErrorCode* status);
+#endif /* U_HIDE_SYSTEM_API */
+
+/**
+ * Gets the language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param language the language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than languageCapacity, the returned language code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLanguage(const char* localeID,
+ char* language,
+ int32_t languageCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the script code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @param script the language code for localeID
+ * @param scriptCapacity the size of the language buffer to store the
+ * language code with
+ * @param err error information if retrieving the language code failed
+ * @return the actual buffer size needed for the language code. If it's greater
+ * than scriptCapacity, the returned language code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getScript(const char* localeID,
+ char* script,
+ int32_t scriptCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the country code for the specified locale.
+ *
+ * @param localeID the locale to get the country code with
+ * @param country the country code for localeID
+ * @param countryCapacity the size of the country buffer to store the
+ * country code with
+ * @param err error information if retrieving the country code failed
+ * @return the actual buffer size needed for the country code. If it's greater
+ * than countryCapacity, the returned country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getCountry(const char* localeID,
+ char* country,
+ int32_t countryCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the variant code for the specified locale.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param variant the variant code for localeID
+ * @param variantCapacity the size of the variant buffer to store the
+ * variant code with
+ * @param err error information if retrieving the variant code failed
+ * @return the actual buffer size needed for the variant code. If it's greater
+ * than variantCapacity, the returned variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getVariant(const char* localeID,
+ char* variant,
+ int32_t variantCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the ICU locale ID to
+ * a certain extent. Upper and lower case are set as needed.
+ * It does NOT map aliased names in any way.
+ * See the top of this header file.
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the full name for the specified locale.
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name the full name for localeID
+ * @param nameCapacity the size of the name buffer to store the
+ * full name with
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_canonicalize(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets the ISO language code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO language code with
+ * @return language the ISO language code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Language(const char* localeID);
+
+
+/**
+ * Gets the ISO country code for the specified locale.
+ *
+ * @param localeID the locale to get the ISO country code with
+ * @return country the ISO country code for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getISO3Country(const char* localeID);
+
+/**
+ * Gets the Win32 LCID value for the specified locale.
+ * If the ICU locale is not recognized by Windows, 0 will be returned.
+ *
+ * LCIDs were deprecated with Windows Vista and Microsoft recommends
+ * that developers use BCP47 style tags instead (uloc_toLanguageTag).
+ *
+ * @param localeID the locale to get the Win32 LCID value with
+ * @return country the Win32 LCID for localeID
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2
+uloc_getLCID(const char* localeID);
+
+/**
+ * Gets the language name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the ISO language code with
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch".
+ * @param language the displayable language code for localeID
+ * @param languageCapacity the size of the language buffer to store the
+ * displayable language code with.
+ * @param status error information if retrieving the displayable language code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * language code is placed into language as fallback.
+ * @return the actual buffer size needed for the displayable language code. If
+ * it's greater than languageCapacity, the returned language
+ * code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayLanguage(const char* locale,
+ const char* displayLocale,
+ UChar* language,
+ int32_t languageCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the script name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable script code with. NULL may be
+ * used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "", while
+ * passing Locale::getGerman() for inLocale would result in "".
+ * NULL may be used to specify the default.
+ * @param script the displayable script for the localeID.
+ * @param scriptCapacity the size of the script buffer to store the displayable
+ * script code with.
+ * @param status error information if retrieving the displayable script code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * script code is placed into script as fallback.
+ * @return the actual buffer size needed for the displayable script code. If
+ * it's greater than scriptCapacity, the returned displayable
+ * script code will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayScript(const char* locale,
+ const char* displayLocale,
+ UChar* script,
+ int32_t scriptCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the country name suitable for display for the specified locale.
+ * Warning: this is for the region part of a valid locale ID; it cannot just be
+ * the region code (like "FR"). To get the display name for a region alone, or
+ * for other options, use ULocaleDisplayNames instead.
+ *
+ * @param locale the locale to get the displayable country code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param country the displayable country code for localeID.
+ * @param countryCapacity the size of the country buffer to store the
+ * displayable country code with.
+ * @param status error information if retrieving the displayable country code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * country code is placed into country as fallback.
+ * @return the actual buffer size needed for the displayable country code. If
+ * it's greater than countryCapacity, the returned displayable
+ * country code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayCountry(const char* locale,
+ const char* displayLocale,
+ UChar* country,
+ int32_t countryCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Gets the variant name suitable for display for the specified locale.
+ *
+ * @param locale the locale to get the displayable variant code with. NULL may
+ * be used to specify the default.
+ * @param displayLocale Specifies the locale to be used to display the name. In
+ * other words, if the locale's language code is "en", passing
+ * Locale::getFrench() for inLocale would result in "Anglais",
+ * while passing Locale::getGerman() for inLocale would result
+ * in "Englisch". NULL may be used to specify the default.
+ * @param variant the displayable variant code for localeID.
+ * @param variantCapacity the size of the variant buffer to store the
+ * displayable variant code with.
+ * @param status error information if retrieving the displayable variant code
+ * failed. U_USING_DEFAULT_WARNING indicates that no data was
+ * found from the locale resources and a case canonicalized
+ * variant code is placed into variant as fallback.
+ * @return the actual buffer size needed for the displayable variant code. If
+ * it's greater than variantCapacity, the returned displayable
+ * variant code will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayVariant(const char* locale,
+ const char* displayLocale,
+ UChar* variant,
+ int32_t variantCapacity,
+ UErrorCode* status);
+
+/**
+ * Gets the keyword name suitable for display for the specified locale. E.g:
+ * for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for the keyword collation.
+ * Usage:
+ * <code>
+ * UErrorCode status = U_ZERO_ERROR;
+ * const char* keyword =NULL;
+ * int32_t keywordLen = 0;
+ * int32_t keywordCount = 0;
+ * UChar displayKeyword[256];
+ * int32_t displayKeywordLen = 0;
+ * UEnumeration* keywordEnum = uloc_openKeywords("de_DE@collation=PHONEBOOK;calendar=TRADITIONAL", &status);
+ * for(keywordCount = uenum_count(keywordEnum, &status); keywordCount > 0 ; keywordCount--){
+ * if(U_FAILURE(status)){
+ * ...something went wrong so handle the error...
+ * break;
+ * }
+ * // the uenum_next returns NUL terminated string
+ * keyword = uenum_next(keywordEnum, &keywordLen, &status);
+ * displayKeywordLen = uloc_getDisplayKeyword(keyword, "en_US", displayKeyword, 256);
+ * ... do something interesting .....
+ * }
+ * uenum_close(keywordEnum);
+ * </code>
+ * @param keyword The keyword whose display string needs to be returned.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and should not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @see #uloc_openKeywords
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeyword(const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the value of the keyword suitable for display for the specified locale.
+ * E.g: for the locale string de_DE\@collation=PHONEBOOK, this API gets the display
+ * string for PHONEBOOK, in the display locale, when "collation" is specified as the keyword.
+ *
+ * @param locale The locale to get the displayable variant code with. NULL may be used to specify the default.
+ * @param keyword The keyword for whose value should be used.
+ * @param displayLocale Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param dest the buffer to which the displayable keyword should be written.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param status error information if retrieving the displayable string failed.
+ * Should not be NULL and must not indicate failure on entry.
+ * U_USING_DEFAULT_WARNING indicates that no data was found from the locale
+ * resources and the value of the keyword is placed into dest as fallback.
+ * @return the actual buffer size needed for the displayable variant code.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayKeywordValue( const char* locale,
+ const char* keyword,
+ const char* displayLocale,
+ UChar* dest,
+ int32_t destCapacity,
+ UErrorCode* status);
+/**
+ * Gets the full name suitable for display for the specified locale.
+ *
+ * @param localeID the locale to get the displayable name with. NULL may be used to specify the default.
+ * @param inLocaleID Specifies the locale to be used to display the name. In other words,
+ * if the locale's language code is "en", passing Locale::getFrench() for
+ * inLocale would result in "Anglais", while passing Locale::getGerman()
+ * for inLocale would result in "Englisch". NULL may be used to specify the default.
+ * @param result the displayable name for localeID
+ * @param maxResultSize the size of the name buffer to store the
+ * displayable full name with
+ * @param err error information if retrieving the displayable name failed
+ * @return the actual buffer size needed for the displayable name. If it's greater
+ * than maxResultSize, the returned displayable name will be truncated.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getDisplayName(const char* localeID,
+ const char* inLocaleID,
+ UChar* result,
+ int32_t maxResultSize,
+ UErrorCode* err);
+
+
+/**
+ * Gets the specified locale from a list of available locales.
+ *
+ * This method corresponds to uloc_openAvailableByType called with the
+ * ULOC_AVAILABLE_DEFAULT type argument.
+ *
+ * The return value is a pointer to an item of a locale name array. Both this
+ * array and the pointers it contains are owned by ICU and should not be
+ * deleted or written through by the caller. The locale name is terminated by
+ * a null pointer.
+ *
+ * @param n the specific locale name index of the available locale list;
+ * should not exceed the number returned by uloc_countAvailable.
+ * @return a specified locale name of all available locales
+ * @stable ICU 2.0
+ */
+U_CAPI const char* U_EXPORT2
+uloc_getAvailable(int32_t n);
+
+/**
+ * Gets the size of the all available locale list.
+ *
+ * @return the size of the locale list
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2 uloc_countAvailable(void);
+
+/**
+ * Types for uloc_getAvailableByType and uloc_countAvailableByType.
+ *
+ * @stable ICU 65
+ */
+typedef enum ULocAvailableType {
+ /**
+ * Locales that return data when passed to ICU APIs,
+ * but not including legacy or alias locales.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_DEFAULT,
+
+ /**
+ * Legacy or alias locales that return data when passed to ICU APIs.
+ * Examples of supported legacy or alias locales:
+ *
+ * - iw (alias to he)
+ * - mo (alias to ro)
+ * - zh_CN (alias to zh_Hans_CN)
+ * - sr_BA (alias to sr_Cyrl_BA)
+ * - ars (alias to ar_SA)
+ *
+ * The locales in this set are disjoint from the ones in
+ * ULOC_AVAILABLE_DEFAULT. To get both sets at the same time, use
+ * ULOC_AVAILABLE_WITH_LEGACY_ALIASES.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_ONLY_LEGACY_ALIASES,
+
+ /**
+ * The union of the locales in ULOC_AVAILABLE_DEFAULT and
+ * ULOC_AVAILABLE_ONLY_LEGACY_ALIAS.
+ *
+ * @stable ICU 65
+ */
+ ULOC_AVAILABLE_WITH_LEGACY_ALIASES,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ ULOC_AVAILABLE_COUNT
+#endif /* U_HIDE_INTERNAL_API */
+} ULocAvailableType;
+
+/**
+ * Gets a list of available locales according to the type argument, allowing
+ * the user to access different sets of supported locales in ICU.
+ *
+ * The returned UEnumeration must be closed by the caller.
+ *
+ * @param type Type choice from ULocAvailableType.
+ * @param status Set if an error occurred.
+ * @return a UEnumeration owned by the caller, or nullptr on failure.
+ * @stable ICU 65
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openAvailableByType(ULocAvailableType type, UErrorCode* status);
+
+/**
+ *
+ * Gets a list of all available 2-letter language codes defined in ISO 639,
+ * plus additional 3-letter codes determined to be useful for locale generation as
+ * defined by Unicode CLDR. This is a pointer
+ * to an array of pointers to arrays of char. All of these pointers are owned
+ * by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available language codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOLanguages(void);
+
+/**
+ *
+ * Gets a list of all available 2-letter country codes defined in ISO 639. This is a
+ * pointer to an array of pointers to arrays of char. All of these pointers are
+ * owned by ICU-- do not delete them, and do not write through them. The array is
+ * terminated with a null pointer.
+ * @return a list of all available country codes
+ * @stable ICU 2.0
+ */
+U_CAPI const char* const* U_EXPORT2
+uloc_getISOCountries(void);
+
+/**
+ * Truncate the locale ID string to get the parent locale ID.
+ * Copies the part of the string before the last underscore.
+ * The parent locale ID will be an empty string if there is no
+ * underscore, or if there is only one underscore at localeID[0].
+ *
+ * @param localeID Input locale ID string.
+ * @param parent Output string buffer for the parent locale ID.
+ * @param parentCapacity Size of the output buffer.
+ * @param err A UErrorCode value.
+ * @return The length of the parent locale ID.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getParent(const char* localeID,
+ char* parent,
+ int32_t parentCapacity,
+ UErrorCode* err);
+
+
+
+
+/**
+ * Gets the full name for the specified locale, like uloc_getName(),
+ * but without keywords.
+ *
+ * Note: This has the effect of 'canonicalizing' the string to
+ * a certain extent. Upper and lower case are set as needed,
+ * and if the components were in 'POSIX' format they are changed to
+ * ICU format. It does NOT map aliased names in any way.
+ * See the top of this header file.
+ *
+ * This API strips off the keyword part, so "de_DE\@collation=phonebook"
+ * will become "de_DE".
+ * This API supports preflighting.
+ *
+ * @param localeID the locale to get the full name with
+ * @param name fill in buffer for the name without keywords.
+ * @param nameCapacity capacity of the fill in buffer.
+ * @param err error information if retrieving the full name failed
+ * @return the actual buffer size needed for the full name. If it's greater
+ * than nameCapacity, the returned full name will be truncated.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getBaseName(const char* localeID,
+ char* name,
+ int32_t nameCapacity,
+ UErrorCode* err);
+
+/**
+ * Gets an enumeration of keywords for the specified locale. Enumeration
+ * must get disposed of by the client using uenum_close function.
+ *
+ * @param localeID the locale to get the variant code with
+ * @param status error information if retrieving the keywords failed
+ * @return enumeration of keywords or NULL if there are no keywords.
+ * @stable ICU 2.8
+ */
+U_CAPI UEnumeration* U_EXPORT2
+uloc_openKeywords(const char* localeID,
+ UErrorCode* status);
+
+/**
+ * Get the value for a keyword. Locale name does not need to be normalized.
+ *
+ * @param localeID locale name containing the keyword ("de_DE@currency=EURO;collation=PHONEBOOK")
+ * @param keywordName name of the keyword for which we want the value; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param buffer receiving buffer
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough or ill-formed localeID
+ * or keywordName parameters.
+ * @return the length of keyword value
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getKeywordValue(const char* localeID,
+ const char* keywordName,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+
+/**
+ * Sets or removes the value of the specified keyword.
+ *
+ * For removing all keywords, use uloc_getBaseName().
+ *
+ * NOTE: Unlike almost every other ICU function which takes a
+ * buffer, this function will NOT truncate the output text, and will
+ * not update the buffer with unterminated text setting a status of
+ * U_STRING_NOT_TERMINATED_WARNING. If a BUFFER_OVERFLOW_ERROR is received,
+ * it means a terminated version of the updated locale ID would not fit
+ * in the buffer, and the original buffer is untouched. This is done to
+ * prevent incorrect or possibly even malformed locales from being generated
+ * and used.
+ *
+ * @param keywordName name of the keyword to be set; must not be
+ * NULL or empty, and must consist only of [A-Za-z0-9]. Case insensitive.
+ * @param keywordValue value of the keyword to be set. If 0-length or
+ * NULL, will result in the keyword being removed; no error is given if
+ * that keyword does not exist. Otherwise, must consist only of
+ * [A-Za-z0-9] and [/_+-].
+ * @param buffer input buffer containing well-formed locale ID to be
+ * modified.
+ * @param bufferCapacity capacity of receiving buffer
+ * @param status containing error code: e.g. buffer not big enough
+ * or ill-formed keywordName or keywordValue parameters, or ill-formed
+ * locale ID in buffer on input.
+ * @return the length needed for the buffer
+ * @see uloc_getKeywordValue
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_setKeywordValue(const char* keywordName,
+ const char* keywordValue,
+ char* buffer, int32_t bufferCapacity,
+ UErrorCode* status);
+
+/**
+ * Returns whether the locale's script is written right-to-left.
+ * If there is no script subtag, then the likely script is used, see uloc_addLikelySubtags().
+ * If no likely script is known, then false is returned.
+ *
+ * A script is right-to-left according to the CLDR script metadata
+ * which corresponds to whether the script's letters have Bidi_Class=R or AL.
+ *
+ * Returns true for "ar" and "en-Hebr", false for "zh" and "fa-Cyrl".
+ *
+ * @param locale input locale ID
+ * @return true if the locale's script is written right-to-left
+ * @stable ICU 54
+ */
+U_CAPI UBool U_EXPORT2
+uloc_isRightToLeft(const char *locale);
+
+/**
+ * enums for the return value for the character and line orientation
+ * functions.
+ * @stable ICU 4.0
+ */
+typedef enum {
+ ULOC_LAYOUT_LTR = 0, /* left-to-right. */
+ ULOC_LAYOUT_RTL = 1, /* right-to-left. */
+ ULOC_LAYOUT_TTB = 2, /* top-to-bottom. */
+ ULOC_LAYOUT_BTT = 3, /* bottom-to-top. */
+ ULOC_LAYOUT_UNKNOWN
+} ULayoutType;
+
+/**
+ * Get the layout character orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for characters.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getCharacterOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Get the layout line orientation for the specified locale.
+ *
+ * @param localeId locale name
+ * @param status Error status
+ * @return an enum indicating the layout orientation for lines.
+ * @stable ICU 4.0
+ */
+U_CAPI ULayoutType U_EXPORT2
+uloc_getLineOrientation(const char* localeId,
+ UErrorCode *status);
+
+/**
+ * Output values which uloc_acceptLanguage() writes to the 'outResult' parameter.
+ *
+ * @see uloc_acceptLanguageFromHTTP
+ * @see uloc_acceptLanguage
+ * @stable ICU 3.2
+ */
+typedef enum {
+ /**
+ * No exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FAILED = 0,
+ /**
+ * An exact match was found.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_VALID = 1,
+ /**
+ * A fallback was found. For example, the Accept-Language list includes 'ja_JP'
+ * and is matched with available locale 'ja'.
+ * @stable ICU 3.2
+ */
+ ULOC_ACCEPT_FALLBACK = 2 /* */
+} UAcceptResult;
+
+/**
+ * Based on a HTTP header from a web browser and a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param httpAcceptLanguage - "Accept-Language:" header as per HTTP.
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguageFromHTTP(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult,
+ const char *httpAcceptLanguage,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+/**
+ * Based on a list of available locales,
+ * determine an acceptable locale for the user.
+ *
+ * This is a thin wrapper over C++ class LocaleMatcher.
+ *
+ * @param result - buffer to accept the result locale
+ * @param resultAvailable the size of the result buffer.
+ * @param outResult - An out parameter that contains the fallback status
+ * @param acceptList - list of acceptable languages
+ * @param acceptListCount - count of acceptList items
+ * @param availableLocales - list of available locales to match
+ * @param status ICU error code. Its input value must pass the U_SUCCESS() test,
+ * or else the function returns immediately. Check for U_FAILURE()
+ * on output or use with function chaining. (See User Guide for details.)
+ * @return length needed for the locale.
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_acceptLanguage(char *result, int32_t resultAvailable,
+ UAcceptResult *outResult, const char **acceptList,
+ int32_t acceptListCount,
+ UEnumeration* availableLocales,
+ UErrorCode *status);
+
+
+/**
+ * Gets the ICU locale ID for the specified Win32 LCID value.
+ *
+ * @param hostID the Win32 LCID to translate
+ * @param locale the output buffer for the ICU locale ID, which will be NUL-terminated
+ * if there is room.
+ * @param localeCapacity the size of the output buffer
+ * @param status an error is returned if the LCID is unrecognized or the output buffer
+ * is too small
+ * @return actual the actual size of the locale ID, not including NUL-termination
+ * @stable ICU 3.8
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_getLocaleForLCID(uint32_t hostID, char *locale, int32_t localeCapacity,
+ UErrorCode *status);
+
+
+/**
+ * Add the likely subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the maximal form, or there is no data available
+ * for maximization, it will be copied to the output buffer. For example,
+ * "und-Zzzz" cannot be maximized, since there is no reasonable maximization.
+ *
+ * Examples:
+ *
+ * "en" maximizes to "en_Latn_US"
+ *
+ * "de" maximizes to "de_Latn_US"
+ *
+ * "sr" maximizes to "sr_Cyrl_RS"
+ *
+ * "sh" maximizes to "sr_Latn_RS" (Note this will not reverse.)
+ *
+ * "zh_Hani" maximizes to "zh_Hans_CN" (Note this will not reverse.)
+ *
+ * @param localeID The locale to maximize
+ * @param maximizedLocaleID The maximized locale
+ * @param maximizedLocaleIDCapacity The capacity of the maximizedLocaleID buffer
+ * @param err Error information if maximizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the maximized locale. If it's
+ * greater than maximizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_addLikelySubtags(const char* localeID,
+ char* maximizedLocaleID,
+ int32_t maximizedLocaleIDCapacity,
+ UErrorCode* err);
+
+
+/**
+ * Minimize the subtags for a provided locale ID, per the algorithm described
+ * in the following CLDR technical report:
+ *
+ * http://www.unicode.org/reports/tr35/#Likely_Subtags
+ *
+ * If localeID is already in the minimal form, or there is no data available
+ * for minimization, it will be copied to the output buffer. Since the
+ * minimization algorithm relies on proper maximization, see the comments
+ * for uloc_addLikelySubtags for reasons why there might not be any data.
+ *
+ * Examples:
+ *
+ * "en_Latn_US" minimizes to "en"
+ *
+ * "de_Latn_US" minimizes to "de"
+ *
+ * "sr_Cyrl_RS" minimizes to "sr"
+ *
+ * "zh_Hant_TW" minimizes to "zh_TW" (The region is preferred to the
+ * script, and minimizing to "zh" would imply "zh_Hans_CN".)
+ *
+ * @param localeID The locale to minimize
+ * @param minimizedLocaleID The minimized locale
+ * @param minimizedLocaleIDCapacity The capacity of the minimizedLocaleID buffer
+ * @param err Error information if minimizing the locale failed. If the length
+ * of the localeID and the null-terminator is greater than the maximum allowed size,
+ * or the localeId is not well-formed, the error code is U_ILLEGAL_ARGUMENT_ERROR.
+ * @return The actual buffer size needed for the minimized locale. If it's
+ * greater than minimizedLocaleIDCapacity, the returned ID will be truncated.
+ * On error, the return value is -1.
+ * @stable ICU 4.0
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_minimizeSubtags(const char* localeID,
+ char* minimizedLocaleID,
+ int32_t minimizedLocaleIDCapacity,
+ UErrorCode* err);
+
+/**
+ * Returns a locale ID for the specified BCP47 language tag string.
+ * If the specified language tag contains any ill-formed subtags,
+ * the first such subtag and all following subtags are ignored.
+ * <p>
+ * This implements the 'Language-Tag' production of BCP 47, and so
+ * supports legacy language tags (marked as “Type: grandfathered” in BCP 47)
+ * (regular and irregular) as well as private use language tags.
+ *
+ * Private use tags are represented as 'x-whatever',
+ * and legacy tags are converted to their canonical replacements where they exist.
+ *
+ * Note that a few legacy tags have no modern replacement;
+ * these will be converted using the fallback described in
+ * the first paragraph, so some information might be lost.
+ *
+ * @param langtag the input BCP47 language tag.
+ * @param localeID the output buffer receiving a locale ID for the
+ * specified BCP47 language tag.
+ * @param localeIDCapacity the size of the locale ID output buffer.
+ * @param parsedLength if not NULL, successfully parsed length
+ * for the input language tag is set.
+ * @param err error information if receiving the locald ID
+ * failed.
+ * @return the length of the locale ID.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_forLanguageTag(const char* langtag,
+ char* localeID,
+ int32_t localeIDCapacity,
+ int32_t* parsedLength,
+ UErrorCode* err);
+
+/**
+ * Returns a well-formed language tag for this locale ID.
+ * <p>
+ * <b>Note</b>: When <code>strict</code> is false, any locale
+ * fields which do not satisfy the BCP47 syntax requirement will
+ * be omitted from the result. When <code>strict</code> is
+ * true, this function sets U_ILLEGAL_ARGUMENT_ERROR to the
+ * <code>err</code> if any locale fields do not satisfy the
+ * BCP47 syntax requirement.
+ * @param localeID the input locale ID
+ * @param langtag the output buffer receiving BCP47 language
+ * tag for the locale ID.
+ * @param langtagCapacity the size of the BCP47 language tag
+ * output buffer.
+ * @param strict boolean value indicating if the function returns
+ * an error for an ill-formed input locale ID.
+ * @param err error information if receiving the language
+ * tag failed.
+ * @return The length of the BCP47 language tag.
+ * @stable ICU 4.2
+ */
+U_CAPI int32_t U_EXPORT2
+uloc_toLanguageTag(const char* localeID,
+ char* langtag,
+ int32_t langtagCapacity,
+ UBool strict,
+ UErrorCode* err);
+
+/**
+ * Converts the specified keyword (legacy key, or BCP 47 Unicode locale
+ * extension key) to the equivalent BCP 47 Unicode locale extension key.
+ * For example, BCP 47 Unicode locale extension key "co" is returned for
+ * the input keyword "collation".
+ * <p>
+ * When the specified keyword is unknown, but satisfies the BCP syntax,
+ * then the pointer to the input keyword itself will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleKey("ZZ")</code> returns "ZZ".
+ *
+ * @param keyword the input locale keyword (either legacy key
+ * such as "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @return the well-formed BCP 47 Unicode locale extension key,
+ * or NULL if the specified locale keyword cannot be
+ * mapped to a well-formed BCP 47 Unicode locale extension
+ * key.
+ * @see uloc_toLegacyKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (legacy type, or BCP 47
+ * Unicode locale extension type) to the well-formed BCP 47 Unicode locale
+ * extension type for the specified keyword (category). For example, BCP 47
+ * Unicode locale extension type "phonebk" is returned for the input
+ * keyword value "phonebook", with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of the BCP 47 Unicode locale extension type,
+ * or when the specified keyword allows 'variable' type and the specified
+ * value satisfies the syntax, then the pointer to the input type value itself
+ * will be returned.
+ * For example,
+ * <code>uloc_toUnicodeLocaleType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toUnicodeLocaleType("variableTop", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy key such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either legacy type
+ * such as "phonebook" or BCP 47 Unicode locale extension
+ * type such as "phonebk").
+ * @return the well-formed BCP47 Unicode locale extension type,
+ * or NULL if the locale keyword value cannot be mapped to
+ * a well-formed BCP 47 Unicode locale extension type.
+ * @see uloc_toLegacyType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toUnicodeLocaleType(const char* keyword, const char* value);
+
+/**
+ * Converts the specified keyword (BCP 47 Unicode locale extension key, or
+ * legacy key) to the legacy key. For example, legacy key "collation" is
+ * returned for the input BCP 47 Unicode locale extension key "co".
+ *
+ * @param keyword the input locale keyword (either BCP 47 Unicode locale
+ * extension key or legacy key).
+ * @return the well-formed legacy key, or NULL if the specified
+ * keyword cannot be mapped to a well-formed legacy key.
+ * @see toUnicodeLocaleKey
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyKey(const char* keyword);
+
+/**
+ * Converts the specified keyword value (BCP 47 Unicode locale extension type,
+ * or legacy type or type alias) to the canonical legacy type. For example,
+ * the legacy type "phonebook" is returned for the input BCP 47 Unicode
+ * locale extension type "phonebk" with the keyword "collation" (or "co").
+ * <p>
+ * When the specified keyword is not recognized, but the specified value
+ * satisfies the syntax of legacy key, or when the specified keyword
+ * allows 'variable' type and the specified value satisfies the syntax,
+ * then the pointer to the input type value itself will be returned.
+ * For example,
+ * <code>uloc_toLegacyType("Foo", "Bar")</code> returns "Bar",
+ * <code>uloc_toLegacyType("vt", "00A4")</code> returns "00A4".
+ *
+ * @param keyword the locale keyword (either legacy keyword such as
+ * "collation" or BCP 47 Unicode locale extension
+ * key such as "co").
+ * @param value the locale keyword value (either BCP 47 Unicode locale
+ * extension type such as "phonebk" or legacy keyword value
+ * such as "phonebook").
+ * @return the well-formed legacy type, or NULL if the specified
+ * keyword value cannot be mapped to a well-formed legacy
+ * type.
+ * @see toUnicodeLocaleType
+ * @stable ICU 54
+ */
+U_CAPI const char* U_EXPORT2
+uloc_toLegacyType(const char* keyword, const char* value);
+
+#endif /*_ULOC*/
diff --git a/intl/icu/source/common/unicode/umachine.h b/intl/icu/source/common/unicode/umachine.h
new file mode 100644
index 0000000000..545abef595
--- /dev/null
+++ b/intl/icu/source/common/unicode/umachine.h
@@ -0,0 +1,459 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: umachine.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*
+* This file defines basic types and constants for ICU to be
+* platform-independent. umachine.h and utf.h are included into
+* utypes.h to provide all the general definitions for ICU.
+* All of these definitions used to be in utypes.h before
+* the UTF-handling macros made this unmaintainable.
+*/
+
+#ifndef __UMACHINE_H__
+#define __UMACHINE_H__
+
+
+/**
+ * \file
+ * \brief Basic types and constants for UTF
+ *
+ * <h2> Basic types and constants for UTF </h2>
+ * This file defines basic types and constants for utf.h to be
+ * platform-independent. umachine.h and utf.h are included into
+ * utypes.h to provide all the general definitions for ICU.
+ * All of these definitions used to be in utypes.h before
+ * the UTF-handling macros made this unmaintainable.
+ *
+ */
+/*==========================================================================*/
+/* Include platform-dependent definitions */
+/* which are contained in the platform-specific file platform.h */
+/*==========================================================================*/
+
+#include "unicode/ptypes.h" /* platform.h is included in ptypes.h */
+
+/*
+ * ANSI C headers:
+ * stddef.h defines wchar_t
+ */
+#include <stdbool.h>
+#include <stddef.h>
+
+/*==========================================================================*/
+/* For C wrappers, we use the symbol U_CAPI. */
+/* This works properly if the includer is C or C++. */
+/* Functions are declared U_CAPI return-type U_EXPORT2 function-name()... */
+/*==========================================================================*/
+
+/**
+ * \def U_CFUNC
+ * This is used in a declaration of a library private ICU C function.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_BEGIN
+ * This is used to begin a declaration of a library private ICU C API.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_CDECL_END
+ * This is used to end a declaration of a library private ICU C API
+ * @stable ICU 2.4
+ */
+
+#ifdef __cplusplus
+# define U_CFUNC extern "C"
+# define U_CDECL_BEGIN extern "C" {
+# define U_CDECL_END }
+#else
+# define U_CFUNC extern
+# define U_CDECL_BEGIN
+# define U_CDECL_END
+#endif
+
+#ifndef U_ATTRIBUTE_DEPRECATED
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for GCC specific attributes
+ * @internal
+ */
+#if U_GCC_MAJOR_MINOR >= 302
+# define U_ATTRIBUTE_DEPRECATED __attribute__ ((deprecated))
+/**
+ * \def U_ATTRIBUTE_DEPRECATED
+ * This is used for Visual C++ specific attributes
+ * @internal
+ */
+#elif defined(_MSC_VER) && (_MSC_VER >= 1400)
+# define U_ATTRIBUTE_DEPRECATED __declspec(deprecated)
+#else
+# define U_ATTRIBUTE_DEPRECATED
+#endif
+#endif
+
+/** This is used to declare a function as a public ICU C API @stable ICU 2.0*/
+#define U_CAPI U_CFUNC U_EXPORT
+/** Obsolete/same as U_CAPI; was used to declare a function as a stable public ICU C API*/
+#define U_STABLE U_CAPI
+/** Obsolete/same as U_CAPI; was used to declare a function as a draft public ICU C API */
+#define U_DRAFT U_CAPI
+/** This is used to declare a function as a deprecated public ICU C API */
+#define U_DEPRECATED U_CAPI U_ATTRIBUTE_DEPRECATED
+/** Obsolete/same as U_CAPI; was used to declare a function as an obsolete public ICU C API */
+#define U_OBSOLETE U_CAPI
+/** Obsolete/same as U_CAPI; was used to declare a function as an internal ICU C API */
+#define U_INTERNAL U_CAPI
+
+// Before ICU 65, function-like, multi-statement ICU macros were just defined as
+// series of statements wrapped in { } blocks and the caller could choose to
+// either treat them as if they were actual functions and end the invocation
+// with a trailing ; creating an empty statement after the block or else omit
+// this trailing ; using the knowledge that the macro would expand to { }.
+//
+// But doing so doesn't work well with macros that look like functions and
+// compiler warnings about empty statements (ICU-20601) and ICU 65 therefore
+// switches to the standard solution of wrapping such macros in do { } while.
+//
+// This will however break existing code that depends on being able to invoke
+// these macros without a trailing ; so to be able to remain compatible with
+// such code the wrapper is itself defined as macros so that it's possible to
+// build ICU 65 and later with the old macro behaviour, like this:
+//
+// export CPPFLAGS='-DUPRV_BLOCK_MACRO_BEGIN="" -DUPRV_BLOCK_MACRO_END=""'
+// runConfigureICU ...
+//
+
+/**
+ * \def UPRV_BLOCK_MACRO_BEGIN
+ * Defined as the "do" keyword by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_BEGIN
+#define UPRV_BLOCK_MACRO_BEGIN do
+#endif
+
+/**
+ * \def UPRV_BLOCK_MACRO_END
+ * Defined as "while (false)" by default.
+ * @internal
+ */
+#ifndef UPRV_BLOCK_MACRO_END
+#define UPRV_BLOCK_MACRO_END while (false)
+#endif
+
+/*==========================================================================*/
+/* limits for int32_t etc., like in POSIX inttypes.h */
+/*==========================================================================*/
+
+#ifndef INT8_MIN
+/** The smallest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MIN ((int8_t)(-128))
+#endif
+#ifndef INT16_MIN
+/** The smallest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MIN ((int16_t)(-32767-1))
+#endif
+#ifndef INT32_MIN
+/** The smallest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MIN ((int32_t)(-2147483647-1))
+#endif
+
+#ifndef INT8_MAX
+/** The largest value an 8 bit signed integer can hold @stable ICU 2.0 */
+# define INT8_MAX ((int8_t)(127))
+#endif
+#ifndef INT16_MAX
+/** The largest value a 16 bit signed integer can hold @stable ICU 2.0 */
+# define INT16_MAX ((int16_t)(32767))
+#endif
+#ifndef INT32_MAX
+/** The largest value a 32 bit signed integer can hold @stable ICU 2.0 */
+# define INT32_MAX ((int32_t)(2147483647))
+#endif
+
+#ifndef UINT8_MAX
+/** The largest value an 8 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT8_MAX ((uint8_t)(255U))
+#endif
+#ifndef UINT16_MAX
+/** The largest value a 16 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT16_MAX ((uint16_t)(65535U))
+#endif
+#ifndef UINT32_MAX
+/** The largest value a 32 bit unsigned integer can hold @stable ICU 2.0 */
+# define UINT32_MAX ((uint32_t)(4294967295U))
+#endif
+
+#if defined(U_INT64_T_UNAVAILABLE)
+# error int64_t is required for decimal format and rule-based number format.
+#else
+# ifndef INT64_C
+/**
+ * Provides a platform independent way to specify a signed 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides INT64_C
+ * @stable ICU 2.8
+ */
+# define INT64_C(c) c ## LL
+# endif
+# ifndef UINT64_C
+/**
+ * Provides a platform independent way to specify an unsigned 64-bit integer constant.
+ * note: may be wrong for some 64 bit platforms - ensure your compiler provides UINT64_C
+ * @stable ICU 2.8
+ */
+# define UINT64_C(c) c ## ULL
+# endif
+# ifndef U_INT64_MIN
+/** The smallest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MIN ((int64_t)(INT64_C(-9223372036854775807)-1))
+# endif
+# ifndef U_INT64_MAX
+/** The largest value a 64 bit signed integer can hold @stable ICU 2.8 */
+# define U_INT64_MAX ((int64_t)(INT64_C(9223372036854775807)))
+# endif
+# ifndef U_UINT64_MAX
+/** The largest value a 64 bit unsigned integer can hold @stable ICU 2.8 */
+# define U_UINT64_MAX ((uint64_t)(UINT64_C(18446744073709551615)))
+# endif
+#endif
+
+/*==========================================================================*/
+/* Boolean data type */
+/*==========================================================================*/
+
+/**
+ * The ICU boolean type, a signed-byte integer.
+ * ICU-specific for historical reasons: The C and C++ standards used to not define type bool.
+ * Also provides a fixed type definition, as opposed to
+ * type bool whose details (e.g., sizeof) may vary by compiler and between C and C++.
+ *
+ * @stable ICU 2.0
+ */
+typedef int8_t UBool;
+
+/**
+ * \def U_DEFINE_FALSE_AND_TRUE
+ * Normally turns off defining macros FALSE=0 & TRUE=1 in public ICU headers.
+ * These obsolete macros sometimes break compilation of other code that
+ * defines enum constants or similar with these names.
+ * C++ has long defined bool/false/true.
+ * C99 also added definitions for these, although as macros; see stdbool.h.
+ *
+ * You may transitionally define U_DEFINE_FALSE_AND_TRUE=1 if you need time to migrate code.
+ *
+ * @internal ICU 68
+ */
+#ifdef U_DEFINE_FALSE_AND_TRUE
+ // Use the predefined value.
+#else
+ // Default to avoiding collision with non-macro definitions of FALSE & TRUE.
+# define U_DEFINE_FALSE_AND_TRUE 0
+#endif
+
+#if U_DEFINE_FALSE_AND_TRUE || defined(U_IN_DOXYGEN)
+#ifndef TRUE
+/**
+ * The TRUE value of a UBool.
+ *
+ * @deprecated ICU 68 Use standard "true" instead.
+ */
+# define TRUE 1
+#endif
+#ifndef FALSE
+/**
+ * The FALSE value of a UBool.
+ *
+ * @deprecated ICU 68 Use standard "false" instead.
+ */
+# define FALSE 0
+#endif
+#endif // U_DEFINE_FALSE_AND_TRUE
+
+/*==========================================================================*/
+/* Unicode data types */
+/*==========================================================================*/
+
+/* wchar_t-related definitions -------------------------------------------- */
+
+/*
+ * \def U_WCHAR_IS_UTF16
+ * Defined if wchar_t uses UTF-16.
+ *
+ * @stable ICU 2.0
+ */
+/*
+ * \def U_WCHAR_IS_UTF32
+ * Defined if wchar_t uses UTF-32.
+ *
+ * @stable ICU 2.0
+ */
+#if !defined(U_WCHAR_IS_UTF16) && !defined(U_WCHAR_IS_UTF32)
+# ifdef __STDC_ISO_10646__
+# if (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# elif (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif defined __UCS2__
+# if (U_PF_OS390 <= U_PLATFORM && U_PLATFORM <= U_PF_OS400) && (U_SIZEOF_WCHAR_T==2)
+# define U_WCHAR_IS_UTF16
+# endif
+# elif defined(__UCS4__) || (U_PLATFORM == U_PF_OS400 && defined(__UTF32__))
+# if (U_SIZEOF_WCHAR_T==4)
+# define U_WCHAR_IS_UTF32
+# endif
+# elif U_PLATFORM_IS_DARWIN_BASED || (U_SIZEOF_WCHAR_T==4 && U_PLATFORM_IS_LINUX_BASED)
+# define U_WCHAR_IS_UTF32
+# elif U_PLATFORM_HAS_WIN32_API
+# define U_WCHAR_IS_UTF16
+# endif
+#endif
+
+/* UChar and UChar32 definitions -------------------------------------------- */
+
+/** Number of bytes in a UChar (always 2). @stable ICU 2.0 */
+#define U_SIZEOF_UCHAR 2
+
+/**
+ * \def U_CHAR16_IS_TYPEDEF
+ * If 1, then char16_t is a typedef and not a real type (yet)
+ * @internal
+ */
+#if defined(_MSC_VER) && (_MSC_VER < 1900)
+// Versions of Visual Studio/MSVC below 2015 do not support char16_t as a real type,
+// and instead use a typedef. https://msdn.microsoft.com/library/bb531344.aspx
+# define U_CHAR16_IS_TYPEDEF 1
+#else
+# define U_CHAR16_IS_TYPEDEF 0
+#endif
+
+
+/**
+ * \var UChar
+ *
+ * The base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ * Starting with ICU 59, C++ API uses char16_t directly, while C API continues to use UChar.
+ *
+ * UChar is configurable by defining the macro UCHAR_TYPE
+ * on the preprocessor or compiler command line:
+ * -DUCHAR_TYPE=uint16_t or -DUCHAR_TYPE=wchar_t (if U_SIZEOF_WCHAR_T==2) etc.
+ * (The UCHAR_TYPE can also be \#defined earlier in this file, for outside the ICU library code.)
+ * This is for transitional use from application code that uses uint16_t or wchar_t for UTF-16.
+ *
+ * The default is UChar=char16_t.
+ *
+ * C++11 defines char16_t as bit-compatible with uint16_t, but as a distinct type.
+ *
+ * In C, char16_t is a simple typedef of uint_least16_t.
+ * ICU requires uint_least16_t=uint16_t for data memory mapping.
+ * On macOS, char16_t is not available because the uchar.h standard header is missing.
+ *
+ * @stable ICU 4.4
+ */
+
+#if 1
+ // #if 1 is normal. UChar defaults to char16_t in C++.
+ // For configuration testing of UChar=uint16_t temporarily change this to #if 0.
+ // The intltest Makefile #defines UCHAR_TYPE=char16_t,
+ // so we only #define it to uint16_t if it is undefined so far.
+#elif !defined(UCHAR_TYPE)
+# define UCHAR_TYPE uint16_t
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Inside the ICU library code, never configurable.
+ typedef char16_t UChar;
+#elif defined(UCHAR_TYPE)
+ typedef UCHAR_TYPE UChar;
+#elif U_CPLUSPLUS_VERSION != 0
+ typedef char16_t UChar; // C++
+#else
+ typedef uint16_t UChar; // C
+#endif
+
+/**
+ * \var OldUChar
+ * Default ICU 58 definition of UChar.
+ * A base type for UTF-16 code units and pointers.
+ * Unsigned 16-bit integer.
+ *
+ * Define OldUChar to be wchar_t if that is 16 bits wide.
+ * If wchar_t is not 16 bits wide, then define UChar to be uint16_t.
+ *
+ * This makes the definition of OldUChar platform-dependent
+ * but allows direct string type compatibility with platforms with
+ * 16-bit wchar_t types.
+ *
+ * This is how UChar was defined in ICU 58, for transition convenience.
+ * Exception: ICU 58 UChar was defined to UCHAR_TYPE if that macro was defined.
+ * The current UChar responds to UCHAR_TYPE but OldUChar does not.
+ *
+ * @stable ICU 59
+ */
+#if U_SIZEOF_WCHAR_T==2
+ typedef wchar_t OldUChar;
+#elif defined(__CHAR16_TYPE__)
+ typedef __CHAR16_TYPE__ OldUChar;
+#else
+ typedef uint16_t OldUChar;
+#endif
+
+/**
+ * Define UChar32 as a type for single Unicode code points.
+ * UChar32 is a signed 32-bit integer (same as int32_t).
+ *
+ * The Unicode code point range is 0..0x10ffff.
+ * All other values (negative or >=0x110000) are illegal as Unicode code points.
+ * They may be used as sentinel values to indicate "done", "error"
+ * or similar non-code point conditions.
+ *
+ * Before ICU 2.4 (Jitterbug 2146), UChar32 was defined
+ * to be wchar_t if that is 32 bits wide (wchar_t may be signed or unsigned)
+ * or else to be uint32_t.
+ * That is, the definition of UChar32 was platform-dependent.
+ *
+ * @see U_SENTINEL
+ * @stable ICU 2.4
+ */
+typedef int32_t UChar32;
+
+/**
+ * This value is intended for sentinel values for APIs that
+ * (take or) return single code points (UChar32).
+ * It is outside of the Unicode code point range 0..0x10ffff.
+ *
+ * For example, a "done" or "error" value in a new API
+ * could be indicated with U_SENTINEL.
+ *
+ * ICU APIs designed before ICU 2.4 usually define service-specific "done"
+ * values, mostly 0xffff.
+ * Those may need to be distinguished from
+ * actual U+ffff text contents by calling functions like
+ * CharacterIterator::hasNext() or UnicodeString::length().
+ *
+ * @return -1
+ * @see UChar32
+ * @stable ICU 2.4
+ */
+#define U_SENTINEL (-1)
+
+#include "unicode/urename.h"
+
+#endif
diff --git a/intl/icu/source/common/unicode/umisc.h b/intl/icu/source/common/unicode/umisc.h
new file mode 100644
index 0000000000..4e9dda7450
--- /dev/null
+++ b/intl/icu/source/common/unicode/umisc.h
@@ -0,0 +1,62 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2006, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+* file name: umisc.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999oct15
+* created by: Markus W. Scherer
+*/
+
+#ifndef UMISC_H
+#define UMISC_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Miscellaneous definitions
+ *
+ * This file contains miscellaneous definitions for the C APIs.
+ */
+
+U_CDECL_BEGIN
+
+/** A struct representing a range of text containing a specific field
+ * @stable ICU 2.0
+ */
+typedef struct UFieldPosition {
+ /**
+ * The field
+ * @stable ICU 2.0
+ */
+ int32_t field;
+ /**
+ * The start of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t beginIndex;
+ /**
+ * The limit of the text range containing field
+ * @stable ICU 2.0
+ */
+ int32_t endIndex;
+} UFieldPosition;
+
+#if !UCONFIG_NO_SERVICE
+/**
+ * Opaque type returned by registerInstance, registerFactory and unregister for service registration.
+ * @stable ICU 2.6
+ */
+typedef const void* URegistryKey;
+#endif
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/umutablecptrie.h b/intl/icu/source/common/unicode/umutablecptrie.h
new file mode 100644
index 0000000000..d60fd61819
--- /dev/null
+++ b/intl/icu/source/common/unicode/umutablecptrie.h
@@ -0,0 +1,240 @@
+// © 2017 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+// umutablecptrie.h (split out of ucptrie.h)
+// created: 2018jan24 Markus W. Scherer
+
+#ifndef __UMUTABLECPTRIE_H__
+#define __UMUTABLECPTRIE_H__
+
+#include "unicode/utypes.h"
+
+#include "unicode/ucpmap.h"
+#include "unicode/ucptrie.h"
+#include "unicode/utf8.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+U_CDECL_BEGIN
+
+/**
+ * \file
+ * \brief C API: This file defines a mutable Unicode code point trie.
+ *
+ * @see UCPTrie
+ * @see UMutableCPTrie
+ */
+
+/**
+ * Mutable Unicode code point trie.
+ * Fast map from Unicode code points (U+0000..U+10FFFF) to 32-bit integer values.
+ * For details see https://icu.unicode.org/design/struct/utrie
+ *
+ * Setting values (especially ranges) and lookup is fast.
+ * The mutable trie is only somewhat space-efficient.
+ * It builds a compacted, immutable UCPTrie.
+ *
+ * This trie can be modified while iterating over its contents.
+ * For example, it is possible to merge its values with those from another
+ * set of ranges (e.g., another mutable or immutable trie):
+ * Iterate over those source ranges; for each of them iterate over this trie;
+ * add the source value into the value of each trie range.
+ *
+ * @see UCPTrie
+ * @see umutablecptrie_buildImmutable
+ * @stable ICU 63
+ */
+typedef struct UMutableCPTrie UMutableCPTrie;
+
+/**
+ * Creates a mutable trie that initially maps each Unicode code point to the same value.
+ * It uses 32-bit data values until umutablecptrie_buildImmutable() is called.
+ * umutablecptrie_buildImmutable() takes a valueWidth parameter which
+ * determines the number of bits in the data value in the resulting UCPTrie.
+ * You must umutablecptrie_close() the trie once you are done using it.
+ *
+ * @param initialValue the initial value that is set for all code points
+ * @param errorValue the value for out-of-range code points and ill-formed UTF-8/16
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_open(uint32_t initialValue, uint32_t errorValue, UErrorCode *pErrorCode);
+
+/**
+ * Clones a mutable trie.
+ * You must umutablecptrie_close() the clone once you are done using it.
+ *
+ * @param other the trie to clone
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the trie clone
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_clone(const UMutableCPTrie *other, UErrorCode *pErrorCode);
+
+/**
+ * Closes a mutable trie and releases associated memory.
+ *
+ * @param trie the trie
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_close(UMutableCPTrie *trie);
+
+/**
+ * Creates a mutable trie with the same contents as the UCPMap.
+ * You must umutablecptrie_close() the mutable trie once you are done using it.
+ *
+ * @param map the source map
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the mutable trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPMap(const UCPMap *map, UErrorCode *pErrorCode);
+
+/**
+ * Creates a mutable trie with the same contents as the immutable one.
+ * You must umutablecptrie_close() the mutable trie once you are done using it.
+ *
+ * @param trie the immutable trie
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @return the mutable trie
+ * @stable ICU 63
+ */
+U_CAPI UMutableCPTrie * U_EXPORT2
+umutablecptrie_fromUCPTrie(const UCPTrie *trie, UErrorCode *pErrorCode);
+
+/**
+ * Returns the value for a code point as stored in the trie.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @return the value
+ * @stable ICU 63
+ */
+U_CAPI uint32_t U_EXPORT2
+umutablecptrie_get(const UMutableCPTrie *trie, UChar32 c);
+
+/**
+ * Returns the last code point such that all those from start to there have the same value.
+ * Can be used to efficiently iterate over all same-value ranges in a trie.
+ * (This is normally faster than iterating over code points and get()ting each value,
+ * but much slower than a data structure that stores ranges directly.)
+ *
+ * The trie can be modified between calls to this function.
+ *
+ * If the UCPMapValueFilter function pointer is not NULL, then
+ * the value to be delivered is passed through that function, and the return value is the end
+ * of the range where all values are modified to the same actual value.
+ * The value is unchanged if that function pointer is NULL.
+ *
+ * See the same-signature ucptrie_getRange() for a code sample.
+ *
+ * @param trie the trie
+ * @param start range start
+ * @param option defines whether surrogates are treated normally,
+ * or as having the surrogateValue; usually UCPMAP_RANGE_NORMAL
+ * @param surrogateValue value for surrogates; ignored if option==UCPMAP_RANGE_NORMAL
+ * @param filter a pointer to a function that may modify the trie data value,
+ * or NULL if the values from the trie are to be used unmodified
+ * @param context an opaque pointer that is passed on to the filter function
+ * @param pValue if not NULL, receives the value that every code point start..end has;
+ * may have been modified by filter(context, trie value)
+ * if that function pointer is not NULL
+ * @return the range end code point, or -1 if start is not a valid code point
+ * @stable ICU 63
+ */
+U_CAPI UChar32 U_EXPORT2
+umutablecptrie_getRange(const UMutableCPTrie *trie, UChar32 start,
+ UCPMapRangeOption option, uint32_t surrogateValue,
+ UCPMapValueFilter *filter, const void *context, uint32_t *pValue);
+
+/**
+ * Sets a value for a code point.
+ *
+ * @param trie the trie
+ * @param c the code point
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_set(UMutableCPTrie *trie, UChar32 c, uint32_t value, UErrorCode *pErrorCode);
+
+/**
+ * Sets a value for each code point [start..end].
+ * Faster and more space-efficient than setting the value for each code point separately.
+ *
+ * @param trie the trie
+ * @param start the first code point to get the value
+ * @param end the last code point to get the value (inclusive)
+ * @param value the value
+ * @param pErrorCode an in/out ICU UErrorCode
+ * @stable ICU 63
+ */
+U_CAPI void U_EXPORT2
+umutablecptrie_setRange(UMutableCPTrie *trie,
+ UChar32 start, UChar32 end,
+ uint32_t value, UErrorCode *pErrorCode);
+
+/**
+ * Compacts the data and builds an immutable UCPTrie according to the parameters.
+ * After this, the mutable trie will be empty.
+ *
+ * The mutable trie stores 32-bit values until buildImmutable() is called.
+ * If values shorter than 32 bits are to be stored in the immutable trie,
+ * then the upper bits are discarded.
+ * For example, when the mutable trie contains values 0x81, -0x7f, and 0xa581,
+ * and the value width is 8 bits, then each of these is stored as 0x81
+ * and the immutable trie will return that as an unsigned value.
+ * (Some implementations may want to make productive temporary use of the upper bits
+ * until buildImmutable() discards them.)
+ *
+ * Not every possible set of mappings can be built into a UCPTrie,
+ * because of limitations resulting from speed and space optimizations.
+ * Every Unicode assigned character can be mapped to a unique value.
+ * Typical data yields data structures far smaller than the limitations.
+ *
+ * It is possible to construct extremely unusual mappings that exceed the data structure limits.
+ * In such a case this function will fail with a U_INDEX_OUTOFBOUNDS_ERROR.
+ *
+ * @param trie the trie trie
+ * @param type selects the trie type
+ * @param valueWidth selects the number of bits in a trie data value; if smaller than 32 bits,
+ * then the values stored in the trie will be truncated first
+ * @param pErrorCode an in/out ICU UErrorCode
+ *
+ * @see umutablecptrie_fromUCPTrie
+ * @stable ICU 63
+ */
+U_CAPI UCPTrie * U_EXPORT2
+umutablecptrie_buildImmutable(UMutableCPTrie *trie, UCPTrieType type, UCPTrieValueWidth valueWidth,
+ UErrorCode *pErrorCode);
+
+U_CDECL_END
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUMutableCPTriePointer
+ * "Smart pointer" class, closes a UMutableCPTrie via umutablecptrie_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 63
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUMutableCPTriePointer, UMutableCPTrie, umutablecptrie_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#endif
diff --git a/intl/icu/source/common/unicode/unifilt.h b/intl/icu/source/common/unicode/unifilt.h
new file mode 100644
index 0000000000..0fcaf4b789
--- /dev/null
+++ b/intl/icu/source/common/unicode/unifilt.h
@@ -0,0 +1,136 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1999-2010, International Business Machines Corporation and others.
+* All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 11/17/99 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIFILT_H
+#define UNIFILT_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/unifunct.h"
+#include "unicode/unimatch.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Filter
+ */
+
+U_NAMESPACE_BEGIN
+
+/**
+ * U_ETHER is used to represent character values for positions outside
+ * a range. For example, transliterator uses this to represent
+ * characters outside the range contextStart..contextLimit-1. This
+ * allows explicit matching by rules and UnicodeSets of text outside a
+ * defined range.
+ * @stable ICU 3.0
+ */
+#define U_ETHER ((char16_t)0xFFFF)
+
+/**
+ *
+ * <code>UnicodeFilter</code> defines a protocol for selecting a
+ * subset of the full range (U+0000 to U+10FFFF) of Unicode characters.
+ * Currently, filters are used in conjunction with classes like
+ * {@link Transliterator} to only process selected characters through a
+ * transformation.
+ *
+ * <p>Note: UnicodeFilter currently stubs out two pure virtual methods
+ * of its base class, UnicodeMatcher. These methods are toPattern()
+ * and matchesIndexValue(). This is done so that filter classes that
+ * are not actually used as matchers -- specifically, those in the
+ * UnicodeFilterLogic component, and those in tests -- can continue to
+ * work without defining these methods. As long as a filter is not
+ * used in an RBT during real transliteration, these methods will not
+ * be called. However, this breaks the UnicodeMatcher base class
+ * protocol, and it is not a correct solution.
+ *
+ * <p>In the future we may revisit the UnicodeMatcher / UnicodeFilter
+ * hierarchy and either redesign it, or simply remove the stubs in
+ * UnicodeFilter and force subclasses to implement the full
+ * UnicodeMatcher protocol.
+ *
+ * @see UnicodeFilterLogic
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeFilter : public UnicodeFunctor, public UnicodeMatcher {
+
+public:
+ /**
+ * Destructor
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeFilter();
+
+ /**
+ * Clones this object polymorphically.
+ * The caller owns the result and should delete it when done.
+ * @return clone, or nullptr if an error occurred
+ * @stable ICU 2.4
+ */
+ virtual UnicodeFilter* clone() const override = 0;
+
+ /**
+ * Returns <tt>true</tt> for characters that are in the selected
+ * subset. In other words, if a character is <b>to be
+ * filtered</b>, then <tt>contains()</tt> returns
+ * <b><tt>false</tt></b>.
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const = 0;
+
+ /**
+ * UnicodeFunctor API. Cast 'this' to a UnicodeMatcher* pointer
+ * and return the pointer.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const override;
+
+ /**
+ * Implement UnicodeMatcher API.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) override;
+
+ /**
+ * UnicodeFunctor API. Nothing to do.
+ * @stable ICU 2.4
+ */
+ virtual void setData(const TransliterationRuleData*) override;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+protected:
+
+ /*
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+/* UnicodeFilter();*/
+};
+
+/*inline UnicodeFilter::UnicodeFilter() {}*/
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/unifunct.h b/intl/icu/source/common/unicode/unifunct.h
new file mode 100644
index 0000000000..8751302494
--- /dev/null
+++ b/intl/icu/source/common/unicode/unifunct.h
@@ -0,0 +1,132 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2005, International Business Machines Corporation
+* and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 01/14/2002 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIFUNCT_H
+#define UNIFUNCT_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Functor
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeMatcher;
+class UnicodeReplacer;
+class TransliterationRuleData;
+
+/**
+ * <code>UnicodeFunctor</code> is an abstract base class for objects
+ * that perform match and/or replace operations on Unicode strings.
+ * @author Alan Liu
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeFunctor : public UObject {
+
+public:
+
+ /**
+ * Destructor
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeFunctor();
+
+ /**
+ * Return a copy of this object. All UnicodeFunctor objects
+ * have to support cloning in order to allow classes using
+ * UnicodeFunctor to implement cloning.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeFunctor* clone() const = 0;
+
+ /**
+ * Cast 'this' to a UnicodeMatcher* pointer and return the
+ * pointer, or null if this is not a UnicodeMatcher*. Subclasses
+ * that mix in UnicodeMatcher as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeMatcher, since
+ * UnicodeMatcher is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeMatcher* toMatcher() const;
+
+ /**
+ * Cast 'this' to a UnicodeReplacer* pointer and return the
+ * pointer, or null if this is not a UnicodeReplacer*. Subclasses
+ * that mix in UnicodeReplacer as a base class must override this.
+ * This protocol is required because a pointer to a UnicodeFunctor
+ * cannot be cast to a pointer to a UnicodeReplacer, since
+ * UnicodeReplacer is a mixin that does not derive from
+ * UnicodeFunctor.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeReplacer* toReplacer() const;
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID().
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Returns a unique class ID <b>polymorphically</b>. This method
+ * is to implement a simple version of RTTI, since not all C++
+ * compilers support genuine RTTI. Polymorphic operator==() and
+ * clone() methods call this method.
+ *
+ * <p>Concrete subclasses of UnicodeFunctor should use the macro
+ * UOBJECT_DEFINE_RTTI_IMPLEMENTATION from uobject.h to
+ * provide definitions getStaticClassID and getDynamicClassID.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const override = 0;
+
+ /**
+ * Set the data object associated with this functor. The data
+ * object provides context for functor-to-standin mapping. This
+ * method is required when assigning a functor to a different data
+ * object. This function MAY GO AWAY later if the architecture is
+ * changed to pass data object pointers through the API.
+ * @internal ICU 2.1
+ */
+ virtual void setData(const TransliterationRuleData*) = 0;
+
+protected:
+
+ /**
+ * Since this class has pure virtual functions,
+ * a constructor can't be used.
+ * @stable ICU 2.0
+ */
+ /*UnicodeFunctor();*/
+
+};
+
+/*inline UnicodeFunctor::UnicodeFunctor() {}*/
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/unimatch.h b/intl/icu/source/common/unicode/unimatch.h
new file mode 100644
index 0000000000..302332f455
--- /dev/null
+++ b/intl/icu/source/common/unicode/unimatch.h
@@ -0,0 +1,168 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+* Copyright (C) 2001-2005, International Business Machines Corporation and others. All Rights Reserved.
+**********************************************************************
+* Date Name Description
+* 07/18/01 aliu Creation.
+**********************************************************************
+*/
+#ifndef UNIMATCH_H
+#define UNIMATCH_H
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Matcher
+ */
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+class Replaceable;
+class UnicodeString;
+class UnicodeSet;
+
+/**
+ * Constants returned by <code>UnicodeMatcher::matches()</code>
+ * indicating the degree of match.
+ * @stable ICU 2.4
+ */
+enum UMatchDegree {
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * mismatch between the text and this matcher. The text contains
+ * a character which does not match, or the text does not contain
+ * all desired characters for a non-incremental match.
+ * @stable ICU 2.4
+ */
+ U_MISMATCH,
+
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * partial match between the text and this matcher. This value is
+ * only returned for incremental match operations. All characters
+ * of the text match, but more characters are required for a
+ * complete match. Alternatively, for variable-length matchers,
+ * all characters of the text match, and if more characters were
+ * supplied at limit, they might also match.
+ * @stable ICU 2.4
+ */
+ U_PARTIAL_MATCH,
+
+ /**
+ * Constant returned by <code>matches()</code> indicating a
+ * complete match between the text and this matcher. For an
+ * incremental variable-length match, this value is returned if
+ * the given text matches, and it is known that additional
+ * characters would not alter the extent of the match.
+ * @stable ICU 2.4
+ */
+ U_MATCH
+};
+
+/**
+ * <code>UnicodeMatcher</code> defines a protocol for objects that can
+ * match a range of characters in a Replaceable string.
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeMatcher /* not : public UObject because this is an interface/mixin class */ {
+
+public:
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeMatcher();
+
+ /**
+ * Return a UMatchDegree value indicating the degree of match for
+ * the given text at the given offset. Zero, one, or more
+ * characters may be matched.
+ *
+ * Matching in the forward direction is indicated by limit >
+ * offset. Characters from offset forwards to limit-1 will be
+ * considered for matching.
+ *
+ * Matching in the reverse direction is indicated by limit <
+ * offset. Characters from offset backwards to limit+1 will be
+ * considered for matching.
+ *
+ * If limit == offset then the only match possible is a zero
+ * character match (which subclasses may implement if desired).
+ *
+ * As a side effect, advance the offset parameter to the limit of
+ * the matched substring. In the forward direction, this will be
+ * the index of the last matched character plus one. In the
+ * reverse direction, this will be the index of the last matched
+ * character minus one.
+ *
+ * <p>Note: This method is not const because some classes may
+ * modify their state as the result of a match.
+ *
+ * @param text the text to be matched
+ * @param offset on input, the index into text at which to begin
+ * matching. On output, the limit of the matched text. The
+ * number of matched characters is the output value of offset
+ * minus the input value. Offset should always point to the
+ * HIGH SURROGATE (leading code unit) of a pair of surrogates,
+ * both on entry and upon return.
+ * @param limit the limit index of text to be matched. Greater
+ * than offset for a forward direction match, less than offset for
+ * a backward direction match. The last character to be
+ * considered for matching will be text.charAt(limit-1) in the
+ * forward direction or text.charAt(limit+1) in the backward
+ * direction.
+ * @param incremental if true, then assume further characters may
+ * be inserted at limit and check for partial matching. Otherwise
+ * assume the text as given is complete.
+ * @return a match degree value indicating a full match, a partial
+ * match, or a mismatch. If incremental is false then
+ * U_PARTIAL_MATCH should never be returned.
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) = 0;
+
+ /**
+ * Returns a string representation of this matcher. If the result of
+ * calling this function is passed to the appropriate parser, it
+ * will produce another matcher that is equal to this one.
+ * @param result the string to receive the pattern. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = false) const = 0;
+
+ /**
+ * Returns true if this matcher will match a character c, where c
+ * & 0xFF == v, at offset, in the forward direction (with limit >
+ * offset). This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ * @stable ICU 2.4
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const = 0;
+
+ /**
+ * Union the set of all characters that may be matched by this object
+ * into the given set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const = 0;
+};
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/uniset.h b/intl/icu/source/common/unicode/uniset.h
new file mode 100644
index 0000000000..84774d9f36
--- /dev/null
+++ b/intl/icu/source/common/unicode/uniset.h
@@ -0,0 +1,1793 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 1999-2016, International Business Machines Corporation
+* and others. All Rights Reserved.
+***************************************************************************
+* Date Name Description
+* 10/20/99 alan Creation.
+***************************************************************************
+*/
+
+#ifndef UNICODESET_H
+#define UNICODESET_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/ucpmap.h"
+#include "unicode/unifilt.h"
+#include "unicode/unistr.h"
+#include "unicode/uset.h"
+
+/**
+ * \file
+ * \brief C++ API: Unicode Set
+ */
+
+U_NAMESPACE_BEGIN
+
+// Forward Declarations.
+class BMPSet;
+class ParsePosition;
+class RBBIRuleScanner;
+class SymbolTable;
+class UnicodeSetStringSpan;
+class UVector;
+class RuleCharacterIterator;
+
+/**
+ * A mutable set of Unicode characters and multicharacter strings. Objects of this class
+ * represent <em>character classes</em> used in regular expressions.
+ * A character specifies a subset of Unicode code points. Legal
+ * code points are U+0000 to U+10FFFF, inclusive.
+ *
+ * <p>The UnicodeSet class is not designed to be subclassed.
+ *
+ * <p><code>UnicodeSet</code> supports two APIs. The first is the
+ * <em>operand</em> API that allows the caller to modify the value of
+ * a <code>UnicodeSet</code> object. It conforms to Java 2's
+ * <code>java.util.Set</code> interface, although
+ * <code>UnicodeSet</code> does not actually implement that
+ * interface. All methods of <code>Set</code> are supported, with the
+ * modification that they take a character range or single character
+ * instead of an <code>Object</code>, and they take a
+ * <code>UnicodeSet</code> instead of a <code>Collection</code>. The
+ * operand API may be thought of in terms of boolean logic: a boolean
+ * OR is implemented by <code>add</code>, a boolean AND is implemented
+ * by <code>retain</code>, a boolean XOR is implemented by
+ * <code>complement</code> taking an argument, and a boolean NOT is
+ * implemented by <code>complement</code> with no argument. In terms
+ * of traditional set theory function names, <code>add</code> is a
+ * union, <code>retain</code> is an intersection, <code>remove</code>
+ * is an asymmetric difference, and <code>complement</code> with no
+ * argument is a set complement with respect to the superset range
+ * <code>MIN_VALUE-MAX_VALUE</code>
+ *
+ * <p>The second API is the
+ * <code>applyPattern()</code>/<code>toPattern()</code> API from the
+ * <code>java.text.Format</code>-derived classes. Unlike the
+ * methods that add characters, add categories, and control the logic
+ * of the set, the method <code>applyPattern()</code> sets all
+ * attributes of a <code>UnicodeSet</code> at once, based on a
+ * string pattern.
+ *
+ * <p><b>Pattern syntax</b></p>
+ *
+ * Patterns are accepted by the constructors and the
+ * <code>applyPattern()</code> methods and returned by the
+ * <code>toPattern()</code> method. These patterns follow a syntax
+ * similar to that employed by version 8 regular expression character
+ * classes. Here are some simple examples:
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <table>
+ * <tr align="top">
+ * <td nowrap valign="top" align="left"><code>[]</code></td>
+ * <td valign="top">No characters</td>
+ * </tr><tr align="top">
+ * <td nowrap valign="top" align="left"><code>[a]</code></td>
+ * <td valign="top">The character 'a'</td>
+ * </tr><tr align="top">
+ * <td nowrap valign="top" align="left"><code>[ae]</code></td>
+ * <td valign="top">The characters 'a' and 'e'</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[a-e]</code></td>
+ * <td valign="top">The characters 'a' through 'e' inclusive, in Unicode code
+ * point order</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[\\u4E01]</code></td>
+ * <td valign="top">The character U+4E01</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[a{ab}{ac}]</code></td>
+ * <td valign="top">The character 'a' and the multicharacter strings &quot;ab&quot; and
+ * &quot;ac&quot;</td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="left"><code>[\\p{Lu}]</code></td>
+ * <td valign="top">All characters in the general category Uppercase Letter</td>
+ * </tr>
+ * </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * Any character may be preceded by a backslash in order to remove any special
+ * meaning. White space characters, as defined by UCharacter.isWhitespace(), are
+ * ignored, unless they are escaped.
+ *
+ * <p>Property patterns specify a set of characters having a certain
+ * property as defined by the Unicode standard. Both the POSIX-like
+ * "[:Lu:]" and the Perl-like syntax "\\p{Lu}" are recognized. For a
+ * complete list of supported property patterns, see the User's Guide
+ * for UnicodeSet at
+ * <a href="https://unicode-org.github.io/icu/userguide/strings/unicodeset">
+ * https://unicode-org.github.io/icu/userguide/strings/unicodeset</a>.
+ * Actual determination of property data is defined by the underlying
+ * Unicode database as implemented by UCharacter.
+ *
+ * <p>Patterns specify individual characters, ranges of characters, and
+ * Unicode property sets. When elements are concatenated, they
+ * specify their union. To complement a set, place a '^' immediately
+ * after the opening '['. Property patterns are inverted by modifying
+ * their delimiters; "[:^foo]" and "\\P{foo}". In any other location,
+ * '^' has no special meaning.
+ *
+ * <p>Since ICU 70, "[^...]", "[:^foo]", "\\P{foo}", and "[:binaryProperty=No:]"
+ * perform a “code point complement” (all code points minus the original set),
+ * removing all multicharacter strings,
+ * equivalent to <code>.complement().removeAllStrings()</code>.
+ * The complement() API function continues to perform a
+ * symmetric difference with all code points and thus retains all multicharacter strings.
+ *
+ * <p>Ranges are indicated by placing two a '-' between two
+ * characters, as in "a-z". This specifies the range of all
+ * characters from the left to the right, in Unicode order. If the
+ * left character is greater than or equal to the
+ * right character it is a syntax error. If a '-' occurs as the first
+ * character after the opening '[' or '[^', or if it occurs as the
+ * last character before the closing ']', then it is taken as a
+ * literal. Thus "[a\-b]", "[-ab]", and "[ab-]" all indicate the same
+ * set of three characters, 'a', 'b', and '-'.
+ *
+ * <p>Sets may be intersected using the '&' operator or the asymmetric
+ * set difference may be taken using the '-' operator, for example,
+ * "[[:L:]&[\\u0000-\\u0FFF]]" indicates the set of all Unicode letters
+ * with values less than 4096. Operators ('&' and '|') have equal
+ * precedence and bind left-to-right. Thus
+ * "[[:L:]-[a-z]-[\\u0100-\\u01FF]]" is equivalent to
+ * "[[[:L:]-[a-z]]-[\\u0100-\\u01FF]]". This only really matters for
+ * difference; intersection is commutative.
+ *
+ * <table>
+ * <tr valign=top><td nowrap><code>[a]</code><td>The set containing 'a'
+ * <tr valign=top><td nowrap><code>[a-z]</code><td>The set containing 'a'
+ * through 'z' and all letters in between, in Unicode order
+ * <tr valign=top><td nowrap><code>[^a-z]</code><td>The set containing
+ * all characters but 'a' through 'z',
+ * that is, U+0000 through 'a'-1 and 'z'+1 through U+10FFFF
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>][<em>pat2</em>]]</code>
+ * <td>The union of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]&[<em>pat2</em>]]</code>
+ * <td>The intersection of sets specified by <em>pat1</em> and <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[[<em>pat1</em>]-[<em>pat2</em>]]</code>
+ * <td>The asymmetric difference of sets specified by <em>pat1</em> and
+ * <em>pat2</em>
+ * <tr valign=top><td nowrap><code>[:Lu:] or \\p{Lu}</code>
+ * <td>The set of characters having the specified
+ * Unicode property; in
+ * this case, Unicode uppercase letters
+ * <tr valign=top><td nowrap><code>[:^Lu:] or \\P{Lu}</code>
+ * <td>The set of characters <em>not</em> having the given
+ * Unicode property
+ * </table>
+ *
+ * <p><b>Formal syntax</b></p>
+ *
+ * \htmlonly<blockquote>\endhtmlonly
+ * <table>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern :=&nbsp; </code></td>
+ * <td valign="top"><code>('[' '^'? item* ']') |
+ * property</code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>item :=&nbsp; </code></td>
+ * <td valign="top"><code>char | (char '-' char) | pattern-expr<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>pattern-expr :=&nbsp; </code></td>
+ * <td valign="top"><code>pattern | pattern-expr pattern |
+ * pattern-expr op pattern<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>op :=&nbsp; </code></td>
+ * <td valign="top"><code>'&amp;' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>special :=&nbsp; </code></td>
+ * <td valign="top"><code>'[' | ']' | '-'<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>char :=&nbsp; </code></td>
+ * <td valign="top"><em>any character that is not</em><code> special<br>
+ * | ('\' </code><em>any character</em><code>)<br>
+ * | ('\\u' hex hex hex hex)<br>
+ * </code></td>
+ * </tr>
+ * <tr align="top">
+ * <td nowrap valign="top" align="right"><code>hex :=&nbsp; </code></td>
+ * <td valign="top"><code>'0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' |<br>
+ * &nbsp;&nbsp;&nbsp;&nbsp;'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f'</code></td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top" align="right"><code>property :=&nbsp; </code></td>
+ * <td valign="top"><em>a Unicode property set pattern</em></td>
+ * </tr>
+ * </table>
+ * <br>
+ * <table border="1">
+ * <tr>
+ * <td>Legend: <table>
+ * <tr>
+ * <td nowrap valign="top"><code>a := b</code></td>
+ * <td width="20" valign="top">&nbsp; </td>
+ * <td valign="top"><code>a</code> may be replaced by <code>b</code> </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a?</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">zero or one instance of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a*</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">one or more instances of <code>a</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>a | b</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">either <code>a</code> or <code>b</code><br>
+ * </td>
+ * </tr>
+ * <tr>
+ * <td nowrap valign="top"><code>'a'</code></td>
+ * <td valign="top"></td>
+ * <td valign="top">the literal string between the quotes </td>
+ * </tr>
+ * </table>
+ * </td>
+ * </tr>
+ * </table>
+ * \htmlonly</blockquote>\endhtmlonly
+ *
+ * <p>Note:
+ * - Most UnicodeSet methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeSet into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * @author Alan Liu
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeSet final : public UnicodeFilter {
+private:
+ /**
+ * Enough for sets with few ranges.
+ * For example, White_Space has 10 ranges, list length 21.
+ */
+ static constexpr int32_t INITIAL_CAPACITY = 25;
+ // fFlags constant
+ static constexpr uint8_t kIsBogus = 1; // This set is bogus (i.e. not valid)
+
+ UChar32* list = stackList; // MUST be terminated with HIGH
+ int32_t capacity = INITIAL_CAPACITY; // capacity of list
+ int32_t len = 1; // length of list used; 1 <= len <= capacity
+ uint8_t fFlags = 0; // Bit flag (see constants above)
+
+ BMPSet *bmpSet = nullptr; // The set is frozen iff either bmpSet or stringSpan is not nullptr.
+ UChar32* buffer = nullptr; // internal buffer, may be nullptr
+ int32_t bufferCapacity = 0; // capacity of buffer
+
+ /**
+ * The pattern representation of this set. This may not be the
+ * most economical pattern. It is the pattern supplied to
+ * applyPattern(), with variables substituted and whitespace
+ * removed. For sets constructed without applyPattern(), or
+ * modified using the non-pattern API, this string will be empty,
+ * indicating that toPattern() must generate a pattern
+ * representation from the inversion list.
+ */
+ char16_t *pat = nullptr;
+ int32_t patLen = 0;
+
+ UVector* strings = nullptr; // maintained in sorted order
+ UnicodeSetStringSpan *stringSpan = nullptr;
+
+ /**
+ * Initial list array.
+ * Avoids some heap allocations, and list is never nullptr.
+ * Increases the object size a bit.
+ */
+ UChar32 stackList[INITIAL_CAPACITY];
+
+public:
+ /**
+ * Determine if this object contains a valid set.
+ * A bogus set has no value. It is different from an empty set.
+ * It can be used to indicate that no set value is available.
+ *
+ * @return true if the set is bogus/invalid, false otherwise
+ * @see setToBogus()
+ * @stable ICU 4.0
+ */
+ inline UBool isBogus(void) const;
+
+ /**
+ * Make this UnicodeSet object invalid.
+ * The string will test true with isBogus().
+ *
+ * A bogus set has no value. It is different from an empty set.
+ * It can be used to indicate that no set value is available.
+ *
+ * This utility function is used throughout the UnicodeSet
+ * implementation to indicate that a UnicodeSet operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * @see isBogus()
+ * @stable ICU 4.0
+ */
+ void setToBogus();
+
+public:
+
+ enum {
+ /**
+ * Minimum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MIN_VALUE = 0,
+
+ /**
+ * Maximum value that can be stored in a UnicodeSet.
+ * @stable ICU 2.4
+ */
+ MAX_VALUE = 0x10ffff
+ };
+
+ //----------------------------------------------------------------
+ // Constructors &c
+ //----------------------------------------------------------------
+
+public:
+
+ /**
+ * Constructs an empty set.
+ * @stable ICU 2.0
+ */
+ UnicodeSet();
+
+ /**
+ * Constructs a set containing the given range. If <code>end <
+ * start</code> then an empty set is created.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 2.4
+ */
+ UnicodeSet(UChar32 start, UChar32 end);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * @internal
+ */
+ enum ESerialization {
+ kSerialized /* result of serialize() */
+ };
+
+ /**
+ * Constructs a set from the output of serialize().
+ *
+ * @param buffer the 16 bit array
+ * @param bufferLen the original length returned from serialize()
+ * @param serialization the value 'kSerialized'
+ * @param status error code
+ *
+ * @internal
+ */
+ UnicodeSet(const uint16_t buffer[], int32_t bufferLen,
+ ESerialization serialization, UErrorCode &status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ UErrorCode& status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Constructs a set from the given pattern. See the class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be nullptr
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @internal
+ */
+ UnicodeSet(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Constructs a set from the given pattern. See the class description
+ * for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param pos on input, the position in pattern at which to start parsing.
+ * On output, the position after the last character parsed.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param symbols a symbol table mapping variable names to values
+ * and stand-in characters to UnicodeSets; may be nullptr
+ * @param status input-output error code
+ * @stable ICU 2.8
+ */
+ UnicodeSet(const UnicodeString& pattern, ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Constructs a set that is identical to the given UnicodeSet.
+ * @stable ICU 2.0
+ */
+ UnicodeSet(const UnicodeSet& o);
+
+ /**
+ * Destructs the set.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeSet();
+
+ /**
+ * Assigns this object to be a copy of another.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ UnicodeSet& operator=(const UnicodeSet& o);
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the two sets
+ * have the same size, and every member of the specified set is
+ * contained in this set (or equivalently, every member of this set is
+ * contained in the specified set).
+ *
+ * @param o set to be compared for equality with this set.
+ * @return <tt>true</tt> if the specified set is equal to this set.
+ * @stable ICU 2.0
+ */
+ virtual bool operator==(const UnicodeSet& o) const;
+
+ /**
+ * Compares the specified object with this set for equality. Returns
+ * <tt>true</tt> if the specified set is not equal to this set.
+ * @stable ICU 2.0
+ */
+ inline bool operator!=(const UnicodeSet& o) const;
+
+ /**
+ * Returns a copy of this object. All UnicodeFunctor objects have
+ * to support cloning in order to allow classes using
+ * UnicodeFunctors, such as Transliterator, to implement cloning.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use cloneAsThawed() for a mutable clone of a frozen set.
+ * @see cloneAsThawed
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet* clone() const override;
+
+ /**
+ * Returns the hash code value for this set.
+ *
+ * @return the hash code value for this set.
+ * @see Object#hashCode()
+ * @stable ICU 2.0
+ */
+ virtual int32_t hashCode(void) const;
+
+ /**
+ * Get a UnicodeSet pointer from a USet
+ *
+ * @param uset a USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static UnicodeSet *fromUSet(USet *uset);
+
+ /**
+ * Get a UnicodeSet pointer from a const USet
+ *
+ * @param uset a const USet (the ICU plain C type for UnicodeSet)
+ * @return the corresponding UnicodeSet pointer.
+ *
+ * @stable ICU 4.2
+ */
+ inline static const UnicodeSet *fromUSet(const USet *uset);
+
+ /**
+ * Produce a USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline USet *toUSet();
+
+
+ /**
+ * Produce a const USet * pointer for this UnicodeSet.
+ * USet is the plain C type for UnicodeSet
+ *
+ * @return a const USet pointer for this UnicodeSet
+ * @stable ICU 4.2
+ */
+ inline const USet * toUSet() const;
+
+
+ //----------------------------------------------------------------
+ // Freezable API
+ //----------------------------------------------------------------
+
+ /**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @return true/false for whether the set has been frozen
+ * @see freeze
+ * @see cloneAsThawed
+ * @stable ICU 3.8
+ */
+ inline UBool isFrozen() const;
+
+ /**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * contains() and span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @return this set.
+ * @see isFrozen
+ * @see cloneAsThawed
+ * @stable ICU 3.8
+ */
+ UnicodeSet *freeze();
+
+ /**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @return the mutable clone
+ * @see freeze
+ * @see isFrozen
+ * @stable ICU 3.8
+ */
+ UnicodeSet *cloneAsThawed() const;
+
+ //----------------------------------------------------------------
+ // Public API
+ //----------------------------------------------------------------
+
+ /**
+ * Make this object represent the range `start - end`.
+ * If `start > end` then this object is set to an empty range.
+ * A frozen set will not be modified.
+ *
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 2.4
+ */
+ UnicodeSet& set(UChar32 start, UChar32 end);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ * @stable ICU 2.4
+ */
+ static UBool resemblesPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * <em> Empties the set passed before applying the pattern.</em>
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ UErrorCode& status);
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * Modifies this set to represent the set specified by the given
+ * pattern, optionally ignoring Unicode Pattern_White_Space characters.
+ * See the class description for the syntax of the pattern language.
+ * A frozen set will not be modified.
+ * @param pattern a string specifying what characters are in the set
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be nullptr
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ *<em> Empties the set passed before applying the pattern.</em>
+ * @return a reference to this
+ * @internal
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+#endif /* U_HIDE_INTERNAL_API */
+
+ /**
+ * Parses the given pattern, starting at the given position. The
+ * character at pattern.charAt(pos.getIndex()) must be '[', or the
+ * parse fails. Parsing continues until the corresponding closing
+ * ']'. If a syntax error is encountered between the opening and
+ * closing brace, the parse fails. Upon return from a successful
+ * parse, the ParsePosition is updated to point to the character
+ * following the closing ']', and a StringBuffer containing a
+ * pairs list for the parsed pattern is returned. This method calls
+ * itself recursively to parse embedded subpatterns.
+ *<em> Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ *
+ * @param pattern the string containing the pattern to be parsed.
+ * The portion of the string from pos.getIndex(), which must be a
+ * '[', to the corresponding closing ']', is parsed.
+ * @param pos upon entry, the position at which to being parsing.
+ * The character at pattern.charAt(pos.getIndex()) must be a '['.
+ * Upon return from a successful parse, pos.getIndex() is either
+ * the character after the closing ']' of the parsed pattern, or
+ * pattern.length() if the closing ']' is the last character of
+ * the pattern string.
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param symbols a symbol table mapping variable names to
+ * values and stand-ins to UnicodeSets; may be nullptr
+ * @param status returns <code>U_ILLEGAL_ARGUMENT_ERROR</code> if the pattern
+ * contains a syntax error.
+ * @return a reference to this
+ * @stable ICU 2.8
+ */
+ UnicodeSet& applyPattern(const UnicodeString& pattern,
+ ParsePosition& pos,
+ uint32_t options,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ /**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a UnicodeSet constructor, it
+ * will produce another set that is equal to this one.
+ * A frozen set will not be modified.
+ * @param result the string to receive the rules. Previous
+ * contents will be deleted.
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeString& toPattern(UnicodeString& result,
+ UBool escapeUnprintable = false) const override;
+
+ /**
+ * Modifies this set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyIntPropertyValue(UProperty prop,
+ int32_t value,
+ UErrorCode& ec);
+
+ /**
+ * Modifies this set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param prop a property alias, either short or long. The name is matched
+ * loosely. See PropertyAliases.txt for names and a description of loose
+ * matching. If the value string is empty, then this string is interpreted
+ * as either a General_Category value alias, a Script value alias, a binary
+ * property alias, or a special ID. Special IDs are matched loosely and
+ * correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param value a value alias, either short or long. The name is matched
+ * loosely. See PropertyValueAliases.txt for names and a description of
+ * loose matching. In addition to aliases listed, numeric values and
+ * canonical combining classes may be expressed numerically, e.g., ("nv",
+ * "0.5") or ("ccc", "220"). The value string may also be empty.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @return a reference to this set
+ *
+ * @stable ICU 2.4
+ */
+ UnicodeSet& applyPropertyAlias(const UnicodeString& prop,
+ const UnicodeString& value,
+ UErrorCode& ec);
+
+ /**
+ * Returns the number of elements in this set (its cardinality).
+ * Note than the elements of a set may include both individual
+ * codepoints and strings.
+ *
+ * This is slower than getRangeCount() because
+ * it counts the code points of all ranges.
+ *
+ * @return the number of elements in this set (its cardinality).
+ * @stable ICU 2.0
+ * @see getRangeCount
+ */
+ virtual int32_t size(void) const;
+
+ /**
+ * Returns <tt>true</tt> if this set contains no elements.
+ *
+ * @return <tt>true</tt> if this set contains no elements.
+ * @stable ICU 2.0
+ */
+ virtual UBool isEmpty(void) const;
+
+ /**
+ * @return true if this set contains multi-character strings or the empty string.
+ * @stable ICU 70
+ */
+ UBool hasStrings() const;
+
+ /**
+ * Returns true if this set contains the given character.
+ * This function works faster with a frozen set.
+ * @param c character to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 c) const override;
+
+ /**
+ * Returns true if this set contains every character
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.0
+ */
+ virtual UBool contains(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns <tt>true</tt> if this set contains the given
+ * multicharacter string.
+ * @param s string to be checked for containment
+ * @return <tt>true</tt> if this set contains the specified string
+ * @stable ICU 2.4
+ */
+ UBool contains(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains all the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ virtual UBool containsAll(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains all the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsAll(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains none of the characters and strings
+ * of the given set.
+ * @param c set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeSet& c) const;
+
+ /**
+ * Returns true if this set contains none of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 2.4
+ */
+ UBool containsNone(const UnicodeString& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * in the given range.
+ * @param start first character, inclusive, of the range
+ * @param end last character, inclusive, of the range
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(UChar32 start, UChar32 end) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * and strings of the given set.
+ * @param s The set to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeSet& s) const;
+
+ /**
+ * Returns true if this set contains one or more of the characters
+ * of the given string.
+ * @param s string containing characters to be checked for containment
+ * @return true if the condition is met
+ * @stable ICU 2.4
+ */
+ inline UBool containsSome(const UnicodeString& s) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the end of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>start+span(s.getBuffer()+start, s.length()-start, spanCondition)</code>
+ * after pinning start to 0<=start<=s.length().
+ * @param s the string
+ * @param start the start index in the string for the span operation
+ * @param spanCondition specifies the containment condition
+ * @return the exclusive end of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, end) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the substring of the input string according to the USetSpanCondition.
+ * Same as <code>spanBack(s.getBuffer(), limit, spanCondition)</code>
+ * after pinning limit to 0<=end<=s.length().
+ * @param s the string
+ * @param limit the exclusive-end index in the string for the span operation
+ * (use s.length() or INT32_MAX for spanning back from the end of the string)
+ * @param spanCondition specifies the containment condition
+ * @return the start of the substring according to the spanCondition;
+ * the substring s.tempSubStringBetween(start, limit) fulfills the spanCondition
+ * @stable ICU 4.4
+ * @see USetSpanCondition
+ */
+ inline int32_t spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+ int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const;
+
+ /**
+ * Implement UnicodeMatcher::matches()
+ * @stable ICU 2.4
+ */
+ virtual UMatchDegree matches(const Replaceable& text,
+ int32_t& offset,
+ int32_t limit,
+ UBool incremental) override;
+
+private:
+ /**
+ * Returns the longest match for s in text at the given position.
+ * If limit > start then match forward from start+1 to limit
+ * matching all characters except s.charAt(0). If limit < start,
+ * go backward starting from start-1 matching all characters
+ * except s.charAt(s.length()-1). This method assumes that the
+ * first character, text.charAt(start), matches s, so it does not
+ * check it.
+ * @param text the text to match
+ * @param start the first character to match. In the forward
+ * direction, text.charAt(start) is matched against s.charAt(0).
+ * In the reverse direction, it is matched against
+ * s.charAt(s.length()-1).
+ * @param limit the limit offset for matching, either last+1 in
+ * the forward direction, or last-1 in the reverse direction,
+ * where last is the index of the last character to match.
+ * @param s
+ * @return If part of s matches up to the limit, return |limit -
+ * start|. If all of s matches before reaching the limit, return
+ * s.length(). If there is a mismatch between s and text, return
+ * 0
+ */
+ static int32_t matchRest(const Replaceable& text,
+ int32_t start, int32_t limit,
+ const UnicodeString& s);
+
+ /**
+ * Returns the smallest value i such that c < list[i]. Caller
+ * must ensure that c is a legal value or this method will enter
+ * an infinite loop. This method performs a binary search.
+ * @param c a character in the range MIN_VALUE..MAX_VALUE
+ * inclusive
+ * @return the smallest integer i in the range 0..len-1,
+ * inclusive, such that c < list[i]
+ */
+ int32_t findCodePoint(UChar32 c) const;
+
+public:
+
+ /**
+ * Implementation of UnicodeMatcher API. Union the set of all
+ * characters that may be matched by this object into the given
+ * set.
+ * @param toUnionTo the set into which to union the source characters
+ * @stable ICU 2.4
+ */
+ virtual void addMatchSetTo(UnicodeSet& toUnionTo) const override;
+
+ /**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 2.4
+ */
+ int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range for characters, returns (UChar32)-1.
+ * The inverse of this method is <code>indexOf()</code>.
+ *
+ * For iteration, this is slower than UnicodeSetIterator or
+ * getRangeCount()/getRangeStart()/getRangeEnd(),
+ * because for each call it skips linearly over <code>index</code>
+ * characters in the ranges.
+ *
+ * @param index an index from 0..size()-1
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 2.4
+ */
+ UChar32 charAt(int32_t index) const;
+
+ /**
+ * Adds the specified range to this set if it is not already
+ * present. If this set already contains the specified range,
+ * the call leaves this set unchanged. If <code>start > end</code>
+ * then an empty range is added, leaving the set unchanged.
+ * This is equivalent to a boolean logic OR, or a set UNION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be added
+ * to this set.
+ * @param end last character, inclusive, of range to be added
+ * to this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& add(UChar32 start, UChar32 end);
+
+ /**
+ * Adds the specified character to this set if it is not already
+ * present. If this set already contains the specified character,
+ * the call leaves this set unchanged.
+ * A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
+ * @stable ICU 2.0
+ */
+ UnicodeSet& add(UChar32 c);
+
+ /**
+ * Adds the specified multicharacter to this set if it is not already
+ * present. If this set already contains the multicharacter,
+ * the call leaves this set unchanged.
+ * Thus "ch" => {"ch"}
+ * A frozen set will not be modified.
+ *
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& add(const UnicodeString& s);
+
+ private:
+ /**
+ * @return a code point IF the string consists of a single one.
+ * otherwise returns -1.
+ * @param s string to test
+ */
+ static int32_t getSingleCP(const UnicodeString& s);
+
+ void _add(const UnicodeString& s);
+
+ public:
+ /**
+ * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
+ * If this set already contains any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& addAll(const UnicodeString& s);
+
+ /**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& retainAll(const UnicodeString& s);
+
+ /**
+ * Complement EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complementAll(const UnicodeString& s);
+
+ /**
+ * Remove EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& removeAll(const UnicodeString& s);
+
+ /**
+ * Makes a set from a multicharacter string. Thus "ch" => {"ch"}
+ *
+ * @param s the source string
+ * @return a newly created set containing the given string.
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFrom(const UnicodeString& s);
+
+
+ /**
+ * Makes a set from each of the characters in the string. Thus "ch" => {"c", "h"}
+ * @param s the source string
+ * @return a newly created set containing the given characters
+ * The caller owns the return object and is responsible for deleting it.
+ * @stable ICU 2.4
+ */
+ static UnicodeSet* U_EXPORT2 createFromAll(const UnicodeString& s);
+
+ /**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retain(UChar32 start, UChar32 end);
+
+
+ /**
+ * Retain the specified character from this set if it is present.
+ * A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
+ * @stable ICU 2.0
+ */
+ UnicodeSet& retain(UChar32 c);
+
+ /**
+ * Retains only the specified string from this set if it is present.
+ * Upon return this set will be empty if it did not contain s, or
+ * will only contain s if it did contain s.
+ * A frozen set will not be modified.
+ *
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 69
+ */
+ UnicodeSet& retain(const UnicodeString &s);
+
+ /**
+ * Removes the specified range from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns. If <code>start > end</code> then an empty range is
+ * removed, leaving the set unchanged.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range to be removed
+ * from this set.
+ * @param end last character, inclusive, of range to be removed
+ * from this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& remove(UChar32 start, UChar32 end);
+
+ /**
+ * Removes the specified character from this set if it is present.
+ * The set will not contain the specified range once the call
+ * returns.
+ * A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
+ * @stable ICU 2.0
+ */
+ UnicodeSet& remove(UChar32 c);
+
+ /**
+ * Removes the specified string from this set if it is present.
+ * The set will not contain the specified character once the call
+ * returns.
+ * A frozen set will not be modified.
+ * @param s the source string
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& remove(const UnicodeString& s);
+
+ /**
+ * This is equivalent to
+ * <code>complement(MIN_VALUE, MAX_VALUE)</code>.
+ *
+ * <strong>Note:</strong> This performs a symmetric difference with all code points
+ * <em>and thus retains all multicharacter strings</em>.
+ * In order to achieve a “code point complement” (all code points minus this set),
+ * the easiest is to <code>.complement().removeAllStrings()</code>.
+ *
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement();
+
+ /**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If <code>start > end</code>
+ * then an empty range is complemented, leaving the set unchanged.
+ * This is equivalent to a boolean logic XOR.
+ * A frozen set will not be modified.
+ *
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& complement(UChar32 start, UChar32 end);
+
+ /**
+ * Complements the specified character in this set. The character
+ * will be removed if it is in this set, or will be added if it is
+ * not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param c the character (code point)
+ * @return this object, for chaining
+ * @stable ICU 2.0
+ */
+ UnicodeSet& complement(UChar32 c);
+
+ /**
+ * Complement the specified string in this set.
+ * The string will be removed if it is in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param s the string to complement
+ * @return this object, for chaining
+ * @stable ICU 2.4
+ */
+ UnicodeSet& complement(const UnicodeString& s);
+
+ /**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param c set whose elements are to be added to this set.
+ * @see #add(UChar32, UChar32)
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& addAll(const UnicodeSet& c);
+
+ /**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements this set will retain.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& retainAll(const UnicodeSet& c);
+
+ /**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be removed from
+ * this set.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& removeAll(const UnicodeSet& c);
+
+ /**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param c set that defines which elements will be xor'ed from
+ * this set.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& complementAll(const UnicodeSet& c);
+
+ /**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @stable ICU 2.0
+ */
+ virtual UnicodeSet& clear(void);
+
+ /**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE_INSENSITIVE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param attribute bitmask for attributes to close over.
+ * Valid options:
+ * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * Unrelated options bits are ignored.
+ * @return a reference to this set.
+ * @stable ICU 4.2
+ */
+ UnicodeSet& closeOver(int32_t attribute);
+
+ /**
+ * Remove all strings from this set.
+ *
+ * @return a reference to this set.
+ * @stable ICU 4.2
+ */
+ virtual UnicodeSet &removeAllStrings();
+
+ /**
+ * Iteration method that returns the number of ranges contained in
+ * this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual int32_t getRangeCount(void) const;
+
+ /**
+ * Iteration method that returns the first character in the
+ * specified range of this set.
+ * @see #getRangeCount
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeStart(int32_t index) const;
+
+ /**
+ * Iteration method that returns the last character in the
+ * specified range of this set.
+ * @see #getRangeStart
+ * @see #getRangeEnd
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getRangeEnd(int32_t index) const;
+
+ /**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array has following format (each line is one 16-bit
+ * integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be nullptr only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param ec error code. Will be set to U_INDEX_OUTOFBOUNDS_ERROR
+ * if n+2*m > 0x7FFF. Will be set to U_BUFFER_OVERFLOW_ERROR if
+ * n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+ int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode& ec) const;
+
+ /**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ * @stable ICU 2.4
+ */
+ virtual UnicodeSet& compact();
+
+ /**
+ * Return the class ID for this class. This is useful only for
+ * comparing to a return value from getDynamicClassID(). For example:
+ * <pre>
+ * . Base* polymorphic_pointer = createPolymorphicObject();
+ * . if (polymorphic_pointer->getDynamicClassID() ==
+ * . Derived::getStaticClassID()) ...
+ * </pre>
+ * @return The class ID for all objects of this class.
+ * @stable ICU 2.0
+ */
+ static UClassID U_EXPORT2 getStaticClassID(void);
+
+ /**
+ * Implement UnicodeFunctor API.
+ *
+ * @return The class ID for this object. All objects of a given
+ * class have the same class ID. Objects of other classes have
+ * different class IDs.
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID(void) const override;
+
+private:
+
+ // Private API for the USet API
+
+ friend class USetAccess;
+
+ const UnicodeString* getString(int32_t index) const;
+
+ //----------------------------------------------------------------
+ // RuleBasedTransliterator support
+ //----------------------------------------------------------------
+
+private:
+
+ /**
+ * Returns <tt>true</tt> if this set contains any character whose low byte
+ * is the given value. This is used by <tt>RuleBasedTransliterator</tt> for
+ * indexing.
+ */
+ virtual UBool matchesIndexValue(uint8_t v) const override;
+
+private:
+ friend class RBBIRuleScanner;
+
+ //----------------------------------------------------------------
+ // Implementation: Clone as thawed (see ICU4J Freezable)
+ //----------------------------------------------------------------
+
+ UnicodeSet(const UnicodeSet& o, UBool /* asThawed */);
+ UnicodeSet& copyFrom(const UnicodeSet& o, UBool asThawed);
+
+ //----------------------------------------------------------------
+ // Implementation: Pattern parsing
+ //----------------------------------------------------------------
+
+ void applyPatternIgnoreSpace(const UnicodeString& pattern,
+ ParsePosition& pos,
+ const SymbolTable* symbols,
+ UErrorCode& status);
+
+ void applyPattern(RuleCharacterIterator& chars,
+ const SymbolTable* symbols,
+ UnicodeString& rebuiltPat,
+ uint32_t options,
+ UnicodeSet& (UnicodeSet::*caseClosure)(int32_t attribute),
+ int32_t depth,
+ UErrorCode& ec);
+
+ void closeOverCaseInsensitive(bool simple);
+ void closeOverAddCaseMappings();
+
+ //----------------------------------------------------------------
+ // Implementation: Utility methods
+ //----------------------------------------------------------------
+
+ static int32_t nextCapacity(int32_t minCapacity);
+
+ bool ensureCapacity(int32_t newLen);
+
+ bool ensureBufferCapacity(int32_t newLen);
+
+ void swapBuffers(void);
+
+ UBool allocateStrings(UErrorCode &status);
+ int32_t stringsSize() const;
+ UBool stringsContains(const UnicodeString &s) const;
+
+ UnicodeString& _toPattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ UnicodeString& _generatePattern(UnicodeString& result,
+ UBool escapeUnprintable) const;
+
+ static void _appendToPat(UnicodeString& buf, const UnicodeString& s, UBool escapeUnprintable);
+
+ static void _appendToPat(UnicodeString& buf, UChar32 c, UBool escapeUnprintable);
+
+ static void _appendToPat(UnicodeString &result, UChar32 start, UChar32 end,
+ UBool escapeUnprintable);
+
+ //----------------------------------------------------------------
+ // Implementation: Fundamental operators
+ //----------------------------------------------------------------
+
+ void exclusiveOr(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void add(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ void retain(const UChar32* other, int32_t otherLen, int8_t polarity);
+
+ /**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a property set pattern [:foo:], \\p{foo}, or
+ * \\P{foo}, or \\N{name}.
+ */
+ static UBool resemblesPropertyPattern(const UnicodeString& pattern,
+ int32_t pos);
+
+ static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
+ int32_t iterOpts);
+
+ /**
+ * Parse the given property pattern at the given parse position
+ * and set this UnicodeSet to the result.
+ *
+ * The original design document is out of date, but still useful.
+ * Ignore the property and value names:
+ * https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/unicodeset_properties.html
+ *
+ * Recognized syntax:
+ *
+ * [:foo:] [:^foo:] - white space not allowed within "[:" or ":]"
+ * \\p{foo} \\P{foo} - white space not allowed within "\\p" or "\\P"
+ * \\N{name} - white space not allowed within "\\N"
+ *
+ * Other than the above restrictions, Unicode Pattern_White_Space characters are ignored.
+ * Case is ignored except in "\\p" and "\\P" and "\\N". In 'name' leading
+ * and trailing space is deleted, and internal runs of whitespace
+ * are collapsed to a single space.
+ *
+ * We support binary properties, enumerated properties, and the
+ * following non-enumerated properties:
+ *
+ * Numeric_Value
+ * Name
+ * Unicode_1_Name
+ *
+ * @param pattern the pattern string
+ * @param ppos on entry, the position at which to begin parsing.
+ * This should be one of the locations marked '^':
+ *
+ * [:blah:] \\p{blah} \\P{blah} \\N{name}
+ * ^ % ^ % ^ % ^ %
+ *
+ * On return, the position after the last character parsed, that is,
+ * the locations marked '%'. If the parse fails, ppos is returned
+ * unchanged.
+ * @param ec status
+ * @return a reference to this.
+ */
+ UnicodeSet& applyPropertyPattern(const UnicodeString& pattern,
+ ParsePosition& ppos,
+ UErrorCode &ec);
+
+ void applyPropertyPattern(RuleCharacterIterator& chars,
+ UnicodeString& rebuiltPat,
+ UErrorCode& ec);
+
+ /**
+ * A filter that returns true if the given code point should be
+ * included in the UnicodeSet being constructed.
+ */
+ typedef UBool (*Filter)(UChar32 codePoint, void* context);
+
+ /**
+ * Given a filter, set this UnicodeSet to the code points
+ * contained by that filter. The filter MUST be
+ * property-conformant. That is, if it returns value v for one
+ * code point, then it must return v for all affiliated code
+ * points, as defined by the inclusions list. See
+ * getInclusions().
+ * src is a UPropertySource value.
+ */
+ void applyFilter(Filter filter,
+ void* context,
+ const UnicodeSet* inclusions,
+ UErrorCode &status);
+
+ /**
+ * Set the new pattern to cache.
+ */
+ void setPattern(const UnicodeString& newPat) {
+ setPattern(newPat.getBuffer(), newPat.length());
+ }
+ void setPattern(const char16_t *newPat, int32_t newPatLen);
+ /**
+ * Release existing cached pattern.
+ */
+ void releasePattern();
+
+ friend class UnicodeSetIterator;
+};
+
+
+
+inline bool UnicodeSet::operator!=(const UnicodeSet& o) const {
+ return !operator==(o);
+}
+
+inline UBool UnicodeSet::isFrozen() const {
+ return (UBool)(bmpSet!=nullptr || stringSpan!=nullptr);
+}
+
+inline UBool UnicodeSet::containsSome(UChar32 start, UChar32 end) const {
+ return !containsNone(start, end);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeSet& s) const {
+ return !containsNone(s);
+}
+
+inline UBool UnicodeSet::containsSome(const UnicodeString& s) const {
+ return !containsNone(s);
+}
+
+inline UBool UnicodeSet::isBogus() const {
+ return (UBool)(fFlags & kIsBogus);
+}
+
+inline UnicodeSet *UnicodeSet::fromUSet(USet *uset) {
+ return reinterpret_cast<UnicodeSet *>(uset);
+}
+
+inline const UnicodeSet *UnicodeSet::fromUSet(const USet *uset) {
+ return reinterpret_cast<const UnicodeSet *>(uset);
+}
+
+inline USet *UnicodeSet::toUSet() {
+ return reinterpret_cast<USet *>(this);
+}
+
+inline const USet *UnicodeSet::toUSet() const {
+ return reinterpret_cast<const USet *>(this);
+}
+
+inline int32_t UnicodeSet::span(const UnicodeString &s, int32_t start, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(start<0) {
+ start=0;
+ } else if(start>sLength) {
+ start=sLength;
+ }
+ return start+span(s.getBuffer()+start, sLength-start, spanCondition);
+}
+
+inline int32_t UnicodeSet::spanBack(const UnicodeString &s, int32_t limit, USetSpanCondition spanCondition) const {
+ int32_t sLength=s.length();
+ if(limit<0) {
+ limit=0;
+ } else if(limit>sLength) {
+ limit=sLength;
+ }
+ return spanBack(s.getBuffer(), limit, spanCondition);
+}
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/unistr.h b/intl/icu/source/common/unicode/unistr.h
new file mode 100644
index 0000000000..4074e8d07b
--- /dev/null
+++ b/intl/icu/source/common/unicode/unistr.h
@@ -0,0 +1,4784 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File unistr.h
+*
+* Modification History:
+*
+* Date Name Description
+* 09/25/98 stephen Creation.
+* 11/11/98 stephen Changed per 11/9 code review.
+* 04/20/99 stephen Overhauled per 4/16 code review.
+* 11/18/99 aliu Made to inherit from Replaceable. Added method
+* handleReplaceBetween(); other methods unchanged.
+* 06/25/01 grhoten Remove dependency on iostream.
+******************************************************************************
+*/
+
+#ifndef UNISTR_H
+#define UNISTR_H
+
+/**
+ * \file
+ * \brief C++ API: Unicode String
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include <cstddef>
+#include "unicode/char16ptr.h"
+#include "unicode/rep.h"
+#include "unicode/std_string.h"
+#include "unicode/stringpiece.h"
+#include "unicode/bytestream.h"
+
+struct UConverter; // unicode/ucnv.h
+
+#ifndef USTRING_H
+/**
+ * \ingroup ustring_ustrlen
+ * @param s Pointer to sequence of UChars.
+ * @return Length of sequence.
+ */
+U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
+#endif
+
+U_NAMESPACE_BEGIN
+
+#if !UCONFIG_NO_BREAK_ITERATION
+class BreakIterator; // unicode/brkiter.h
+#endif
+class Edits;
+
+U_NAMESPACE_END
+
+// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
+/**
+ * Internal string case mapping function type.
+ * All error checking must be done.
+ * src and dest must not overlap.
+ * @internal
+ */
+typedef int32_t U_CALLCONV
+UStringCaseMapper(int32_t caseLocale, uint32_t options,
+#if !UCONFIG_NO_BREAK_ITERATION
+ icu::BreakIterator *iter,
+#endif
+ char16_t *dest, int32_t destCapacity,
+ const char16_t *src, int32_t srcLength,
+ icu::Edits *edits,
+ UErrorCode &errorCode);
+
+U_NAMESPACE_BEGIN
+
+class Locale; // unicode/locid.h
+class StringCharacterIterator;
+class UnicodeStringAppendable; // unicode/appendable.h
+
+/* The <iostream> include has been moved to unicode/ustream.h */
+
+/**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * @stable ICU 3.2
+ */
+#define US_INV icu::UnicodeString::kInvariant
+
+/**
+ * Unicode String literals in C++.
+ *
+ * Note: these macros are not recommended for new code.
+ * Prior to the availability of C++11 and u"unicode string literals",
+ * these macros were provided for portability and efficiency when
+ * initializing UnicodeStrings from literals.
+ *
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * `NUL`, must be specified as a constant.
+ * @stable ICU 2.0
+ */
+#if !U_CHAR16_IS_TYPEDEF
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
+#else
+# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
+#endif
+
+/**
+ * Unicode String literals in C++.
+ * Dependent on the platform properties, different UnicodeString
+ * constructors should be used to create a UnicodeString object from
+ * a string literal.
+ * The macros are defined for improved performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * The string parameter must be a C string literal.
+ * @stable ICU 2.0
+ */
+#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
+
+/**
+ * \def UNISTR_FROM_CHAR_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(char16_t) and UnicodeString(UChar32)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_CHAR_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_CHAR_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_CHAR_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_FROM_STRING_EXPLICIT
+ * This can be defined to be empty or "explicit".
+ * If explicit, then the UnicodeString(const char *) and UnicodeString(const char16_t *)
+ * constructors are marked as explicit, preventing their inadvertent use.
+ *
+ * In particular, this helps prevent accidentally depending on ICU conversion code
+ * by passing a string literal into an API with a const UnicodeString & parameter.
+ * @stable ICU 49
+ */
+#ifndef UNISTR_FROM_STRING_EXPLICIT
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
+ // Auto-"explicit" in ICU library code.
+# define UNISTR_FROM_STRING_EXPLICIT explicit
+# else
+ // Empty by default for source code compatibility.
+# define UNISTR_FROM_STRING_EXPLICIT
+# endif
+#endif
+
+/**
+ * \def UNISTR_OBJECT_SIZE
+ * Desired sizeof(UnicodeString) in bytes.
+ * It should be a multiple of sizeof(pointer) to avoid unusable space for padding.
+ * The object size may want to be a multiple of 16 bytes,
+ * which is a common granularity for heap allocation.
+ *
+ * Any space inside the object beyond sizeof(vtable pointer) + 2
+ * is available for storing short strings inside the object.
+ * The bigger the object, the longer a string that can be stored inside the object,
+ * without additional heap allocation.
+ *
+ * Depending on a platform's pointer size, pointer alignment requirements,
+ * and struct padding, the compiler will usually round up sizeof(UnicodeString)
+ * to 4 * sizeof(pointer) (or 3 * sizeof(pointer) for P128 data models),
+ * to hold the fields for heap-allocated strings.
+ * Such a minimum size also ensures that the object is easily large enough
+ * to hold at least 2 char16_ts, for one supplementary code point (U16_MAX_LENGTH).
+ *
+ * sizeof(UnicodeString) >= 48 should work for all known platforms.
+ *
+ * For example, on a 64-bit machine where sizeof(vtable pointer) is 8,
+ * sizeof(UnicodeString) = 64 would leave space for
+ * (64 - sizeof(vtable pointer) - 2) / U_SIZEOF_UCHAR = (64 - 8 - 2) / 2 = 27
+ * char16_ts stored inside the object.
+ *
+ * The minimum object size on a 64-bit machine would be
+ * 4 * sizeof(pointer) = 4 * 8 = 32 bytes,
+ * and the internal buffer would hold up to 11 char16_ts in that case.
+ *
+ * @see U16_MAX_LENGTH
+ * @stable ICU 56
+ */
+#ifndef UNISTR_OBJECT_SIZE
+# define UNISTR_OBJECT_SIZE 64
+#endif
+
+/**
+ * UnicodeString is a string class that stores Unicode characters directly and provides
+ * similar functionality as the Java String and StringBuffer/StringBuilder classes.
+ * It is a concrete implementation of the abstract class Replaceable (for transliteration).
+ *
+ * The UnicodeString equivalent of std::string’s clear() is remove().
+ *
+ * A UnicodeString may "alias" an external array of characters
+ * (that is, point to it, rather than own the array)
+ * whose lifetime must then at least match the lifetime of the aliasing object.
+ * This aliasing may be preserved when returning a UnicodeString by value,
+ * depending on the compiler and the function implementation,
+ * via Return Value Optimization (RVO) or the move assignment operator.
+ * (However, the copy assignment operator does not preserve aliasing.)
+ * For details see the description of storage models at the end of the class API docs
+ * and in the User Guide chapter linked from there.
+ *
+ * The UnicodeString class is not suitable for subclassing.
+ *
+ * For an overview of Unicode strings in C and C++ see the
+ * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#strings-in-cc).
+ *
+ * In ICU, a Unicode string consists of 16-bit Unicode *code units*.
+ * A Unicode character may be stored with either one code unit
+ * (the most common case) or with a matched pair of special code units
+ * ("surrogates"). The data type for code units is char16_t.
+ * For single-character handling, a Unicode character code *point* is a value
+ * in the range 0..0x10ffff. ICU uses the UChar32 type for code points.
+ *
+ * Indexes and offsets into and lengths of strings always count code units, not code points.
+ * This is the same as with multi-byte char* strings in traditional string handling.
+ * Operations on partial strings typically do not test for code point boundaries.
+ * If necessary, the user needs to take care of such boundaries by testing for the code unit
+ * values or by using functions like
+ * UnicodeString::getChar32Start() and UnicodeString::getChar32Limit()
+ * (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).
+ *
+ * UnicodeString methods are more lenient with regard to input parameter values
+ * than other ICU APIs. In particular:
+ * - If indexes are out of bounds for a UnicodeString object
+ * (< 0 or > length()) then they are "pinned" to the nearest boundary.
+ * - If the buffer passed to an insert/append/replace operation is owned by the
+ * target object, e.g., calling str.append(str), an extra copy may take place
+ * to ensure safety.
+ * - If primitive string pointer values (e.g., const char16_t * or char *)
+ * for input strings are nullptr, then those input string parameters are treated
+ * as if they pointed to an empty string.
+ * However, this is *not* the case for char * parameters for charset names
+ * or other IDs.
+ * - Most UnicodeString methods do not take a UErrorCode parameter because
+ * there are usually very few opportunities for failure other than a shortage
+ * of memory, error codes in low-level C++ string methods would be inconvenient,
+ * and the error code as the last parameter (ICU convention) would prevent
+ * the use of default parameter values.
+ * Instead, such methods set the UnicodeString into a "bogus" state
+ * (see isBogus()) if an error occurs.
+ *
+ * In string comparisons, two UnicodeString objects that are both "bogus"
+ * compare equal (to be transitive and prevent endless loops in sorting),
+ * and a "bogus" string compares less than any non-"bogus" one.
+ *
+ * Const UnicodeString methods are thread-safe. Multiple threads can use
+ * const methods on the same UnicodeString object simultaneously,
+ * but non-const methods must not be called concurrently (in multiple threads)
+ * with any other (const or non-const) methods.
+ *
+ * Similarly, const UnicodeString & parameters are thread-safe.
+ * One object may be passed in as such a parameter concurrently in multiple threads.
+ * This includes the const UnicodeString & parameters for
+ * copy construction, assignment, and cloning.
+ *
+ * UnicodeString uses several storage methods.
+ * String contents can be stored inside the UnicodeString object itself,
+ * in an allocated and shared buffer, or in an outside buffer that is "aliased".
+ * Most of this is done transparently, but careful aliasing in particular provides
+ * significant performance improvements.
+ * Also, the internal buffer is accessible via special functions.
+ * For details see the
+ * [User Guide Strings chapter](https://unicode-org.github.io/icu/userguide/strings#maximizing-performance-with-the-unicodestring-storage-model).
+ *
+ * @see utf.h
+ * @see CharacterIterator
+ * @stable ICU 2.0
+ */
+class U_COMMON_API UnicodeString : public Replaceable
+{
+public:
+
+ /**
+ * Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor
+ * which constructs a Unicode string from an invariant-character char * string.
+ * Use the macro US_INV instead of the full qualification for this value.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ enum EInvariant {
+ /**
+ * @see EInvariant
+ * @stable ICU 3.2
+ */
+ kInvariant
+ };
+
+ //========================================
+ // Read-only operations
+ //========================================
+
+ /* Comparison - bitwise only - for international comparison use collation */
+
+ /**
+ * Equality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if `text` contains the same characters as this one,
+ * false otherwise.
+ * @stable ICU 2.0
+ */
+ inline bool operator== (const UnicodeString& text) const;
+
+ /**
+ * Inequality operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return false if `text` contains the same characters as this one,
+ * true otherwise.
+ * @stable ICU 2.0
+ */
+ inline bool operator!= (const UnicodeString& text) const;
+
+ /**
+ * Greater than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * greater than the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator> (const UnicodeString& text) const;
+
+ /**
+ * Less than operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * less than the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator< (const UnicodeString& text) const;
+
+ /**
+ * Greater than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * greater than or equal to the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator>= (const UnicodeString& text) const;
+
+ /**
+ * Less than or equal operator. Performs only bitwise comparison.
+ * @param text The UnicodeString to compare to this one.
+ * @return true if the characters in this are bitwise
+ * less than or equal to the characters in `text`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool operator<= (const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString to
+ * the characters in `text`.
+ * @param text The UnicodeString to compare to this one.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `text`, -1 if the characters in
+ * this are bitwise less than the characters in `text`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `text`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in the **entire string** `text`.
+ * (The parameters "start" and "length" are not applied to the other text "text".)
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters of text to compare.
+ * @param text the other text to be compared against this string.
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `text`, -1 if the characters in
+ * this are bitwise less than the characters in `text`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `text`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& text) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare.
+ * @param srcText the text to be compared
+ * @param srcStart the offset into `srcText` to start comparison
+ * @param srcLength the number of characters in `src` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcText`, -1 if the characters in
+ * this are bitwise less than the characters in `srcText`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcText`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in this UnicodeString with the first
+ * `srcLength` characters in `srcChars`.
+ * @param srcChars The characters to compare to this UnicodeString.
+ * @param srcLength the number of characters in `srcChars` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the first
+ * `length` characters in `srcChars`
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters to compare.
+ * @param srcChars the characters to be compared
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `start + length`) with the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * @param start the offset at which the compare operation begins
+ * @param length the number of characters in this to compare
+ * @param srcChars the characters to be compared
+ * @param srcStart the offset into `srcChars` to start comparison
+ * @param srcLength the number of characters in `srcChars` to compare
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcChars`, -1 if the characters in
+ * this are bitwise less than the characters in `srcChars`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcChars`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare the characters bitwise in the range
+ * [`start`, `limit`) with the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcLimit`).
+ * @param start the offset at which the compare operation begins
+ * @param limit the offset immediately following the compare operation
+ * @param srcText the text to be compared
+ * @param srcStart the offset into `srcText` to start comparison
+ * @param srcLimit the offset into `srcText` to limit comparison
+ * @return The result of bitwise character comparison: 0 if this
+ * contains the same characters as `srcText`, -1 if the characters in
+ * this are bitwise less than the characters in `srcText`, +1 if the
+ * characters in this are bitwise greater than the characters
+ * in `srcText`.
+ * @stable ICU 2.0
+ */
+ inline int8_t compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param text Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(const UnicodeString& text) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Compare two Unicode strings in code point order.
+ * The result may be different from the results of compare(), operator<, etc.
+ * if supplementary characters are present:
+ *
+ * In UTF-16, supplementary characters (with code points U+10000 and above) are
+ * stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff,
+ * which means that they compare as less than some other BMP characters like U+feff.
+ * This function compares Unicode strings in code point order.
+ * If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * this string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+ inline int8_t compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(text.foldCase(options)).
+ *
+ * @param text Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param length The number of code units from this string to compare.
+ * @param srcChars A pointer to another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLength The number of code units from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ /**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).
+ *
+ * @param start The start offset in this string at which the compare operation begins.
+ * @param limit The offset after the last code unit from this string to compare.
+ * @param srcText Another string to compare this one to.
+ * @param srcStart The start offset in that string at which the compare operation begins.
+ * @param srcLimit The offset after the last code unit from that string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+ inline int8_t caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const;
+
+ /**
+ * Determine if this starts with the characters in `text`
+ * @param text The text to match.
+ * @return true if this starts with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this starts with the characters in `srcText`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcText The text to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @return true if this starts with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this starts with the characters in `srcChars`
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in `srcChars`
+ * @return true if this starts with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @return true if this ends with the characters in `srcChars`, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool startsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `text`
+ * @param text The text to match.
+ * @return true if this ends with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& text) const;
+
+ /**
+ * Determine if this ends with the characters in `srcText`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcText The text to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @return true if this ends with the characters in `text`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * @param srcChars The characters to match.
+ * @param srcLength the number of characters in `srcChars`
+ * @return true if this ends with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const;
+
+ /**
+ * Determine if this ends with the characters in `srcChars`
+ * in the range [`srcStart`, `srcStart + srcLength`).
+ * @param srcChars The characters to match.
+ * @param srcStart the offset into `srcText` to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @return true if this ends with the characters in `srcChars`,
+ * false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool endsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+
+ /* Searching - bitwise only */
+
+ /**
+ * Locate in this the first occurrence of the characters in `text`,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in `text`
+ * starting at offset `start`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `text`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into `srcText` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the characters in
+ * `srcChars`
+ * starting at offset `start`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars`
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `srcChars`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into `srcChars` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t indexOf(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`,
+ * starting at offset `start`, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the first occurrence of the BMP code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the first occurrence of the code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t indexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `text`,
+ * using bitwise comparison.
+ * @param text The text to search for.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `text`
+ * starting at offset `start`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `text`, using bitwise comparison.
+ * @param text The text to search for.
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcText The text to search for.
+ * @param srcStart the offset into `srcText` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcText` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the characters in `srcChars`
+ * starting at offset `start`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars`, using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcLength the number of characters in `srcChars`
+ * @param start The offset at which searching will start.
+ * @param length The number of characters to search
+ * @return The offset into this of the start of `srcChars`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence in the range
+ * [`start`, `start + length`) of the characters
+ * in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`),
+ * using bitwise comparison.
+ * @param srcChars The text to search for.
+ * @param srcStart the offset into `srcChars` at which
+ * to start matching
+ * @param srcLength the number of characters in `srcChars` to match
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of the start of `text`,
+ * or -1 if not found.
+ * @stable ICU 2.0
+ */
+ int32_t lastIndexOf(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`,
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`,
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`
+ * starting at offset `start`, using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start The offset at which searching will start.
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start) const;
+
+ /**
+ * Locate in this the last occurrence of the BMP code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ * @param c The code unit to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ /**
+ * Locate in this the last occurrence of the code point `c`
+ * in the range [`start`, `start + length`),
+ * using bitwise comparison.
+ *
+ * @param c The code point to search for.
+ * @param start the offset into this at which to start matching
+ * @param length the number of characters in this to search
+ * @return The offset into this of `c`, or -1 if not found.
+ * @stable ICU 2.0
+ */
+ inline int32_t lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+
+ /* Character access */
+
+ /**
+ * Return the code unit at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset `offset`
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ inline char16_t charAt(int32_t offset) const;
+
+ /**
+ * Return the code unit at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * @return the code unit at offset `offset`
+ * @stable ICU 2.0
+ */
+ inline char16_t operator[] (int32_t offset) const;
+
+ /**
+ * Return the code point that contains the code unit
+ * at offset `offset`.
+ * If the offset is not valid (0..length()-1) then U+ffff is returned.
+ * @param offset a valid offset into the text
+ * that indicates the text offset of any of the code units
+ * that will be assembled into a code point (21-bit value) and returned
+ * @return the code point of text at `offset`
+ * or 0xffff if the offset is not valid for this string
+ * @stable ICU 2.0
+ */
+ UChar32 char32At(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points to the beginning of a Unicode character.
+ * The offset that is passed in points to
+ * any code unit of a code point,
+ * while the returned offset will point to the first code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points to a second surrogate
+ * of a surrogate pair, then the returned offset will point
+ * to the first surrogate.
+ * @param offset a valid offset into one code point of the text
+ * @return offset of the first code unit of the same code point
+ * @see U16_SET_CP_START
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Start(int32_t offset) const;
+
+ /**
+ * Adjust a random-access offset so that
+ * it points behind a Unicode character.
+ * The offset that is passed in points behind
+ * any code unit of a code point,
+ * while the returned offset will point behind the last code unit
+ * of the same code point.
+ * In UTF-16, if the input offset points behind the first surrogate
+ * (i.e., to the second surrogate)
+ * of a surrogate pair, then the returned offset will point
+ * behind the second surrogate (i.e., to the first surrogate).
+ * @param offset a valid offset after any code unit of a code point of the text
+ * @return offset of the first code unit after the same code point
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.0
+ */
+ int32_t getChar32Limit(int32_t offset) const;
+
+ /**
+ * Move the code unit index along the string by delta code points.
+ * Interpret the input index as a code unit-based offset into the string,
+ * move the index forward or backward by delta code points, and
+ * return the resulting index.
+ * The input index should point to the first code unit of a code point,
+ * if there is more than one.
+ *
+ * Both input and output indexes are code unit-based as for all
+ * string indexes/offsets in ICU (and other libraries, like MBCS char*).
+ * If delta<0 then the index is moved backward (toward the start of the string).
+ * If delta>0 then the index is moved forward (toward the end of the string).
+ *
+ * This behaves like CharacterIterator::move32(delta, kCurrent).
+ *
+ * Behavior for out-of-bounds indexes:
+ * `moveIndex32` pins the input index to 0..length(), i.e.,
+ * if the input index<0 then it is pinned to 0;
+ * if it is index>length() then it is pinned to length().
+ * Afterwards, the index is moved by `delta` code points
+ * forward or backward,
+ * but no further backward than to 0 and no further forward than to length().
+ * The resulting index return value will be in between 0 and length(), inclusively.
+ *
+ * Examples:
+ * \code
+ * // s has code points 'a' U+10000 'b' U+10ffff U+2029
+ * UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
+ *
+ * // initial index: position of U+10000
+ * int32_t index=1;
+ *
+ * // the following examples will all result in index==4, position of U+10ffff
+ *
+ * // skip 2 code points from some position in the string
+ * index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
+ *
+ * // go to the 3rd code point from the start of s (0-based)
+ * index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
+ *
+ * // go to the next-to-last code point of s
+ * index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff
+ * \endcode
+ *
+ * @param index input code unit index
+ * @param delta (signed) code point count to move the index forward or backward
+ * in the string
+ * @return the resulting code unit index
+ * @stable ICU 2.0
+ */
+ int32_t moveIndex32(int32_t index, int32_t delta) const;
+
+ /* Substring extraction */
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into the array `dst`,
+ * beginning at `dstStart`.
+ * If the string aliases to `dst` itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param start offset of first character which will be copied into the array
+ * @param length the number of characters to extract
+ * @param dst array in which to copy characters. The length of `dst`
+ * must be at least (`dstStart + length`).
+ * @param dstStart the offset in `dst` where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ Char16Ptr dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the contents of the string into dest.
+ * This is a convenience function that
+ * checks if there is enough space in dest,
+ * extracts the entire string if possible,
+ * and NUL-terminates dest if possible.
+ *
+ * If the string fits into dest but cannot be NUL-terminated
+ * (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING.
+ * If the string itself does not fit into dest
+ * (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ *
+ * If the string aliases to `dest` itself as an external buffer,
+ * then extract() will not copy the contents.
+ *
+ * @param dest Destination string buffer.
+ * @param destCapacity Number of char16_ts available at dest.
+ * @param errorCode ICU error code.
+ * @return length()
+ * @stable ICU 2.0
+ */
+ int32_t
+ extract(Char16Ptr dest, int32_t destCapacity,
+ UErrorCode &errorCode) const;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into the UnicodeString
+ * `target`.
+ * @param start offset of first character which will be copied
+ * @param length the number of characters to extract
+ * @param target UnicodeString into which to copy characters.
+ * @stable ICU 2.0
+ */
+ inline void extract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ /**
+ * Copy the characters in the range [`start`, `limit`)
+ * into the array `dst`, beginning at `dstStart`.
+ * @param start offset of first character which will be copied into the array
+ * @param limit offset immediately following the last character to be copied
+ * @param dst array in which to copy characters. The length of `dst`
+ * must be at least (`dstStart + (limit - start)`).
+ * @param dstStart the offset in `dst` where the first character
+ * will be extracted
+ * @stable ICU 2.0
+ */
+ inline void extractBetween(int32_t start,
+ int32_t limit,
+ char16_t *dst,
+ int32_t dstStart = 0) const;
+
+ /**
+ * Copy the characters in the range [`start`, `limit`)
+ * into the UnicodeString `target`. Replaceable API.
+ * @param start offset of first character which will be copied
+ * @param limit offset immediately following the last character to be copied
+ * @param target UnicodeString into which to copy characters.
+ * @stable ICU 2.0
+ */
+ virtual void extractBetween(int32_t start,
+ int32_t limit,
+ UnicodeString& target) const override;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + startLength`) into an array of characters.
+ * All characters must be invariant (see utypes.h).
+ * Use US_INV as the last, signature-distinguishing parameter.
+ *
+ * This function does not write any more than `targetCapacity`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction, can be nullptr
+ * if targetLength is 0
+ * @param targetCapacity the length of the target buffer
+ * @param inv Signature-distinguishing parameter, use US_INV.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 3.2
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ int32_t targetCapacity,
+ enum EInvariant inv) const;
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in the platform's default codepage.
+ * This function does not write any more than `targetLength`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * If `target` is nullptr, then the number of bytes required for
+ * `target` is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength) const;
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in a specified codepage.
+ * The output string is NUL-terminated.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If `target` is nullptr, then the number of bytes required for
+ * `target` is returned. It is assumed that the target is big enough
+ * to fit all of the characters.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ inline int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ const char *codepage = 0) const;
+
+ /**
+ * Copy the characters in the range
+ * [`start`, `start + length`) into an array of characters
+ * in a specified codepage.
+ * This function does not write any more than `targetLength`
+ * characters but returns the length of the entire output string
+ * so that one can allocate a larger buffer and call the function again
+ * if necessary.
+ * The output string is NUL-terminated if possible.
+ *
+ * Recommendation: For invariant-character strings use
+ * extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @param start offset of first character which will be copied
+ * @param startLength the number of characters to extract
+ * @param target the target buffer for extraction
+ * @param targetLength the length of the target buffer
+ * @param codepage the desired codepage for the characters. 0 has
+ * the special meaning of the default codepage
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * If `target` is nullptr, then the number of bytes required for
+ * `target` is returned.
+ * @return the output string length, not including the terminating NUL
+ * @stable ICU 2.0
+ */
+ int32_t extract(int32_t start,
+ int32_t startLength,
+ char *target,
+ uint32_t targetLength,
+ const char *codepage) const;
+
+ /**
+ * Convert the UnicodeString into a codepage string using an existing UConverter.
+ * The output string is NUL-terminated if possible.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are extracted.
+ *
+ * @param dest destination string buffer, can be nullptr if destCapacity==0
+ * @param destCapacity the number of chars available at dest
+ * @param cnv the converter object to be used (ucnv_resetFromUnicode() will be called),
+ * or nullptr for the default converter
+ * @param errorCode normal ICU error code
+ * @return the length of the output string, not counting the terminating NUL;
+ * if the length is greater than destCapacity, then the string will not fit
+ * and a buffer of the indicated length would need to be passed in
+ * @stable ICU 2.0
+ */
+ int32_t extract(char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+#endif
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Unlike the substring constructor and setTo() functions,
+ * the object returned here will be a read-only alias (using getBuffer())
+ * rather than copying the text.
+ * As a result, this substring operation is much faster but requires
+ * that the original string not be modified or deleted during the lifetime
+ * of the returned substring object.
+ * @param start offset of the first character visible in the substring
+ * @param length length of the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Create a temporary substring for the specified range.
+ * Same as tempSubString(start, length) except that the substring range
+ * is specified as a (start, limit) pair (with an exclusive limit index)
+ * rather than a (start, length) pair.
+ * @param start offset of the first character visible in the substring
+ * @param limit offset immediately following the last character visible in the substring
+ * @return a read-only alias UnicodeString object for the substring
+ * @stable ICU 4.4
+ */
+ inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and write the result
+ * to a ByteSink. This is called by toUTF8String().
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF8WithSub().
+ *
+ * @param sink A ByteSink to which the UTF-8 version of the string is written.
+ * sink.Flush() is called at the end.
+ * @stable ICU 4.2
+ * @see toUTF8String
+ */
+ void toUTF8(ByteSink &sink) const;
+
+ /**
+ * Convert the UnicodeString to UTF-8 and append the result
+ * to a standard string.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls toUTF8().
+ *
+ * @param result A standard string (or a compatible object)
+ * to which the UTF-8 version of the string is appended.
+ * @return The string object.
+ * @stable ICU 4.2
+ * @see toUTF8
+ */
+ template<typename StringClass>
+ StringClass &toUTF8String(StringClass &result) const {
+ StringByteSink<StringClass> sbs(&result, length());
+ toUTF8(sbs);
+ return result;
+ }
+
+ /**
+ * Convert the UnicodeString to UTF-32.
+ * Unpaired surrogates are replaced with U+FFFD.
+ * Calls u_strToUTF32WithSub().
+ *
+ * @param utf32 destination string buffer, can be nullptr if capacity==0
+ * @param capacity the number of UChar32s available at utf32
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The length of the UTF-32 string.
+ * @see fromUTF32
+ * @stable ICU 4.2
+ */
+ int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
+
+ /* Length operations */
+
+ /**
+ * Return the length of the UnicodeString object.
+ * The length is the number of char16_t code units are in the UnicodeString.
+ * If you want the number of code points, please use countChar32().
+ * @return the length of the UnicodeString object
+ * @see countChar32
+ * @stable ICU 2.0
+ */
+ inline int32_t length(void) const;
+
+ /**
+ * Count Unicode code points in the length char16_t code units of the string.
+ * A code point may occupy either one or two char16_t code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of moveIndex32().
+ *
+ * @param start the index of the first code unit to check
+ * @param length the number of char16_t code units to check
+ * @return the number of code points in the specified code units
+ * @see length
+ * @stable ICU 2.0
+ */
+ int32_t
+ countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
+
+ /**
+ * Check if the length char16_t code units of the string
+ * contain more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in this part of the string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length
+ * falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (countChar32(start, length)>number).
+ * A Unicode code point may occupy either one or two char16_t code units.
+ *
+ * @param start the index of the first code unit to check (0 for the entire string)
+ * @param length the number of char16_t code units to check
+ * (use INT32_MAX for the entire string; remember that start/length
+ * values are pinned)
+ * @param number The number of code points in the (sub)string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @see countChar32
+ * @see u_strHasMoreChar32Than
+ * @stable ICU 2.4
+ */
+ UBool
+ hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
+
+ /**
+ * Determine if this string is empty.
+ * @return true if this string contains 0 characters, false otherwise.
+ * @stable ICU 2.0
+ */
+ inline UBool isEmpty(void) const;
+
+ /**
+ * Return the capacity of the internal buffer of the UnicodeString object.
+ * This is useful together with the getBuffer functions.
+ * See there for details.
+ *
+ * @return the number of char16_ts available in the internal buffer
+ * @see getBuffer
+ * @stable ICU 2.0
+ */
+ inline int32_t getCapacity(void) const;
+
+ /* Other operations */
+
+ /**
+ * Generate a hash code for this object.
+ * @return The hash code of this UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline int32_t hashCode(void) const;
+
+ /**
+ * Determine if this object contains a valid string.
+ * A bogus string has no value. It is different from an empty string,
+ * although in both cases isEmpty() returns true and length() returns 0.
+ * setToBogus() and isBogus() can be used to indicate that no string value is available.
+ * For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and
+ * length() returns 0.
+ *
+ * @return true if the string is bogus/invalid, false otherwise
+ * @see setToBogus()
+ * @stable ICU 2.0
+ */
+ inline UBool isBogus(void) const;
+
+
+ //========================================
+ // Write operations
+ //========================================
+
+ /* Assignment operations */
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the characters from `srcText`.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param srcText The text containing the characters to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ * @see fastCopyFrom
+ */
+ UnicodeString &operator=(const UnicodeString &srcText);
+
+ /**
+ * Almost the same as the assignment operator.
+ * Replace the characters in this UnicodeString
+ * with the characters from `srcText`.
+ *
+ * This function works the same as the assignment operator
+ * for all strings except for ones that are readonly aliases.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * This function implements the old, more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * The fastCopyFrom function must be used only if it is known that the lifetime of
+ * this UnicodeString does not exceed the lifetime of the aliased buffer
+ * including its contents, for example for strings from resource bundles
+ * or aliases to string constants.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param src The text containing the characters to replace.
+ * @return a reference to this
+ * @stable ICU 2.4
+ */
+ UnicodeString &fastCopyFrom(const UnicodeString &src);
+
+ /**
+ * Move assignment operator; might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * The behavior is undefined if *this and src are the same object.
+ * @param src source string
+ * @return *this
+ * @stable ICU 56
+ */
+ UnicodeString &operator=(UnicodeString &&src) noexcept;
+
+ /**
+ * Swap strings.
+ * @param other other string
+ * @stable ICU 56
+ */
+ void swap(UnicodeString &other) noexcept;
+
+ /**
+ * Non-member UnicodeString swap function.
+ * @param s1 will get s2's contents and state
+ * @param s2 will get s1's contents and state
+ * @stable ICU 56
+ */
+ friend inline void U_EXPORT2
+ swap(UnicodeString &s1, UnicodeString &s2) noexcept {
+ s1.swap(s2);
+ }
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code unit `ch`.
+ * @param ch the code unit to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (char16_t ch);
+
+ /**
+ * Assignment operator. Replace the characters in this UnicodeString
+ * with the code point `ch`.
+ * @param ch the code point to replace
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator= (UChar32 ch);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcText.length()`).
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @return a reference to this
+ * @stable ICU 2.2
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart);
+
+ /**
+ * Set the text in the UnicodeString object to the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in the
+ * replace string.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Set the text in the UnicodeString object to the characters in
+ * `srcText`.
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const UnicodeString& srcText);
+
+ /**
+ * Set the characters in the UnicodeString object to the characters
+ * in `srcChars`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(const char16_t *srcChars,
+ int32_t srcLength);
+
+ /**
+ * Set the characters in the UnicodeString object to the code unit
+ * `srcChar`.
+ * @param srcChar the code unit which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(char16_t srcChar);
+
+ /**
+ * Set the characters in the UnicodeString object to the code point
+ * `srcChar`.
+ * @param srcChar the code point which becomes the UnicodeString's character
+ * content
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& setTo(UChar32 srcChar);
+
+ /**
+ * Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if `text` is `NUL`-terminated.
+ * This must be true if `textLength==-1`.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in `text` to alias.
+ * If -1, then this constructor will determine the length
+ * by calling `u_strlen()`.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(UBool isTerminated,
+ ConstChar16Ptr text,
+ int32_t textLength);
+
+ /**
+ * Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in `buffer` to alias.
+ * @param buffCapacity The size of `buffer` in char16_ts.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString &setTo(char16_t *buffer,
+ int32_t buffLength,
+ int32_t buffCapacity);
+
+ /**
+ * Make this UnicodeString object invalid.
+ * The string will test true with isBogus().
+ *
+ * A bogus string has no value. It is different from an empty string.
+ * It can be used to indicate that no string value is available.
+ * getBuffer() and getTerminatedBuffer() return nullptr, and
+ * length() returns 0.
+ *
+ * This utility function is used throughout the UnicodeString
+ * implementation to indicate that a UnicodeString operation failed,
+ * and may be used in other functions,
+ * especially but not exclusively when such functions do not
+ * take a UErrorCode for simplicity.
+ *
+ * The following methods, and no others, will clear a string object's bogus flag:
+ * - remove()
+ * - remove(0, INT32_MAX)
+ * - truncate(0)
+ * - operator=() (assignment operator)
+ * - setTo(...)
+ *
+ * The simplest ways to turn a bogus string into an empty one
+ * is to use the remove() function.
+ * Examples for other functions that are equivalent to "set to empty string":
+ * \code
+ * if(s.isBogus()) {
+ * s.remove(); // set to an empty string (remove all), or
+ * s.remove(0, INT32_MAX); // set to an empty string (remove all), or
+ * s.truncate(0); // set to an empty string (complete truncation), or
+ * s=UnicodeString(); // assign an empty string, or
+ * s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
+ * s.setTo(u"", 0); // set to an empty C Unicode string
+ * }
+ * \endcode
+ *
+ * @see isBogus()
+ * @stable ICU 2.0
+ */
+ void setToBogus();
+
+ /**
+ * Set the character at the specified offset to the specified character.
+ * @param offset A valid offset into the text of the character to set
+ * @param ch The new character
+ * @return A reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& setCharAt(int32_t offset,
+ char16_t ch);
+
+
+ /* Append operations */
+
+ /**
+ * Append operator. Append the code unit `ch` to the UnicodeString
+ * object.
+ * @param ch the code unit to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (char16_t ch);
+
+ /**
+ * Append operator. Append the code point `ch` to the UnicodeString
+ * object.
+ * @param ch the code point to be appended
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (UChar32 ch);
+
+ /**
+ * Append operator. Append the characters in `srcText` to the
+ * UnicodeString object. `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& operator+= (const UnicodeString& srcText);
+
+ /**
+ * Append the characters
+ * in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`) to the
+ * UnicodeString object at offset `start`. `srcText`
+ * is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the append string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in `srcText` to the UnicodeString object.
+ * `srcText` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const UnicodeString& srcText);
+
+ /**
+ * Append the characters in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`) to the UnicodeString
+ * object at offset
+ * `start`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars` in
+ * the append string; can be -1 if `srcChars` is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Append the characters in `srcChars` to the UnicodeString object
+ * at offset `start`. `srcChars` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in `srcChars`;
+ * can be -1 if `srcChars` is NUL-terminated
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Append the code unit `srcChar` to the UnicodeString object.
+ * @param srcChar the code unit to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& append(char16_t srcChar);
+
+ /**
+ * Append the code point `srcChar` to the UnicodeString object.
+ * @param srcChar the code point to append
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& append(UChar32 srcChar);
+
+
+ /* Insert operations */
+
+ /**
+ * Insert the characters in `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
+ * object at offset `start`. `srcText` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in `srcText` into the UnicodeString object
+ * at offset `start`. `srcText` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const UnicodeString& srcText);
+
+ /**
+ * Insert the characters in `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`) into the UnicodeString
+ * object at offset `start`. `srcChars` is not modified.
+ * @param start the offset at which the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars`
+ * in the insert string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Insert the characters in `srcChars` into the UnicodeString object
+ * at offset `start`. `srcChars` is not modified.
+ * @param start the offset where the insertion begins
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Insert the code unit `srcChar` into the UnicodeString object at
+ * offset `start`.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code unit to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ char16_t srcChar);
+
+ /**
+ * Insert the code point `srcChar` into the UnicodeString object at
+ * offset `start`.
+ * @param start the offset at which the insertion occurs
+ * @param srcChar the code point to insert
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& insert(int32_t start,
+ UChar32 srcChar);
+
+
+ /* Replace operations */
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcText` in the range
+ * [`srcStart`, `srcStart + srcLength`).
+ * `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcText` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcText` in
+ * the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`)
+ * with the characters in `srcText`. `srcText` is
+ * not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcChars` in the range
+ * [`srcStart`, `srcStart + srcLength`). `srcChars`
+ * is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLength the number of characters in `srcChars`
+ * in the replace string
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the characters in
+ * `srcChars`. `srcChars` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param length number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChars the source for the new characters
+ * @param srcLength the number of Unicode characters in srcChars
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the code unit
+ * `srcChar`.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChar the new code unit
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replace(int32_t start,
+ int32_t length,
+ char16_t srcChar);
+
+ /**
+ * Replace the characters in the range
+ * [`start`, `start + length`) with the code point
+ * `srcChar`.
+ * @param start the offset at which the replace operation begins
+ * @param length the number of characters to replace. The character at
+ * `start + length` is not modified.
+ * @param srcChar the new code point
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
+
+ /**
+ * Replace the characters in the range [`start`, `limit`)
+ * with the characters in `srcText`. `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText);
+
+ /**
+ * Replace the characters in the range [`start`, `limit`)
+ * with the characters in `srcText` in the range
+ * [`srcStart`, `srcLimit`). `srcText` is not modified.
+ * @param start the offset at which the replace operation begins
+ * @param limit the offset immediately following the replace range
+ * @param srcText the source for the new characters
+ * @param srcStart the offset into `srcChars` where new characters
+ * will be obtained
+ * @param srcLimit the offset immediately following the range to copy
+ * in `srcText`
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit);
+
+ /**
+ * Replace a substring of this object with the given text.
+ * @param start the beginning index, inclusive; `0 <= start <= limit`.
+ * @param limit the ending index, exclusive; `start <= limit <= length()`.
+ * @param text the text to replace characters `start` to `limit - 1`
+ * @stable ICU 2.0
+ */
+ virtual void handleReplaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& text) override;
+
+ /**
+ * Replaceable API
+ * @return true if it has MetaData
+ * @stable ICU 2.4
+ */
+ virtual UBool hasMetaData() const override;
+
+ /**
+ * Copy a substring of this object, retaining attribute (out-of-band)
+ * information. This method is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * @param start the beginning index, inclusive; `0 <= start <= limit`.
+ * @param limit the ending index, exclusive; `start <= limit <= length()`.
+ * @param dest the destination index. The characters from
+ * `start..limit-1` will be copied to `dest`.
+ * Implementations of this method may assume that `dest <= start ||
+ * dest >= limit`.
+ * @stable ICU 2.0
+ */
+ virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
+
+ /* Search and replace operations */
+
+ /**
+ * Replace all occurrences of characters in oldText with the characters
+ * in newText
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText with characters
+ * in newText
+ * in the range [`start`, `start + length`).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param newText the text containing the replacement text
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText);
+
+ /**
+ * Replace all occurrences of characters in oldText in the range
+ * [`oldStart`, `oldStart + oldLength`) with the characters
+ * in newText in the range
+ * [`newStart`, `newStart + newLength`)
+ * in the range [`start`, `start + length`).
+ * @param start the start of the range in which replace will performed
+ * @param length the length of the range in which replace will be performed
+ * @param oldText the text containing the search text
+ * @param oldStart the start of the search range in `oldText`
+ * @param oldLength the length of the search range in `oldText`
+ * @param newText the text containing the replacement text
+ * @param newStart the start of the replacement range in `newText`
+ * @param newLength the length of the replacement range in `newText`
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& findAndReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& oldText,
+ int32_t oldStart,
+ int32_t oldLength,
+ const UnicodeString& newText,
+ int32_t newStart,
+ int32_t newLength);
+
+
+ /* Remove operations */
+
+ /**
+ * Removes all characters from the UnicodeString object and clears the bogus flag.
+ * This is the UnicodeString equivalent of std::string’s clear().
+ *
+ * @return a reference to this
+ * @see setToBogus
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove();
+
+ /**
+ * Remove the characters in the range
+ * [`start`, `start + length`) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param length the number of characters to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& remove(int32_t start,
+ int32_t length = (int32_t)INT32_MAX);
+
+ /**
+ * Remove the characters in the range
+ * [`start`, `limit`) from the UnicodeString object.
+ * @param start the offset of the first character to remove
+ * @param limit the offset immediately following the range to remove
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& removeBetween(int32_t start,
+ int32_t limit = (int32_t)INT32_MAX);
+
+ /**
+ * Retain only the characters in the range
+ * [`start`, `limit`) from the UnicodeString object.
+ * Removes characters before `start` and at and after `limit`.
+ * @param start the offset of the first character to retain
+ * @param limit the offset immediately following the range to retain
+ * @return a reference to this
+ * @stable ICU 4.4
+ */
+ inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
+
+ /* Length operations */
+
+ /**
+ * Pad the start of this UnicodeString with the character `padChar`.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * beginning of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return true if the text was padded, false otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padLeading(int32_t targetLength,
+ char16_t padChar = 0x0020);
+
+ /**
+ * Pad the end of this UnicodeString with the character `padChar`.
+ * If the length of this UnicodeString is less than targetLength,
+ * length() - targetLength copies of padChar will be added to the
+ * end of this UnicodeString.
+ * @param targetLength the desired length of the string
+ * @param padChar the character to use for padding. Defaults to
+ * space (U+0020)
+ * @return true if the text was padded, false otherwise.
+ * @stable ICU 2.0
+ */
+ UBool padTrailing(int32_t targetLength,
+ char16_t padChar = 0x0020);
+
+ /**
+ * Truncate this UnicodeString to the `targetLength`.
+ * @param targetLength the desired length of this UnicodeString.
+ * @return true if the text was truncated, false otherwise
+ * @stable ICU 2.0
+ */
+ inline UBool truncate(int32_t targetLength);
+
+ /**
+ * Trims leading and trailing whitespace from this UnicodeString.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ UnicodeString& trim(void);
+
+
+ /* Miscellaneous operations */
+
+ /**
+ * Reverse this UnicodeString in place.
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(void);
+
+ /**
+ * Reverse the range [`start`, `start + length`) in
+ * this UnicodeString.
+ * @param start the start of the range to reverse
+ * @param length the number of characters to to reverse
+ * @return a reference to this
+ * @stable ICU 2.0
+ */
+ inline UnicodeString& reverse(int32_t start,
+ int32_t length);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(void);
+
+ /**
+ * Convert the characters in this to UPPER CASE following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toUpper(const Locale& locale);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * the default locale.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(void);
+
+ /**
+ * Convert the characters in this to lower case following the conventions of
+ * a specific locale.
+ * @param locale The locale containing the conventions to use.
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString& toLower(const Locale& locale);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+ /**
+ * Titlecase this string, convenience function using the default locale.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter);
+
+ /**
+ * Titlecase this string.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @return A reference to this.
+ * @stable ICU 2.1
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
+
+ /**
+ * Titlecase this string, with options.
+ *
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others. (This can be modified with options.)
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (0), then a standard titlecase
+ * break iterator is opened.
+ * Otherwise the provided iterator is set to the string's text.
+ * @param locale The locale to consider.
+ * @param options Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE,
+ * U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED,
+ * U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.
+ * @return A reference to this.
+ * @stable ICU 3.8
+ */
+ UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
+
+#endif
+
+ /**
+ * Case-folds the characters in this string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ *
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @return A reference to this.
+ * @stable ICU 2.0
+ */
+ UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
+
+ //========================================
+ // Access to the internal buffer
+ //========================================
+
+ /**
+ * Get a read/write pointer to the internal buffer.
+ * The buffer is guaranteed to be large enough for at least minCapacity char16_ts,
+ * writable, and is still owned by the UnicodeString object.
+ * Calls to getBuffer(minCapacity) must not be nested, and
+ * must be matched with calls to releaseBuffer(newLength).
+ * If the string buffer was read-only or shared,
+ * then it will be reallocated and copied.
+ *
+ * An attempted nested call will return 0, and will not further modify the
+ * state of the UnicodeString object.
+ * It also returns 0 if the string is bogus.
+ *
+ * The actual capacity of the string buffer may be larger than minCapacity.
+ * getCapacity() returns the actual capacity.
+ * For many operations, the full capacity should be used to avoid reallocations.
+ *
+ * While the buffer is "open" between getBuffer(minCapacity)
+ * and releaseBuffer(newLength), the following applies:
+ * - The string length is set to 0.
+ * - Any read API call on the UnicodeString object will behave like on a 0-length string.
+ * - Any write API call on the UnicodeString object is disallowed and will have no effect.
+ * - You can read from and write to the returned buffer.
+ * - The previous string contents will still be in the buffer;
+ * if you want to use it, then you need to call length() before getBuffer(minCapacity).
+ * If the length() was greater than minCapacity, then any contents after minCapacity
+ * may be lost.
+ * The buffer contents is not NUL-terminated by getBuffer().
+ * If length() < getCapacity() then you can terminate it by writing a NUL
+ * at index length().
+ * - You must call releaseBuffer(newLength) before and in order to
+ * return to normal UnicodeString operation.
+ *
+ * @param minCapacity the minimum number of char16_ts that are to be available
+ * in the buffer, starting at the returned pointer;
+ * default to the current string capacity if minCapacity==-1
+ * @return a writable pointer to the internal string buffer,
+ * or nullptr if an error occurs (nested calls, out of memory)
+ *
+ * @see releaseBuffer
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ char16_t *getBuffer(int32_t minCapacity);
+
+ /**
+ * Release a read/write buffer on a UnicodeString object with an
+ * "open" getBuffer(minCapacity).
+ * This function must be called in a matched pair with getBuffer(minCapacity).
+ * releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".
+ *
+ * It will set the string length to newLength, at most to the current capacity.
+ * If newLength==-1 then it will set the length according to the
+ * first NUL in the buffer, or to the capacity if there is no NUL.
+ *
+ * After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.
+ *
+ * @param newLength the new length of the UnicodeString object;
+ * defaults to the current capacity if newLength is greater than that;
+ * if newLength==-1, it defaults to u_strlen(buffer) but not more than
+ * the current capacity of the string
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @stable ICU 2.0
+ */
+ void releaseBuffer(int32_t newLength=-1);
+
+ /**
+ * Get a read-only pointer to the internal buffer.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length() may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is (probably) not NUL-terminated.
+ * You can check if it is with
+ * `(s.length() < s.getCapacity() && buffer[s.length()]==0)`.
+ * (See getTerminatedBuffer().)
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or nullptr if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getTerminatedBuffer()
+ * @stable ICU 2.0
+ */
+ inline const char16_t *getBuffer() const;
+
+ /**
+ * Get a read-only pointer to the internal buffer,
+ * making sure that it is NUL-terminated.
+ * This can be called at any time on a valid UnicodeString.
+ *
+ * It returns 0 if the string is bogus, or
+ * during an "open" getBuffer(minCapacity), or if the buffer cannot
+ * be NUL-terminated (because memory allocation failed).
+ *
+ * It can be called as many times as desired.
+ * The pointer that it returns will remain valid until the UnicodeString object is modified,
+ * at which time the pointer is semantically invalidated and must not be used any more.
+ *
+ * The capacity of the buffer can be determined with getCapacity().
+ * The part after length()+1 may or may not be initialized and valid,
+ * depending on the history of the UnicodeString object.
+ *
+ * The buffer contents is guaranteed to be NUL-terminated.
+ * getTerminatedBuffer() may reallocate the buffer if a terminating NUL
+ * is written.
+ * For this reason, this function is not const, unlike getBuffer().
+ * Note that a UnicodeString may also contain NUL characters as part of its contents.
+ *
+ * The buffer may reside in read-only memory. Its contents must not
+ * be modified.
+ *
+ * @return a read-only pointer to the internal string buffer,
+ * or 0 if the string is empty or bogus
+ *
+ * @see getBuffer(int32_t minCapacity)
+ * @see getBuffer()
+ * @stable ICU 2.2
+ */
+ const char16_t *getTerminatedBuffer();
+
+ //========================================
+ // Constructors
+ //========================================
+
+ /** Construct an empty UnicodeString.
+ * @stable ICU 2.0
+ */
+ inline UnicodeString();
+
+ /**
+ * Construct a UnicodeString with capacity to hold `capacity` char16_ts
+ * @param capacity the number of char16_ts this UnicodeString should hold
+ * before a resize is necessary; if count is greater than 0 and count
+ * code points c take up more space than capacity, then capacity is adjusted
+ * accordingly.
+ * @param c is used to initially fill the string
+ * @param count specifies how many code points c are to be written in the
+ * string
+ * @stable ICU 2.0
+ */
+ UnicodeString(int32_t capacity, UChar32 c, int32_t count);
+
+ /**
+ * Single char16_t (code unit) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch);
+
+ /**
+ * Single UChar32 (code point) constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_CHAR_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param ch the character to place in the UnicodeString
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch);
+
+ /**
+ * char16_t* constructor.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text The characters to place in the UnicodeString. `text`
+ * must be NUL (U+0000) terminated.
+ * @stable ICU 2.0
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *).
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text NUL-terminated UTF-16 string
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
+ UnicodeString(ConstChar16Ptr(text)) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *).
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text NUL-terminated UTF-16 string
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
+ UnicodeString(ConstChar16Ptr(text)) {}
+#endif
+
+ /**
+ * nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param text nullptr
+ * @stable ICU 59
+ */
+ UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
+
+ /**
+ * char16_t* constructor.
+ * @param text The characters to place in the UnicodeString.
+ * @param textLength The number of Unicode characters in `text`
+ * to copy.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char16_t *text,
+ int32_t textLength);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *, int32_t).
+ * @param text UTF-16 string
+ * @param textLength string length
+ * @stable ICU 59
+ */
+ UnicodeString(const uint16_t *text, int32_t textLength) :
+ UnicodeString(ConstChar16Ptr(text), textLength) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *, int32_t).
+ * @param text NUL-terminated UTF-16 string
+ * @param textLength string length
+ * @stable ICU 59
+ */
+ UnicodeString(const wchar_t *text, int32_t textLength) :
+ UnicodeString(ConstChar16Ptr(text), textLength) {}
+#endif
+
+ /**
+ * nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ * @param text nullptr
+ * @param textLength ignored
+ * @stable ICU 59
+ */
+ inline UnicodeString(const std::nullptr_t text, int32_t textLength);
+
+ /**
+ * Readonly-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has copy-on-write semantics:
+ * When the string is modified, then the buffer is first copied into
+ * newly allocated memory.
+ * The aliased buffer is never modified.
+ *
+ * In an assignment to another UnicodeString, when using the copy constructor
+ * or the assignment operator, the text will be copied.
+ * When using fastCopyFrom(), the text will be aliased again,
+ * so that both strings then alias the same readonly-text.
+ *
+ * @param isTerminated specifies if `text` is `NUL`-terminated.
+ * This must be true if `textLength==-1`.
+ * @param text The characters to alias for the UnicodeString.
+ * @param textLength The number of Unicode characters in `text` to alias.
+ * If -1, then this constructor will determine the length
+ * by calling `u_strlen()`.
+ * @stable ICU 2.0
+ */
+ UnicodeString(UBool isTerminated,
+ ConstChar16Ptr text,
+ int32_t textLength);
+
+ /**
+ * Writable-aliasing char16_t* constructor.
+ * The text will be used for the UnicodeString object, but
+ * it will not be released when the UnicodeString is destroyed.
+ * This has write-through semantics:
+ * For as long as the capacity of the buffer is sufficient, write operations
+ * will directly affect the buffer. When more capacity is necessary, then
+ * a new buffer will be allocated and the contents copied as with regularly
+ * constructed strings.
+ * In an assignment to another UnicodeString, the buffer will be copied.
+ * The extract(Char16Ptr dst) function detects whether the dst pointer is the same
+ * as the string buffer itself and will in this case not copy the contents.
+ *
+ * @param buffer The characters to alias for the UnicodeString.
+ * @param buffLength The number of Unicode characters in `buffer` to alias.
+ * @param buffCapacity The size of `buffer` in char16_ts.
+ * @stable ICU 2.0
+ */
+ UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if !U_CHAR16_IS_TYPEDEF
+ /**
+ * Writable-aliasing uint16_t * constructor.
+ * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
+ * @param buffer writable buffer of/for UTF-16 text
+ * @param buffLength length of the current buffer contents
+ * @param buffCapacity buffer capacity
+ * @stable ICU 59
+ */
+ UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
+ UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
+#endif
+
+#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
+ /**
+ * Writable-aliasing wchar_t * constructor.
+ * (Only defined if U_SIZEOF_WCHAR_T==2.)
+ * Delegates to UnicodeString(const char16_t *, int32_t, int32_t).
+ * @param buffer writable buffer of/for UTF-16 text
+ * @param buffLength length of the current buffer contents
+ * @param buffCapacity buffer capacity
+ * @stable ICU 59
+ */
+ UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
+ UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
+#endif
+
+ /**
+ * Writable-aliasing nullptr_t constructor.
+ * Effectively the same as the default constructor, makes an empty string object.
+ * @param buffer nullptr
+ * @param buffLength ignored
+ * @param buffCapacity ignored
+ * @stable ICU 59
+ */
+ inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
+
+#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ *
+ * For ASCII (really "invariant character") strings it is more efficient to use
+ * the constructor that takes a US_INV (for its enum EInvariant).
+ * For ASCII (invariant-character) string literals, see UNICODE_STRING and
+ * UNICODE_STRING_SIMPLE.
+ *
+ * It is recommended to mark this constructor "explicit" by
+ * `-DUNISTR_FROM_STRING_EXPLICIT=explicit`
+ * on the compiler command line or similar.
+ * @param codepageData an array of bytes, null-terminated,
+ * in the platform's default codepage.
+ * @stable ICU 2.0
+ * @see UNICODE_STRING
+ * @see UNICODE_STRING_SIMPLE
+ */
+ UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
+
+ /**
+ * char* constructor.
+ * Uses the default converter (and thus depends on the ICU conversion code)
+ * unless U_CHARSET_IS_UTF8 is set to 1.
+ * @param codepageData an array of bytes in the platform's default codepage.
+ * @param dataLength The number of bytes in `codepageData`.
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength);
+
+#endif
+
+#if !UCONFIG_NO_CONVERSION
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes, null-terminated
+ * @param codepage the encoding of `codepageData`. The special
+ * value 0 for `codepage` indicates that the text is in the
+ * platform's default codepage.
+ *
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, const char *codepage);
+
+ /**
+ * char* constructor.
+ * @param codepageData an array of bytes.
+ * @param dataLength The number of bytes in `codepageData`.
+ * @param codepage the encoding of `codepageData`. The special
+ * value 0 for `codepage` indicates that the text is in the
+ * platform's default codepage.
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ * Recommendation: For invariant-character strings use the constructor
+ * UnicodeString(const char *src, int32_t length, enum EInvariant inv)
+ * because it avoids object code dependencies of UnicodeString on
+ * the conversion code.
+ *
+ * @stable ICU 2.0
+ */
+ UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
+
+ /**
+ * char * / UConverter constructor.
+ * This constructor uses an existing UConverter object to
+ * convert the codepage string to Unicode and construct a UnicodeString
+ * from that.
+ *
+ * The converter is reset at first.
+ * If the error code indicates a failure before this constructor is called,
+ * or if an error occurs during conversion or construction,
+ * then the string will be bogus.
+ *
+ * This function avoids the overhead of opening and closing a converter if
+ * multiple strings are constructed.
+ *
+ * @param src input codepage string
+ * @param srcLength length of the input string, can be -1 for NUL-terminated strings
+ * @param cnv converter object (ucnv_resetToUnicode() will be called),
+ * can be nullptr for the default converter
+ * @param errorCode normal ICU error code
+ * @stable ICU 2.0
+ */
+ UnicodeString(
+ const char *src, int32_t srcLength,
+ UConverter *cnv,
+ UErrorCode &errorCode);
+
+#endif
+
+ /**
+ * Constructs a Unicode string from an invariant-character char * string.
+ * About invariant characters see utypes.h.
+ * This constructor has no runtime dependency on conversion code and is
+ * therefore recommended over ones taking a charset name string
+ * (where the empty string "" indicates invariant-character conversion).
+ *
+ * Use the macro US_INV as the third, signature-distinguishing parameter.
+ *
+ * For example:
+ * \code
+ * void fn(const char *s) {
+ * UnicodeString ustr(s, -1, US_INV);
+ * // use ustr ...
+ * }
+ * \endcode
+ * @param src String using only invariant characters.
+ * @param textLength Length of src, or -1 if NUL-terminated.
+ * @param inv Signature-distinguishing parameter, use US_INV.
+ *
+ * @see US_INV
+ * @stable ICU 3.2
+ */
+ UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
+
+
+ /**
+ * Copy constructor.
+ *
+ * Starting with ICU 2.4, the assignment operator and the copy constructor
+ * allocate a new buffer and copy the buffer contents even for readonly aliases.
+ * By contrast, the fastCopyFrom() function implements the old,
+ * more efficient but less safe behavior
+ * of making this string also a readonly alias to the same buffer.
+ *
+ * If the source object has an "open" buffer from getBuffer(minCapacity),
+ * then the copy is an empty string.
+ *
+ * @param that The UnicodeString object to copy.
+ * @stable ICU 2.0
+ * @see fastCopyFrom
+ */
+ UnicodeString(const UnicodeString& that);
+
+ /**
+ * Move constructor; might leave src in bogus state.
+ * This string will have the same contents and state that the source string had.
+ * @param src source string
+ * @stable ICU 56
+ */
+ UnicodeString(UnicodeString &&src) noexcept;
+
+ /**
+ * 'Substring' constructor from tail of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into `src` at which to start copying.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart);
+
+ /**
+ * 'Substring' constructor from subrange of source string.
+ * @param src The UnicodeString object to copy.
+ * @param srcStart The offset into `src` at which to start copying.
+ * @param srcLength The number of characters from `src` to copy.
+ * @stable ICU 2.2
+ */
+ UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+
+ /**
+ * Clone this object, an instance of a subclass of Replaceable.
+ * Clones can be used concurrently in multiple threads.
+ * If a subclass does not implement clone(), or if an error occurs,
+ * then nullptr is returned.
+ * The caller must delete the clone.
+ *
+ * @return a clone of this object
+ *
+ * @see Replaceable::clone
+ * @see getDynamicClassID
+ * @stable ICU 2.6
+ */
+ virtual UnicodeString *clone() const override;
+
+ /** Destructor.
+ * @stable ICU 2.0
+ */
+ virtual ~UnicodeString();
+
+ /**
+ * Create a UnicodeString from a UTF-8 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF8WithSub().
+ *
+ * @param utf8 UTF-8 input string.
+ * Note that a StringPiece can be implicitly constructed
+ * from a std::string or a NUL-terminated const char * string.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF8
+ * @see toUTF8String
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF8(StringPiece utf8);
+
+ /**
+ * Create a UnicodeString from a UTF-32 string.
+ * Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string.
+ * Calls u_strFromUTF32WithSub().
+ *
+ * @param utf32 UTF-32 input string. Must not be nullptr.
+ * @param length Length of the input string, or -1 if NUL-terminated.
+ * @return A UnicodeString with equivalent UTF-16 contents.
+ * @see toUTF32
+ * @stable ICU 4.2
+ */
+ static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
+
+ /* Miscellaneous operations */
+
+ /**
+ * Unescape a string of characters and return a string containing
+ * the result. The following escape sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\" => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * This function is similar to u_unescape() but not identical to it.
+ * The latter takes a source char*, so it does escape recognition
+ * and also invariant conversion.
+ *
+ * @return a string with backslash escapes interpreted, or an
+ * empty string on error.
+ * @see UnicodeString#unescapeAt()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UnicodeString unescape() const;
+
+ /**
+ * Unescape a single escape sequence and return the represented
+ * character. See unescape() for a listing of the recognized escape
+ * sequences. The character at offset-1 is assumed (without
+ * checking) to be a backslash. If the escape sequence is
+ * ill-formed, or the offset is out of range, U_SENTINEL=-1 is
+ * returned.
+ *
+ * @param offset an input output parameter. On input, it is the
+ * offset into this string where the escape sequence is located,
+ * after the initial backslash. On output, it is advanced after the
+ * last character parsed. On error, it is not advanced at all.
+ * @return the character represented by the escape sequence at
+ * offset, or U_SENTINEL=-1 on error.
+ * @see UnicodeString#unescape()
+ * @see u_unescape()
+ * @see u_unescapeAt()
+ * @stable ICU 2.0
+ */
+ UChar32 unescapeAt(int32_t &offset) const;
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.2
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+ //========================================
+ // Implementation methods
+ //========================================
+
+protected:
+ /**
+ * Implement Replaceable::getLength() (see jitterbug 1027).
+ * @stable ICU 2.4
+ */
+ virtual int32_t getLength() const override;
+
+ /**
+ * The change in Replaceable to use virtual getCharAt() allows
+ * UnicodeString::charAt() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual char16_t getCharAt(int32_t offset) const override;
+
+ /**
+ * The change in Replaceable to use virtual getChar32At() allows
+ * UnicodeString::char32At() to be inline again (see jitterbug 709).
+ * @stable ICU 2.4
+ */
+ virtual UChar32 getChar32At(int32_t offset) const override;
+
+private:
+ // For char* constructors. Could be made public.
+ UnicodeString &setToUTF8(StringPiece utf8);
+ // For extract(char*).
+ // We could make a toUTF8(target, capacity, errorCode) public but not
+ // this version: New API will be cleaner if we make callers create substrings
+ // rather than having start+length on every method,
+ // and it should take a UErrorCode&.
+ int32_t
+ toUTF8(int32_t start, int32_t len,
+ char *target, int32_t capacity) const;
+
+ /**
+ * Internal string contents comparison, called by operator==.
+ * Requires: this & text not bogus and have same lengths.
+ */
+ UBool doEquals(const UnicodeString &text, int32_t len) const;
+
+ inline UBool
+ doEqualsSubstring(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ UBool doEqualsSubstring(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompare(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ int8_t doCompareCodePointOrder(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const;
+
+ inline int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int8_t
+ doCaseCompare(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const;
+
+ int32_t doIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(char16_t c,
+ int32_t start,
+ int32_t length) const;
+
+ int32_t doLastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t length) const;
+
+ void doExtract(int32_t start,
+ int32_t length,
+ char16_t *dst,
+ int32_t dstStart) const;
+
+ inline void doExtract(int32_t start,
+ int32_t length,
+ UnicodeString& target) const;
+
+ inline char16_t doCharAt(int32_t offset) const;
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doReplace(int32_t start,
+ int32_t length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength);
+
+ UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
+ UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
+
+ UnicodeString& doReverse(int32_t start,
+ int32_t length);
+
+ // calculate hash code
+ int32_t doHashCode(void) const;
+
+ // get pointer to start of array
+ // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
+ inline char16_t* getArrayStart(void);
+ inline const char16_t* getArrayStart(void) const;
+
+ inline UBool hasShortLength() const;
+ inline int32_t getShortLength() const;
+
+ // A UnicodeString object (not necessarily its current buffer)
+ // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
+ inline UBool isWritable() const;
+
+ // Is the current buffer writable?
+ inline UBool isBufferWritable() const;
+
+ // None of the following does releaseArray().
+ inline void setZeroLength();
+ inline void setShortLength(int32_t len);
+ inline void setLength(int32_t len);
+ inline void setToEmpty();
+ inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
+
+ // allocate the array; result may be the stack buffer
+ // sets refCount to 1 if appropriate
+ // sets fArray, fCapacity, and flags
+ // sets length to 0
+ // returns boolean for success or failure
+ UBool allocate(int32_t capacity);
+
+ // release the array if owned
+ void releaseArray(void);
+
+ // turn a bogus string into an empty one
+ void unBogus();
+
+ // implements assignment operator, copy constructor, and fastCopyFrom()
+ UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
+
+ // Copies just the fields without memory management.
+ void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
+
+ // Pin start and limit to acceptable values.
+ inline void pinIndex(int32_t& start) const;
+ inline void pinIndices(int32_t& start,
+ int32_t& length) const;
+
+#if !UCONFIG_NO_CONVERSION
+
+ /* Internal extract() using UConverter. */
+ int32_t doExtract(int32_t start, int32_t length,
+ char *dest, int32_t destCapacity,
+ UConverter *cnv,
+ UErrorCode &errorCode) const;
+
+ /*
+ * Real constructor for converting from codepage data.
+ * It assumes that it is called with !fRefCounted.
+ *
+ * If `codepage==0`, then the default converter
+ * is used for the platform encoding.
+ * If `codepage` is an empty string (`""`),
+ * then a simple conversion is performed on the codepage-invariant
+ * subset ("invariant characters") of the platform encoding. See utypes.h.
+ */
+ void doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ const char *codepage);
+
+ /*
+ * Worker function for creating a UnicodeString from
+ * a codepage string using a UConverter.
+ */
+ void
+ doCodepageCreate(const char *codepageData,
+ int32_t dataLength,
+ UConverter *converter,
+ UErrorCode &status);
+
+#endif
+
+ /*
+ * This function is called when write access to the array
+ * is necessary.
+ *
+ * We need to make a copy of the array if
+ * the buffer is read-only, or
+ * the buffer is refCounted (shared), and refCount>1, or
+ * the buffer is too small.
+ *
+ * Return false if memory could not be allocated.
+ */
+ UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
+ int32_t growCapacity = -1,
+ UBool doCopyArray = true,
+ int32_t **pBufferToDelete = 0,
+ UBool forceClone = false);
+
+ /**
+ * Common function for UnicodeString case mappings.
+ * The stringCaseMapper has the same type UStringCaseMapper
+ * as in ustr_imp.h for ustrcase_map().
+ */
+ UnicodeString &
+ caseMap(int32_t caseLocale, uint32_t options,
+#if !UCONFIG_NO_BREAK_ITERATION
+ BreakIterator *iter,
+#endif
+ UStringCaseMapper *stringCaseMapper);
+
+ // ref counting
+ void addRef(void);
+ int32_t removeRef(void);
+ int32_t refCount(void) const;
+
+ // constants
+ enum {
+ /**
+ * Size of stack buffer for short strings.
+ * Must be at least U16_MAX_LENGTH for the single-code point constructor to work.
+ * @see UNISTR_OBJECT_SIZE
+ */
+ US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
+ kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
+ kInvalidHashCode=0, // invalid hash code
+ kEmptyHashCode=1, // hash code for empty string
+
+ // bit flag values for fLengthAndFlags
+ kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
+ kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
+ kRefCounted=4, // there is a refCount field before the characters in fArray
+ kBufferIsReadonly=8,// do not write to this buffer
+ kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
+ // and releaseBuffer(newLength) must be called
+ kAllStorageFlags=0x1f,
+
+ kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
+ kLength1=1<<kLengthShift,
+ kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
+ kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
+
+ // combined values for convenience
+ kShortString=kUsingStackBuffer,
+ kLongString=kRefCounted,
+ kReadonlyAlias=kBufferIsReadonly,
+ kWritableAlias=0
+ };
+
+ friend class UnicodeStringAppendable;
+
+ union StackBufferOrFields; // forward declaration necessary before friend declaration
+ friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
+
+ /*
+ * The following are all the class fields that are stored
+ * in each UnicodeString object.
+ * Note that UnicodeString has virtual functions,
+ * therefore there is an implicit vtable pointer
+ * as the first real field.
+ * The fields should be aligned such that no padding is necessary.
+ * On 32-bit machines, the size should be 32 bytes,
+ * on 64-bit machines (8-byte pointers), it should be 40 bytes.
+ *
+ * We use a hack to achieve this.
+ *
+ * With at least some compilers, each of the following is forced to
+ * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
+ * rounded up with additional padding if the fields do not already fit that requirement:
+ * - sizeof(class UnicodeString)
+ * - offsetof(UnicodeString, fUnion)
+ * - sizeof(fUnion)
+ * - sizeof(fStackFields)
+ *
+ * We optimize for the longest possible internal buffer for short strings.
+ * fUnion.fStackFields begins with 2 bytes for storage flags
+ * and the length of relatively short strings,
+ * followed by the buffer for short string contents.
+ * There is no padding inside fStackFields.
+ *
+ * Heap-allocated and aliased strings use fUnion.fFields.
+ * Both fStackFields and fFields must begin with the same fields for flags and short length,
+ * that is, those must have the same memory offsets inside the object,
+ * because the flags must be inspected in order to decide which half of fUnion is being used.
+ * We assume that the compiler does not reorder the fields.
+ *
+ * (Padding at the end of fFields is ok:
+ * As long as it is no larger than fStackFields, it is not wasted space.)
+ *
+ * For some of the history of the UnicodeString class fields layout, see
+ * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
+ * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
+ * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
+ */
+ // (implicit) *vtable;
+ union StackBufferOrFields {
+ // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
+ // Each struct of the union must begin with fLengthAndFlags.
+ struct {
+ int16_t fLengthAndFlags; // bit fields: see constants above
+ char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
+ } fStackFields;
+ struct {
+ int16_t fLengthAndFlags; // bit fields: see constants above
+ int32_t fLength; // number of characters in fArray if >127; else undefined
+ int32_t fCapacity; // capacity of fArray (in char16_ts)
+ // array pointer last to minimize padding for machines with P128 data model
+ // or pointer sizes that are not a power of 2
+ char16_t *fArray; // the Unicode data
+ } fFields;
+ } fUnion;
+};
+
+/**
+ * Create a new UnicodeString with the concatenation of two others.
+ *
+ * @param s1 The first string to be copied to the new one.
+ * @param s2 The second string to be copied to the new one, after s1.
+ * @return UnicodeString(s1).append(s2)
+ * @stable ICU 2.8
+ */
+U_COMMON_API UnicodeString U_EXPORT2
+operator+ (const UnicodeString &s1, const UnicodeString &s2);
+
+//========================================
+// Inline members
+//========================================
+
+//========================================
+// Privates
+//========================================
+
+inline void
+UnicodeString::pinIndex(int32_t& start) const
+{
+ // pin index
+ if(start < 0) {
+ start = 0;
+ } else if(start > length()) {
+ start = length();
+ }
+}
+
+inline void
+UnicodeString::pinIndices(int32_t& start,
+ int32_t& _length) const
+{
+ // pin indices
+ int32_t len = length();
+ if(start < 0) {
+ start = 0;
+ } else if(start > len) {
+ start = len;
+ }
+ if(_length < 0) {
+ _length = 0;
+ } else if(_length > (len - start)) {
+ _length = (len - start);
+ }
+}
+
+inline char16_t*
+UnicodeString::getArrayStart() {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
+}
+
+inline const char16_t*
+UnicodeString::getArrayStart() const {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
+}
+
+//========================================
+// Default constructor
+//========================================
+
+inline
+UnicodeString::UnicodeString() {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
+ fUnion.fStackFields.fLengthAndFlags=kShortString;
+}
+
+//========================================
+// Read-only implementation methods
+//========================================
+inline UBool
+UnicodeString::hasShortLength() const {
+ return fUnion.fFields.fLengthAndFlags>=0;
+}
+
+inline int32_t
+UnicodeString::getShortLength() const {
+ // fLengthAndFlags must be non-negative -> short length >= 0
+ // and arithmetic or logical shift does not matter.
+ return fUnion.fFields.fLengthAndFlags>>kLengthShift;
+}
+
+inline int32_t
+UnicodeString::length() const {
+ return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
+}
+
+inline int32_t
+UnicodeString::getCapacity() const {
+ return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
+ US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
+}
+
+inline int32_t
+UnicodeString::hashCode() const
+{ return doHashCode(); }
+
+inline UBool
+UnicodeString::isBogus() const
+{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
+
+inline UBool
+UnicodeString::isWritable() const
+{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
+
+inline UBool
+UnicodeString::isBufferWritable() const
+{
+ return (UBool)(
+ !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
+ (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
+}
+
+inline const char16_t *
+UnicodeString::getBuffer() const {
+ if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
+ return nullptr;
+ } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
+ return fUnion.fStackFields.fBuffer;
+ } else {
+ return fUnion.fFields.fArray;
+ }
+}
+
+//========================================
+// Read-only alias methods
+//========================================
+inline int8_t
+UnicodeString::doCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline UBool
+UnicodeString::doEqualsSubstring(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return isBogus();
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline bool
+UnicodeString::operator== (const UnicodeString& text) const
+{
+ if(isBogus()) {
+ return text.isBogus();
+ } else {
+ int32_t len = length(), textLength = text.length();
+ return !text.isBogus() && len == textLength && doEquals(text, len);
+ }
+}
+
+inline bool
+UnicodeString::operator!= (const UnicodeString& text) const
+{ return (! operator==(text)); }
+
+inline UBool
+UnicodeString::operator> (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == 1; }
+
+inline UBool
+UnicodeString::operator< (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) == -1; }
+
+inline UBool
+UnicodeString::operator>= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != -1; }
+
+inline UBool
+UnicodeString::operator<= (const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()) != 1; }
+
+inline int8_t
+UnicodeString::compare(const UnicodeString& text) const
+{ return doCompare(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompare(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compare(ConstChar16Ptr srcChars,
+ int32_t srcLength) const
+{ return doCompare(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars) const
+{ return doCompare(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompare(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCompareCodePointOrder(int32_t start,
+ int32_t thisLength,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
+ }
+}
+
+inline int8_t
+UnicodeString::compareCodePointOrder(const UnicodeString& text) const
+{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText) const
+{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars) const
+{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
+
+inline int8_t
+UnicodeString::compareCodePointOrder(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
+
+inline int8_t
+UnicodeString::compareCodePointOrderBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit) const
+{ return doCompareCodePointOrder(start, limit - start,
+ srcText, srcStart, srcLimit - srcStart); }
+
+inline int8_t
+UnicodeString::doCaseCompare(int32_t start,
+ int32_t thisLength,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const
+{
+ if(srcText.isBogus()) {
+ return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
+ } else {
+ srcText.pinIndices(srcStart, srcLength);
+ return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
+ }
+}
+
+inline int8_t
+UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
+ return doCaseCompare(0, length(), text, 0, text.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, 0, _length, options);
+}
+
+inline int8_t
+UnicodeString::caseCompare(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength,
+ uint32_t options) const {
+ return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
+}
+
+inline int8_t
+UnicodeString::caseCompareBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString &srcText,
+ int32_t srcStart,
+ int32_t srcLimit,
+ uint32_t options) const {
+ return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text) const
+{ return indexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return indexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const
+{ return doIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c) const
+{ return doIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c) const
+{ return indexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::indexOf(char16_t c,
+ int32_t start) const {
+ pinIndex(start);
+ return doIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::indexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return indexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const char16_t *srcChars,
+ int32_t srcLength,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength,
+ int32_t start,
+ int32_t _length) const
+{
+ if(!srcText.isBogus()) {
+ srcText.pinIndices(srcStart, srcLength);
+ if(srcLength > 0) {
+ return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
+ }
+ }
+ return -1;
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start,
+ int32_t _length) const
+{ return lastIndexOf(text, 0, text.length(), start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(text, 0, text.length(), start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(const UnicodeString& text) const
+{ return lastIndexOf(text, 0, text.length(), 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c,
+ int32_t start,
+ int32_t _length) const
+{ return doLastIndexOf(c, start, _length); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start,
+ int32_t _length) const {
+ return doLastIndexOf(c, start, _length);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c) const
+{ return doLastIndexOf(c, 0, length()); }
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c) const {
+ return lastIndexOf(c, 0, length());
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(char16_t c,
+ int32_t start) const {
+ pinIndex(start);
+ return doLastIndexOf(c, start, length() - start);
+}
+
+inline int32_t
+UnicodeString::lastIndexOf(UChar32 c,
+ int32_t start) const {
+ pinIndex(start);
+ return lastIndexOf(c, start, length() - start);
+}
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& text) const
+{ return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
+
+inline UBool
+UnicodeString::startsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const
+{ return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
+
+inline UBool
+UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
+}
+
+inline UBool
+UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
+}
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& text) const
+{ return doEqualsSubstring(length() - text.length(), text.length(),
+ text, 0, text.length()); }
+
+inline UBool
+UnicodeString::endsWith(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ srcText.pinIndices(srcStart, srcLength);
+ return doEqualsSubstring(length() - srcLength, srcLength,
+ srcText, srcStart, srcLength);
+}
+
+inline UBool
+UnicodeString::endsWith(ConstChar16Ptr srcChars,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars));
+ }
+ return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
+}
+
+inline UBool
+UnicodeString::endsWith(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength) const {
+ if(srcLength < 0) {
+ srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
+ }
+ return doEqualsSubstring(length() - srcLength, srcLength,
+ srcChars, srcStart, srcLength);
+}
+
+//========================================
+// replace
+//========================================
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText)
+{ return doReplace(start, _length, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::replace(int32_t start,
+ int32_t _length,
+ char16_t srcChar)
+{ return doReplace(start, _length, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText)
+{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::replaceBetween(int32_t start,
+ int32_t limit,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLimit)
+{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+inline UnicodeString&
+UnicodeString::findAndReplace(int32_t start,
+ int32_t _length,
+ const UnicodeString& oldText,
+ const UnicodeString& newText)
+{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
+ newText, 0, newText.length()); }
+
+// ============================
+// extract
+// ============================
+inline void
+UnicodeString::doExtract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ target.replace(0, target.length(), *this, start, _length); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ Char16Ptr target,
+ int32_t targetStart) const
+{ doExtract(start, _length, target, targetStart); }
+
+inline void
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ UnicodeString& target) const
+{ doExtract(start, _length, target); }
+
+#if !UCONFIG_NO_CONVERSION
+
+inline int32_t
+UnicodeString::extract(int32_t start,
+ int32_t _length,
+ char *dst,
+ const char *codepage) const
+
+{
+ // This dstSize value will be checked explicitly
+ return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
+}
+
+#endif
+
+inline void
+UnicodeString::extractBetween(int32_t start,
+ int32_t limit,
+ char16_t *dst,
+ int32_t dstStart) const {
+ pinIndex(start);
+ pinIndex(limit);
+ doExtract(start, limit - start, dst, dstStart);
+}
+
+inline UnicodeString
+UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
+ return tempSubString(start, limit - start);
+}
+
+inline char16_t
+UnicodeString::doCharAt(int32_t offset) const
+{
+ if((uint32_t)offset < (uint32_t)length()) {
+ return getArrayStart()[offset];
+ } else {
+ return kInvalidUChar;
+ }
+}
+
+inline char16_t
+UnicodeString::charAt(int32_t offset) const
+{ return doCharAt(offset); }
+
+inline char16_t
+UnicodeString::operator[] (int32_t offset) const
+{ return doCharAt(offset); }
+
+inline UBool
+UnicodeString::isEmpty() const {
+ // Arithmetic or logical right shift does not matter: only testing for 0.
+ return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
+}
+
+//========================================
+// Write implementation methods
+//========================================
+inline void
+UnicodeString::setZeroLength() {
+ fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
+}
+
+inline void
+UnicodeString::setShortLength(int32_t len) {
+ // requires 0 <= len <= kMaxShortLength
+ fUnion.fFields.fLengthAndFlags =
+ (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
+}
+
+inline void
+UnicodeString::setLength(int32_t len) {
+ if(len <= kMaxShortLength) {
+ setShortLength(len);
+ } else {
+ fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
+ fUnion.fFields.fLength = len;
+ }
+}
+
+inline void
+UnicodeString::setToEmpty() {
+ fUnion.fFields.fLengthAndFlags = kShortString;
+}
+
+inline void
+UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
+ setLength(len);
+ fUnion.fFields.fArray = array;
+ fUnion.fFields.fCapacity = capacity;
+}
+
+inline UnicodeString&
+UnicodeString::operator= (char16_t ch)
+{ return doReplace(0, length(), &ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator= (UChar32 ch)
+{ return replace(0, length(), ch); }
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcText, srcStart, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText,
+ int32_t srcStart)
+{
+ unBogus();
+ srcText.pinIndex(srcStart);
+ return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const UnicodeString& srcText)
+{
+ return copyFrom(srcText);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(const char16_t *srcChars,
+ int32_t srcLength)
+{
+ unBogus();
+ return doReplace(0, length(), srcChars, 0, srcLength);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(char16_t srcChar)
+{
+ unBogus();
+ return doReplace(0, length(), &srcChar, 0, 1);
+}
+
+inline UnicodeString&
+UnicodeString::setTo(UChar32 srcChar)
+{
+ unBogus();
+ return replace(0, length(), srcChar);
+}
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doAppend(srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(const UnicodeString& srcText)
+{ return doAppend(srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::append(const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doAppend(srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doAppend(srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::append(char16_t srcChar)
+{ return doAppend(&srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (char16_t ch)
+{ return doAppend(&ch, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::operator+= (UChar32 ch) {
+ return append(ch);
+}
+
+inline UnicodeString&
+UnicodeString::operator+= (const UnicodeString& srcText)
+{ return doAppend(srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcText, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const UnicodeString& srcText)
+{ return doReplace(start, 0, srcText, 0, srcText.length()); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ const char16_t *srcChars,
+ int32_t srcStart,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ ConstChar16Ptr srcChars,
+ int32_t srcLength)
+{ return doReplace(start, 0, srcChars, 0, srcLength); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ char16_t srcChar)
+{ return doReplace(start, 0, &srcChar, 0, 1); }
+
+inline UnicodeString&
+UnicodeString::insert(int32_t start,
+ UChar32 srcChar)
+{ return replace(start, 0, srcChar); }
+
+
+inline UnicodeString&
+UnicodeString::remove()
+{
+ // remove() of a bogus string makes the string empty and non-bogus
+ if(isBogus()) {
+ setToEmpty();
+ } else {
+ setZeroLength();
+ }
+ return *this;
+}
+
+inline UnicodeString&
+UnicodeString::remove(int32_t start,
+ int32_t _length)
+{
+ if(start <= 0 && _length == INT32_MAX) {
+ // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
+ return remove();
+ }
+ return doReplace(start, _length, nullptr, 0, 0);
+}
+
+inline UnicodeString&
+UnicodeString::removeBetween(int32_t start,
+ int32_t limit)
+{ return doReplace(start, limit - start, nullptr, 0, 0); }
+
+inline UnicodeString &
+UnicodeString::retainBetween(int32_t start, int32_t limit) {
+ truncate(limit);
+ return doReplace(0, start, nullptr, 0, 0);
+}
+
+inline UBool
+UnicodeString::truncate(int32_t targetLength)
+{
+ if(isBogus() && targetLength == 0) {
+ // truncate(0) of a bogus string makes the string empty and non-bogus
+ unBogus();
+ return false;
+ } else if((uint32_t)targetLength < (uint32_t)length()) {
+ setLength(targetLength);
+ return true;
+ } else {
+ return false;
+ }
+}
+
+inline UnicodeString&
+UnicodeString::reverse()
+{ return doReverse(0, length()); }
+
+inline UnicodeString&
+UnicodeString::reverse(int32_t start,
+ int32_t _length)
+{ return doReverse(start, _length); }
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/unorm.h b/intl/icu/source/common/unicode/unorm.h
new file mode 100644
index 0000000000..38fb895155
--- /dev/null
+++ b/intl/icu/source/common/unicode/unorm.h
@@ -0,0 +1,476 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (c) 1996-2016, International Business Machines Corporation
+* and others. All Rights Reserved.
+*******************************************************************************
+* File unorm.h
+*
+* Created by: Vladimir Weinstein 12052000
+*
+* Modification history :
+*
+* Date Name Description
+* 02/01/01 synwee Added normalization quickcheck enum and method.
+*/
+#ifndef UNORM_H
+#define UNORM_H
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/uiter.h"
+#include "unicode/unorm2.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Normalization
+ *
+ * Old Unicode normalization API.
+ *
+ * This API has been replaced by the unorm2.h API and is only available
+ * for backward compatibility. The functions here simply delegate to the
+ * unorm2.h functions, for example unorm2_getInstance() and unorm2_normalize().
+ * There is one exception: The new API does not provide a replacement for unorm_compare().
+ * Its declaration has been moved to unorm2.h.
+ *
+ * <code>unorm_normalize</code> transforms Unicode text into an equivalent composed or
+ * decomposed form, allowing for easier sorting and searching of text.
+ * <code>unorm_normalize</code> supports the standard normalization forms described in
+ * <a href="http://www.unicode.org/unicode/reports/tr15/" target="unicode">
+ * Unicode Standard Annex #15: Unicode Normalization Forms</a>.
+ *
+ * Characters with accents or other adornments can be encoded in
+ * several different ways in Unicode. For example, take the character A-acute.
+ * In Unicode, this can be encoded as a single character (the
+ * "composed" form):
+ *
+ * \code
+ * 00C1 LATIN CAPITAL LETTER A WITH ACUTE
+ * \endcode
+ *
+ * or as two separate characters (the "decomposed" form):
+ *
+ * \code
+ * 0041 LATIN CAPITAL LETTER A
+ * 0301 COMBINING ACUTE ACCENT
+ * \endcode
+ *
+ * To a user of your program, however, both of these sequences should be
+ * treated as the same "user-level" character "A with acute accent". When you are searching or
+ * comparing text, you must ensure that these two sequences are treated
+ * equivalently. In addition, you must handle characters with more than one
+ * accent. Sometimes the order of a character's combining accents is
+ * significant, while in other cases accent sequences in different orders are
+ * really equivalent.
+ *
+ * Similarly, the string "ffi" can be encoded as three separate letters:
+ *
+ * \code
+ * 0066 LATIN SMALL LETTER F
+ * 0066 LATIN SMALL LETTER F
+ * 0069 LATIN SMALL LETTER I
+ * \endcode
+ *
+ * or as the single character
+ *
+ * \code
+ * FB03 LATIN SMALL LIGATURE FFI
+ * \endcode
+ *
+ * The ffi ligature is not a distinct semantic character, and strictly speaking
+ * it shouldn't be in Unicode at all, but it was included for compatibility
+ * with existing character sets that already provided it. The Unicode standard
+ * identifies such characters by giving them "compatibility" decompositions
+ * into the corresponding semantic characters. When sorting and searching, you
+ * will often want to use these mappings.
+ *
+ * <code>unorm_normalize</code> helps solve these problems by transforming text into the
+ * canonical composed and decomposed forms as shown in the first example above.
+ * In addition, you can have it perform compatibility decompositions so that
+ * you can treat compatibility characters the same as their equivalents.
+ * Finally, <code>unorm_normalize</code> rearranges accents into the proper canonical
+ * order, so that you do not have to worry about accent rearrangement on your
+ * own.
+ *
+ * Form FCD, "Fast C or D", is also designed for collation.
+ * It allows to work on strings that are not necessarily normalized
+ * with an algorithm (like in collation) that works under "canonical closure", i.e., it treats precomposed
+ * characters and their decomposed equivalents the same.
+ *
+ * It is not a normalization form because it does not provide for uniqueness of representation. Multiple strings
+ * may be canonically equivalent (their NFDs are identical) and may all conform to FCD without being identical
+ * themselves.
+ *
+ * The form is defined such that the "raw decomposition", the recursive canonical decomposition of each character,
+ * results in a string that is canonically ordered. This means that precomposed characters are allowed for as long
+ * as their decompositions do not need canonical reordering.
+ *
+ * Its advantage for a process like collation is that all NFD and most NFC texts - and many unnormalized texts -
+ * already conform to FCD and do not need to be normalized (NFD) for such a process. The FCD quick check will
+ * return UNORM_YES for most strings in practice.
+ *
+ * unorm_normalize(UNORM_FCD) may be implemented with UNORM_NFD.
+ *
+ * For more details on FCD see the collation design document:
+ * https://htmlpreview.github.io/?https://github.com/unicode-org/icu-docs/blob/main/design/collation/ICU_collation_design.htm
+ *
+ * ICU collation performs either NFD or FCD normalization automatically if normalization
+ * is turned on for the collator object.
+ * Beyond collation and string search, normalized strings may be useful for string equivalence comparisons,
+ * transliteration/transcription, unique representations, etc.
+ *
+ * The W3C generally recommends to exchange texts in NFC.
+ * Note also that most legacy character encodings use only precomposed forms and often do not
+ * encode any combining marks by themselves. For conversion to such character encodings the
+ * Unicode text needs to be normalized to NFC.
+ * For more usage examples, see the Unicode Standard Annex.
+ */
+
+// Do not conditionalize the following enum with #ifndef U_HIDE_DEPRECATED_API,
+// it is needed for layout of Normalizer object.
+#ifndef U_FORCE_HIDE_DEPRECATED_API
+
+/**
+ * Constants for normalization modes.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+typedef enum {
+ /** No decomposition/composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NONE = 1,
+ /** Canonical decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFD = 2,
+ /** Compatibility decomposition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFKD = 3,
+ /** Canonical decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFC = 4,
+ /** Default normalization. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_DEFAULT = UNORM_NFC,
+ /** Compatibility decomposition followed by canonical composition. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_NFKC =5,
+ /** "Fast C or D" form. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_FCD = 6,
+
+ /** One more than the highest normalization mode constant. @deprecated ICU 56 Use unorm2.h instead. */
+ UNORM_MODE_COUNT
+} UNormalizationMode;
+
+#endif // U_FORCE_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+
+/**
+ * Constants for options flags for normalization.
+ * Use 0 for default options,
+ * including normalization according to the Unicode version
+ * that is currently supported by ICU (see u_getUnicodeVersion).
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+enum {
+ /**
+ * Options bit set value to select Unicode 3.2 normalization
+ * (except NormalizationCorrections).
+ * At most one Unicode version can be selected at a time.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+ UNORM_UNICODE_3_2=0x20
+};
+
+/**
+ * Lowest-order bit number of unorm_compare() options bits corresponding to
+ * normalization options bits.
+ *
+ * The options parameter for unorm_compare() uses most bits for
+ * itself and for various comparison and folding flags.
+ * The most significant bits, however, are shifted down and passed on
+ * to the normalization implementation.
+ * (That is, from unorm_compare(..., options, ...),
+ * options>>UNORM_COMPARE_NORM_OPTIONS_SHIFT will be passed on to the
+ * internal normalization functions.)
+ *
+ * @see unorm_compare
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+#define UNORM_COMPARE_NORM_OPTIONS_SHIFT 20
+
+/**
+ * Normalize a string.
+ * The string will be normalized according the specified normalization mode
+ * and options.
+ * The source and result buffers must not be the same, nor overlap.
+ *
+ * @param source The string to normalize.
+ * @param sourceLength The length of source, or -1 if NUL-terminated.
+ * @param mode The normalization mode; one of UNORM_NONE,
+ * UNORM_NFD, UNORM_NFC, UNORM_NFKC, UNORM_NFKD, UNORM_DEFAULT.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param result A pointer to a buffer to receive the result string.
+ * The result string is NUL-terminated if possible.
+ * @param resultLength The maximum size of result.
+ * @param status A pointer to a UErrorCode to receive any errors.
+ * @return The total buffer size needed; if greater than resultLength,
+ * the output was truncated, and the error code is set to U_BUFFER_OVERFLOW_ERROR.
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_normalize(const UChar *source, int32_t sourceLength,
+ UNormalizationMode mode, int32_t options,
+ UChar *result, int32_t resultLength,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string, to quickly determine if the string is
+ * in a particular normalization format.
+ * Three types of result can be returned UNORM_YES, UNORM_NO or
+ * UNORM_MAYBE. Result UNORM_YES indicates that the argument
+ * string is in the desired normalized format, UNORM_NO determines that
+ * argument string is not in the desired normalized format. A
+ * UNORM_MAYBE result indicates that a more thorough check is required,
+ * the user may have to put the string in its normalized form and compare the
+ * results.
+ *
+ * @param source string for determining if it is in a normalized format
+ * @param sourcelength length of source to test, or -1 if NUL-terminated
+ * @param mode which normalization form to test for
+ * @param status a pointer to a UErrorCode to receive any errors
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
+unorm_quickCheck(const UChar *source, int32_t sourcelength,
+ UNormalizationMode mode,
+ UErrorCode *status);
+
+/**
+ * Performing quick check on a string; same as unorm_quickCheck but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return UNORM_YES, UNORM_NO or UNORM_MAYBE
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UNormalizationCheckResult U_EXPORT2
+unorm_quickCheckWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form.
+ * This is semantically equivalent to source.equals(normalize(source, mode)) .
+ *
+ * Unlike unorm_quickCheck(), this function returns a definitive result,
+ * never a "maybe".
+ * For NFD, NFKD, and FCD, both functions work exactly the same.
+ * For NFC and NFKC where quickCheck may return "maybe", this function will
+ * perform further tests to arrive at a true/false result.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UBool U_EXPORT2
+unorm_isNormalized(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Test if a string is in a given normalization form; same as unorm_isNormalized but
+ * takes an extra options parameter like most normalization functions.
+ *
+ * @param src String that is to be tested if it is in a normalization format.
+ * @param srcLength Length of source to test, or -1 if NUL-terminated.
+ * @param mode Which normalization form to test for.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Boolean value indicating whether the source string is in the
+ * "mode/options" normalization form.
+ *
+ * @see unorm_quickCheck
+ * @see unorm_isNormalized
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED UBool U_EXPORT2
+unorm_isNormalizedWithOptions(const UChar *src, int32_t srcLength,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization forward.
+ * This function (together with unorm_previous) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ *
+ * Iterative normalization is useful when only a small portion of a longer
+ * string/text needs to be processed.
+ *
+ * For example, the likelihood may be high that processing the first 10% of some
+ * text will be sufficient to find certain data.
+ * Another example: When one wants to concatenate two normalized strings and get a
+ * normalized result, it is much more efficient to normalize just a small part of
+ * the result around the concatenation place instead of re-normalizing everything.
+ *
+ * The input text is an instance of the C character iteration API UCharIterator.
+ * It may wrap around a simple string, a CharacterIterator, a Replaceable, or any
+ * other kind of text object.
+ *
+ * If a buffer overflow occurs, then the caller needs to reset the iterator to the
+ * old index and call the function again with a larger buffer - if the caller cares
+ * for the actual output.
+ * Regardless of the output buffer, the iterator will always be moved to the next
+ * normalization boundary.
+ *
+ * This function (like unorm_previous) serves two purposes:
+ *
+ * 1) To find the next boundary so that the normalization of the part of the text
+ * from the current position to that boundary does not affect and is not affected
+ * by the part of the text beyond that boundary.
+ *
+ * 2) To normalize the text up to the boundary.
+ *
+ * The second step is optional, per the doNormalize parameter.
+ * It is omitted for operations like string concatenation, where the two adjacent
+ * string ends need to be normalized together.
+ * In such a case, the output buffer will just contain a copy of the text up to the
+ * boundary.
+ *
+ * pNeededToNormalize is an output-only parameter. Its output value is only defined
+ * if normalization was requested (doNormalize) and successful (especially, no
+ * buffer overflow).
+ * It is useful for operations like a normalizing transliterator, where one would
+ * not want to replace a piece of text if it is not modified.
+ *
+ * If doNormalize==true and pNeededToNormalize!=NULL then *pNeeded... is set true
+ * if the normalization was necessary.
+ *
+ * If doNormalize==false then *pNeededToNormalize will be set to false.
+ *
+ * If the buffer overflows, then *pNeededToNormalize will be undefined;
+ * essentially, whenever U_FAILURE is true (like in buffer overflows), this result
+ * will be undefined.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (true) or just copied (false).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always false if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_previous
+ * @see unorm_normalize
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_next(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Iterative normalization backward.
+ * This function (together with unorm_next) is somewhat
+ * similar to the C++ Normalizer class (see its non-static functions).
+ * For all details see unorm_next.
+ *
+ * @param src The input text in the form of a C character iterator.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param doNormalize Indicates if the source text up to the next boundary
+ * is to be normalized (true) or just copied (false).
+ * @param pNeededToNormalize Output flag indicating if the normalization resulted in
+ * different text from the input.
+ * Not defined if an error occurs including buffer overflow.
+ * Always false if !doNormalize.
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_next
+ * @see unorm_normalize
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_previous(UCharIterator *src,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UBool doNormalize, UBool *pNeededToNormalize,
+ UErrorCode *pErrorCode);
+
+/**
+ * Concatenate normalized strings, making sure that the result is normalized as well.
+ *
+ * If both the left and the right strings are in
+ * the normalization form according to "mode/options",
+ * then the result will be
+ *
+ * \code
+ * dest=normalize(left+right, mode, options)
+ * \endcode
+ *
+ * With the input strings already being normalized,
+ * this function will use unorm_next() and unorm_previous()
+ * to find the adjacent end pieces of the input strings.
+ * Only the concatenation of these end pieces will be normalized and
+ * then concatenated with the remaining parts of the input strings.
+ *
+ * It is allowed to have dest==left to avoid copying the entire left string.
+ *
+ * @param left Left source string, may be same as dest.
+ * @param leftLength Length of left source string, or -1 if NUL-terminated.
+ * @param right Right source string. Must not be the same as dest, nor overlap.
+ * @param rightLength Length of right source string, or -1 if NUL-terminated.
+ * @param dest The output buffer; can be NULL if destCapacity==0 for pure preflighting.
+ * @param destCapacity The number of UChars that fit into dest.
+ * @param mode The normalization mode.
+ * @param options The normalization options, ORed together (0 for no options).
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Length of output (number of UChars) when successful or buffer overflow.
+ *
+ * @see unorm_normalize
+ * @see unorm_next
+ * @see unorm_previous
+ *
+ * @deprecated ICU 56 Use unorm2.h instead.
+ */
+U_DEPRECATED int32_t U_EXPORT2
+unorm_concatenate(const UChar *left, int32_t leftLength,
+ const UChar *right, int32_t rightLength,
+ UChar *dest, int32_t destCapacity,
+ UNormalizationMode mode, int32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* U_HIDE_DEPRECATED_API */
+#endif /* #if !UCONFIG_NO_NORMALIZATION */
+#endif
diff --git a/intl/icu/source/common/unicode/unorm2.h b/intl/icu/source/common/unicode/unorm2.h
new file mode 100644
index 0000000000..24417b7103
--- /dev/null
+++ b/intl/icu/source/common/unicode/unorm2.h
@@ -0,0 +1,606 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2009-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: unorm2.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2009dec15
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UNORM2_H__
+#define __UNORM2_H__
+
+/**
+ * \file
+ * \brief C API: New API for Unicode Normalization.
+ *
+ * Unicode normalization functionality for standard Unicode normalization or
+ * for using custom mapping tables.
+ * All instances of UNormalizer2 are unmodifiable/immutable.
+ * Instances returned by unorm2_getInstance() are singletons that must not be deleted by the caller.
+ * For more details see the Normalizer2 C++ class.
+ */
+
+#include "unicode/utypes.h"
+#include "unicode/stringoptions.h"
+#include "unicode/uset.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * Constants for normalization modes.
+ * For details about standard Unicode normalization forms
+ * and about the algorithms which are also used with custom mapping tables
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+typedef enum {
+ /**
+ * Decomposition followed by composition.
+ * Same as standard NFC when using an "nfc" instance.
+ * Same as standard NFKC when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE,
+ /**
+ * Map, and reorder canonically.
+ * Same as standard NFD when using an "nfc" instance.
+ * Same as standard NFKD when using an "nfkc" instance.
+ * For details about standard Unicode normalization forms
+ * see http://www.unicode.org/unicode/reports/tr15/
+ * @stable ICU 4.4
+ */
+ UNORM2_DECOMPOSE,
+ /**
+ * "Fast C or D" form.
+ * If a string is in this form, then further decomposition <i>without reordering</i>
+ * would yield the same form as DECOMPOSE.
+ * Text in "Fast C or D" form can be processed efficiently with data tables
+ * that are "canonically closed", that is, that provide equivalent data for
+ * equivalent text, without having to be fully normalized.
+ * Not a standard Unicode normalization form.
+ * Not a unique form: Different FCD strings can be canonically equivalent.
+ * For details see http://www.unicode.org/notes/tn5/#FCD
+ * @stable ICU 4.4
+ */
+ UNORM2_FCD,
+ /**
+ * Compose only contiguously.
+ * Also known as "FCC" or "Fast C Contiguous".
+ * The result will often but not always be in NFC.
+ * The result will conform to FCD which is useful for processing.
+ * Not a standard Unicode normalization form.
+ * For details see http://www.unicode.org/notes/tn5/#FCC
+ * @stable ICU 4.4
+ */
+ UNORM2_COMPOSE_CONTIGUOUS
+} UNormalization2Mode;
+
+/**
+ * Result values for normalization quick check functions.
+ * For details see http://www.unicode.org/reports/tr15/#Detecting_Normalization_Forms
+ * @stable ICU 2.0
+ */
+typedef enum UNormalizationCheckResult {
+ /**
+ * The input string is not in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_NO,
+ /**
+ * The input string is in the normalization form.
+ * @stable ICU 2.0
+ */
+ UNORM_YES,
+ /**
+ * The input string may or may not be in the normalization form.
+ * This value is only returned for composition forms like NFC and FCC,
+ * when a backward-combining character is found for which the surrounding text
+ * would have to be analyzed further.
+ * @stable ICU 2.0
+ */
+ UNORM_MAYBE
+} UNormalizationCheckResult;
+
+/**
+ * Opaque C service object type for the new normalization API.
+ * @stable ICU 4.4
+ */
+struct UNormalizer2;
+typedef struct UNormalizer2 UNormalizer2; /**< C typedef for struct UNormalizer2. @stable ICU 4.4 */
+
+#if !UCONFIG_NO_NORMALIZATION
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFC normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFD normalization.
+ * Same as unorm2_getInstance(NULL, "nfc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKD normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc", UNORM2_DECOMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKDInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance for Unicode NFKC_Casefold normalization.
+ * Same as unorm2_getInstance(NULL, "nfkc_cf", UNORM2_COMPOSE, pErrorCode).
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested Normalizer2, if successful
+ * @stable ICU 49
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode);
+
+/**
+ * Returns a UNormalizer2 instance which uses the specified data file
+ * (packageName/name similar to ucnv_openPackage() and ures_open()/ResourceBundle)
+ * and which composes or decomposes text according to the specified mode.
+ * Returns an unmodifiable singleton instance. Do not delete it.
+ *
+ * Use packageName=NULL for data files that are part of ICU's own data.
+ * Use name="nfc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFC/NFD.
+ * Use name="nfkc" and UNORM2_COMPOSE/UNORM2_DECOMPOSE for Unicode standard NFKC/NFKD.
+ * Use name="nfkc_cf" and UNORM2_COMPOSE for Unicode standard NFKC_CF=NFKC_Casefold.
+ *
+ * @param packageName NULL for ICU built-in data, otherwise application data package name
+ * @param name "nfc" or "nfkc" or "nfkc_cf" or name of custom data file
+ * @param mode normalization mode (compose or decompose etc.)
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI const UNormalizer2 * U_EXPORT2
+unorm2_getInstance(const char *packageName,
+ const char *name,
+ UNormalization2Mode mode,
+ UErrorCode *pErrorCode);
+
+/**
+ * Constructs a filtered normalizer wrapping any UNormalizer2 instance
+ * and a filter set.
+ * Both are aliased and must not be modified or deleted while this object
+ * is used.
+ * The filter set should be frozen; otherwise the performance will suffer greatly.
+ * @param norm2 wrapped UNormalizer2 instance
+ * @param filterSet USet which determines the characters to be normalized
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the requested UNormalizer2, if successful
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizer2 * U_EXPORT2
+unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
+
+/**
+ * Closes a UNormalizer2 instance from unorm2_openFiltered().
+ * Do not close instances from unorm2_getInstance()!
+ * @param norm2 UNormalizer2 instance to be closed
+ * @stable ICU 4.4
+ */
+U_CAPI void U_EXPORT2
+unorm2_close(UNormalizer2 *norm2);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUNormalizer2Pointer
+ * "Smart pointer" class, closes a UNormalizer2 via unorm2_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUNormalizer2Pointer, UNormalizer2, unorm2_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Writes the normalized form of the source string to the destination string
+ * (replacing its contents) and returns the length of the destination string.
+ * The source and destination strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param src source string
+ * @param length length of the source string, or -1 if NUL-terminated
+ * @param dest destination string; its contents is replaced with normalized src
+ * @param capacity number of UChars that can be written to dest
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return dest
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalize(const UNormalizer2 *norm2,
+ const UChar *src, int32_t length,
+ UChar *dest, int32_t capacity,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the normalized form of the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if the first string was normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, will be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+/**
+ * Appends the second string to the first string
+ * (merging them at the boundary) and returns the length of the first string.
+ * The result is normalized if both the strings were normalized.
+ * The first and second strings must be different buffers.
+ * @param norm2 UNormalizer2 instance
+ * @param first string, should be normalized
+ * @param firstLength length of the first string, or -1 if NUL-terminated
+ * @param firstCapacity number of UChars that can be written to first
+ * @param second string, should be normalized
+ * @param secondLength length of the source string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return first
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_append(const UNormalizer2 *norm2,
+ UChar *first, int32_t firstLength, int32_t firstCapacity,
+ const UChar *second, int32_t secondLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the decomposition mapping of c.
+ * Roughly equivalent to normalizing the String form of c
+ * on a UNORM2_DECOMPOSE UNormalizer2 instance, but much faster, and except that this function
+ * returns a negative value and does not write a string
+ * if c does not have a decomposition mapping in this instance's data.
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's decomposition, if there is one; otherwise a negative value
+ * @stable ICU 4.6
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Gets the raw decomposition mapping of c.
+ *
+ * This is similar to the unorm2_getDecomposition() function but returns the
+ * raw decomposition mapping as specified in UnicodeData.txt or
+ * (for custom data) in the mapping files processed by the gennorm2 tool.
+ * By contrast, unorm2_getDecomposition() returns the processed,
+ * recursively-decomposed version of this mapping.
+ *
+ * When used on a standard NFKC Normalizer2 instance,
+ * unorm2_getRawDecomposition() returns the Unicode Decomposition_Mapping (dm) property.
+ *
+ * When used on a standard NFC Normalizer2 instance,
+ * it returns the Decomposition_Mapping only if the Decomposition_Type (dt) is Canonical (Can);
+ * in this case, the result contains either one or two code points (=1..4 UChars).
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @param decomposition String buffer which will be set to c's
+ * raw decomposition mapping, if there is one.
+ * @param capacity number of UChars that can be written to decomposition
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return the non-negative length of c's raw decomposition, if there is one; otherwise a negative value
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_getRawDecomposition(const UNormalizer2 *norm2,
+ UChar32 c, UChar *decomposition, int32_t capacity,
+ UErrorCode *pErrorCode);
+
+/**
+ * Performs pairwise composition of a & b and returns the composite if there is one.
+ *
+ * Returns a composite code point c only if c has a two-way mapping to a+b.
+ * In standard Unicode normalization, this means that
+ * c has a canonical decomposition to a+b
+ * and c does not have the Full_Composition_Exclusion property.
+ *
+ * This function is independent of the mode of the UNormalizer2.
+ * @param norm2 UNormalizer2 instance
+ * @param a A (normalization starter) code point.
+ * @param b Another code point.
+ * @return The non-negative composite code point if there is one; otherwise a negative value.
+ * @stable ICU 49
+ */
+U_CAPI UChar32 U_EXPORT2
+unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b);
+
+/**
+ * Gets the combining class of c.
+ * The default implementation returns 0
+ * but all standard implementations return the Unicode Canonical_Combining_Class value.
+ * @param norm2 UNormalizer2 instance
+ * @param c code point
+ * @return c's combining class
+ * @stable ICU 49
+ */
+U_CAPI uint8_t U_EXPORT2
+unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the string is normalized.
+ * Internally, in cases where the quickCheck() method would return "maybe"
+ * (which is only possible for the two COMPOSE modes) this method
+ * resolves to "yes" or "no" to provide a definitive result,
+ * at the cost of doing more work in those cases.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return true if s is normalized
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isNormalized(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the string is normalized.
+ * For the two COMPOSE modes, the result could be "maybe" in cases that
+ * would take a little more work to resolve definitively.
+ * Use spanQuickCheckYes() and normalizeSecondAndAppend() for a faster
+ * combination of quick check + normalization, to avoid
+ * re-checking the "yes" prefix.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return UNormalizationCheckResult
+ * @stable ICU 4.4
+ */
+U_CAPI UNormalizationCheckResult U_EXPORT2
+unorm2_quickCheck(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Returns the end of the normalized substring of the input string.
+ * In other words, with <code>end=spanQuickCheckYes(s, ec);</code>
+ * the substring <code>UnicodeString(s, 0, end)</code>
+ * will pass the quick check with a "yes" result.
+ *
+ * The returned end index is usually one or more characters before the
+ * "no" or "maybe" character: The end index is at a normalization boundary.
+ * (See the class documentation for more about normalization boundaries.)
+ *
+ * When the goal is a normalized string and most input strings are expected
+ * to be normalized already, then call this method,
+ * and if it returns a prefix shorter than the input string,
+ * copy that prefix and use normalizeSecondAndAppend() for the remainder.
+ * @param norm2 UNormalizer2 instance
+ * @param s input string
+ * @param length length of the string, or -1 if NUL-terminated
+ * @param pErrorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return "yes" span end index
+ * @stable ICU 4.4
+ */
+U_CAPI int32_t U_EXPORT2
+unorm2_spanQuickCheckYes(const UNormalizer2 *norm2,
+ const UChar *s, int32_t length,
+ UErrorCode *pErrorCode);
+
+/**
+ * Tests if the character always has a normalization boundary before it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary before it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character always has a normalization boundary after it,
+ * regardless of context.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c has a normalization boundary after it
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Tests if the character is normalization-inert.
+ * For details see the Normalizer2 base class documentation.
+ * @param norm2 UNormalizer2 instance
+ * @param c character to test
+ * @return true if c is normalization-inert
+ * @stable ICU 4.4
+ */
+U_CAPI UBool U_EXPORT2
+unorm2_isInert(const UNormalizer2 *norm2, UChar32 c);
+
+/**
+ * Compares two strings for canonical equivalence.
+ * Further options include case-insensitive comparison and
+ * code point order (as opposed to code unit order).
+ *
+ * Canonical equivalence between two strings is defined as their normalized
+ * forms (NFD or NFC) being identical.
+ * This function compares strings incrementally instead of normalizing
+ * (and optionally case-folding) both strings entirely,
+ * improving performance significantly.
+ *
+ * Bulk normalization is only necessary if the strings do not fulfill the FCD
+ * conditions. Only in this case, and only if the strings are relatively long,
+ * is memory allocated temporarily.
+ * For FCD strings and short non-FCD strings there is no memory allocation.
+ *
+ * Semantically, this is equivalent to
+ * strcmp[CodePointOrder](NFD(foldCase(NFD(s1))), NFD(foldCase(NFD(s2))))
+ * where code point order and foldCase are all optional.
+ *
+ * UAX 21 2.5 Caseless Matching specifies that for a canonical caseless match
+ * the case folding must be performed first, then the normalization.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Case-sensitive comparison in code unit order, and the input strings
+ * are quick-checked for FCD.
+ *
+ * - UNORM_INPUT_IS_FCD
+ * Set if the caller knows that both s1 and s2 fulfill the FCD conditions.
+ * If not set, the function will quickCheck for FCD
+ * and normalize if necessary.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_COMPARE_IGNORE_CASE
+ * Set to compare strings case-insensitively using case folding,
+ * instead of case-sensitively.
+ * If set, then the following case folding options are used.
+ *
+ * - Options as used with case-insensitive comparisons, currently:
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * (see u_strCaseCompare for details)
+ *
+ * - regular normalization options shifted left by UNORM_COMPARE_NORM_OPTIONS_SHIFT
+ *
+ * @param pErrorCode ICU error code in/out parameter.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see unorm_normalize
+ * @see UNORM_FCD
+ * @see u_strCompare
+ * @see u_strCaseCompare
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+unorm_compare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#endif /* !UCONFIG_NO_NORMALIZATION */
+#endif /* __UNORM2_H__ */
diff --git a/intl/icu/source/common/unicode/uobject.h b/intl/icu/source/common/unicode/uobject.h
new file mode 100644
index 0000000000..f53ec1d111
--- /dev/null
+++ b/intl/icu/source/common/unicode/uobject.h
@@ -0,0 +1,324 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: uobject.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002jun26
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UOBJECT_H__
+#define __UOBJECT_H__
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/platform.h"
+
+/**
+ * \file
+ * \brief C++ API: Common ICU base class UObject.
+ */
+
+/**
+ * \def U_NO_THROW
+ * Since ICU 64, use noexcept instead.
+ *
+ * Previously, define this to define the throw() specification so
+ * certain functions do not throw any exceptions
+ *
+ * UMemory operator new methods should have the throw() specification
+ * appended to them, so that the compiler adds the additional nullptr check
+ * before calling constructors. Without, if <code>operator new</code> returns nullptr the
+ * constructor is still called, and if the constructor references member
+ * data, (which it typically does), the result is a segmentation violation.
+ *
+ * @stable ICU 4.2. Since ICU 64, Use noexcept instead. See ICU-20422.
+ */
+#ifndef U_NO_THROW
+#define U_NO_THROW noexcept
+#endif
+
+/*===========================================================================*/
+/* UClassID-based RTTI */
+/*===========================================================================*/
+
+/**
+ * UClassID is used to identify classes without using the compiler's RTTI.
+ * This was used before C++ compilers consistently supported RTTI.
+ * ICU 4.6 requires compiler RTTI to be turned on.
+ *
+ * Each class hierarchy which needs
+ * to implement polymorphic clone() or operator==() defines two methods,
+ * described in detail below. UClassID values can be compared using
+ * operator==(). Nothing else should be done with them.
+ *
+ * \par
+ * In class hierarchies that implement "poor man's RTTI",
+ * each concrete subclass implements getDynamicClassID() in the same way:
+ *
+ * \code
+ * class Derived {
+ * public:
+ * virtual UClassID getDynamicClassID() const
+ * { return Derived::getStaticClassID(); }
+ * }
+ * \endcode
+ *
+ * Each concrete class implements getStaticClassID() as well, which allows
+ * clients to test for a specific type.
+ *
+ * \code
+ * class Derived {
+ * public:
+ * static UClassID U_EXPORT2 getStaticClassID();
+ * private:
+ * static char fgClassID;
+ * }
+ *
+ * // In Derived.cpp:
+ * UClassID Derived::getStaticClassID()
+ * { return (UClassID)&Derived::fgClassID; }
+ * char Derived::fgClassID = 0; // Value is irrelevant
+ * \endcode
+ * @stable ICU 2.0
+ */
+typedef void* UClassID;
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UMemory is the common ICU base class.
+ * All other ICU C++ classes are derived from UMemory (starting with ICU 2.4).
+ *
+ * This is primarily to make it possible and simple to override the
+ * C++ memory management by adding new/delete operators to this base class.
+ *
+ * To override ALL ICU memory management, including that from plain C code,
+ * replace the allocation functions declared in cmemory.h
+ *
+ * UMemory does not contain any virtual functions.
+ * Common "boilerplate" functions are defined in UObject.
+ *
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UMemory {
+public:
+
+/* test versions for debugging shaper heap memory problems */
+#ifdef SHAPER_MEMORY_DEBUG
+ static void * NewArray(int size, int count);
+ static void * GrowArray(void * array, int newSize );
+ static void FreeArray(void * array );
+#endif
+
+#if U_OVERRIDE_CXX_ALLOCATION
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new(size_t size) noexcept;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See new().
+ * @stable ICU 2.4
+ */
+ static void * U_EXPORT2 operator new[](size_t size) noexcept;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * simple, non-class types are allocated using the macros in common/cmemory.h
+ * (uprv_malloc(), uprv_free(), uprv_realloc());
+ * they or something else could be used here to implement C++ new/delete
+ * for ICU4C C++ classes
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete(void *p) noexcept;
+
+ /**
+ * Override for ICU4C C++ memory management.
+ * See delete().
+ * @stable ICU 2.4
+ */
+ static void U_EXPORT2 operator delete[](void *p) noexcept;
+
+#if U_HAVE_PLACEMENT_NEW
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See new().
+ * @stable ICU 2.6
+ */
+ static inline void * U_EXPORT2 operator new(size_t, void *ptr) noexcept { return ptr; }
+
+ /**
+ * Override for ICU4C C++ memory management for STL.
+ * See delete().
+ * @stable ICU 2.6
+ */
+ static inline void U_EXPORT2 operator delete(void *, void *) noexcept {}
+#endif /* U_HAVE_PLACEMENT_NEW */
+#if U_HAVE_DEBUG_LOCATION_NEW
+ /**
+ * This method overrides the MFC debug version of the operator new
+ *
+ * @param size The requested memory size
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void * U_EXPORT2 operator new(size_t size, const char* file, int line) noexcept;
+ /**
+ * This method provides a matching delete for the MFC debug new
+ *
+ * @param p The pointer to the allocated memory
+ * @param file The file where the allocation was requested
+ * @param line The line where the allocation was requested
+ */
+ static void U_EXPORT2 operator delete(void* p, const char* file, int line) noexcept;
+#endif /* U_HAVE_DEBUG_LOCATION_NEW */
+#endif /* U_OVERRIDE_CXX_ALLOCATION */
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UMemory &UMemory::operator=(const UMemory &);
+ */
+};
+
+/**
+ * UObject is the common ICU "boilerplate" class.
+ * UObject inherits UMemory (starting with ICU 2.4),
+ * and all other public ICU C++ classes
+ * are derived from UObject (starting with ICU 2.2).
+ *
+ * UObject contains common virtual functions, in particular a virtual destructor.
+ *
+ * The clone() function is not available in UObject because it is not
+ * implemented by all ICU classes.
+ * Many ICU services provide a clone() function for their class trees,
+ * defined on the service's C++ base class
+ * (which itself is a subclass of UObject).
+ *
+ * @stable ICU 2.2
+ */
+class U_COMMON_API UObject : public UMemory {
+public:
+ /**
+ * Destructor.
+ *
+ * @stable ICU 2.2
+ */
+ virtual ~UObject();
+
+ /**
+ * ICU4C "poor man's RTTI", returns a UClassID for the actual ICU class.
+ * The base class implementation returns a dummy value.
+ *
+ * Use compiler RTTI rather than ICU's "poor man's RTTI".
+ * Since ICU 4.6, new ICU C++ class hierarchies do not implement "poor man's RTTI".
+ *
+ * @stable ICU 2.2
+ */
+ virtual UClassID getDynamicClassID() const;
+
+protected:
+ // the following functions are protected to prevent instantiation and
+ // direct use of UObject itself
+
+ // default constructor
+ // inline UObject() {}
+
+ // copy constructor
+ // inline UObject(const UObject &other) {}
+
+#if 0
+ // TODO Sometime in the future. Implement operator==().
+ // (This comment inserted in 2.2)
+ // some or all of the following "boilerplate" functions may be made public
+ // in a future ICU4C release when all subclasses implement them
+
+ // assignment operator
+ // (not virtual, see "Taligent's Guide to Designing Programs" pp.73..74)
+ // commented out because the implementation is the same as a compiler's default
+ // UObject &operator=(const UObject &other) { return *this; }
+
+ // comparison operators
+ virtual inline bool operator==(const UObject &other) const { return this==&other; }
+ inline bool operator!=(const UObject &other) const { return !operator==(other); }
+
+ // clone() commented out from the base class:
+ // some compilers do not support co-variant return types
+ // (i.e., subclasses would have to return UObject * as well, instead of SubClass *)
+ // see also UObject class documentation.
+ // virtual UObject *clone() const;
+#endif
+
+ /*
+ * Assignment operator not declared. The compiler will provide one
+ * which does nothing since this class does not contain any data members.
+ * API/code coverage may show the assignment operator as present and
+ * untested - ignore.
+ * Subclasses need this assignment operator if they use compiler-provided
+ * assignment operators of their own. An alternative to not declaring one
+ * here would be to declare and empty-implement a protected or public one.
+ UObject &UObject::operator=(const UObject &);
+ */
+};
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * This is a simple macro to add ICU RTTI to an ICU object implementation.
+ * This does not go into the header. This should only be used in *.cpp files.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ } \
+ UClassID myClass::getDynamicClassID() const \
+ { return myClass::getStaticClassID(); }
+
+
+/**
+ * This macro adds ICU RTTI to an ICU abstract class implementation.
+ * This macro should be invoked in *.cpp files. The corresponding
+ * header should declare getStaticClassID.
+ *
+ * @param myClass The name of the class that needs RTTI defined.
+ * @internal
+ */
+#define UOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(myClass) \
+ UClassID U_EXPORT2 myClass::getStaticClassID() { \
+ static char classID = 0; \
+ return (UClassID)&classID; \
+ }
+
+#endif /* U_HIDE_INTERNAL_API */
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/urename.h b/intl/icu/source/common/unicode/urename.h
new file mode 100644
index 0000000000..b35df45380
--- /dev/null
+++ b/intl/icu/source/common/unicode/urename.h
@@ -0,0 +1,1975 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2002-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: urename.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Perl script tools/genren.pl written by Vladimir Weinstein
+*
+* Contains data for renaming ICU exports.
+* Gets included by umachine.h
+*
+* THIS FILE IS MACHINE-GENERATED, DON'T PLAY WITH IT IF YOU DON'T KNOW WHAT
+* YOU ARE DOING, OTHERWISE VERY BAD THINGS WILL HAPPEN!
+*/
+
+#ifndef URENAME_H
+#define URENAME_H
+
+/* U_DISABLE_RENAMING can be defined in the following ways:
+ * - when running configure, e.g.
+ * runConfigureICU Linux --disable-renaming
+ * - by changing the default setting of U_DISABLE_RENAMING in uconfig.h
+ */
+
+#include "unicode/uconfig.h"
+
+#if !U_DISABLE_RENAMING
+
+// Disable Renaming for Visual Studio's IntelliSense feature, so that 'Go-to-Definition' (F12) will work.
+#if !(defined(_MSC_VER) && defined(__INTELLISENSE__))
+
+/* We need the U_ICU_ENTRY_POINT_RENAME definition. There's a default one in unicode/uvernum.h we can use, but we will give
+ the platform a chance to define it first.
+ Normally (if utypes.h or umachine.h was included first) this will not be necessary as it will already be defined.
+ */
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/umachine.h"
+#endif
+
+/* If we still don't have U_ICU_ENTRY_POINT_RENAME use the default. */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#include "unicode/uvernum.h"
+#endif
+
+/* Error out before the following defines cause very strange and unexpected code breakage */
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#error U_ICU_ENTRY_POINT_RENAME is not defined - cannot continue. Consider defining U_DISABLE_RENAMING if renaming should not be used.
+#endif
+
+
+/* C exports renaming data */
+
+#define CreateLSTMBreakEngine U_ICU_ENTRY_POINT_RENAME(CreateLSTMBreakEngine)
+#define CreateLSTMData U_ICU_ENTRY_POINT_RENAME(CreateLSTMData)
+#define CreateLSTMDataForScript U_ICU_ENTRY_POINT_RENAME(CreateLSTMDataForScript)
+#define DeleteLSTMData U_ICU_ENTRY_POINT_RENAME(DeleteLSTMData)
+#define LSTMDataName U_ICU_ENTRY_POINT_RENAME(LSTMDataName)
+#define T_CString_int64ToString U_ICU_ENTRY_POINT_RENAME(T_CString_int64ToString)
+#define T_CString_integerToString U_ICU_ENTRY_POINT_RENAME(T_CString_integerToString)
+#define T_CString_stringToInteger U_ICU_ENTRY_POINT_RENAME(T_CString_stringToInteger)
+#define T_CString_toLowerCase U_ICU_ENTRY_POINT_RENAME(T_CString_toLowerCase)
+#define T_CString_toUpperCase U_ICU_ENTRY_POINT_RENAME(T_CString_toUpperCase)
+#define UCNV_FROM_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_ESCAPE)
+#define UCNV_FROM_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SKIP)
+#define UCNV_FROM_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_STOP)
+#define UCNV_FROM_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_FROM_U_CALLBACK_SUBSTITUTE)
+#define UCNV_TO_U_CALLBACK_ESCAPE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_ESCAPE)
+#define UCNV_TO_U_CALLBACK_SKIP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SKIP)
+#define UCNV_TO_U_CALLBACK_STOP U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_STOP)
+#define UCNV_TO_U_CALLBACK_SUBSTITUTE U_ICU_ENTRY_POINT_RENAME(UCNV_TO_U_CALLBACK_SUBSTITUTE)
+#define UDataMemory_createNewInstance U_ICU_ENTRY_POINT_RENAME(UDataMemory_createNewInstance)
+#define UDataMemory_init U_ICU_ENTRY_POINT_RENAME(UDataMemory_init)
+#define UDataMemory_isLoaded U_ICU_ENTRY_POINT_RENAME(UDataMemory_isLoaded)
+#define UDataMemory_normalizeDataPointer U_ICU_ENTRY_POINT_RENAME(UDataMemory_normalizeDataPointer)
+#define UDataMemory_setData U_ICU_ENTRY_POINT_RENAME(UDataMemory_setData)
+#define UDatamemory_assign U_ICU_ENTRY_POINT_RENAME(UDatamemory_assign)
+#define _ASCIIData U_ICU_ENTRY_POINT_RENAME(_ASCIIData)
+#define _Bocu1Data U_ICU_ENTRY_POINT_RENAME(_Bocu1Data)
+#define _CESU8Data U_ICU_ENTRY_POINT_RENAME(_CESU8Data)
+#define _CompoundTextData U_ICU_ENTRY_POINT_RENAME(_CompoundTextData)
+#define _HZData U_ICU_ENTRY_POINT_RENAME(_HZData)
+#define _IMAPData U_ICU_ENTRY_POINT_RENAME(_IMAPData)
+#define _ISCIIData U_ICU_ENTRY_POINT_RENAME(_ISCIIData)
+#define _ISO2022Data U_ICU_ENTRY_POINT_RENAME(_ISO2022Data)
+#define _LMBCSData1 U_ICU_ENTRY_POINT_RENAME(_LMBCSData1)
+#define _LMBCSData11 U_ICU_ENTRY_POINT_RENAME(_LMBCSData11)
+#define _LMBCSData16 U_ICU_ENTRY_POINT_RENAME(_LMBCSData16)
+#define _LMBCSData17 U_ICU_ENTRY_POINT_RENAME(_LMBCSData17)
+#define _LMBCSData18 U_ICU_ENTRY_POINT_RENAME(_LMBCSData18)
+#define _LMBCSData19 U_ICU_ENTRY_POINT_RENAME(_LMBCSData19)
+#define _LMBCSData2 U_ICU_ENTRY_POINT_RENAME(_LMBCSData2)
+#define _LMBCSData3 U_ICU_ENTRY_POINT_RENAME(_LMBCSData3)
+#define _LMBCSData4 U_ICU_ENTRY_POINT_RENAME(_LMBCSData4)
+#define _LMBCSData5 U_ICU_ENTRY_POINT_RENAME(_LMBCSData5)
+#define _LMBCSData6 U_ICU_ENTRY_POINT_RENAME(_LMBCSData6)
+#define _LMBCSData8 U_ICU_ENTRY_POINT_RENAME(_LMBCSData8)
+#define _Latin1Data U_ICU_ENTRY_POINT_RENAME(_Latin1Data)
+#define _MBCSData U_ICU_ENTRY_POINT_RENAME(_MBCSData)
+#define _SCSUData U_ICU_ENTRY_POINT_RENAME(_SCSUData)
+#define _UTF16BEData U_ICU_ENTRY_POINT_RENAME(_UTF16BEData)
+#define _UTF16Data U_ICU_ENTRY_POINT_RENAME(_UTF16Data)
+#define _UTF16LEData U_ICU_ENTRY_POINT_RENAME(_UTF16LEData)
+#define _UTF16v2Data U_ICU_ENTRY_POINT_RENAME(_UTF16v2Data)
+#define _UTF32BEData U_ICU_ENTRY_POINT_RENAME(_UTF32BEData)
+#define _UTF32Data U_ICU_ENTRY_POINT_RENAME(_UTF32Data)
+#define _UTF32LEData U_ICU_ENTRY_POINT_RENAME(_UTF32LEData)
+#define _UTF7Data U_ICU_ENTRY_POINT_RENAME(_UTF7Data)
+#define _UTF8Data U_ICU_ENTRY_POINT_RENAME(_UTF8Data)
+#define _isUnicodeLocaleTypeSubtag U_ICU_ENTRY_POINT_RENAME(_isUnicodeLocaleTypeSubtag)
+#define allowedHourFormatsCleanup U_ICU_ENTRY_POINT_RENAME(allowedHourFormatsCleanup)
+#define cmemory_cleanup U_ICU_ENTRY_POINT_RENAME(cmemory_cleanup)
+#define dayPeriodRulesCleanup U_ICU_ENTRY_POINT_RENAME(dayPeriodRulesCleanup)
+#define deleteAllowedHourFormats U_ICU_ENTRY_POINT_RENAME(deleteAllowedHourFormats)
+#define gTimeZoneFilesInitOnce U_ICU_ENTRY_POINT_RENAME(gTimeZoneFilesInitOnce)
+#define initNumsysNames U_ICU_ENTRY_POINT_RENAME(initNumsysNames)
+#define izrule_clone U_ICU_ENTRY_POINT_RENAME(izrule_clone)
+#define izrule_close U_ICU_ENTRY_POINT_RENAME(izrule_close)
+#define izrule_equals U_ICU_ENTRY_POINT_RENAME(izrule_equals)
+#define izrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(izrule_getDSTSavings)
+#define izrule_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(izrule_getDynamicClassID)
+#define izrule_getFinalStart U_ICU_ENTRY_POINT_RENAME(izrule_getFinalStart)
+#define izrule_getFirstStart U_ICU_ENTRY_POINT_RENAME(izrule_getFirstStart)
+#define izrule_getName U_ICU_ENTRY_POINT_RENAME(izrule_getName)
+#define izrule_getNextStart U_ICU_ENTRY_POINT_RENAME(izrule_getNextStart)
+#define izrule_getPreviousStart U_ICU_ENTRY_POINT_RENAME(izrule_getPreviousStart)
+#define izrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(izrule_getRawOffset)
+#define izrule_getStaticClassID U_ICU_ENTRY_POINT_RENAME(izrule_getStaticClassID)
+#define izrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(izrule_isEquivalentTo)
+#define izrule_open U_ICU_ENTRY_POINT_RENAME(izrule_open)
+#define locale_getKeywordsStart U_ICU_ENTRY_POINT_RENAME(locale_getKeywordsStart)
+#define locale_get_default U_ICU_ENTRY_POINT_RENAME(locale_get_default)
+#define locale_set_default U_ICU_ENTRY_POINT_RENAME(locale_set_default)
+#define numSysCleanup U_ICU_ENTRY_POINT_RENAME(numSysCleanup)
+#define rbbi_cleanup U_ICU_ENTRY_POINT_RENAME(rbbi_cleanup)
+#define pl_addFontRun U_ICU_ENTRY_POINT_RENAME(pl_addFontRun)
+#define pl_addLocaleRun U_ICU_ENTRY_POINT_RENAME(pl_addLocaleRun)
+#define pl_addValueRun U_ICU_ENTRY_POINT_RENAME(pl_addValueRun)
+#define pl_close U_ICU_ENTRY_POINT_RENAME(pl_close)
+#define pl_closeFontRuns U_ICU_ENTRY_POINT_RENAME(pl_closeFontRuns)
+#define pl_closeLine U_ICU_ENTRY_POINT_RENAME(pl_closeLine)
+#define pl_closeLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_closeLocaleRuns)
+#define pl_closeValueRuns U_ICU_ENTRY_POINT_RENAME(pl_closeValueRuns)
+#define pl_countLineRuns U_ICU_ENTRY_POINT_RENAME(pl_countLineRuns)
+#define pl_create U_ICU_ENTRY_POINT_RENAME(pl_create)
+#define pl_getAscent U_ICU_ENTRY_POINT_RENAME(pl_getAscent)
+#define pl_getDescent U_ICU_ENTRY_POINT_RENAME(pl_getDescent)
+#define pl_getFontRunCount U_ICU_ENTRY_POINT_RENAME(pl_getFontRunCount)
+#define pl_getFontRunFont U_ICU_ENTRY_POINT_RENAME(pl_getFontRunFont)
+#define pl_getFontRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLastLimit)
+#define pl_getFontRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getFontRunLimit)
+#define pl_getLeading U_ICU_ENTRY_POINT_RENAME(pl_getLeading)
+#define pl_getLineAscent U_ICU_ENTRY_POINT_RENAME(pl_getLineAscent)
+#define pl_getLineDescent U_ICU_ENTRY_POINT_RENAME(pl_getLineDescent)
+#define pl_getLineLeading U_ICU_ENTRY_POINT_RENAME(pl_getLineLeading)
+#define pl_getLineVisualRun U_ICU_ENTRY_POINT_RENAME(pl_getLineVisualRun)
+#define pl_getLineWidth U_ICU_ENTRY_POINT_RENAME(pl_getLineWidth)
+#define pl_getLocaleRunCount U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunCount)
+#define pl_getLocaleRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLastLimit)
+#define pl_getLocaleRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLimit)
+#define pl_getLocaleRunLocale U_ICU_ENTRY_POINT_RENAME(pl_getLocaleRunLocale)
+#define pl_getParagraphLevel U_ICU_ENTRY_POINT_RENAME(pl_getParagraphLevel)
+#define pl_getTextDirection U_ICU_ENTRY_POINT_RENAME(pl_getTextDirection)
+#define pl_getValueRunCount U_ICU_ENTRY_POINT_RENAME(pl_getValueRunCount)
+#define pl_getValueRunLastLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLastLimit)
+#define pl_getValueRunLimit U_ICU_ENTRY_POINT_RENAME(pl_getValueRunLimit)
+#define pl_getValueRunValue U_ICU_ENTRY_POINT_RENAME(pl_getValueRunValue)
+#define pl_getVisualRunAscent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunAscent)
+#define pl_getVisualRunDescent U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDescent)
+#define pl_getVisualRunDirection U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunDirection)
+#define pl_getVisualRunFont U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunFont)
+#define pl_getVisualRunGlyphCount U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphCount)
+#define pl_getVisualRunGlyphToCharMap U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphToCharMap)
+#define pl_getVisualRunGlyphs U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunGlyphs)
+#define pl_getVisualRunLeading U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunLeading)
+#define pl_getVisualRunPositions U_ICU_ENTRY_POINT_RENAME(pl_getVisualRunPositions)
+#define pl_isComplex U_ICU_ENTRY_POINT_RENAME(pl_isComplex)
+#define pl_nextLine U_ICU_ENTRY_POINT_RENAME(pl_nextLine)
+#define pl_openEmptyFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyFontRuns)
+#define pl_openEmptyLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyLocaleRuns)
+#define pl_openEmptyValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openEmptyValueRuns)
+#define pl_openFontRuns U_ICU_ENTRY_POINT_RENAME(pl_openFontRuns)
+#define pl_openLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_openLocaleRuns)
+#define pl_openValueRuns U_ICU_ENTRY_POINT_RENAME(pl_openValueRuns)
+#define pl_reflow U_ICU_ENTRY_POINT_RENAME(pl_reflow)
+#define pl_resetFontRuns U_ICU_ENTRY_POINT_RENAME(pl_resetFontRuns)
+#define pl_resetLocaleRuns U_ICU_ENTRY_POINT_RENAME(pl_resetLocaleRuns)
+#define pl_resetValueRuns U_ICU_ENTRY_POINT_RENAME(pl_resetValueRuns)
+#define res_countArrayItems U_ICU_ENTRY_POINT_RENAME(res_countArrayItems)
+#define res_findResource U_ICU_ENTRY_POINT_RENAME(res_findResource)
+#define res_getAlias U_ICU_ENTRY_POINT_RENAME(res_getAlias)
+#define res_getArrayItem U_ICU_ENTRY_POINT_RENAME(res_getArrayItem)
+#define res_getBinaryNoTrace U_ICU_ENTRY_POINT_RENAME(res_getBinaryNoTrace)
+#define res_getIntVectorNoTrace U_ICU_ENTRY_POINT_RENAME(res_getIntVectorNoTrace)
+#define res_getPublicType U_ICU_ENTRY_POINT_RENAME(res_getPublicType)
+#define res_getResource U_ICU_ENTRY_POINT_RENAME(res_getResource)
+#define res_getStringNoTrace U_ICU_ENTRY_POINT_RENAME(res_getStringNoTrace)
+#define res_getTableItemByIndex U_ICU_ENTRY_POINT_RENAME(res_getTableItemByIndex)
+#define res_getTableItemByKey U_ICU_ENTRY_POINT_RENAME(res_getTableItemByKey)
+#define res_load U_ICU_ENTRY_POINT_RENAME(res_load)
+#define res_read U_ICU_ENTRY_POINT_RENAME(res_read)
+#define res_unload U_ICU_ENTRY_POINT_RENAME(res_unload)
+#define u_UCharsToChars U_ICU_ENTRY_POINT_RENAME(u_UCharsToChars)
+#define u_asciiToUpper U_ICU_ENTRY_POINT_RENAME(u_asciiToUpper)
+#define u_austrcpy U_ICU_ENTRY_POINT_RENAME(u_austrcpy)
+#define u_austrncpy U_ICU_ENTRY_POINT_RENAME(u_austrncpy)
+#define u_caseInsensitivePrefixMatch U_ICU_ENTRY_POINT_RENAME(u_caseInsensitivePrefixMatch)
+#define u_catclose U_ICU_ENTRY_POINT_RENAME(u_catclose)
+#define u_catgets U_ICU_ENTRY_POINT_RENAME(u_catgets)
+#define u_catopen U_ICU_ENTRY_POINT_RENAME(u_catopen)
+#define u_charAge U_ICU_ENTRY_POINT_RENAME(u_charAge)
+#define u_charDigitValue U_ICU_ENTRY_POINT_RENAME(u_charDigitValue)
+#define u_charDirection U_ICU_ENTRY_POINT_RENAME(u_charDirection)
+#define u_charFromName U_ICU_ENTRY_POINT_RENAME(u_charFromName)
+#define u_charMirror U_ICU_ENTRY_POINT_RENAME(u_charMirror)
+#define u_charName U_ICU_ENTRY_POINT_RENAME(u_charName)
+#define u_charType U_ICU_ENTRY_POINT_RENAME(u_charType)
+#define u_charsToUChars U_ICU_ENTRY_POINT_RENAME(u_charsToUChars)
+#define u_cleanup U_ICU_ENTRY_POINT_RENAME(u_cleanup)
+#define u_countChar32 U_ICU_ENTRY_POINT_RENAME(u_countChar32)
+#define u_digit U_ICU_ENTRY_POINT_RENAME(u_digit)
+#define u_enumCharNames U_ICU_ENTRY_POINT_RENAME(u_enumCharNames)
+#define u_enumCharTypes U_ICU_ENTRY_POINT_RENAME(u_enumCharTypes)
+#define u_errorName U_ICU_ENTRY_POINT_RENAME(u_errorName)
+#define u_fadopt U_ICU_ENTRY_POINT_RENAME(u_fadopt)
+#define u_fclose U_ICU_ENTRY_POINT_RENAME(u_fclose)
+#define u_feof U_ICU_ENTRY_POINT_RENAME(u_feof)
+#define u_fflush U_ICU_ENTRY_POINT_RENAME(u_fflush)
+#define u_fgetConverter U_ICU_ENTRY_POINT_RENAME(u_fgetConverter)
+#define u_fgetNumberFormat U_ICU_ENTRY_POINT_RENAME(u_fgetNumberFormat)
+#define u_fgetc U_ICU_ENTRY_POINT_RENAME(u_fgetc)
+#define u_fgetcodepage U_ICU_ENTRY_POINT_RENAME(u_fgetcodepage)
+#define u_fgetcx U_ICU_ENTRY_POINT_RENAME(u_fgetcx)
+#define u_fgetfile U_ICU_ENTRY_POINT_RENAME(u_fgetfile)
+#define u_fgetlocale U_ICU_ENTRY_POINT_RENAME(u_fgetlocale)
+#define u_fgets U_ICU_ENTRY_POINT_RENAME(u_fgets)
+#define u_file_read U_ICU_ENTRY_POINT_RENAME(u_file_read)
+#define u_file_write U_ICU_ENTRY_POINT_RENAME(u_file_write)
+#define u_file_write_flush U_ICU_ENTRY_POINT_RENAME(u_file_write_flush)
+#define u_finit U_ICU_ENTRY_POINT_RENAME(u_finit)
+#define u_flushDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_flushDefaultConverter)
+#define u_foldCase U_ICU_ENTRY_POINT_RENAME(u_foldCase)
+#define u_fopen U_ICU_ENTRY_POINT_RENAME(u_fopen)
+#define u_fopen_u U_ICU_ENTRY_POINT_RENAME(u_fopen_u)
+#define u_forDigit U_ICU_ENTRY_POINT_RENAME(u_forDigit)
+#define u_formatMessage U_ICU_ENTRY_POINT_RENAME(u_formatMessage)
+#define u_formatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_formatMessageWithError)
+#define u_fprintf U_ICU_ENTRY_POINT_RENAME(u_fprintf)
+#define u_fprintf_u U_ICU_ENTRY_POINT_RENAME(u_fprintf_u)
+#define u_fputc U_ICU_ENTRY_POINT_RENAME(u_fputc)
+#define u_fputs U_ICU_ENTRY_POINT_RENAME(u_fputs)
+#define u_frewind U_ICU_ENTRY_POINT_RENAME(u_frewind)
+#define u_fscanf U_ICU_ENTRY_POINT_RENAME(u_fscanf)
+#define u_fscanf_u U_ICU_ENTRY_POINT_RENAME(u_fscanf_u)
+#define u_fsetcodepage U_ICU_ENTRY_POINT_RENAME(u_fsetcodepage)
+#define u_fsetlocale U_ICU_ENTRY_POINT_RENAME(u_fsetlocale)
+#define u_fsettransliterator U_ICU_ENTRY_POINT_RENAME(u_fsettransliterator)
+#define u_fstropen U_ICU_ENTRY_POINT_RENAME(u_fstropen)
+#define u_fungetc U_ICU_ENTRY_POINT_RENAME(u_fungetc)
+#define u_getBidiPairedBracket U_ICU_ENTRY_POINT_RENAME(u_getBidiPairedBracket)
+#define u_getBinaryPropertySet U_ICU_ENTRY_POINT_RENAME(u_getBinaryPropertySet)
+#define u_getCombiningClass U_ICU_ENTRY_POINT_RENAME(u_getCombiningClass)
+#define u_getDataDirectory U_ICU_ENTRY_POINT_RENAME(u_getDataDirectory)
+#define u_getDataVersion U_ICU_ENTRY_POINT_RENAME(u_getDataVersion)
+#define u_getDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_getDefaultConverter)
+#define u_getFC_NFKC_Closure U_ICU_ENTRY_POINT_RENAME(u_getFC_NFKC_Closure)
+#define u_getISOComment U_ICU_ENTRY_POINT_RENAME(u_getISOComment)
+#define u_getIntPropertyMap U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMap)
+#define u_getIntPropertyMaxValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMaxValue)
+#define u_getIntPropertyMinValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyMinValue)
+#define u_getIntPropertyValue U_ICU_ENTRY_POINT_RENAME(u_getIntPropertyValue)
+#define u_getMainProperties U_ICU_ENTRY_POINT_RENAME(u_getMainProperties)
+#define u_getNumericValue U_ICU_ENTRY_POINT_RENAME(u_getNumericValue)
+#define u_getPropertyEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyEnum)
+#define u_getPropertyName U_ICU_ENTRY_POINT_RENAME(u_getPropertyName)
+#define u_getPropertyValueEnum U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueEnum)
+#define u_getPropertyValueName U_ICU_ENTRY_POINT_RENAME(u_getPropertyValueName)
+#define u_getTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_getTimeZoneFilesDirectory)
+#define u_getUnicodeProperties U_ICU_ENTRY_POINT_RENAME(u_getUnicodeProperties)
+#define u_getUnicodeVersion U_ICU_ENTRY_POINT_RENAME(u_getUnicodeVersion)
+#define u_getVersion U_ICU_ENTRY_POINT_RENAME(u_getVersion)
+#define u_get_stdout U_ICU_ENTRY_POINT_RENAME(u_get_stdout)
+#define u_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_hasBinaryProperty)
+#define u_init U_ICU_ENTRY_POINT_RENAME(u_init)
+#define u_isIDIgnorable U_ICU_ENTRY_POINT_RENAME(u_isIDIgnorable)
+#define u_isIDPart U_ICU_ENTRY_POINT_RENAME(u_isIDPart)
+#define u_isIDStart U_ICU_ENTRY_POINT_RENAME(u_isIDStart)
+#define u_isISOControl U_ICU_ENTRY_POINT_RENAME(u_isISOControl)
+#define u_isJavaIDPart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDPart)
+#define u_isJavaIDStart U_ICU_ENTRY_POINT_RENAME(u_isJavaIDStart)
+#define u_isJavaSpaceChar U_ICU_ENTRY_POINT_RENAME(u_isJavaSpaceChar)
+#define u_isMirrored U_ICU_ENTRY_POINT_RENAME(u_isMirrored)
+#define u_isUAlphabetic U_ICU_ENTRY_POINT_RENAME(u_isUAlphabetic)
+#define u_isULowercase U_ICU_ENTRY_POINT_RENAME(u_isULowercase)
+#define u_isUUppercase U_ICU_ENTRY_POINT_RENAME(u_isUUppercase)
+#define u_isUWhiteSpace U_ICU_ENTRY_POINT_RENAME(u_isUWhiteSpace)
+#define u_isWhitespace U_ICU_ENTRY_POINT_RENAME(u_isWhitespace)
+#define u_isalnum U_ICU_ENTRY_POINT_RENAME(u_isalnum)
+#define u_isalnumPOSIX U_ICU_ENTRY_POINT_RENAME(u_isalnumPOSIX)
+#define u_isalpha U_ICU_ENTRY_POINT_RENAME(u_isalpha)
+#define u_isbase U_ICU_ENTRY_POINT_RENAME(u_isbase)
+#define u_isblank U_ICU_ENTRY_POINT_RENAME(u_isblank)
+#define u_iscntrl U_ICU_ENTRY_POINT_RENAME(u_iscntrl)
+#define u_isdefined U_ICU_ENTRY_POINT_RENAME(u_isdefined)
+#define u_isdigit U_ICU_ENTRY_POINT_RENAME(u_isdigit)
+#define u_isgraph U_ICU_ENTRY_POINT_RENAME(u_isgraph)
+#define u_isgraphPOSIX U_ICU_ENTRY_POINT_RENAME(u_isgraphPOSIX)
+#define u_islower U_ICU_ENTRY_POINT_RENAME(u_islower)
+#define u_isprint U_ICU_ENTRY_POINT_RENAME(u_isprint)
+#define u_isprintPOSIX U_ICU_ENTRY_POINT_RENAME(u_isprintPOSIX)
+#define u_ispunct U_ICU_ENTRY_POINT_RENAME(u_ispunct)
+#define u_isspace U_ICU_ENTRY_POINT_RENAME(u_isspace)
+#define u_istitle U_ICU_ENTRY_POINT_RENAME(u_istitle)
+#define u_isupper U_ICU_ENTRY_POINT_RENAME(u_isupper)
+#define u_isxdigit U_ICU_ENTRY_POINT_RENAME(u_isxdigit)
+#define u_locbund_close U_ICU_ENTRY_POINT_RENAME(u_locbund_close)
+#define u_locbund_getNumberFormat U_ICU_ENTRY_POINT_RENAME(u_locbund_getNumberFormat)
+#define u_locbund_init U_ICU_ENTRY_POINT_RENAME(u_locbund_init)
+#define u_memcasecmp U_ICU_ENTRY_POINT_RENAME(u_memcasecmp)
+#define u_memchr U_ICU_ENTRY_POINT_RENAME(u_memchr)
+#define u_memchr32 U_ICU_ENTRY_POINT_RENAME(u_memchr32)
+#define u_memcmp U_ICU_ENTRY_POINT_RENAME(u_memcmp)
+#define u_memcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_memcmpCodePointOrder)
+#define u_memcpy U_ICU_ENTRY_POINT_RENAME(u_memcpy)
+#define u_memmove U_ICU_ENTRY_POINT_RENAME(u_memmove)
+#define u_memrchr U_ICU_ENTRY_POINT_RENAME(u_memrchr)
+#define u_memrchr32 U_ICU_ENTRY_POINT_RENAME(u_memrchr32)
+#define u_memset U_ICU_ENTRY_POINT_RENAME(u_memset)
+#define u_parseMessage U_ICU_ENTRY_POINT_RENAME(u_parseMessage)
+#define u_parseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_parseMessageWithError)
+#define u_printf U_ICU_ENTRY_POINT_RENAME(u_printf)
+#define u_printf_parse U_ICU_ENTRY_POINT_RENAME(u_printf_parse)
+#define u_printf_u U_ICU_ENTRY_POINT_RENAME(u_printf_u)
+#define u_releaseDefaultConverter U_ICU_ENTRY_POINT_RENAME(u_releaseDefaultConverter)
+#define u_scanf_parse U_ICU_ENTRY_POINT_RENAME(u_scanf_parse)
+#define u_setAtomicIncDecFunctions U_ICU_ENTRY_POINT_RENAME(u_setAtomicIncDecFunctions)
+#define u_setDataDirectory U_ICU_ENTRY_POINT_RENAME(u_setDataDirectory)
+#define u_setMemoryFunctions U_ICU_ENTRY_POINT_RENAME(u_setMemoryFunctions)
+#define u_setMutexFunctions U_ICU_ENTRY_POINT_RENAME(u_setMutexFunctions)
+#define u_setTimeZoneFilesDirectory U_ICU_ENTRY_POINT_RENAME(u_setTimeZoneFilesDirectory)
+#define u_shapeArabic U_ICU_ENTRY_POINT_RENAME(u_shapeArabic)
+#define u_snprintf U_ICU_ENTRY_POINT_RENAME(u_snprintf)
+#define u_snprintf_u U_ICU_ENTRY_POINT_RENAME(u_snprintf_u)
+#define u_sprintf U_ICU_ENTRY_POINT_RENAME(u_sprintf)
+#define u_sprintf_u U_ICU_ENTRY_POINT_RENAME(u_sprintf_u)
+#define u_sscanf U_ICU_ENTRY_POINT_RENAME(u_sscanf)
+#define u_sscanf_u U_ICU_ENTRY_POINT_RENAME(u_sscanf_u)
+#define u_strCaseCompare U_ICU_ENTRY_POINT_RENAME(u_strCaseCompare)
+#define u_strCompare U_ICU_ENTRY_POINT_RENAME(u_strCompare)
+#define u_strCompareIter U_ICU_ENTRY_POINT_RENAME(u_strCompareIter)
+#define u_strFindFirst U_ICU_ENTRY_POINT_RENAME(u_strFindFirst)
+#define u_strFindLast U_ICU_ENTRY_POINT_RENAME(u_strFindLast)
+#define u_strFoldCase U_ICU_ENTRY_POINT_RENAME(u_strFoldCase)
+#define u_strFromJavaModifiedUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromJavaModifiedUTF8WithSub)
+#define u_strFromPunycode U_ICU_ENTRY_POINT_RENAME(u_strFromPunycode)
+#define u_strFromUTF32 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32)
+#define u_strFromUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF32WithSub)
+#define u_strFromUTF8 U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8)
+#define u_strFromUTF8Lenient U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8Lenient)
+#define u_strFromUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strFromUTF8WithSub)
+#define u_strFromWCS U_ICU_ENTRY_POINT_RENAME(u_strFromWCS)
+#define u_strHasMoreChar32Than U_ICU_ENTRY_POINT_RENAME(u_strHasMoreChar32Than)
+#define u_strToJavaModifiedUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToJavaModifiedUTF8)
+#define u_strToLower U_ICU_ENTRY_POINT_RENAME(u_strToLower)
+#define u_strToPunycode U_ICU_ENTRY_POINT_RENAME(u_strToPunycode)
+#define u_strToTitle U_ICU_ENTRY_POINT_RENAME(u_strToTitle)
+#define u_strToUTF32 U_ICU_ENTRY_POINT_RENAME(u_strToUTF32)
+#define u_strToUTF32WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF32WithSub)
+#define u_strToUTF8 U_ICU_ENTRY_POINT_RENAME(u_strToUTF8)
+#define u_strToUTF8WithSub U_ICU_ENTRY_POINT_RENAME(u_strToUTF8WithSub)
+#define u_strToUpper U_ICU_ENTRY_POINT_RENAME(u_strToUpper)
+#define u_strToWCS U_ICU_ENTRY_POINT_RENAME(u_strToWCS)
+#define u_strcasecmp U_ICU_ENTRY_POINT_RENAME(u_strcasecmp)
+#define u_strcat U_ICU_ENTRY_POINT_RENAME(u_strcat)
+#define u_strchr U_ICU_ENTRY_POINT_RENAME(u_strchr)
+#define u_strchr32 U_ICU_ENTRY_POINT_RENAME(u_strchr32)
+#define u_strcmp U_ICU_ENTRY_POINT_RENAME(u_strcmp)
+#define u_strcmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strcmpCodePointOrder)
+#define u_strcmpFold U_ICU_ENTRY_POINT_RENAME(u_strcmpFold)
+#define u_strcpy U_ICU_ENTRY_POINT_RENAME(u_strcpy)
+#define u_strcspn U_ICU_ENTRY_POINT_RENAME(u_strcspn)
+#define u_stringHasBinaryProperty U_ICU_ENTRY_POINT_RENAME(u_stringHasBinaryProperty)
+#define u_strlen U_ICU_ENTRY_POINT_RENAME(u_strlen)
+#define u_strncasecmp U_ICU_ENTRY_POINT_RENAME(u_strncasecmp)
+#define u_strncat U_ICU_ENTRY_POINT_RENAME(u_strncat)
+#define u_strncmp U_ICU_ENTRY_POINT_RENAME(u_strncmp)
+#define u_strncmpCodePointOrder U_ICU_ENTRY_POINT_RENAME(u_strncmpCodePointOrder)
+#define u_strncpy U_ICU_ENTRY_POINT_RENAME(u_strncpy)
+#define u_strpbrk U_ICU_ENTRY_POINT_RENAME(u_strpbrk)
+#define u_strrchr U_ICU_ENTRY_POINT_RENAME(u_strrchr)
+#define u_strrchr32 U_ICU_ENTRY_POINT_RENAME(u_strrchr32)
+#define u_strrstr U_ICU_ENTRY_POINT_RENAME(u_strrstr)
+#define u_strspn U_ICU_ENTRY_POINT_RENAME(u_strspn)
+#define u_strstr U_ICU_ENTRY_POINT_RENAME(u_strstr)
+#define u_strtok_r U_ICU_ENTRY_POINT_RENAME(u_strtok_r)
+#define u_terminateChars U_ICU_ENTRY_POINT_RENAME(u_terminateChars)
+#define u_terminateUChar32s U_ICU_ENTRY_POINT_RENAME(u_terminateUChar32s)
+#define u_terminateUChars U_ICU_ENTRY_POINT_RENAME(u_terminateUChars)
+#define u_terminateWChars U_ICU_ENTRY_POINT_RENAME(u_terminateWChars)
+#define u_tolower U_ICU_ENTRY_POINT_RENAME(u_tolower)
+#define u_totitle U_ICU_ENTRY_POINT_RENAME(u_totitle)
+#define u_toupper U_ICU_ENTRY_POINT_RENAME(u_toupper)
+#define u_uastrcpy U_ICU_ENTRY_POINT_RENAME(u_uastrcpy)
+#define u_uastrncpy U_ICU_ENTRY_POINT_RENAME(u_uastrncpy)
+#define u_unescape U_ICU_ENTRY_POINT_RENAME(u_unescape)
+#define u_unescapeAt U_ICU_ENTRY_POINT_RENAME(u_unescapeAt)
+#define u_versionFromString U_ICU_ENTRY_POINT_RENAME(u_versionFromString)
+#define u_versionFromUString U_ICU_ENTRY_POINT_RENAME(u_versionFromUString)
+#define u_versionToString U_ICU_ENTRY_POINT_RENAME(u_versionToString)
+#define u_vformatMessage U_ICU_ENTRY_POINT_RENAME(u_vformatMessage)
+#define u_vformatMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vformatMessageWithError)
+#define u_vfprintf U_ICU_ENTRY_POINT_RENAME(u_vfprintf)
+#define u_vfprintf_u U_ICU_ENTRY_POINT_RENAME(u_vfprintf_u)
+#define u_vfscanf U_ICU_ENTRY_POINT_RENAME(u_vfscanf)
+#define u_vfscanf_u U_ICU_ENTRY_POINT_RENAME(u_vfscanf_u)
+#define u_vparseMessage U_ICU_ENTRY_POINT_RENAME(u_vparseMessage)
+#define u_vparseMessageWithError U_ICU_ENTRY_POINT_RENAME(u_vparseMessageWithError)
+#define u_vsnprintf U_ICU_ENTRY_POINT_RENAME(u_vsnprintf)
+#define u_vsnprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsnprintf_u)
+#define u_vsprintf U_ICU_ENTRY_POINT_RENAME(u_vsprintf)
+#define u_vsprintf_u U_ICU_ENTRY_POINT_RENAME(u_vsprintf_u)
+#define u_vsscanf U_ICU_ENTRY_POINT_RENAME(u_vsscanf)
+#define u_vsscanf_u U_ICU_ENTRY_POINT_RENAME(u_vsscanf_u)
+#define u_writeIdenticalLevelRun U_ICU_ENTRY_POINT_RENAME(u_writeIdenticalLevelRun)
+#define ubidi_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ubidi_addPropertyStarts)
+#define ubidi_close U_ICU_ENTRY_POINT_RENAME(ubidi_close)
+#define ubidi_countParagraphs U_ICU_ENTRY_POINT_RENAME(ubidi_countParagraphs)
+#define ubidi_countRuns U_ICU_ENTRY_POINT_RENAME(ubidi_countRuns)
+#define ubidi_getBaseDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getBaseDirection)
+#define ubidi_getClass U_ICU_ENTRY_POINT_RENAME(ubidi_getClass)
+#define ubidi_getClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_getClassCallback)
+#define ubidi_getCustomizedClass U_ICU_ENTRY_POINT_RENAME(ubidi_getCustomizedClass)
+#define ubidi_getDirection U_ICU_ENTRY_POINT_RENAME(ubidi_getDirection)
+#define ubidi_getJoiningGroup U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningGroup)
+#define ubidi_getJoiningType U_ICU_ENTRY_POINT_RENAME(ubidi_getJoiningType)
+#define ubidi_getLength U_ICU_ENTRY_POINT_RENAME(ubidi_getLength)
+#define ubidi_getLevelAt U_ICU_ENTRY_POINT_RENAME(ubidi_getLevelAt)
+#define ubidi_getLevels U_ICU_ENTRY_POINT_RENAME(ubidi_getLevels)
+#define ubidi_getLogicalIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalIndex)
+#define ubidi_getLogicalMap U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalMap)
+#define ubidi_getLogicalRun U_ICU_ENTRY_POINT_RENAME(ubidi_getLogicalRun)
+#define ubidi_getMaxValue U_ICU_ENTRY_POINT_RENAME(ubidi_getMaxValue)
+#define ubidi_getMemory U_ICU_ENTRY_POINT_RENAME(ubidi_getMemory)
+#define ubidi_getMirror U_ICU_ENTRY_POINT_RENAME(ubidi_getMirror)
+#define ubidi_getPairedBracket U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracket)
+#define ubidi_getPairedBracketType U_ICU_ENTRY_POINT_RENAME(ubidi_getPairedBracketType)
+#define ubidi_getParaLevel U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevel)
+#define ubidi_getParaLevelAtIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParaLevelAtIndex)
+#define ubidi_getParagraph U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraph)
+#define ubidi_getParagraphByIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getParagraphByIndex)
+#define ubidi_getProcessedLength U_ICU_ENTRY_POINT_RENAME(ubidi_getProcessedLength)
+#define ubidi_getReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingMode)
+#define ubidi_getReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_getReorderingOptions)
+#define ubidi_getResultLength U_ICU_ENTRY_POINT_RENAME(ubidi_getResultLength)
+#define ubidi_getRuns U_ICU_ENTRY_POINT_RENAME(ubidi_getRuns)
+#define ubidi_getText U_ICU_ENTRY_POINT_RENAME(ubidi_getText)
+#define ubidi_getVisualIndex U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualIndex)
+#define ubidi_getVisualMap U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualMap)
+#define ubidi_getVisualRun U_ICU_ENTRY_POINT_RENAME(ubidi_getVisualRun)
+#define ubidi_invertMap U_ICU_ENTRY_POINT_RENAME(ubidi_invertMap)
+#define ubidi_isBidiControl U_ICU_ENTRY_POINT_RENAME(ubidi_isBidiControl)
+#define ubidi_isInverse U_ICU_ENTRY_POINT_RENAME(ubidi_isInverse)
+#define ubidi_isJoinControl U_ICU_ENTRY_POINT_RENAME(ubidi_isJoinControl)
+#define ubidi_isMirrored U_ICU_ENTRY_POINT_RENAME(ubidi_isMirrored)
+#define ubidi_isOrderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_isOrderParagraphsLTR)
+#define ubidi_open U_ICU_ENTRY_POINT_RENAME(ubidi_open)
+#define ubidi_openSized U_ICU_ENTRY_POINT_RENAME(ubidi_openSized)
+#define ubidi_orderParagraphsLTR U_ICU_ENTRY_POINT_RENAME(ubidi_orderParagraphsLTR)
+#define ubidi_reorderLogical U_ICU_ENTRY_POINT_RENAME(ubidi_reorderLogical)
+#define ubidi_reorderVisual U_ICU_ENTRY_POINT_RENAME(ubidi_reorderVisual)
+#define ubidi_setClassCallback U_ICU_ENTRY_POINT_RENAME(ubidi_setClassCallback)
+#define ubidi_setContext U_ICU_ENTRY_POINT_RENAME(ubidi_setContext)
+#define ubidi_setInverse U_ICU_ENTRY_POINT_RENAME(ubidi_setInverse)
+#define ubidi_setLine U_ICU_ENTRY_POINT_RENAME(ubidi_setLine)
+#define ubidi_setPara U_ICU_ENTRY_POINT_RENAME(ubidi_setPara)
+#define ubidi_setReorderingMode U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingMode)
+#define ubidi_setReorderingOptions U_ICU_ENTRY_POINT_RENAME(ubidi_setReorderingOptions)
+#define ubidi_writeReordered U_ICU_ENTRY_POINT_RENAME(ubidi_writeReordered)
+#define ubidi_writeReverse U_ICU_ENTRY_POINT_RENAME(ubidi_writeReverse)
+#define ubiditransform_close U_ICU_ENTRY_POINT_RENAME(ubiditransform_close)
+#define ubiditransform_open U_ICU_ENTRY_POINT_RENAME(ubiditransform_open)
+#define ubiditransform_transform U_ICU_ENTRY_POINT_RENAME(ubiditransform_transform)
+#define ublock_getCode U_ICU_ENTRY_POINT_RENAME(ublock_getCode)
+#define ubrk_clone U_ICU_ENTRY_POINT_RENAME(ubrk_clone)
+#define ubrk_close U_ICU_ENTRY_POINT_RENAME(ubrk_close)
+#define ubrk_countAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_countAvailable)
+#define ubrk_current U_ICU_ENTRY_POINT_RENAME(ubrk_current)
+#define ubrk_first U_ICU_ENTRY_POINT_RENAME(ubrk_first)
+#define ubrk_following U_ICU_ENTRY_POINT_RENAME(ubrk_following)
+#define ubrk_getAvailable U_ICU_ENTRY_POINT_RENAME(ubrk_getAvailable)
+#define ubrk_getBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_getBinaryRules)
+#define ubrk_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ubrk_getLocaleByType)
+#define ubrk_getRuleStatus U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatus)
+#define ubrk_getRuleStatusVec U_ICU_ENTRY_POINT_RENAME(ubrk_getRuleStatusVec)
+#define ubrk_isBoundary U_ICU_ENTRY_POINT_RENAME(ubrk_isBoundary)
+#define ubrk_last U_ICU_ENTRY_POINT_RENAME(ubrk_last)
+#define ubrk_next U_ICU_ENTRY_POINT_RENAME(ubrk_next)
+#define ubrk_open U_ICU_ENTRY_POINT_RENAME(ubrk_open)
+#define ubrk_openBinaryRules U_ICU_ENTRY_POINT_RENAME(ubrk_openBinaryRules)
+#define ubrk_openRules U_ICU_ENTRY_POINT_RENAME(ubrk_openRules)
+#define ubrk_preceding U_ICU_ENTRY_POINT_RENAME(ubrk_preceding)
+#define ubrk_previous U_ICU_ENTRY_POINT_RENAME(ubrk_previous)
+#define ubrk_refreshUText U_ICU_ENTRY_POINT_RENAME(ubrk_refreshUText)
+#define ubrk_safeClone U_ICU_ENTRY_POINT_RENAME(ubrk_safeClone)
+#define ubrk_setText U_ICU_ENTRY_POINT_RENAME(ubrk_setText)
+#define ubrk_setUText U_ICU_ENTRY_POINT_RENAME(ubrk_setUText)
+#define ubrk_swap U_ICU_ENTRY_POINT_RENAME(ubrk_swap)
+#define ucache_compareKeys U_ICU_ENTRY_POINT_RENAME(ucache_compareKeys)
+#define ucache_deleteKey U_ICU_ENTRY_POINT_RENAME(ucache_deleteKey)
+#define ucache_hashKeys U_ICU_ENTRY_POINT_RENAME(ucache_hashKeys)
+#define ucal_add U_ICU_ENTRY_POINT_RENAME(ucal_add)
+#define ucal_clear U_ICU_ENTRY_POINT_RENAME(ucal_clear)
+#define ucal_clearField U_ICU_ENTRY_POINT_RENAME(ucal_clearField)
+#define ucal_clone U_ICU_ENTRY_POINT_RENAME(ucal_clone)
+#define ucal_close U_ICU_ENTRY_POINT_RENAME(ucal_close)
+#define ucal_countAvailable U_ICU_ENTRY_POINT_RENAME(ucal_countAvailable)
+#define ucal_equivalentTo U_ICU_ENTRY_POINT_RENAME(ucal_equivalentTo)
+#define ucal_get U_ICU_ENTRY_POINT_RENAME(ucal_get)
+#define ucal_getAttribute U_ICU_ENTRY_POINT_RENAME(ucal_getAttribute)
+#define ucal_getAvailable U_ICU_ENTRY_POINT_RENAME(ucal_getAvailable)
+#define ucal_getCanonicalTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getCanonicalTimeZoneID)
+#define ucal_getDSTSavings U_ICU_ENTRY_POINT_RENAME(ucal_getDSTSavings)
+#define ucal_getDayOfWeekType U_ICU_ENTRY_POINT_RENAME(ucal_getDayOfWeekType)
+#define ucal_getDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getDefaultTimeZone)
+#define ucal_getFieldDifference U_ICU_ENTRY_POINT_RENAME(ucal_getFieldDifference)
+#define ucal_getGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_getGregorianChange)
+#define ucal_getHostTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_getHostTimeZone)
+#define ucal_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucal_getKeywordValuesForLocale)
+#define ucal_getLimit U_ICU_ENTRY_POINT_RENAME(ucal_getLimit)
+#define ucal_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucal_getLocaleByType)
+#define ucal_getMillis U_ICU_ENTRY_POINT_RENAME(ucal_getMillis)
+#define ucal_getNow U_ICU_ENTRY_POINT_RENAME(ucal_getNow)
+#define ucal_getTZDataVersion U_ICU_ENTRY_POINT_RENAME(ucal_getTZDataVersion)
+#define ucal_getTimeZoneDisplayName U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneDisplayName)
+#define ucal_getTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneID)
+#define ucal_getTimeZoneIDForWindowsID U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneIDForWindowsID)
+#define ucal_getTimeZoneOffsetFromLocal U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneOffsetFromLocal)
+#define ucal_getTimeZoneTransitionDate U_ICU_ENTRY_POINT_RENAME(ucal_getTimeZoneTransitionDate)
+#define ucal_getType U_ICU_ENTRY_POINT_RENAME(ucal_getType)
+#define ucal_getWeekendTransition U_ICU_ENTRY_POINT_RENAME(ucal_getWeekendTransition)
+#define ucal_getWindowsTimeZoneID U_ICU_ENTRY_POINT_RENAME(ucal_getWindowsTimeZoneID)
+#define ucal_inDaylightTime U_ICU_ENTRY_POINT_RENAME(ucal_inDaylightTime)
+#define ucal_isSet U_ICU_ENTRY_POINT_RENAME(ucal_isSet)
+#define ucal_isWeekend U_ICU_ENTRY_POINT_RENAME(ucal_isWeekend)
+#define ucal_open U_ICU_ENTRY_POINT_RENAME(ucal_open)
+#define ucal_openCountryTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openCountryTimeZones)
+#define ucal_openTimeZoneIDEnumeration U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZoneIDEnumeration)
+#define ucal_openTimeZones U_ICU_ENTRY_POINT_RENAME(ucal_openTimeZones)
+#define ucal_roll U_ICU_ENTRY_POINT_RENAME(ucal_roll)
+#define ucal_set U_ICU_ENTRY_POINT_RENAME(ucal_set)
+#define ucal_setAttribute U_ICU_ENTRY_POINT_RENAME(ucal_setAttribute)
+#define ucal_setDate U_ICU_ENTRY_POINT_RENAME(ucal_setDate)
+#define ucal_setDateTime U_ICU_ENTRY_POINT_RENAME(ucal_setDateTime)
+#define ucal_setDefaultTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setDefaultTimeZone)
+#define ucal_setGregorianChange U_ICU_ENTRY_POINT_RENAME(ucal_setGregorianChange)
+#define ucal_setMillis U_ICU_ENTRY_POINT_RENAME(ucal_setMillis)
+#define ucal_setTimeZone U_ICU_ENTRY_POINT_RENAME(ucal_setTimeZone)
+#define ucase_addCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addCaseClosure)
+#define ucase_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(ucase_addPropertyStarts)
+#define ucase_addSimpleCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addSimpleCaseClosure)
+#define ucase_addStringCaseClosure U_ICU_ENTRY_POINT_RENAME(ucase_addStringCaseClosure)
+#define ucase_fold U_ICU_ENTRY_POINT_RENAME(ucase_fold)
+#define ucase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ucase_getCaseLocale)
+#define ucase_getSingleton U_ICU_ENTRY_POINT_RENAME(ucase_getSingleton)
+#define ucase_getTrie U_ICU_ENTRY_POINT_RENAME(ucase_getTrie)
+#define ucase_getType U_ICU_ENTRY_POINT_RENAME(ucase_getType)
+#define ucase_getTypeOrIgnorable U_ICU_ENTRY_POINT_RENAME(ucase_getTypeOrIgnorable)
+#define ucase_hasBinaryProperty U_ICU_ENTRY_POINT_RENAME(ucase_hasBinaryProperty)
+#define ucase_isCaseSensitive U_ICU_ENTRY_POINT_RENAME(ucase_isCaseSensitive)
+#define ucase_isSoftDotted U_ICU_ENTRY_POINT_RENAME(ucase_isSoftDotted)
+#define ucase_toFullFolding U_ICU_ENTRY_POINT_RENAME(ucase_toFullFolding)
+#define ucase_toFullLower U_ICU_ENTRY_POINT_RENAME(ucase_toFullLower)
+#define ucase_toFullTitle U_ICU_ENTRY_POINT_RENAME(ucase_toFullTitle)
+#define ucase_toFullUpper U_ICU_ENTRY_POINT_RENAME(ucase_toFullUpper)
+#define ucase_tolower U_ICU_ENTRY_POINT_RENAME(ucase_tolower)
+#define ucase_totitle U_ICU_ENTRY_POINT_RENAME(ucase_totitle)
+#define ucase_toupper U_ICU_ENTRY_POINT_RENAME(ucase_toupper)
+#define ucasemap_close U_ICU_ENTRY_POINT_RENAME(ucasemap_close)
+#define ucasemap_getBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_getBreakIterator)
+#define ucasemap_getLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_getLocale)
+#define ucasemap_getOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_getOptions)
+#define ucasemap_internalUTF8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_internalUTF8ToTitle)
+#define ucasemap_open U_ICU_ENTRY_POINT_RENAME(ucasemap_open)
+#define ucasemap_setBreakIterator U_ICU_ENTRY_POINT_RENAME(ucasemap_setBreakIterator)
+#define ucasemap_setLocale U_ICU_ENTRY_POINT_RENAME(ucasemap_setLocale)
+#define ucasemap_setOptions U_ICU_ENTRY_POINT_RENAME(ucasemap_setOptions)
+#define ucasemap_toTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_toTitle)
+#define ucasemap_utf8FoldCase U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8FoldCase)
+#define ucasemap_utf8ToLower U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToLower)
+#define ucasemap_utf8ToTitle U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToTitle)
+#define ucasemap_utf8ToUpper U_ICU_ENTRY_POINT_RENAME(ucasemap_utf8ToUpper)
+#define ucfpos_close U_ICU_ENTRY_POINT_RENAME(ucfpos_close)
+#define ucfpos_constrainCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainCategory)
+#define ucfpos_constrainField U_ICU_ENTRY_POINT_RENAME(ucfpos_constrainField)
+#define ucfpos_getCategory U_ICU_ENTRY_POINT_RENAME(ucfpos_getCategory)
+#define ucfpos_getField U_ICU_ENTRY_POINT_RENAME(ucfpos_getField)
+#define ucfpos_getIndexes U_ICU_ENTRY_POINT_RENAME(ucfpos_getIndexes)
+#define ucfpos_getInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_getInt64IterationContext)
+#define ucfpos_matchesField U_ICU_ENTRY_POINT_RENAME(ucfpos_matchesField)
+#define ucfpos_open U_ICU_ENTRY_POINT_RENAME(ucfpos_open)
+#define ucfpos_reset U_ICU_ENTRY_POINT_RENAME(ucfpos_reset)
+#define ucfpos_setInt64IterationContext U_ICU_ENTRY_POINT_RENAME(ucfpos_setInt64IterationContext)
+#define ucfpos_setState U_ICU_ENTRY_POINT_RENAME(ucfpos_setState)
+#define uchar_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uchar_addPropertyStarts)
+#define uchar_swapNames U_ICU_ENTRY_POINT_RENAME(uchar_swapNames)
+#define ucln_cleanupOne U_ICU_ENTRY_POINT_RENAME(ucln_cleanupOne)
+#define ucln_common_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_common_registerCleanup)
+#define ucln_i18n_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_i18n_registerCleanup)
+#define ucln_io_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_io_registerCleanup)
+#define ucln_lib_cleanup U_ICU_ENTRY_POINT_RENAME(ucln_lib_cleanup)
+#define ucln_registerCleanup U_ICU_ENTRY_POINT_RENAME(ucln_registerCleanup)
+#define ucnv_MBCSFromUChar32 U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUChar32)
+#define ucnv_MBCSFromUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSFromUnicodeWithOffsets)
+#define ucnv_MBCSGetFilteredUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetFilteredUnicodeSetForUnicode)
+#define ucnv_MBCSGetType U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetType)
+#define ucnv_MBCSGetUnicodeSetForUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSGetUnicodeSetForUnicode)
+#define ucnv_MBCSIsLeadByte U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSIsLeadByte)
+#define ucnv_MBCSSimpleGetNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSSimpleGetNextUChar)
+#define ucnv_MBCSToUnicodeWithOffsets U_ICU_ENTRY_POINT_RENAME(ucnv_MBCSToUnicodeWithOffsets)
+#define ucnv_bld_countAvailableConverters U_ICU_ENTRY_POINT_RENAME(ucnv_bld_countAvailableConverters)
+#define ucnv_bld_getAvailableConverter U_ICU_ENTRY_POINT_RENAME(ucnv_bld_getAvailableConverter)
+#define ucnv_canCreateConverter U_ICU_ENTRY_POINT_RENAME(ucnv_canCreateConverter)
+#define ucnv_cbFromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteBytes)
+#define ucnv_cbFromUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteSub)
+#define ucnv_cbFromUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbFromUWriteUChars)
+#define ucnv_cbToUWriteSub U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteSub)
+#define ucnv_cbToUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_cbToUWriteUChars)
+#define ucnv_clone U_ICU_ENTRY_POINT_RENAME(ucnv_clone)
+#define ucnv_close U_ICU_ENTRY_POINT_RENAME(ucnv_close)
+#define ucnv_compareNames U_ICU_ENTRY_POINT_RENAME(ucnv_compareNames)
+#define ucnv_convert U_ICU_ENTRY_POINT_RENAME(ucnv_convert)
+#define ucnv_convertEx U_ICU_ENTRY_POINT_RENAME(ucnv_convertEx)
+#define ucnv_countAliases U_ICU_ENTRY_POINT_RENAME(ucnv_countAliases)
+#define ucnv_countAvailable U_ICU_ENTRY_POINT_RENAME(ucnv_countAvailable)
+#define ucnv_countStandards U_ICU_ENTRY_POINT_RENAME(ucnv_countStandards)
+#define ucnv_createAlgorithmicConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createAlgorithmicConverter)
+#define ucnv_createConverter U_ICU_ENTRY_POINT_RENAME(ucnv_createConverter)
+#define ucnv_createConverterFromPackage U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromPackage)
+#define ucnv_createConverterFromSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_createConverterFromSharedData)
+#define ucnv_detectUnicodeSignature U_ICU_ENTRY_POINT_RENAME(ucnv_detectUnicodeSignature)
+#define ucnv_enableCleanup U_ICU_ENTRY_POINT_RENAME(ucnv_enableCleanup)
+#define ucnv_extContinueMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchFromU)
+#define ucnv_extContinueMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extContinueMatchToU)
+#define ucnv_extGetUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_extGetUnicodeSet)
+#define ucnv_extInitialMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchFromU)
+#define ucnv_extInitialMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extInitialMatchToU)
+#define ucnv_extSimpleMatchFromU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchFromU)
+#define ucnv_extSimpleMatchToU U_ICU_ENTRY_POINT_RENAME(ucnv_extSimpleMatchToU)
+#define ucnv_fixFileSeparator U_ICU_ENTRY_POINT_RENAME(ucnv_fixFileSeparator)
+#define ucnv_flushCache U_ICU_ENTRY_POINT_RENAME(ucnv_flushCache)
+#define ucnv_fromAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_fromAlgorithmic)
+#define ucnv_fromUChars U_ICU_ENTRY_POINT_RENAME(ucnv_fromUChars)
+#define ucnv_fromUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_fromUCountPending)
+#define ucnv_fromUWriteBytes U_ICU_ENTRY_POINT_RENAME(ucnv_fromUWriteBytes)
+#define ucnv_fromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode)
+#define ucnv_fromUnicode_UTF8 U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8)
+#define ucnv_fromUnicode_UTF8_OFFSETS_LOGIC U_ICU_ENTRY_POINT_RENAME(ucnv_fromUnicode_UTF8_OFFSETS_LOGIC)
+#define ucnv_getAlias U_ICU_ENTRY_POINT_RENAME(ucnv_getAlias)
+#define ucnv_getAliases U_ICU_ENTRY_POINT_RENAME(ucnv_getAliases)
+#define ucnv_getAvailableName U_ICU_ENTRY_POINT_RENAME(ucnv_getAvailableName)
+#define ucnv_getCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_getCCSID)
+#define ucnv_getCanonicalName U_ICU_ENTRY_POINT_RENAME(ucnv_getCanonicalName)
+#define ucnv_getCompleteUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getCompleteUnicodeSet)
+#define ucnv_getDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_getDefaultName)
+#define ucnv_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucnv_getDisplayName)
+#define ucnv_getFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getFromUCallBack)
+#define ucnv_getInvalidChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidChars)
+#define ucnv_getInvalidUChars U_ICU_ENTRY_POINT_RENAME(ucnv_getInvalidUChars)
+#define ucnv_getMaxCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMaxCharSize)
+#define ucnv_getMinCharSize U_ICU_ENTRY_POINT_RENAME(ucnv_getMinCharSize)
+#define ucnv_getName U_ICU_ENTRY_POINT_RENAME(ucnv_getName)
+#define ucnv_getNextUChar U_ICU_ENTRY_POINT_RENAME(ucnv_getNextUChar)
+#define ucnv_getNonSurrogateUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getNonSurrogateUnicodeSet)
+#define ucnv_getPlatform U_ICU_ENTRY_POINT_RENAME(ucnv_getPlatform)
+#define ucnv_getStandard U_ICU_ENTRY_POINT_RENAME(ucnv_getStandard)
+#define ucnv_getStandardName U_ICU_ENTRY_POINT_RENAME(ucnv_getStandardName)
+#define ucnv_getStarters U_ICU_ENTRY_POINT_RENAME(ucnv_getStarters)
+#define ucnv_getSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_getSubstChars)
+#define ucnv_getToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_getToUCallBack)
+#define ucnv_getType U_ICU_ENTRY_POINT_RENAME(ucnv_getType)
+#define ucnv_getUnicodeSet U_ICU_ENTRY_POINT_RENAME(ucnv_getUnicodeSet)
+#define ucnv_incrementRefCount U_ICU_ENTRY_POINT_RENAME(ucnv_incrementRefCount)
+#define ucnv_io_countKnownConverters U_ICU_ENTRY_POINT_RENAME(ucnv_io_countKnownConverters)
+#define ucnv_io_getConverterName U_ICU_ENTRY_POINT_RENAME(ucnv_io_getConverterName)
+#define ucnv_io_stripASCIIForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripASCIIForCompare)
+#define ucnv_io_stripEBCDICForCompare U_ICU_ENTRY_POINT_RENAME(ucnv_io_stripEBCDICForCompare)
+#define ucnv_isAmbiguous U_ICU_ENTRY_POINT_RENAME(ucnv_isAmbiguous)
+#define ucnv_isFixedWidth U_ICU_ENTRY_POINT_RENAME(ucnv_isFixedWidth)
+#define ucnv_load U_ICU_ENTRY_POINT_RENAME(ucnv_load)
+#define ucnv_loadSharedData U_ICU_ENTRY_POINT_RENAME(ucnv_loadSharedData)
+#define ucnv_open U_ICU_ENTRY_POINT_RENAME(ucnv_open)
+#define ucnv_openAllNames U_ICU_ENTRY_POINT_RENAME(ucnv_openAllNames)
+#define ucnv_openCCSID U_ICU_ENTRY_POINT_RENAME(ucnv_openCCSID)
+#define ucnv_openPackage U_ICU_ENTRY_POINT_RENAME(ucnv_openPackage)
+#define ucnv_openStandardNames U_ICU_ENTRY_POINT_RENAME(ucnv_openStandardNames)
+#define ucnv_openU U_ICU_ENTRY_POINT_RENAME(ucnv_openU)
+#define ucnv_reset U_ICU_ENTRY_POINT_RENAME(ucnv_reset)
+#define ucnv_resetFromUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetFromUnicode)
+#define ucnv_resetToUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_resetToUnicode)
+#define ucnv_safeClone U_ICU_ENTRY_POINT_RENAME(ucnv_safeClone)
+#define ucnv_setDefaultName U_ICU_ENTRY_POINT_RENAME(ucnv_setDefaultName)
+#define ucnv_setFallback U_ICU_ENTRY_POINT_RENAME(ucnv_setFallback)
+#define ucnv_setFromUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setFromUCallBack)
+#define ucnv_setSubstChars U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstChars)
+#define ucnv_setSubstString U_ICU_ENTRY_POINT_RENAME(ucnv_setSubstString)
+#define ucnv_setToUCallBack U_ICU_ENTRY_POINT_RENAME(ucnv_setToUCallBack)
+#define ucnv_swap U_ICU_ENTRY_POINT_RENAME(ucnv_swap)
+#define ucnv_swapAliases U_ICU_ENTRY_POINT_RENAME(ucnv_swapAliases)
+#define ucnv_toAlgorithmic U_ICU_ENTRY_POINT_RENAME(ucnv_toAlgorithmic)
+#define ucnv_toUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUChars)
+#define ucnv_toUCountPending U_ICU_ENTRY_POINT_RENAME(ucnv_toUCountPending)
+#define ucnv_toUWriteCodePoint U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteCodePoint)
+#define ucnv_toUWriteUChars U_ICU_ENTRY_POINT_RENAME(ucnv_toUWriteUChars)
+#define ucnv_toUnicode U_ICU_ENTRY_POINT_RENAME(ucnv_toUnicode)
+#define ucnv_unload U_ICU_ENTRY_POINT_RENAME(ucnv_unload)
+#define ucnv_unloadSharedDataIfReady U_ICU_ENTRY_POINT_RENAME(ucnv_unloadSharedDataIfReady)
+#define ucnv_usesFallback U_ICU_ENTRY_POINT_RENAME(ucnv_usesFallback)
+#define ucnvsel_close U_ICU_ENTRY_POINT_RENAME(ucnvsel_close)
+#define ucnvsel_open U_ICU_ENTRY_POINT_RENAME(ucnvsel_open)
+#define ucnvsel_openFromSerialized U_ICU_ENTRY_POINT_RENAME(ucnvsel_openFromSerialized)
+#define ucnvsel_selectForString U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForString)
+#define ucnvsel_selectForUTF8 U_ICU_ENTRY_POINT_RENAME(ucnvsel_selectForUTF8)
+#define ucnvsel_serialize U_ICU_ENTRY_POINT_RENAME(ucnvsel_serialize)
+#define ucol_clone U_ICU_ENTRY_POINT_RENAME(ucol_clone)
+#define ucol_cloneBinary U_ICU_ENTRY_POINT_RENAME(ucol_cloneBinary)
+#define ucol_close U_ICU_ENTRY_POINT_RENAME(ucol_close)
+#define ucol_closeElements U_ICU_ENTRY_POINT_RENAME(ucol_closeElements)
+#define ucol_countAvailable U_ICU_ENTRY_POINT_RENAME(ucol_countAvailable)
+#define ucol_equal U_ICU_ENTRY_POINT_RENAME(ucol_equal)
+#define ucol_equals U_ICU_ENTRY_POINT_RENAME(ucol_equals)
+#define ucol_getAttribute U_ICU_ENTRY_POINT_RENAME(ucol_getAttribute)
+#define ucol_getAvailable U_ICU_ENTRY_POINT_RENAME(ucol_getAvailable)
+#define ucol_getBound U_ICU_ENTRY_POINT_RENAME(ucol_getBound)
+#define ucol_getContractions U_ICU_ENTRY_POINT_RENAME(ucol_getContractions)
+#define ucol_getContractionsAndExpansions U_ICU_ENTRY_POINT_RENAME(ucol_getContractionsAndExpansions)
+#define ucol_getDisplayName U_ICU_ENTRY_POINT_RENAME(ucol_getDisplayName)
+#define ucol_getEquivalentReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getEquivalentReorderCodes)
+#define ucol_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ucol_getFunctionalEquivalent)
+#define ucol_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValues)
+#define ucol_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucol_getKeywordValuesForLocale)
+#define ucol_getKeywords U_ICU_ENTRY_POINT_RENAME(ucol_getKeywords)
+#define ucol_getLocale U_ICU_ENTRY_POINT_RENAME(ucol_getLocale)
+#define ucol_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ucol_getLocaleByType)
+#define ucol_getMaxExpansion U_ICU_ENTRY_POINT_RENAME(ucol_getMaxExpansion)
+#define ucol_getMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_getMaxVariable)
+#define ucol_getOffset U_ICU_ENTRY_POINT_RENAME(ucol_getOffset)
+#define ucol_getReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_getReorderCodes)
+#define ucol_getRules U_ICU_ENTRY_POINT_RENAME(ucol_getRules)
+#define ucol_getRulesEx U_ICU_ENTRY_POINT_RENAME(ucol_getRulesEx)
+#define ucol_getShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_getShortDefinitionString)
+#define ucol_getSortKey U_ICU_ENTRY_POINT_RENAME(ucol_getSortKey)
+#define ucol_getStrength U_ICU_ENTRY_POINT_RENAME(ucol_getStrength)
+#define ucol_getTailoredSet U_ICU_ENTRY_POINT_RENAME(ucol_getTailoredSet)
+#define ucol_getUCAVersion U_ICU_ENTRY_POINT_RENAME(ucol_getUCAVersion)
+#define ucol_getUnsafeSet U_ICU_ENTRY_POINT_RENAME(ucol_getUnsafeSet)
+#define ucol_getVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_getVariableTop)
+#define ucol_getVersion U_ICU_ENTRY_POINT_RENAME(ucol_getVersion)
+#define ucol_greater U_ICU_ENTRY_POINT_RENAME(ucol_greater)
+#define ucol_greaterOrEqual U_ICU_ENTRY_POINT_RENAME(ucol_greaterOrEqual)
+#define ucol_keyHashCode U_ICU_ENTRY_POINT_RENAME(ucol_keyHashCode)
+#define ucol_looksLikeCollationBinary U_ICU_ENTRY_POINT_RENAME(ucol_looksLikeCollationBinary)
+#define ucol_mergeSortkeys U_ICU_ENTRY_POINT_RENAME(ucol_mergeSortkeys)
+#define ucol_next U_ICU_ENTRY_POINT_RENAME(ucol_next)
+#define ucol_nextSortKeyPart U_ICU_ENTRY_POINT_RENAME(ucol_nextSortKeyPart)
+#define ucol_normalizeShortDefinitionString U_ICU_ENTRY_POINT_RENAME(ucol_normalizeShortDefinitionString)
+#define ucol_open U_ICU_ENTRY_POINT_RENAME(ucol_open)
+#define ucol_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ucol_openAvailableLocales)
+#define ucol_openBinary U_ICU_ENTRY_POINT_RENAME(ucol_openBinary)
+#define ucol_openElements U_ICU_ENTRY_POINT_RENAME(ucol_openElements)
+#define ucol_openFromShortString U_ICU_ENTRY_POINT_RENAME(ucol_openFromShortString)
+#define ucol_openRules U_ICU_ENTRY_POINT_RENAME(ucol_openRules)
+#define ucol_prepareShortStringOpen U_ICU_ENTRY_POINT_RENAME(ucol_prepareShortStringOpen)
+#define ucol_previous U_ICU_ENTRY_POINT_RENAME(ucol_previous)
+#define ucol_primaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_primaryOrder)
+#define ucol_reset U_ICU_ENTRY_POINT_RENAME(ucol_reset)
+#define ucol_restoreVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_restoreVariableTop)
+#define ucol_safeClone U_ICU_ENTRY_POINT_RENAME(ucol_safeClone)
+#define ucol_secondaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_secondaryOrder)
+#define ucol_setAttribute U_ICU_ENTRY_POINT_RENAME(ucol_setAttribute)
+#define ucol_setMaxVariable U_ICU_ENTRY_POINT_RENAME(ucol_setMaxVariable)
+#define ucol_setOffset U_ICU_ENTRY_POINT_RENAME(ucol_setOffset)
+#define ucol_setReorderCodes U_ICU_ENTRY_POINT_RENAME(ucol_setReorderCodes)
+#define ucol_setStrength U_ICU_ENTRY_POINT_RENAME(ucol_setStrength)
+#define ucol_setText U_ICU_ENTRY_POINT_RENAME(ucol_setText)
+#define ucol_setVariableTop U_ICU_ENTRY_POINT_RENAME(ucol_setVariableTop)
+#define ucol_strcoll U_ICU_ENTRY_POINT_RENAME(ucol_strcoll)
+#define ucol_strcollIter U_ICU_ENTRY_POINT_RENAME(ucol_strcollIter)
+#define ucol_strcollUTF8 U_ICU_ENTRY_POINT_RENAME(ucol_strcollUTF8)
+#define ucol_swap U_ICU_ENTRY_POINT_RENAME(ucol_swap)
+#define ucol_swapInverseUCA U_ICU_ENTRY_POINT_RENAME(ucol_swapInverseUCA)
+#define ucol_tertiaryOrder U_ICU_ENTRY_POINT_RENAME(ucol_tertiaryOrder)
+#define ucpmap_get U_ICU_ENTRY_POINT_RENAME(ucpmap_get)
+#define ucpmap_getRange U_ICU_ENTRY_POINT_RENAME(ucpmap_getRange)
+#define ucptrie_close U_ICU_ENTRY_POINT_RENAME(ucptrie_close)
+#define ucptrie_get U_ICU_ENTRY_POINT_RENAME(ucptrie_get)
+#define ucptrie_getRange U_ICU_ENTRY_POINT_RENAME(ucptrie_getRange)
+#define ucptrie_getType U_ICU_ENTRY_POINT_RENAME(ucptrie_getType)
+#define ucptrie_getValueWidth U_ICU_ENTRY_POINT_RENAME(ucptrie_getValueWidth)
+#define ucptrie_internalGetRange U_ICU_ENTRY_POINT_RENAME(ucptrie_internalGetRange)
+#define ucptrie_internalSmallIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallIndex)
+#define ucptrie_internalSmallU8Index U_ICU_ENTRY_POINT_RENAME(ucptrie_internalSmallU8Index)
+#define ucptrie_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(ucptrie_internalU8PrevIndex)
+#define ucptrie_openFromBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_openFromBinary)
+#define ucptrie_swap U_ICU_ENTRY_POINT_RENAME(ucptrie_swap)
+#define ucptrie_toBinary U_ICU_ENTRY_POINT_RENAME(ucptrie_toBinary)
+#define ucsdet_close U_ICU_ENTRY_POINT_RENAME(ucsdet_close)
+#define ucsdet_detect U_ICU_ENTRY_POINT_RENAME(ucsdet_detect)
+#define ucsdet_detectAll U_ICU_ENTRY_POINT_RENAME(ucsdet_detectAll)
+#define ucsdet_enableInputFilter U_ICU_ENTRY_POINT_RENAME(ucsdet_enableInputFilter)
+#define ucsdet_getAllDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getAllDetectableCharsets)
+#define ucsdet_getConfidence U_ICU_ENTRY_POINT_RENAME(ucsdet_getConfidence)
+#define ucsdet_getDetectableCharsets U_ICU_ENTRY_POINT_RENAME(ucsdet_getDetectableCharsets)
+#define ucsdet_getLanguage U_ICU_ENTRY_POINT_RENAME(ucsdet_getLanguage)
+#define ucsdet_getName U_ICU_ENTRY_POINT_RENAME(ucsdet_getName)
+#define ucsdet_getUChars U_ICU_ENTRY_POINT_RENAME(ucsdet_getUChars)
+#define ucsdet_isInputFilterEnabled U_ICU_ENTRY_POINT_RENAME(ucsdet_isInputFilterEnabled)
+#define ucsdet_open U_ICU_ENTRY_POINT_RENAME(ucsdet_open)
+#define ucsdet_setDeclaredEncoding U_ICU_ENTRY_POINT_RENAME(ucsdet_setDeclaredEncoding)
+#define ucsdet_setDetectableCharset U_ICU_ENTRY_POINT_RENAME(ucsdet_setDetectableCharset)
+#define ucsdet_setText U_ICU_ENTRY_POINT_RENAME(ucsdet_setText)
+#define ucurr_countCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_countCurrencies)
+#define ucurr_forLocale U_ICU_ENTRY_POINT_RENAME(ucurr_forLocale)
+#define ucurr_forLocaleAndDate U_ICU_ENTRY_POINT_RENAME(ucurr_forLocaleAndDate)
+#define ucurr_getDefaultFractionDigits U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigits)
+#define ucurr_getDefaultFractionDigitsForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getDefaultFractionDigitsForUsage)
+#define ucurr_getKeywordValuesForLocale U_ICU_ENTRY_POINT_RENAME(ucurr_getKeywordValuesForLocale)
+#define ucurr_getName U_ICU_ENTRY_POINT_RENAME(ucurr_getName)
+#define ucurr_getNumericCode U_ICU_ENTRY_POINT_RENAME(ucurr_getNumericCode)
+#define ucurr_getPluralName U_ICU_ENTRY_POINT_RENAME(ucurr_getPluralName)
+#define ucurr_getRoundingIncrement U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrement)
+#define ucurr_getRoundingIncrementForUsage U_ICU_ENTRY_POINT_RENAME(ucurr_getRoundingIncrementForUsage)
+#define ucurr_isAvailable U_ICU_ENTRY_POINT_RENAME(ucurr_isAvailable)
+#define ucurr_openISOCurrencies U_ICU_ENTRY_POINT_RENAME(ucurr_openISOCurrencies)
+#define ucurr_register U_ICU_ENTRY_POINT_RENAME(ucurr_register)
+#define ucurr_unregister U_ICU_ENTRY_POINT_RENAME(ucurr_unregister)
+#define udat_adoptNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormat)
+#define udat_adoptNumberFormatForFields U_ICU_ENTRY_POINT_RENAME(udat_adoptNumberFormatForFields)
+#define udat_applyPattern U_ICU_ENTRY_POINT_RENAME(udat_applyPattern)
+#define udat_applyPatternRelative U_ICU_ENTRY_POINT_RENAME(udat_applyPatternRelative)
+#define udat_clone U_ICU_ENTRY_POINT_RENAME(udat_clone)
+#define udat_close U_ICU_ENTRY_POINT_RENAME(udat_close)
+#define udat_countAvailable U_ICU_ENTRY_POINT_RENAME(udat_countAvailable)
+#define udat_countSymbols U_ICU_ENTRY_POINT_RENAME(udat_countSymbols)
+#define udat_format U_ICU_ENTRY_POINT_RENAME(udat_format)
+#define udat_formatCalendar U_ICU_ENTRY_POINT_RENAME(udat_formatCalendar)
+#define udat_formatCalendarForFields U_ICU_ENTRY_POINT_RENAME(udat_formatCalendarForFields)
+#define udat_formatForFields U_ICU_ENTRY_POINT_RENAME(udat_formatForFields)
+#define udat_get2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_get2DigitYearStart)
+#define udat_getAvailable U_ICU_ENTRY_POINT_RENAME(udat_getAvailable)
+#define udat_getBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_getBooleanAttribute)
+#define udat_getCalendar U_ICU_ENTRY_POINT_RENAME(udat_getCalendar)
+#define udat_getContext U_ICU_ENTRY_POINT_RENAME(udat_getContext)
+#define udat_getLocaleByType U_ICU_ENTRY_POINT_RENAME(udat_getLocaleByType)
+#define udat_getNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormat)
+#define udat_getNumberFormatForField U_ICU_ENTRY_POINT_RENAME(udat_getNumberFormatForField)
+#define udat_getSymbols U_ICU_ENTRY_POINT_RENAME(udat_getSymbols)
+#define udat_isLenient U_ICU_ENTRY_POINT_RENAME(udat_isLenient)
+#define udat_open U_ICU_ENTRY_POINT_RENAME(udat_open)
+#define udat_parse U_ICU_ENTRY_POINT_RENAME(udat_parse)
+#define udat_parseCalendar U_ICU_ENTRY_POINT_RENAME(udat_parseCalendar)
+#define udat_registerOpener U_ICU_ENTRY_POINT_RENAME(udat_registerOpener)
+#define udat_set2DigitYearStart U_ICU_ENTRY_POINT_RENAME(udat_set2DigitYearStart)
+#define udat_setBooleanAttribute U_ICU_ENTRY_POINT_RENAME(udat_setBooleanAttribute)
+#define udat_setCalendar U_ICU_ENTRY_POINT_RENAME(udat_setCalendar)
+#define udat_setContext U_ICU_ENTRY_POINT_RENAME(udat_setContext)
+#define udat_setLenient U_ICU_ENTRY_POINT_RENAME(udat_setLenient)
+#define udat_setNumberFormat U_ICU_ENTRY_POINT_RENAME(udat_setNumberFormat)
+#define udat_setSymbols U_ICU_ENTRY_POINT_RENAME(udat_setSymbols)
+#define udat_toCalendarDateField U_ICU_ENTRY_POINT_RENAME(udat_toCalendarDateField)
+#define udat_toPattern U_ICU_ENTRY_POINT_RENAME(udat_toPattern)
+#define udat_toPatternRelativeDate U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeDate)
+#define udat_toPatternRelativeTime U_ICU_ENTRY_POINT_RENAME(udat_toPatternRelativeTime)
+#define udat_unregisterOpener U_ICU_ENTRY_POINT_RENAME(udat_unregisterOpener)
+#define udata_checkCommonData U_ICU_ENTRY_POINT_RENAME(udata_checkCommonData)
+#define udata_close U_ICU_ENTRY_POINT_RENAME(udata_close)
+#define udata_closeSwapper U_ICU_ENTRY_POINT_RENAME(udata_closeSwapper)
+#define udata_getHeaderSize U_ICU_ENTRY_POINT_RENAME(udata_getHeaderSize)
+#define udata_getInfo U_ICU_ENTRY_POINT_RENAME(udata_getInfo)
+#define udata_getInfoSize U_ICU_ENTRY_POINT_RENAME(udata_getInfoSize)
+#define udata_getLength U_ICU_ENTRY_POINT_RENAME(udata_getLength)
+#define udata_getMemory U_ICU_ENTRY_POINT_RENAME(udata_getMemory)
+#define udata_getRawMemory U_ICU_ENTRY_POINT_RENAME(udata_getRawMemory)
+#define udata_open U_ICU_ENTRY_POINT_RENAME(udata_open)
+#define udata_openChoice U_ICU_ENTRY_POINT_RENAME(udata_openChoice)
+#define udata_openSwapper U_ICU_ENTRY_POINT_RENAME(udata_openSwapper)
+#define udata_openSwapperForInputData U_ICU_ENTRY_POINT_RENAME(udata_openSwapperForInputData)
+#define udata_printError U_ICU_ENTRY_POINT_RENAME(udata_printError)
+#define udata_readInt16 U_ICU_ENTRY_POINT_RENAME(udata_readInt16)
+#define udata_readInt32 U_ICU_ENTRY_POINT_RENAME(udata_readInt32)
+#define udata_setAppData U_ICU_ENTRY_POINT_RENAME(udata_setAppData)
+#define udata_setCommonData U_ICU_ENTRY_POINT_RENAME(udata_setCommonData)
+#define udata_setFileAccess U_ICU_ENTRY_POINT_RENAME(udata_setFileAccess)
+#define udata_swapDataHeader U_ICU_ENTRY_POINT_RENAME(udata_swapDataHeader)
+#define udata_swapInvStringBlock U_ICU_ENTRY_POINT_RENAME(udata_swapInvStringBlock)
+#define udatpg_addPattern U_ICU_ENTRY_POINT_RENAME(udatpg_addPattern)
+#define udatpg_clone U_ICU_ENTRY_POINT_RENAME(udatpg_clone)
+#define udatpg_close U_ICU_ENTRY_POINT_RENAME(udatpg_close)
+#define udatpg_getAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemFormat)
+#define udatpg_getAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_getAppendItemName)
+#define udatpg_getBaseSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getBaseSkeleton)
+#define udatpg_getBestPattern U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPattern)
+#define udatpg_getBestPatternWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_getBestPatternWithOptions)
+#define udatpg_getDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormat)
+#define udatpg_getDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_getDateTimeFormatForStyle)
+#define udatpg_getDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_getDecimal)
+#define udatpg_getDefaultHourCycle U_ICU_ENTRY_POINT_RENAME(udatpg_getDefaultHourCycle)
+#define udatpg_getFieldDisplayName U_ICU_ENTRY_POINT_RENAME(udatpg_getFieldDisplayName)
+#define udatpg_getPatternForSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getPatternForSkeleton)
+#define udatpg_getSkeleton U_ICU_ENTRY_POINT_RENAME(udatpg_getSkeleton)
+#define udatpg_open U_ICU_ENTRY_POINT_RENAME(udatpg_open)
+#define udatpg_openBaseSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openBaseSkeletons)
+#define udatpg_openEmpty U_ICU_ENTRY_POINT_RENAME(udatpg_openEmpty)
+#define udatpg_openSkeletons U_ICU_ENTRY_POINT_RENAME(udatpg_openSkeletons)
+#define udatpg_replaceFieldTypes U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypes)
+#define udatpg_replaceFieldTypesWithOptions U_ICU_ENTRY_POINT_RENAME(udatpg_replaceFieldTypesWithOptions)
+#define udatpg_setAppendItemFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemFormat)
+#define udatpg_setAppendItemName U_ICU_ENTRY_POINT_RENAME(udatpg_setAppendItemName)
+#define udatpg_setDateTimeFormat U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormat)
+#define udatpg_setDateTimeFormatForStyle U_ICU_ENTRY_POINT_RENAME(udatpg_setDateTimeFormatForStyle)
+#define udatpg_setDecimal U_ICU_ENTRY_POINT_RENAME(udatpg_setDecimal)
+#define udict_swap U_ICU_ENTRY_POINT_RENAME(udict_swap)
+#define udispopt_fromGrammaticalCaseIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_fromGrammaticalCaseIdentifier)
+#define udispopt_fromNounClassIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_fromNounClassIdentifier)
+#define udispopt_fromPluralCategoryIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_fromPluralCategoryIdentifier)
+#define udispopt_getGrammaticalCaseIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_getGrammaticalCaseIdentifier)
+#define udispopt_getNounClassIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_getNounClassIdentifier)
+#define udispopt_getPluralCategoryIdentifier U_ICU_ENTRY_POINT_RENAME(udispopt_getPluralCategoryIdentifier)
+#define udtitvfmt_close U_ICU_ENTRY_POINT_RENAME(udtitvfmt_close)
+#define udtitvfmt_closeResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_closeResult)
+#define udtitvfmt_format U_ICU_ENTRY_POINT_RENAME(udtitvfmt_format)
+#define udtitvfmt_formatCalendarToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatCalendarToResult)
+#define udtitvfmt_formatToResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_formatToResult)
+#define udtitvfmt_getContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_getContext)
+#define udtitvfmt_open U_ICU_ENTRY_POINT_RENAME(udtitvfmt_open)
+#define udtitvfmt_openResult U_ICU_ENTRY_POINT_RENAME(udtitvfmt_openResult)
+#define udtitvfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(udtitvfmt_resultAsValue)
+#define udtitvfmt_setContext U_ICU_ENTRY_POINT_RENAME(udtitvfmt_setContext)
+#define uenum_close U_ICU_ENTRY_POINT_RENAME(uenum_close)
+#define uenum_count U_ICU_ENTRY_POINT_RENAME(uenum_count)
+#define uenum_next U_ICU_ENTRY_POINT_RENAME(uenum_next)
+#define uenum_nextDefault U_ICU_ENTRY_POINT_RENAME(uenum_nextDefault)
+#define uenum_openCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openCharStringsEnumeration)
+#define uenum_openFromStringEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openFromStringEnumeration)
+#define uenum_openUCharStringsEnumeration U_ICU_ENTRY_POINT_RENAME(uenum_openUCharStringsEnumeration)
+#define uenum_reset U_ICU_ENTRY_POINT_RENAME(uenum_reset)
+#define uenum_unext U_ICU_ENTRY_POINT_RENAME(uenum_unext)
+#define uenum_unextDefault U_ICU_ENTRY_POINT_RENAME(uenum_unextDefault)
+#define ufieldpositer_close U_ICU_ENTRY_POINT_RENAME(ufieldpositer_close)
+#define ufieldpositer_next U_ICU_ENTRY_POINT_RENAME(ufieldpositer_next)
+#define ufieldpositer_open U_ICU_ENTRY_POINT_RENAME(ufieldpositer_open)
+#define ufile_getch U_ICU_ENTRY_POINT_RENAME(ufile_getch)
+#define ufile_getch32 U_ICU_ENTRY_POINT_RENAME(ufile_getch32)
+#define ufmt_close U_ICU_ENTRY_POINT_RENAME(ufmt_close)
+#define ufmt_getArrayItemByIndex U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayItemByIndex)
+#define ufmt_getArrayLength U_ICU_ENTRY_POINT_RENAME(ufmt_getArrayLength)
+#define ufmt_getDate U_ICU_ENTRY_POINT_RENAME(ufmt_getDate)
+#define ufmt_getDecNumChars U_ICU_ENTRY_POINT_RENAME(ufmt_getDecNumChars)
+#define ufmt_getDouble U_ICU_ENTRY_POINT_RENAME(ufmt_getDouble)
+#define ufmt_getInt64 U_ICU_ENTRY_POINT_RENAME(ufmt_getInt64)
+#define ufmt_getLong U_ICU_ENTRY_POINT_RENAME(ufmt_getLong)
+#define ufmt_getObject U_ICU_ENTRY_POINT_RENAME(ufmt_getObject)
+#define ufmt_getType U_ICU_ENTRY_POINT_RENAME(ufmt_getType)
+#define ufmt_getUChars U_ICU_ENTRY_POINT_RENAME(ufmt_getUChars)
+#define ufmt_isNumeric U_ICU_ENTRY_POINT_RENAME(ufmt_isNumeric)
+#define ufmt_open U_ICU_ENTRY_POINT_RENAME(ufmt_open)
+#define ufmtval_getString U_ICU_ENTRY_POINT_RENAME(ufmtval_getString)
+#define ufmtval_nextPosition U_ICU_ENTRY_POINT_RENAME(ufmtval_nextPosition)
+#define ugender_getInstance U_ICU_ENTRY_POINT_RENAME(ugender_getInstance)
+#define ugender_getListGender U_ICU_ENTRY_POINT_RENAME(ugender_getListGender)
+#define uhash_close U_ICU_ENTRY_POINT_RENAME(uhash_close)
+#define uhash_compareCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareCaselessUnicodeString)
+#define uhash_compareChars U_ICU_ENTRY_POINT_RENAME(uhash_compareChars)
+#define uhash_compareIChars U_ICU_ENTRY_POINT_RENAME(uhash_compareIChars)
+#define uhash_compareLong U_ICU_ENTRY_POINT_RENAME(uhash_compareLong)
+#define uhash_compareScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_compareScriptSet)
+#define uhash_compareUChars U_ICU_ENTRY_POINT_RENAME(uhash_compareUChars)
+#define uhash_compareUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_compareUnicodeString)
+#define uhash_containsKey U_ICU_ENTRY_POINT_RENAME(uhash_containsKey)
+#define uhash_count U_ICU_ENTRY_POINT_RENAME(uhash_count)
+#define uhash_deleteHashtable U_ICU_ENTRY_POINT_RENAME(uhash_deleteHashtable)
+#define uhash_deleteScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_deleteScriptSet)
+#define uhash_equals U_ICU_ENTRY_POINT_RENAME(uhash_equals)
+#define uhash_equalsScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_equalsScriptSet)
+#define uhash_find U_ICU_ENTRY_POINT_RENAME(uhash_find)
+#define uhash_get U_ICU_ENTRY_POINT_RENAME(uhash_get)
+#define uhash_geti U_ICU_ENTRY_POINT_RENAME(uhash_geti)
+#define uhash_getiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_getiAndFound)
+#define uhash_hashCaselessUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashCaselessUnicodeString)
+#define uhash_hashChars U_ICU_ENTRY_POINT_RENAME(uhash_hashChars)
+#define uhash_hashIChars U_ICU_ENTRY_POINT_RENAME(uhash_hashIChars)
+#define uhash_hashLong U_ICU_ENTRY_POINT_RENAME(uhash_hashLong)
+#define uhash_hashScriptSet U_ICU_ENTRY_POINT_RENAME(uhash_hashScriptSet)
+#define uhash_hashUChars U_ICU_ENTRY_POINT_RENAME(uhash_hashUChars)
+#define uhash_hashUnicodeString U_ICU_ENTRY_POINT_RENAME(uhash_hashUnicodeString)
+#define uhash_icontainsKey U_ICU_ENTRY_POINT_RENAME(uhash_icontainsKey)
+#define uhash_iget U_ICU_ENTRY_POINT_RENAME(uhash_iget)
+#define uhash_igeti U_ICU_ENTRY_POINT_RENAME(uhash_igeti)
+#define uhash_igetiAndFound U_ICU_ENTRY_POINT_RENAME(uhash_igetiAndFound)
+#define uhash_init U_ICU_ENTRY_POINT_RENAME(uhash_init)
+#define uhash_initSize U_ICU_ENTRY_POINT_RENAME(uhash_initSize)
+#define uhash_iput U_ICU_ENTRY_POINT_RENAME(uhash_iput)
+#define uhash_iputi U_ICU_ENTRY_POINT_RENAME(uhash_iputi)
+#define uhash_iputiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_iputiAllowZero)
+#define uhash_iremove U_ICU_ENTRY_POINT_RENAME(uhash_iremove)
+#define uhash_iremovei U_ICU_ENTRY_POINT_RENAME(uhash_iremovei)
+#define uhash_nextElement U_ICU_ENTRY_POINT_RENAME(uhash_nextElement)
+#define uhash_open U_ICU_ENTRY_POINT_RENAME(uhash_open)
+#define uhash_openSize U_ICU_ENTRY_POINT_RENAME(uhash_openSize)
+#define uhash_put U_ICU_ENTRY_POINT_RENAME(uhash_put)
+#define uhash_puti U_ICU_ENTRY_POINT_RENAME(uhash_puti)
+#define uhash_putiAllowZero U_ICU_ENTRY_POINT_RENAME(uhash_putiAllowZero)
+#define uhash_remove U_ICU_ENTRY_POINT_RENAME(uhash_remove)
+#define uhash_removeAll U_ICU_ENTRY_POINT_RENAME(uhash_removeAll)
+#define uhash_removeElement U_ICU_ENTRY_POINT_RENAME(uhash_removeElement)
+#define uhash_removei U_ICU_ENTRY_POINT_RENAME(uhash_removei)
+#define uhash_setKeyComparator U_ICU_ENTRY_POINT_RENAME(uhash_setKeyComparator)
+#define uhash_setKeyDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setKeyDeleter)
+#define uhash_setKeyHasher U_ICU_ENTRY_POINT_RENAME(uhash_setKeyHasher)
+#define uhash_setResizePolicy U_ICU_ENTRY_POINT_RENAME(uhash_setResizePolicy)
+#define uhash_setValueComparator U_ICU_ENTRY_POINT_RENAME(uhash_setValueComparator)
+#define uhash_setValueDeleter U_ICU_ENTRY_POINT_RENAME(uhash_setValueDeleter)
+#define uidna_IDNToASCII U_ICU_ENTRY_POINT_RENAME(uidna_IDNToASCII)
+#define uidna_IDNToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_IDNToUnicode)
+#define uidna_close U_ICU_ENTRY_POINT_RENAME(uidna_close)
+#define uidna_compare U_ICU_ENTRY_POINT_RENAME(uidna_compare)
+#define uidna_labelToASCII U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII)
+#define uidna_labelToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToASCII_UTF8)
+#define uidna_labelToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicode)
+#define uidna_labelToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_labelToUnicodeUTF8)
+#define uidna_nameToASCII U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII)
+#define uidna_nameToASCII_UTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToASCII_UTF8)
+#define uidna_nameToUnicode U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicode)
+#define uidna_nameToUnicodeUTF8 U_ICU_ENTRY_POINT_RENAME(uidna_nameToUnicodeUTF8)
+#define uidna_openUTS46 U_ICU_ENTRY_POINT_RENAME(uidna_openUTS46)
+#define uidna_toASCII U_ICU_ENTRY_POINT_RENAME(uidna_toASCII)
+#define uidna_toUnicode U_ICU_ENTRY_POINT_RENAME(uidna_toUnicode)
+#define uiter_current32 U_ICU_ENTRY_POINT_RENAME(uiter_current32)
+#define uiter_getState U_ICU_ENTRY_POINT_RENAME(uiter_getState)
+#define uiter_next32 U_ICU_ENTRY_POINT_RENAME(uiter_next32)
+#define uiter_previous32 U_ICU_ENTRY_POINT_RENAME(uiter_previous32)
+#define uiter_setCharacterIterator U_ICU_ENTRY_POINT_RENAME(uiter_setCharacterIterator)
+#define uiter_setReplaceable U_ICU_ENTRY_POINT_RENAME(uiter_setReplaceable)
+#define uiter_setState U_ICU_ENTRY_POINT_RENAME(uiter_setState)
+#define uiter_setString U_ICU_ENTRY_POINT_RENAME(uiter_setString)
+#define uiter_setUTF16BE U_ICU_ENTRY_POINT_RENAME(uiter_setUTF16BE)
+#define uiter_setUTF8 U_ICU_ENTRY_POINT_RENAME(uiter_setUTF8)
+#define uldn_close U_ICU_ENTRY_POINT_RENAME(uldn_close)
+#define uldn_getContext U_ICU_ENTRY_POINT_RENAME(uldn_getContext)
+#define uldn_getDialectHandling U_ICU_ENTRY_POINT_RENAME(uldn_getDialectHandling)
+#define uldn_getLocale U_ICU_ENTRY_POINT_RENAME(uldn_getLocale)
+#define uldn_keyDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyDisplayName)
+#define uldn_keyValueDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_keyValueDisplayName)
+#define uldn_languageDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_languageDisplayName)
+#define uldn_localeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_localeDisplayName)
+#define uldn_open U_ICU_ENTRY_POINT_RENAME(uldn_open)
+#define uldn_openForContext U_ICU_ENTRY_POINT_RENAME(uldn_openForContext)
+#define uldn_regionDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_regionDisplayName)
+#define uldn_scriptCodeDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptCodeDisplayName)
+#define uldn_scriptDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_scriptDisplayName)
+#define uldn_variantDisplayName U_ICU_ENTRY_POINT_RENAME(uldn_variantDisplayName)
+#define ulist_addItemBeginList U_ICU_ENTRY_POINT_RENAME(ulist_addItemBeginList)
+#define ulist_addItemEndList U_ICU_ENTRY_POINT_RENAME(ulist_addItemEndList)
+#define ulist_close_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_close_keyword_values_iterator)
+#define ulist_containsString U_ICU_ENTRY_POINT_RENAME(ulist_containsString)
+#define ulist_count_keyword_values U_ICU_ENTRY_POINT_RENAME(ulist_count_keyword_values)
+#define ulist_createEmptyList U_ICU_ENTRY_POINT_RENAME(ulist_createEmptyList)
+#define ulist_deleteList U_ICU_ENTRY_POINT_RENAME(ulist_deleteList)
+#define ulist_getListFromEnum U_ICU_ENTRY_POINT_RENAME(ulist_getListFromEnum)
+#define ulist_getListSize U_ICU_ENTRY_POINT_RENAME(ulist_getListSize)
+#define ulist_getNext U_ICU_ENTRY_POINT_RENAME(ulist_getNext)
+#define ulist_next_keyword_value U_ICU_ENTRY_POINT_RENAME(ulist_next_keyword_value)
+#define ulist_removeString U_ICU_ENTRY_POINT_RENAME(ulist_removeString)
+#define ulist_resetList U_ICU_ENTRY_POINT_RENAME(ulist_resetList)
+#define ulist_reset_keyword_values_iterator U_ICU_ENTRY_POINT_RENAME(ulist_reset_keyword_values_iterator)
+#define ulistfmt_close U_ICU_ENTRY_POINT_RENAME(ulistfmt_close)
+#define ulistfmt_closeResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_closeResult)
+#define ulistfmt_format U_ICU_ENTRY_POINT_RENAME(ulistfmt_format)
+#define ulistfmt_formatStringsToResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_formatStringsToResult)
+#define ulistfmt_open U_ICU_ENTRY_POINT_RENAME(ulistfmt_open)
+#define ulistfmt_openForType U_ICU_ENTRY_POINT_RENAME(ulistfmt_openForType)
+#define ulistfmt_openResult U_ICU_ENTRY_POINT_RENAME(ulistfmt_openResult)
+#define ulistfmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ulistfmt_resultAsValue)
+#define uloc_acceptLanguage U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguage)
+#define uloc_acceptLanguageFromHTTP U_ICU_ENTRY_POINT_RENAME(uloc_acceptLanguageFromHTTP)
+#define uloc_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(uloc_addLikelySubtags)
+#define uloc_canonicalize U_ICU_ENTRY_POINT_RENAME(uloc_canonicalize)
+#define uloc_countAvailable U_ICU_ENTRY_POINT_RENAME(uloc_countAvailable)
+#define uloc_forLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_forLanguageTag)
+#define uloc_getAvailable U_ICU_ENTRY_POINT_RENAME(uloc_getAvailable)
+#define uloc_getBaseName U_ICU_ENTRY_POINT_RENAME(uloc_getBaseName)
+#define uloc_getCharacterOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getCharacterOrientation)
+#define uloc_getCountry U_ICU_ENTRY_POINT_RENAME(uloc_getCountry)
+#define uloc_getCurrentCountryID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentCountryID)
+#define uloc_getCurrentLanguageID U_ICU_ENTRY_POINT_RENAME(uloc_getCurrentLanguageID)
+#define uloc_getDefault U_ICU_ENTRY_POINT_RENAME(uloc_getDefault)
+#define uloc_getDisplayCountry U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayCountry)
+#define uloc_getDisplayKeyword U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeyword)
+#define uloc_getDisplayKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayKeywordValue)
+#define uloc_getDisplayLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayLanguage)
+#define uloc_getDisplayName U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayName)
+#define uloc_getDisplayScript U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayScript)
+#define uloc_getDisplayVariant U_ICU_ENTRY_POINT_RENAME(uloc_getDisplayVariant)
+#define uloc_getISO3Country U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Country)
+#define uloc_getISO3Language U_ICU_ENTRY_POINT_RENAME(uloc_getISO3Language)
+#define uloc_getISOCountries U_ICU_ENTRY_POINT_RENAME(uloc_getISOCountries)
+#define uloc_getISOLanguages U_ICU_ENTRY_POINT_RENAME(uloc_getISOLanguages)
+#define uloc_getKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_getKeywordValue)
+#define uloc_getLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLCID)
+#define uloc_getLanguage U_ICU_ENTRY_POINT_RENAME(uloc_getLanguage)
+#define uloc_getLineOrientation U_ICU_ENTRY_POINT_RENAME(uloc_getLineOrientation)
+#define uloc_getLocaleForLCID U_ICU_ENTRY_POINT_RENAME(uloc_getLocaleForLCID)
+#define uloc_getName U_ICU_ENTRY_POINT_RENAME(uloc_getName)
+#define uloc_getParent U_ICU_ENTRY_POINT_RENAME(uloc_getParent)
+#define uloc_getScript U_ICU_ENTRY_POINT_RENAME(uloc_getScript)
+#define uloc_getTableStringWithFallback U_ICU_ENTRY_POINT_RENAME(uloc_getTableStringWithFallback)
+#define uloc_getVariant U_ICU_ENTRY_POINT_RENAME(uloc_getVariant)
+#define uloc_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uloc_isRightToLeft)
+#define uloc_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(uloc_minimizeSubtags)
+#define uloc_openAvailableByType U_ICU_ENTRY_POINT_RENAME(uloc_openAvailableByType)
+#define uloc_openKeywordList U_ICU_ENTRY_POINT_RENAME(uloc_openKeywordList)
+#define uloc_openKeywords U_ICU_ENTRY_POINT_RENAME(uloc_openKeywords)
+#define uloc_setDefault U_ICU_ENTRY_POINT_RENAME(uloc_setDefault)
+#define uloc_setKeywordValue U_ICU_ENTRY_POINT_RENAME(uloc_setKeywordValue)
+#define uloc_toLanguageTag U_ICU_ENTRY_POINT_RENAME(uloc_toLanguageTag)
+#define uloc_toLegacyKey U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyKey)
+#define uloc_toLegacyType U_ICU_ENTRY_POINT_RENAME(uloc_toLegacyType)
+#define uloc_toUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleKey)
+#define uloc_toUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(uloc_toUnicodeLocaleType)
+#define ulocdata_close U_ICU_ENTRY_POINT_RENAME(ulocdata_close)
+#define ulocdata_getCLDRVersion U_ICU_ENTRY_POINT_RENAME(ulocdata_getCLDRVersion)
+#define ulocdata_getDelimiter U_ICU_ENTRY_POINT_RENAME(ulocdata_getDelimiter)
+#define ulocdata_getExemplarSet U_ICU_ENTRY_POINT_RENAME(ulocdata_getExemplarSet)
+#define ulocdata_getLocaleDisplayPattern U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleDisplayPattern)
+#define ulocdata_getLocaleSeparator U_ICU_ENTRY_POINT_RENAME(ulocdata_getLocaleSeparator)
+#define ulocdata_getMeasurementSystem U_ICU_ENTRY_POINT_RENAME(ulocdata_getMeasurementSystem)
+#define ulocdata_getNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_getNoSubstitute)
+#define ulocdata_getPaperSize U_ICU_ENTRY_POINT_RENAME(ulocdata_getPaperSize)
+#define ulocdata_open U_ICU_ENTRY_POINT_RENAME(ulocdata_open)
+#define ulocdata_setNoSubstitute U_ICU_ENTRY_POINT_RENAME(ulocdata_setNoSubstitute)
+#define ulocimp_addLikelySubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_addLikelySubtags)
+#define ulocimp_canonicalize U_ICU_ENTRY_POINT_RENAME(ulocimp_canonicalize)
+#define ulocimp_forLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_forLanguageTag)
+#define ulocimp_getBaseName U_ICU_ENTRY_POINT_RENAME(ulocimp_getBaseName)
+#define ulocimp_getCountry U_ICU_ENTRY_POINT_RENAME(ulocimp_getCountry)
+#define ulocimp_getKeywordValue U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywordValue)
+#define ulocimp_getKeywords U_ICU_ENTRY_POINT_RENAME(ulocimp_getKeywords)
+#define ulocimp_getKnownCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_getKnownCanonicalizedLocaleForTest)
+#define ulocimp_getLanguage U_ICU_ENTRY_POINT_RENAME(ulocimp_getLanguage)
+#define ulocimp_getName U_ICU_ENTRY_POINT_RENAME(ulocimp_getName)
+#define ulocimp_getRegionForSupplementalData U_ICU_ENTRY_POINT_RENAME(ulocimp_getRegionForSupplementalData)
+#define ulocimp_getScript U_ICU_ENTRY_POINT_RENAME(ulocimp_getScript)
+#define ulocimp_isCanonicalizedLocaleForTest U_ICU_ENTRY_POINT_RENAME(ulocimp_isCanonicalizedLocaleForTest)
+#define ulocimp_minimizeSubtags U_ICU_ENTRY_POINT_RENAME(ulocimp_minimizeSubtags)
+#define ulocimp_toBcpKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpKey)
+#define ulocimp_toBcpType U_ICU_ENTRY_POINT_RENAME(ulocimp_toBcpType)
+#define ulocimp_toLanguageTag U_ICU_ENTRY_POINT_RENAME(ulocimp_toLanguageTag)
+#define ulocimp_toLegacyKey U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyKey)
+#define ulocimp_toLegacyType U_ICU_ENTRY_POINT_RENAME(ulocimp_toLegacyType)
+#define ultag_getTKeyStart U_ICU_ENTRY_POINT_RENAME(ultag_getTKeyStart)
+#define ultag_isExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isExtensionSubtags)
+#define ultag_isLanguageSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isLanguageSubtag)
+#define ultag_isPrivateuseValueSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isPrivateuseValueSubtags)
+#define ultag_isRegionSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isRegionSubtag)
+#define ultag_isScriptSubtag U_ICU_ENTRY_POINT_RENAME(ultag_isScriptSubtag)
+#define ultag_isTransformedExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isTransformedExtensionSubtags)
+#define ultag_isUnicodeExtensionSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeExtensionSubtags)
+#define ultag_isUnicodeLocaleAttribute U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttribute)
+#define ultag_isUnicodeLocaleAttributes U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleAttributes)
+#define ultag_isUnicodeLocaleKey U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleKey)
+#define ultag_isUnicodeLocaleType U_ICU_ENTRY_POINT_RENAME(ultag_isUnicodeLocaleType)
+#define ultag_isVariantSubtags U_ICU_ENTRY_POINT_RENAME(ultag_isVariantSubtags)
+#define umeas_getPrefixBase U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixBase)
+#define umeas_getPrefixPower U_ICU_ENTRY_POINT_RENAME(umeas_getPrefixPower)
+#define umsg_applyPattern U_ICU_ENTRY_POINT_RENAME(umsg_applyPattern)
+#define umsg_autoQuoteApostrophe U_ICU_ENTRY_POINT_RENAME(umsg_autoQuoteApostrophe)
+#define umsg_clone U_ICU_ENTRY_POINT_RENAME(umsg_clone)
+#define umsg_close U_ICU_ENTRY_POINT_RENAME(umsg_close)
+#define umsg_format U_ICU_ENTRY_POINT_RENAME(umsg_format)
+#define umsg_getLocale U_ICU_ENTRY_POINT_RENAME(umsg_getLocale)
+#define umsg_open U_ICU_ENTRY_POINT_RENAME(umsg_open)
+#define umsg_parse U_ICU_ENTRY_POINT_RENAME(umsg_parse)
+#define umsg_setLocale U_ICU_ENTRY_POINT_RENAME(umsg_setLocale)
+#define umsg_toPattern U_ICU_ENTRY_POINT_RENAME(umsg_toPattern)
+#define umsg_vformat U_ICU_ENTRY_POINT_RENAME(umsg_vformat)
+#define umsg_vparse U_ICU_ENTRY_POINT_RENAME(umsg_vparse)
+#define umtx_lock U_ICU_ENTRY_POINT_RENAME(umtx_lock)
+#define umtx_unlock U_ICU_ENTRY_POINT_RENAME(umtx_unlock)
+#define umutablecptrie_buildImmutable U_ICU_ENTRY_POINT_RENAME(umutablecptrie_buildImmutable)
+#define umutablecptrie_clone U_ICU_ENTRY_POINT_RENAME(umutablecptrie_clone)
+#define umutablecptrie_close U_ICU_ENTRY_POINT_RENAME(umutablecptrie_close)
+#define umutablecptrie_fromUCPMap U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPMap)
+#define umutablecptrie_fromUCPTrie U_ICU_ENTRY_POINT_RENAME(umutablecptrie_fromUCPTrie)
+#define umutablecptrie_get U_ICU_ENTRY_POINT_RENAME(umutablecptrie_get)
+#define umutablecptrie_getRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_getRange)
+#define umutablecptrie_open U_ICU_ENTRY_POINT_RENAME(umutablecptrie_open)
+#define umutablecptrie_set U_ICU_ENTRY_POINT_RENAME(umutablecptrie_set)
+#define umutablecptrie_setRange U_ICU_ENTRY_POINT_RENAME(umutablecptrie_setRange)
+#define uniset_getUnicode32Instance U_ICU_ENTRY_POINT_RENAME(uniset_getUnicode32Instance)
+#define unorm2_append U_ICU_ENTRY_POINT_RENAME(unorm2_append)
+#define unorm2_close U_ICU_ENTRY_POINT_RENAME(unorm2_close)
+#define unorm2_composePair U_ICU_ENTRY_POINT_RENAME(unorm2_composePair)
+#define unorm2_getCombiningClass U_ICU_ENTRY_POINT_RENAME(unorm2_getCombiningClass)
+#define unorm2_getDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getDecomposition)
+#define unorm2_getInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getInstance)
+#define unorm2_getNFCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFCInstance)
+#define unorm2_getNFDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFDInstance)
+#define unorm2_getNFKCCasefoldInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCCasefoldInstance)
+#define unorm2_getNFKCInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKCInstance)
+#define unorm2_getNFKDInstance U_ICU_ENTRY_POINT_RENAME(unorm2_getNFKDInstance)
+#define unorm2_getRawDecomposition U_ICU_ENTRY_POINT_RENAME(unorm2_getRawDecomposition)
+#define unorm2_hasBoundaryAfter U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryAfter)
+#define unorm2_hasBoundaryBefore U_ICU_ENTRY_POINT_RENAME(unorm2_hasBoundaryBefore)
+#define unorm2_isInert U_ICU_ENTRY_POINT_RENAME(unorm2_isInert)
+#define unorm2_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm2_isNormalized)
+#define unorm2_normalize U_ICU_ENTRY_POINT_RENAME(unorm2_normalize)
+#define unorm2_normalizeSecondAndAppend U_ICU_ENTRY_POINT_RENAME(unorm2_normalizeSecondAndAppend)
+#define unorm2_openFiltered U_ICU_ENTRY_POINT_RENAME(unorm2_openFiltered)
+#define unorm2_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm2_quickCheck)
+#define unorm2_spanQuickCheckYes U_ICU_ENTRY_POINT_RENAME(unorm2_spanQuickCheckYes)
+#define unorm2_swap U_ICU_ENTRY_POINT_RENAME(unorm2_swap)
+#define unorm_compare U_ICU_ENTRY_POINT_RENAME(unorm_compare)
+#define unorm_concatenate U_ICU_ENTRY_POINT_RENAME(unorm_concatenate)
+#define unorm_getFCD16 U_ICU_ENTRY_POINT_RENAME(unorm_getFCD16)
+#define unorm_getQuickCheck U_ICU_ENTRY_POINT_RENAME(unorm_getQuickCheck)
+#define unorm_isNormalized U_ICU_ENTRY_POINT_RENAME(unorm_isNormalized)
+#define unorm_isNormalizedWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_isNormalizedWithOptions)
+#define unorm_next U_ICU_ENTRY_POINT_RENAME(unorm_next)
+#define unorm_normalize U_ICU_ENTRY_POINT_RENAME(unorm_normalize)
+#define unorm_previous U_ICU_ENTRY_POINT_RENAME(unorm_previous)
+#define unorm_quickCheck U_ICU_ENTRY_POINT_RENAME(unorm_quickCheck)
+#define unorm_quickCheckWithOptions U_ICU_ENTRY_POINT_RENAME(unorm_quickCheckWithOptions)
+#define unum_applyPattern U_ICU_ENTRY_POINT_RENAME(unum_applyPattern)
+#define unum_clone U_ICU_ENTRY_POINT_RENAME(unum_clone)
+#define unum_close U_ICU_ENTRY_POINT_RENAME(unum_close)
+#define unum_countAvailable U_ICU_ENTRY_POINT_RENAME(unum_countAvailable)
+#define unum_format U_ICU_ENTRY_POINT_RENAME(unum_format)
+#define unum_formatDecimal U_ICU_ENTRY_POINT_RENAME(unum_formatDecimal)
+#define unum_formatDouble U_ICU_ENTRY_POINT_RENAME(unum_formatDouble)
+#define unum_formatDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleCurrency)
+#define unum_formatDoubleForFields U_ICU_ENTRY_POINT_RENAME(unum_formatDoubleForFields)
+#define unum_formatInt64 U_ICU_ENTRY_POINT_RENAME(unum_formatInt64)
+#define unum_formatUFormattable U_ICU_ENTRY_POINT_RENAME(unum_formatUFormattable)
+#define unum_getAttribute U_ICU_ENTRY_POINT_RENAME(unum_getAttribute)
+#define unum_getAvailable U_ICU_ENTRY_POINT_RENAME(unum_getAvailable)
+#define unum_getContext U_ICU_ENTRY_POINT_RENAME(unum_getContext)
+#define unum_getDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_getDoubleAttribute)
+#define unum_getLocaleByType U_ICU_ENTRY_POINT_RENAME(unum_getLocaleByType)
+#define unum_getSymbol U_ICU_ENTRY_POINT_RENAME(unum_getSymbol)
+#define unum_getTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_getTextAttribute)
+#define unum_hasAttribute U_ICU_ENTRY_POINT_RENAME(unum_hasAttribute)
+#define unum_open U_ICU_ENTRY_POINT_RENAME(unum_open)
+#define unum_parse U_ICU_ENTRY_POINT_RENAME(unum_parse)
+#define unum_parseDecimal U_ICU_ENTRY_POINT_RENAME(unum_parseDecimal)
+#define unum_parseDouble U_ICU_ENTRY_POINT_RENAME(unum_parseDouble)
+#define unum_parseDoubleCurrency U_ICU_ENTRY_POINT_RENAME(unum_parseDoubleCurrency)
+#define unum_parseInt64 U_ICU_ENTRY_POINT_RENAME(unum_parseInt64)
+#define unum_parseToUFormattable U_ICU_ENTRY_POINT_RENAME(unum_parseToUFormattable)
+#define unum_setAttribute U_ICU_ENTRY_POINT_RENAME(unum_setAttribute)
+#define unum_setContext U_ICU_ENTRY_POINT_RENAME(unum_setContext)
+#define unum_setDoubleAttribute U_ICU_ENTRY_POINT_RENAME(unum_setDoubleAttribute)
+#define unum_setSymbol U_ICU_ENTRY_POINT_RENAME(unum_setSymbol)
+#define unum_setTextAttribute U_ICU_ENTRY_POINT_RENAME(unum_setTextAttribute)
+#define unum_toPattern U_ICU_ENTRY_POINT_RENAME(unum_toPattern)
+#define unumf_close U_ICU_ENTRY_POINT_RENAME(unumf_close)
+#define unumf_closeResult U_ICU_ENTRY_POINT_RENAME(unumf_closeResult)
+#define unumf_formatDecimal U_ICU_ENTRY_POINT_RENAME(unumf_formatDecimal)
+#define unumf_formatDouble U_ICU_ENTRY_POINT_RENAME(unumf_formatDouble)
+#define unumf_formatInt U_ICU_ENTRY_POINT_RENAME(unumf_formatInt)
+#define unumf_openForSkeletonAndLocale U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocale)
+#define unumf_openForSkeletonAndLocaleWithError U_ICU_ENTRY_POINT_RENAME(unumf_openForSkeletonAndLocaleWithError)
+#define unumf_openResult U_ICU_ENTRY_POINT_RENAME(unumf_openResult)
+#define unumf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumf_resultAsValue)
+#define unumf_resultGetAllFieldPositions U_ICU_ENTRY_POINT_RENAME(unumf_resultGetAllFieldPositions)
+#define unumf_resultNextFieldPosition U_ICU_ENTRY_POINT_RENAME(unumf_resultNextFieldPosition)
+#define unumf_resultToDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumf_resultToDecimalNumber)
+#define unumf_resultToString U_ICU_ENTRY_POINT_RENAME(unumf_resultToString)
+#define unumrf_close U_ICU_ENTRY_POINT_RENAME(unumrf_close)
+#define unumrf_closeResult U_ICU_ENTRY_POINT_RENAME(unumrf_closeResult)
+#define unumrf_formatDecimalRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDecimalRange)
+#define unumrf_formatDoubleRange U_ICU_ENTRY_POINT_RENAME(unumrf_formatDoubleRange)
+#define unumrf_openForSkeletonWithCollapseAndIdentityFallback U_ICU_ENTRY_POINT_RENAME(unumrf_openForSkeletonWithCollapseAndIdentityFallback)
+#define unumrf_openResult U_ICU_ENTRY_POINT_RENAME(unumrf_openResult)
+#define unumrf_resultAsValue U_ICU_ENTRY_POINT_RENAME(unumrf_resultAsValue)
+#define unumrf_resultGetFirstDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetFirstDecimalNumber)
+#define unumrf_resultGetIdentityResult U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetIdentityResult)
+#define unumrf_resultGetSecondDecimalNumber U_ICU_ENTRY_POINT_RENAME(unumrf_resultGetSecondDecimalNumber)
+#define unumsys_close U_ICU_ENTRY_POINT_RENAME(unumsys_close)
+#define unumsys_getDescription U_ICU_ENTRY_POINT_RENAME(unumsys_getDescription)
+#define unumsys_getName U_ICU_ENTRY_POINT_RENAME(unumsys_getName)
+#define unumsys_getRadix U_ICU_ENTRY_POINT_RENAME(unumsys_getRadix)
+#define unumsys_isAlgorithmic U_ICU_ENTRY_POINT_RENAME(unumsys_isAlgorithmic)
+#define unumsys_open U_ICU_ENTRY_POINT_RENAME(unumsys_open)
+#define unumsys_openAvailableNames U_ICU_ENTRY_POINT_RENAME(unumsys_openAvailableNames)
+#define unumsys_openByName U_ICU_ENTRY_POINT_RENAME(unumsys_openByName)
+#define uplrules_close U_ICU_ENTRY_POINT_RENAME(uplrules_close)
+#define uplrules_getKeywords U_ICU_ENTRY_POINT_RENAME(uplrules_getKeywords)
+#define uplrules_open U_ICU_ENTRY_POINT_RENAME(uplrules_open)
+#define uplrules_openForType U_ICU_ENTRY_POINT_RENAME(uplrules_openForType)
+#define uplrules_select U_ICU_ENTRY_POINT_RENAME(uplrules_select)
+#define uplrules_selectForRange U_ICU_ENTRY_POINT_RENAME(uplrules_selectForRange)
+#define uplrules_selectFormatted U_ICU_ENTRY_POINT_RENAME(uplrules_selectFormatted)
+#define uplrules_selectWithFormat U_ICU_ENTRY_POINT_RENAME(uplrules_selectWithFormat)
+#define uplug_closeLibrary U_ICU_ENTRY_POINT_RENAME(uplug_closeLibrary)
+#define uplug_findLibrary U_ICU_ENTRY_POINT_RENAME(uplug_findLibrary)
+#define uplug_getConfiguration U_ICU_ENTRY_POINT_RENAME(uplug_getConfiguration)
+#define uplug_getContext U_ICU_ENTRY_POINT_RENAME(uplug_getContext)
+#define uplug_getCurrentLevel U_ICU_ENTRY_POINT_RENAME(uplug_getCurrentLevel)
+#define uplug_getLibrary U_ICU_ENTRY_POINT_RENAME(uplug_getLibrary)
+#define uplug_getLibraryName U_ICU_ENTRY_POINT_RENAME(uplug_getLibraryName)
+#define uplug_getPlugInternal U_ICU_ENTRY_POINT_RENAME(uplug_getPlugInternal)
+#define uplug_getPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLevel)
+#define uplug_getPlugLoadStatus U_ICU_ENTRY_POINT_RENAME(uplug_getPlugLoadStatus)
+#define uplug_getPlugName U_ICU_ENTRY_POINT_RENAME(uplug_getPlugName)
+#define uplug_getPluginFile U_ICU_ENTRY_POINT_RENAME(uplug_getPluginFile)
+#define uplug_getSymbolName U_ICU_ENTRY_POINT_RENAME(uplug_getSymbolName)
+#define uplug_init U_ICU_ENTRY_POINT_RENAME(uplug_init)
+#define uplug_loadPlugFromEntrypoint U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromEntrypoint)
+#define uplug_loadPlugFromLibrary U_ICU_ENTRY_POINT_RENAME(uplug_loadPlugFromLibrary)
+#define uplug_nextPlug U_ICU_ENTRY_POINT_RENAME(uplug_nextPlug)
+#define uplug_openLibrary U_ICU_ENTRY_POINT_RENAME(uplug_openLibrary)
+#define uplug_removePlug U_ICU_ENTRY_POINT_RENAME(uplug_removePlug)
+#define uplug_setContext U_ICU_ENTRY_POINT_RENAME(uplug_setContext)
+#define uplug_setPlugLevel U_ICU_ENTRY_POINT_RENAME(uplug_setPlugLevel)
+#define uplug_setPlugName U_ICU_ENTRY_POINT_RENAME(uplug_setPlugName)
+#define uplug_setPlugNoUnload U_ICU_ENTRY_POINT_RENAME(uplug_setPlugNoUnload)
+#define uprops_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(uprops_addPropertyStarts)
+#define uprops_getSource U_ICU_ENTRY_POINT_RENAME(uprops_getSource)
+#define upropsvec_addPropertyStarts U_ICU_ENTRY_POINT_RENAME(upropsvec_addPropertyStarts)
+#define uprv_add32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_add32_overflow)
+#define uprv_aestrncpy U_ICU_ENTRY_POINT_RENAME(uprv_aestrncpy)
+#define uprv_asciiFromEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_asciiFromEbcdic)
+#define uprv_asciitolower U_ICU_ENTRY_POINT_RENAME(uprv_asciitolower)
+#define uprv_calloc U_ICU_ENTRY_POINT_RENAME(uprv_calloc)
+#define uprv_ceil U_ICU_ENTRY_POINT_RENAME(uprv_ceil)
+#define uprv_compareASCIIPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareASCIIPropertyNames)
+#define uprv_compareEBCDICPropertyNames U_ICU_ENTRY_POINT_RENAME(uprv_compareEBCDICPropertyNames)
+#define uprv_compareInvAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvAscii)
+#define uprv_compareInvEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdic)
+#define uprv_compareInvEbcdicAsAscii U_ICU_ENTRY_POINT_RENAME(uprv_compareInvEbcdicAsAscii)
+#define uprv_convertToLCID U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCID)
+#define uprv_convertToLCIDPlatform U_ICU_ENTRY_POINT_RENAME(uprv_convertToLCIDPlatform)
+#define uprv_convertToPosix U_ICU_ENTRY_POINT_RENAME(uprv_convertToPosix)
+#define uprv_copyAscii U_ICU_ENTRY_POINT_RENAME(uprv_copyAscii)
+#define uprv_copyEbcdic U_ICU_ENTRY_POINT_RENAME(uprv_copyEbcdic)
+#define uprv_decContextClearStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextClearStatus)
+#define uprv_decContextDefault U_ICU_ENTRY_POINT_RENAME(uprv_decContextDefault)
+#define uprv_decContextGetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetRounding)
+#define uprv_decContextGetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextGetStatus)
+#define uprv_decContextRestoreStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextRestoreStatus)
+#define uprv_decContextSaveStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSaveStatus)
+#define uprv_decContextSetRounding U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetRounding)
+#define uprv_decContextSetStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatus)
+#define uprv_decContextSetStatusFromString U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromString)
+#define uprv_decContextSetStatusFromStringQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusFromStringQuiet)
+#define uprv_decContextSetStatusQuiet U_ICU_ENTRY_POINT_RENAME(uprv_decContextSetStatusQuiet)
+#define uprv_decContextStatusToString U_ICU_ENTRY_POINT_RENAME(uprv_decContextStatusToString)
+#define uprv_decContextTestSavedStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestSavedStatus)
+#define uprv_decContextTestStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextTestStatus)
+#define uprv_decContextZeroStatus U_ICU_ENTRY_POINT_RENAME(uprv_decContextZeroStatus)
+#define uprv_decNumberAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAbs)
+#define uprv_decNumberAdd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAdd)
+#define uprv_decNumberAnd U_ICU_ENTRY_POINT_RENAME(uprv_decNumberAnd)
+#define uprv_decNumberClassToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberClassToString)
+#define uprv_decNumberCompare U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompare)
+#define uprv_decNumberCompareSignal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareSignal)
+#define uprv_decNumberCompareTotal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotal)
+#define uprv_decNumberCompareTotalMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCompareTotalMag)
+#define uprv_decNumberCopy U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopy)
+#define uprv_decNumberCopyAbs U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyAbs)
+#define uprv_decNumberCopyNegate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopyNegate)
+#define uprv_decNumberCopySign U_ICU_ENTRY_POINT_RENAME(uprv_decNumberCopySign)
+#define uprv_decNumberDivide U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivide)
+#define uprv_decNumberDivideInteger U_ICU_ENTRY_POINT_RENAME(uprv_decNumberDivideInteger)
+#define uprv_decNumberExp U_ICU_ENTRY_POINT_RENAME(uprv_decNumberExp)
+#define uprv_decNumberFMA U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFMA)
+#define uprv_decNumberFromInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromInt32)
+#define uprv_decNumberFromString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromString)
+#define uprv_decNumberFromUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberFromUInt32)
+#define uprv_decNumberGetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberGetBCD)
+#define uprv_decNumberInvert U_ICU_ENTRY_POINT_RENAME(uprv_decNumberInvert)
+#define uprv_decNumberIsNormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsNormal)
+#define uprv_decNumberIsSubnormal U_ICU_ENTRY_POINT_RENAME(uprv_decNumberIsSubnormal)
+#define uprv_decNumberLn U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLn)
+#define uprv_decNumberLog10 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLog10)
+#define uprv_decNumberLogB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberLogB)
+#define uprv_decNumberMax U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMax)
+#define uprv_decNumberMaxMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMaxMag)
+#define uprv_decNumberMin U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMin)
+#define uprv_decNumberMinMag U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinMag)
+#define uprv_decNumberMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMinus)
+#define uprv_decNumberMultiply U_ICU_ENTRY_POINT_RENAME(uprv_decNumberMultiply)
+#define uprv_decNumberNextMinus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextMinus)
+#define uprv_decNumberNextPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextPlus)
+#define uprv_decNumberNextToward U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNextToward)
+#define uprv_decNumberNormalize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberNormalize)
+#define uprv_decNumberOr U_ICU_ENTRY_POINT_RENAME(uprv_decNumberOr)
+#define uprv_decNumberPlus U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPlus)
+#define uprv_decNumberPower U_ICU_ENTRY_POINT_RENAME(uprv_decNumberPower)
+#define uprv_decNumberQuantize U_ICU_ENTRY_POINT_RENAME(uprv_decNumberQuantize)
+#define uprv_decNumberReduce U_ICU_ENTRY_POINT_RENAME(uprv_decNumberReduce)
+#define uprv_decNumberRemainder U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainder)
+#define uprv_decNumberRemainderNear U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRemainderNear)
+#define uprv_decNumberRescale U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRescale)
+#define uprv_decNumberRotate U_ICU_ENTRY_POINT_RENAME(uprv_decNumberRotate)
+#define uprv_decNumberSameQuantum U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSameQuantum)
+#define uprv_decNumberScaleB U_ICU_ENTRY_POINT_RENAME(uprv_decNumberScaleB)
+#define uprv_decNumberSetBCD U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSetBCD)
+#define uprv_decNumberShift U_ICU_ENTRY_POINT_RENAME(uprv_decNumberShift)
+#define uprv_decNumberSquareRoot U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSquareRoot)
+#define uprv_decNumberSubtract U_ICU_ENTRY_POINT_RENAME(uprv_decNumberSubtract)
+#define uprv_decNumberToEngString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToEngString)
+#define uprv_decNumberToInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToInt32)
+#define uprv_decNumberToIntegralExact U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralExact)
+#define uprv_decNumberToIntegralValue U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToIntegralValue)
+#define uprv_decNumberToString U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToString)
+#define uprv_decNumberToUInt32 U_ICU_ENTRY_POINT_RENAME(uprv_decNumberToUInt32)
+#define uprv_decNumberTrim U_ICU_ENTRY_POINT_RENAME(uprv_decNumberTrim)
+#define uprv_decNumberVersion U_ICU_ENTRY_POINT_RENAME(uprv_decNumberVersion)
+#define uprv_decNumberXor U_ICU_ENTRY_POINT_RENAME(uprv_decNumberXor)
+#define uprv_decNumberZero U_ICU_ENTRY_POINT_RENAME(uprv_decNumberZero)
+#define uprv_deleteConditionalCE32 U_ICU_ENTRY_POINT_RENAME(uprv_deleteConditionalCE32)
+#define uprv_deleteUObject U_ICU_ENTRY_POINT_RENAME(uprv_deleteUObject)
+#define uprv_dl_close U_ICU_ENTRY_POINT_RENAME(uprv_dl_close)
+#define uprv_dl_open U_ICU_ENTRY_POINT_RENAME(uprv_dl_open)
+#define uprv_dlsym_func U_ICU_ENTRY_POINT_RENAME(uprv_dlsym_func)
+#define uprv_eastrncpy U_ICU_ENTRY_POINT_RENAME(uprv_eastrncpy)
+#define uprv_ebcdicFromAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicFromAscii)
+#define uprv_ebcdicToAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToAscii)
+#define uprv_ebcdicToLowercaseAscii U_ICU_ENTRY_POINT_RENAME(uprv_ebcdicToLowercaseAscii)
+#define uprv_ebcdictolower U_ICU_ENTRY_POINT_RENAME(uprv_ebcdictolower)
+#define uprv_fabs U_ICU_ENTRY_POINT_RENAME(uprv_fabs)
+#define uprv_floor U_ICU_ENTRY_POINT_RENAME(uprv_floor)
+#define uprv_fmax U_ICU_ENTRY_POINT_RENAME(uprv_fmax)
+#define uprv_fmin U_ICU_ENTRY_POINT_RENAME(uprv_fmin)
+#define uprv_fmod U_ICU_ENTRY_POINT_RENAME(uprv_fmod)
+#define uprv_free U_ICU_ENTRY_POINT_RENAME(uprv_free)
+#define uprv_getCharNameCharacters U_ICU_ENTRY_POINT_RENAME(uprv_getCharNameCharacters)
+#define uprv_getDefaultLocaleID U_ICU_ENTRY_POINT_RENAME(uprv_getDefaultLocaleID)
+#define uprv_getInfinity U_ICU_ENTRY_POINT_RENAME(uprv_getInfinity)
+#define uprv_getMaxCharNameLength U_ICU_ENTRY_POINT_RENAME(uprv_getMaxCharNameLength)
+#define uprv_getMaxValues U_ICU_ENTRY_POINT_RENAME(uprv_getMaxValues)
+#define uprv_getNaN U_ICU_ENTRY_POINT_RENAME(uprv_getNaN)
+#define uprv_getRawUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getRawUTCtime)
+#define uprv_getStaticCurrencyName U_ICU_ENTRY_POINT_RENAME(uprv_getStaticCurrencyName)
+#define uprv_getUTCtime U_ICU_ENTRY_POINT_RENAME(uprv_getUTCtime)
+#define uprv_int32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_int32Comparator)
+#define uprv_isASCIILetter U_ICU_ENTRY_POINT_RENAME(uprv_isASCIILetter)
+#define uprv_isEbcdicAtSign U_ICU_ENTRY_POINT_RENAME(uprv_isEbcdicAtSign)
+#define uprv_isInfinite U_ICU_ENTRY_POINT_RENAME(uprv_isInfinite)
+#define uprv_isInvariantString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantString)
+#define uprv_isInvariantUString U_ICU_ENTRY_POINT_RENAME(uprv_isInvariantUString)
+#define uprv_isNaN U_ICU_ENTRY_POINT_RENAME(uprv_isNaN)
+#define uprv_isNegativeInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isNegativeInfinity)
+#define uprv_isPositiveInfinity U_ICU_ENTRY_POINT_RENAME(uprv_isPositiveInfinity)
+#define uprv_itou U_ICU_ENTRY_POINT_RENAME(uprv_itou)
+#define uprv_log U_ICU_ENTRY_POINT_RENAME(uprv_log)
+#define uprv_malloc U_ICU_ENTRY_POINT_RENAME(uprv_malloc)
+#define uprv_mapFile U_ICU_ENTRY_POINT_RENAME(uprv_mapFile)
+#define uprv_max U_ICU_ENTRY_POINT_RENAME(uprv_max)
+#define uprv_maxMantissa U_ICU_ENTRY_POINT_RENAME(uprv_maxMantissa)
+#define uprv_maximumPtr U_ICU_ENTRY_POINT_RENAME(uprv_maximumPtr)
+#define uprv_min U_ICU_ENTRY_POINT_RENAME(uprv_min)
+#define uprv_modf U_ICU_ENTRY_POINT_RENAME(uprv_modf)
+#define uprv_mul32_overflow U_ICU_ENTRY_POINT_RENAME(uprv_mul32_overflow)
+#define uprv_parseCurrency U_ICU_ENTRY_POINT_RENAME(uprv_parseCurrency)
+#define uprv_pathIsAbsolute U_ICU_ENTRY_POINT_RENAME(uprv_pathIsAbsolute)
+#define uprv_pow U_ICU_ENTRY_POINT_RENAME(uprv_pow)
+#define uprv_pow10 U_ICU_ENTRY_POINT_RENAME(uprv_pow10)
+#define uprv_realloc U_ICU_ENTRY_POINT_RENAME(uprv_realloc)
+#define uprv_round U_ICU_ENTRY_POINT_RENAME(uprv_round)
+#define uprv_sortArray U_ICU_ENTRY_POINT_RENAME(uprv_sortArray)
+#define uprv_stableBinarySearch U_ICU_ENTRY_POINT_RENAME(uprv_stableBinarySearch)
+#define uprv_strCompare U_ICU_ENTRY_POINT_RENAME(uprv_strCompare)
+#define uprv_strdup U_ICU_ENTRY_POINT_RENAME(uprv_strdup)
+#define uprv_stricmp U_ICU_ENTRY_POINT_RENAME(uprv_stricmp)
+#define uprv_strndup U_ICU_ENTRY_POINT_RENAME(uprv_strndup)
+#define uprv_strnicmp U_ICU_ENTRY_POINT_RENAME(uprv_strnicmp)
+#define uprv_syntaxError U_ICU_ENTRY_POINT_RENAME(uprv_syntaxError)
+#define uprv_timezone U_ICU_ENTRY_POINT_RENAME(uprv_timezone)
+#define uprv_toupper U_ICU_ENTRY_POINT_RENAME(uprv_toupper)
+#define uprv_trunc U_ICU_ENTRY_POINT_RENAME(uprv_trunc)
+#define uprv_tzname U_ICU_ENTRY_POINT_RENAME(uprv_tzname)
+#define uprv_tzname_clear_cache U_ICU_ENTRY_POINT_RENAME(uprv_tzname_clear_cache)
+#define uprv_tzset U_ICU_ENTRY_POINT_RENAME(uprv_tzset)
+#define uprv_uint16Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint16Comparator)
+#define uprv_uint32Comparator U_ICU_ENTRY_POINT_RENAME(uprv_uint32Comparator)
+#define uprv_unmapFile U_ICU_ENTRY_POINT_RENAME(uprv_unmapFile)
+#define upvec_cloneArray U_ICU_ENTRY_POINT_RENAME(upvec_cloneArray)
+#define upvec_close U_ICU_ENTRY_POINT_RENAME(upvec_close)
+#define upvec_compact U_ICU_ENTRY_POINT_RENAME(upvec_compact)
+#define upvec_compactToUTrie2Handler U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2Handler)
+#define upvec_compactToUTrie2WithRowIndexes U_ICU_ENTRY_POINT_RENAME(upvec_compactToUTrie2WithRowIndexes)
+#define upvec_getArray U_ICU_ENTRY_POINT_RENAME(upvec_getArray)
+#define upvec_getRow U_ICU_ENTRY_POINT_RENAME(upvec_getRow)
+#define upvec_getValue U_ICU_ENTRY_POINT_RENAME(upvec_getValue)
+#define upvec_open U_ICU_ENTRY_POINT_RENAME(upvec_open)
+#define upvec_setValue U_ICU_ENTRY_POINT_RENAME(upvec_setValue)
+#define uregex_appendReplacement U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacement)
+#define uregex_appendReplacementUText U_ICU_ENTRY_POINT_RENAME(uregex_appendReplacementUText)
+#define uregex_appendTail U_ICU_ENTRY_POINT_RENAME(uregex_appendTail)
+#define uregex_appendTailUText U_ICU_ENTRY_POINT_RENAME(uregex_appendTailUText)
+#define uregex_clone U_ICU_ENTRY_POINT_RENAME(uregex_clone)
+#define uregex_close U_ICU_ENTRY_POINT_RENAME(uregex_close)
+#define uregex_end U_ICU_ENTRY_POINT_RENAME(uregex_end)
+#define uregex_end64 U_ICU_ENTRY_POINT_RENAME(uregex_end64)
+#define uregex_find U_ICU_ENTRY_POINT_RENAME(uregex_find)
+#define uregex_find64 U_ICU_ENTRY_POINT_RENAME(uregex_find64)
+#define uregex_findNext U_ICU_ENTRY_POINT_RENAME(uregex_findNext)
+#define uregex_flags U_ICU_ENTRY_POINT_RENAME(uregex_flags)
+#define uregex_getFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_getFindProgressCallback)
+#define uregex_getMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_getMatchCallback)
+#define uregex_getStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_getStackLimit)
+#define uregex_getText U_ICU_ENTRY_POINT_RENAME(uregex_getText)
+#define uregex_getTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_getTimeLimit)
+#define uregex_getUText U_ICU_ENTRY_POINT_RENAME(uregex_getUText)
+#define uregex_group U_ICU_ENTRY_POINT_RENAME(uregex_group)
+#define uregex_groupCount U_ICU_ENTRY_POINT_RENAME(uregex_groupCount)
+#define uregex_groupNumberFromCName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromCName)
+#define uregex_groupNumberFromName U_ICU_ENTRY_POINT_RENAME(uregex_groupNumberFromName)
+#define uregex_groupUText U_ICU_ENTRY_POINT_RENAME(uregex_groupUText)
+#define uregex_hasAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasAnchoringBounds)
+#define uregex_hasTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_hasTransparentBounds)
+#define uregex_hitEnd U_ICU_ENTRY_POINT_RENAME(uregex_hitEnd)
+#define uregex_lookingAt U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt)
+#define uregex_lookingAt64 U_ICU_ENTRY_POINT_RENAME(uregex_lookingAt64)
+#define uregex_matches U_ICU_ENTRY_POINT_RENAME(uregex_matches)
+#define uregex_matches64 U_ICU_ENTRY_POINT_RENAME(uregex_matches64)
+#define uregex_open U_ICU_ENTRY_POINT_RENAME(uregex_open)
+#define uregex_openC U_ICU_ENTRY_POINT_RENAME(uregex_openC)
+#define uregex_openUText U_ICU_ENTRY_POINT_RENAME(uregex_openUText)
+#define uregex_pattern U_ICU_ENTRY_POINT_RENAME(uregex_pattern)
+#define uregex_patternUText U_ICU_ENTRY_POINT_RENAME(uregex_patternUText)
+#define uregex_refreshUText U_ICU_ENTRY_POINT_RENAME(uregex_refreshUText)
+#define uregex_regionEnd U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd)
+#define uregex_regionEnd64 U_ICU_ENTRY_POINT_RENAME(uregex_regionEnd64)
+#define uregex_regionStart U_ICU_ENTRY_POINT_RENAME(uregex_regionStart)
+#define uregex_regionStart64 U_ICU_ENTRY_POINT_RENAME(uregex_regionStart64)
+#define uregex_replaceAll U_ICU_ENTRY_POINT_RENAME(uregex_replaceAll)
+#define uregex_replaceAllUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceAllUText)
+#define uregex_replaceFirst U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirst)
+#define uregex_replaceFirstUText U_ICU_ENTRY_POINT_RENAME(uregex_replaceFirstUText)
+#define uregex_requireEnd U_ICU_ENTRY_POINT_RENAME(uregex_requireEnd)
+#define uregex_reset U_ICU_ENTRY_POINT_RENAME(uregex_reset)
+#define uregex_reset64 U_ICU_ENTRY_POINT_RENAME(uregex_reset64)
+#define uregex_setFindProgressCallback U_ICU_ENTRY_POINT_RENAME(uregex_setFindProgressCallback)
+#define uregex_setMatchCallback U_ICU_ENTRY_POINT_RENAME(uregex_setMatchCallback)
+#define uregex_setRegion U_ICU_ENTRY_POINT_RENAME(uregex_setRegion)
+#define uregex_setRegion64 U_ICU_ENTRY_POINT_RENAME(uregex_setRegion64)
+#define uregex_setRegionAndStart U_ICU_ENTRY_POINT_RENAME(uregex_setRegionAndStart)
+#define uregex_setStackLimit U_ICU_ENTRY_POINT_RENAME(uregex_setStackLimit)
+#define uregex_setText U_ICU_ENTRY_POINT_RENAME(uregex_setText)
+#define uregex_setTimeLimit U_ICU_ENTRY_POINT_RENAME(uregex_setTimeLimit)
+#define uregex_setUText U_ICU_ENTRY_POINT_RENAME(uregex_setUText)
+#define uregex_split U_ICU_ENTRY_POINT_RENAME(uregex_split)
+#define uregex_splitUText U_ICU_ENTRY_POINT_RENAME(uregex_splitUText)
+#define uregex_start U_ICU_ENTRY_POINT_RENAME(uregex_start)
+#define uregex_start64 U_ICU_ENTRY_POINT_RENAME(uregex_start64)
+#define uregex_ucstr_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_ucstr_unescape_charAt)
+#define uregex_useAnchoringBounds U_ICU_ENTRY_POINT_RENAME(uregex_useAnchoringBounds)
+#define uregex_useTransparentBounds U_ICU_ENTRY_POINT_RENAME(uregex_useTransparentBounds)
+#define uregex_utext_unescape_charAt U_ICU_ENTRY_POINT_RENAME(uregex_utext_unescape_charAt)
+#define uregion_areEqual U_ICU_ENTRY_POINT_RENAME(uregion_areEqual)
+#define uregion_contains U_ICU_ENTRY_POINT_RENAME(uregion_contains)
+#define uregion_getAvailable U_ICU_ENTRY_POINT_RENAME(uregion_getAvailable)
+#define uregion_getContainedRegions U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegions)
+#define uregion_getContainedRegionsOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainedRegionsOfType)
+#define uregion_getContainingRegion U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegion)
+#define uregion_getContainingRegionOfType U_ICU_ENTRY_POINT_RENAME(uregion_getContainingRegionOfType)
+#define uregion_getNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getNumericCode)
+#define uregion_getPreferredValues U_ICU_ENTRY_POINT_RENAME(uregion_getPreferredValues)
+#define uregion_getRegionCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionCode)
+#define uregion_getRegionFromCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromCode)
+#define uregion_getRegionFromNumericCode U_ICU_ENTRY_POINT_RENAME(uregion_getRegionFromNumericCode)
+#define uregion_getType U_ICU_ENTRY_POINT_RENAME(uregion_getType)
+#define ureldatefmt_close U_ICU_ENTRY_POINT_RENAME(ureldatefmt_close)
+#define ureldatefmt_closeResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_closeResult)
+#define ureldatefmt_combineDateAndTime U_ICU_ENTRY_POINT_RENAME(ureldatefmt_combineDateAndTime)
+#define ureldatefmt_format U_ICU_ENTRY_POINT_RENAME(ureldatefmt_format)
+#define ureldatefmt_formatNumeric U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumeric)
+#define ureldatefmt_formatNumericToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatNumericToResult)
+#define ureldatefmt_formatToResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_formatToResult)
+#define ureldatefmt_open U_ICU_ENTRY_POINT_RENAME(ureldatefmt_open)
+#define ureldatefmt_openResult U_ICU_ENTRY_POINT_RENAME(ureldatefmt_openResult)
+#define ureldatefmt_resultAsValue U_ICU_ENTRY_POINT_RENAME(ureldatefmt_resultAsValue)
+#define ures_close U_ICU_ENTRY_POINT_RENAME(ures_close)
+#define ures_copyResb U_ICU_ENTRY_POINT_RENAME(ures_copyResb)
+#define ures_countArrayItems U_ICU_ENTRY_POINT_RENAME(ures_countArrayItems)
+#define ures_findResource U_ICU_ENTRY_POINT_RENAME(ures_findResource)
+#define ures_findSubResource U_ICU_ENTRY_POINT_RENAME(ures_findSubResource)
+#define ures_getAllChildrenWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllChildrenWithFallback)
+#define ures_getAllItemsWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getAllItemsWithFallback)
+#define ures_getBinary U_ICU_ENTRY_POINT_RENAME(ures_getBinary)
+#define ures_getByIndex U_ICU_ENTRY_POINT_RENAME(ures_getByIndex)
+#define ures_getByKey U_ICU_ENTRY_POINT_RENAME(ures_getByKey)
+#define ures_getByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getByKeyWithFallback)
+#define ures_getFunctionalEquivalent U_ICU_ENTRY_POINT_RENAME(ures_getFunctionalEquivalent)
+#define ures_getInt U_ICU_ENTRY_POINT_RENAME(ures_getInt)
+#define ures_getIntVector U_ICU_ENTRY_POINT_RENAME(ures_getIntVector)
+#define ures_getKey U_ICU_ENTRY_POINT_RENAME(ures_getKey)
+#define ures_getKeywordValues U_ICU_ENTRY_POINT_RENAME(ures_getKeywordValues)
+#define ures_getLocale U_ICU_ENTRY_POINT_RENAME(ures_getLocale)
+#define ures_getLocaleByType U_ICU_ENTRY_POINT_RENAME(ures_getLocaleByType)
+#define ures_getLocaleInternal U_ICU_ENTRY_POINT_RENAME(ures_getLocaleInternal)
+#define ures_getName U_ICU_ENTRY_POINT_RENAME(ures_getName)
+#define ures_getNextResource U_ICU_ENTRY_POINT_RENAME(ures_getNextResource)
+#define ures_getNextString U_ICU_ENTRY_POINT_RENAME(ures_getNextString)
+#define ures_getSize U_ICU_ENTRY_POINT_RENAME(ures_getSize)
+#define ures_getString U_ICU_ENTRY_POINT_RENAME(ures_getString)
+#define ures_getStringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getStringByIndex)
+#define ures_getStringByKey U_ICU_ENTRY_POINT_RENAME(ures_getStringByKey)
+#define ures_getStringByKeyWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getStringByKeyWithFallback)
+#define ures_getType U_ICU_ENTRY_POINT_RENAME(ures_getType)
+#define ures_getUInt U_ICU_ENTRY_POINT_RENAME(ures_getUInt)
+#define ures_getUTF8String U_ICU_ENTRY_POINT_RENAME(ures_getUTF8String)
+#define ures_getUTF8StringByIndex U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByIndex)
+#define ures_getUTF8StringByKey U_ICU_ENTRY_POINT_RENAME(ures_getUTF8StringByKey)
+#define ures_getValueWithFallback U_ICU_ENTRY_POINT_RENAME(ures_getValueWithFallback)
+#define ures_getVersion U_ICU_ENTRY_POINT_RENAME(ures_getVersion)
+#define ures_getVersionByKey U_ICU_ENTRY_POINT_RENAME(ures_getVersionByKey)
+#define ures_getVersionNumber U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumber)
+#define ures_getVersionNumberInternal U_ICU_ENTRY_POINT_RENAME(ures_getVersionNumberInternal)
+#define ures_hasNext U_ICU_ENTRY_POINT_RENAME(ures_hasNext)
+#define ures_initStackObject U_ICU_ENTRY_POINT_RENAME(ures_initStackObject)
+#define ures_open U_ICU_ENTRY_POINT_RENAME(ures_open)
+#define ures_openAvailableLocales U_ICU_ENTRY_POINT_RENAME(ures_openAvailableLocales)
+#define ures_openDirect U_ICU_ENTRY_POINT_RENAME(ures_openDirect)
+#define ures_openDirectFillIn U_ICU_ENTRY_POINT_RENAME(ures_openDirectFillIn)
+#define ures_openFillIn U_ICU_ENTRY_POINT_RENAME(ures_openFillIn)
+#define ures_openNoDefault U_ICU_ENTRY_POINT_RENAME(ures_openNoDefault)
+#define ures_openU U_ICU_ENTRY_POINT_RENAME(ures_openU)
+#define ures_resetIterator U_ICU_ENTRY_POINT_RENAME(ures_resetIterator)
+#define ures_swap U_ICU_ENTRY_POINT_RENAME(ures_swap)
+#define uscript_breaksBetweenLetters U_ICU_ENTRY_POINT_RENAME(uscript_breaksBetweenLetters)
+#define uscript_closeRun U_ICU_ENTRY_POINT_RENAME(uscript_closeRun)
+#define uscript_getCode U_ICU_ENTRY_POINT_RENAME(uscript_getCode)
+#define uscript_getName U_ICU_ENTRY_POINT_RENAME(uscript_getName)
+#define uscript_getSampleString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleString)
+#define uscript_getSampleUnicodeString U_ICU_ENTRY_POINT_RENAME(uscript_getSampleUnicodeString)
+#define uscript_getScript U_ICU_ENTRY_POINT_RENAME(uscript_getScript)
+#define uscript_getScriptExtensions U_ICU_ENTRY_POINT_RENAME(uscript_getScriptExtensions)
+#define uscript_getShortName U_ICU_ENTRY_POINT_RENAME(uscript_getShortName)
+#define uscript_getUsage U_ICU_ENTRY_POINT_RENAME(uscript_getUsage)
+#define uscript_hasScript U_ICU_ENTRY_POINT_RENAME(uscript_hasScript)
+#define uscript_isCased U_ICU_ENTRY_POINT_RENAME(uscript_isCased)
+#define uscript_isRightToLeft U_ICU_ENTRY_POINT_RENAME(uscript_isRightToLeft)
+#define uscript_nextRun U_ICU_ENTRY_POINT_RENAME(uscript_nextRun)
+#define uscript_openRun U_ICU_ENTRY_POINT_RENAME(uscript_openRun)
+#define uscript_resetRun U_ICU_ENTRY_POINT_RENAME(uscript_resetRun)
+#define uscript_setRunText U_ICU_ENTRY_POINT_RENAME(uscript_setRunText)
+#define usearch_close U_ICU_ENTRY_POINT_RENAME(usearch_close)
+#define usearch_first U_ICU_ENTRY_POINT_RENAME(usearch_first)
+#define usearch_following U_ICU_ENTRY_POINT_RENAME(usearch_following)
+#define usearch_getAttribute U_ICU_ENTRY_POINT_RENAME(usearch_getAttribute)
+#define usearch_getBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_getBreakIterator)
+#define usearch_getCollator U_ICU_ENTRY_POINT_RENAME(usearch_getCollator)
+#define usearch_getMatchedLength U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedLength)
+#define usearch_getMatchedStart U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedStart)
+#define usearch_getMatchedText U_ICU_ENTRY_POINT_RENAME(usearch_getMatchedText)
+#define usearch_getOffset U_ICU_ENTRY_POINT_RENAME(usearch_getOffset)
+#define usearch_getPattern U_ICU_ENTRY_POINT_RENAME(usearch_getPattern)
+#define usearch_getText U_ICU_ENTRY_POINT_RENAME(usearch_getText)
+#define usearch_handleNextCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handleNextCanonical)
+#define usearch_handleNextExact U_ICU_ENTRY_POINT_RENAME(usearch_handleNextExact)
+#define usearch_handlePreviousCanonical U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousCanonical)
+#define usearch_handlePreviousExact U_ICU_ENTRY_POINT_RENAME(usearch_handlePreviousExact)
+#define usearch_last U_ICU_ENTRY_POINT_RENAME(usearch_last)
+#define usearch_next U_ICU_ENTRY_POINT_RENAME(usearch_next)
+#define usearch_open U_ICU_ENTRY_POINT_RENAME(usearch_open)
+#define usearch_openFromCollator U_ICU_ENTRY_POINT_RENAME(usearch_openFromCollator)
+#define usearch_preceding U_ICU_ENTRY_POINT_RENAME(usearch_preceding)
+#define usearch_previous U_ICU_ENTRY_POINT_RENAME(usearch_previous)
+#define usearch_reset U_ICU_ENTRY_POINT_RENAME(usearch_reset)
+#define usearch_search U_ICU_ENTRY_POINT_RENAME(usearch_search)
+#define usearch_searchBackwards U_ICU_ENTRY_POINT_RENAME(usearch_searchBackwards)
+#define usearch_setAttribute U_ICU_ENTRY_POINT_RENAME(usearch_setAttribute)
+#define usearch_setBreakIterator U_ICU_ENTRY_POINT_RENAME(usearch_setBreakIterator)
+#define usearch_setCollator U_ICU_ENTRY_POINT_RENAME(usearch_setCollator)
+#define usearch_setOffset U_ICU_ENTRY_POINT_RENAME(usearch_setOffset)
+#define usearch_setPattern U_ICU_ENTRY_POINT_RENAME(usearch_setPattern)
+#define usearch_setText U_ICU_ENTRY_POINT_RENAME(usearch_setText)
+#define uset_add U_ICU_ENTRY_POINT_RENAME(uset_add)
+#define uset_addAll U_ICU_ENTRY_POINT_RENAME(uset_addAll)
+#define uset_addAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_addAllCodePoints)
+#define uset_addRange U_ICU_ENTRY_POINT_RENAME(uset_addRange)
+#define uset_addString U_ICU_ENTRY_POINT_RENAME(uset_addString)
+#define uset_applyIntPropertyValue U_ICU_ENTRY_POINT_RENAME(uset_applyIntPropertyValue)
+#define uset_applyPattern U_ICU_ENTRY_POINT_RENAME(uset_applyPattern)
+#define uset_applyPropertyAlias U_ICU_ENTRY_POINT_RENAME(uset_applyPropertyAlias)
+#define uset_charAt U_ICU_ENTRY_POINT_RENAME(uset_charAt)
+#define uset_clear U_ICU_ENTRY_POINT_RENAME(uset_clear)
+#define uset_clone U_ICU_ENTRY_POINT_RENAME(uset_clone)
+#define uset_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(uset_cloneAsThawed)
+#define uset_close U_ICU_ENTRY_POINT_RENAME(uset_close)
+#define uset_closeOver U_ICU_ENTRY_POINT_RENAME(uset_closeOver)
+#define uset_compact U_ICU_ENTRY_POINT_RENAME(uset_compact)
+#define uset_complement U_ICU_ENTRY_POINT_RENAME(uset_complement)
+#define uset_complementAll U_ICU_ENTRY_POINT_RENAME(uset_complementAll)
+#define uset_complementAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_complementAllCodePoints)
+#define uset_complementRange U_ICU_ENTRY_POINT_RENAME(uset_complementRange)
+#define uset_complementString U_ICU_ENTRY_POINT_RENAME(uset_complementString)
+#define uset_contains U_ICU_ENTRY_POINT_RENAME(uset_contains)
+#define uset_containsAll U_ICU_ENTRY_POINT_RENAME(uset_containsAll)
+#define uset_containsAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_containsAllCodePoints)
+#define uset_containsNone U_ICU_ENTRY_POINT_RENAME(uset_containsNone)
+#define uset_containsRange U_ICU_ENTRY_POINT_RENAME(uset_containsRange)
+#define uset_containsSome U_ICU_ENTRY_POINT_RENAME(uset_containsSome)
+#define uset_containsString U_ICU_ENTRY_POINT_RENAME(uset_containsString)
+#define uset_equals U_ICU_ENTRY_POINT_RENAME(uset_equals)
+#define uset_freeze U_ICU_ENTRY_POINT_RENAME(uset_freeze)
+#define uset_getItem U_ICU_ENTRY_POINT_RENAME(uset_getItem)
+#define uset_getItemCount U_ICU_ENTRY_POINT_RENAME(uset_getItemCount)
+#define uset_getRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getRangeCount)
+#define uset_getSerializedRange U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRange)
+#define uset_getSerializedRangeCount U_ICU_ENTRY_POINT_RENAME(uset_getSerializedRangeCount)
+#define uset_getSerializedSet U_ICU_ENTRY_POINT_RENAME(uset_getSerializedSet)
+#define uset_hasStrings U_ICU_ENTRY_POINT_RENAME(uset_hasStrings)
+#define uset_indexOf U_ICU_ENTRY_POINT_RENAME(uset_indexOf)
+#define uset_isEmpty U_ICU_ENTRY_POINT_RENAME(uset_isEmpty)
+#define uset_isFrozen U_ICU_ENTRY_POINT_RENAME(uset_isFrozen)
+#define uset_open U_ICU_ENTRY_POINT_RENAME(uset_open)
+#define uset_openEmpty U_ICU_ENTRY_POINT_RENAME(uset_openEmpty)
+#define uset_openPattern U_ICU_ENTRY_POINT_RENAME(uset_openPattern)
+#define uset_openPatternOptions U_ICU_ENTRY_POINT_RENAME(uset_openPatternOptions)
+#define uset_remove U_ICU_ENTRY_POINT_RENAME(uset_remove)
+#define uset_removeAll U_ICU_ENTRY_POINT_RENAME(uset_removeAll)
+#define uset_removeAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_removeAllCodePoints)
+#define uset_removeAllStrings U_ICU_ENTRY_POINT_RENAME(uset_removeAllStrings)
+#define uset_removeRange U_ICU_ENTRY_POINT_RENAME(uset_removeRange)
+#define uset_removeString U_ICU_ENTRY_POINT_RENAME(uset_removeString)
+#define uset_resemblesPattern U_ICU_ENTRY_POINT_RENAME(uset_resemblesPattern)
+#define uset_retain U_ICU_ENTRY_POINT_RENAME(uset_retain)
+#define uset_retainAll U_ICU_ENTRY_POINT_RENAME(uset_retainAll)
+#define uset_retainAllCodePoints U_ICU_ENTRY_POINT_RENAME(uset_retainAllCodePoints)
+#define uset_retainString U_ICU_ENTRY_POINT_RENAME(uset_retainString)
+#define uset_serialize U_ICU_ENTRY_POINT_RENAME(uset_serialize)
+#define uset_serializedContains U_ICU_ENTRY_POINT_RENAME(uset_serializedContains)
+#define uset_set U_ICU_ENTRY_POINT_RENAME(uset_set)
+#define uset_setSerializedToOne U_ICU_ENTRY_POINT_RENAME(uset_setSerializedToOne)
+#define uset_size U_ICU_ENTRY_POINT_RENAME(uset_size)
+#define uset_span U_ICU_ENTRY_POINT_RENAME(uset_span)
+#define uset_spanBack U_ICU_ENTRY_POINT_RENAME(uset_spanBack)
+#define uset_spanBackUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanBackUTF8)
+#define uset_spanUTF8 U_ICU_ENTRY_POINT_RENAME(uset_spanUTF8)
+#define uset_toPattern U_ICU_ENTRY_POINT_RENAME(uset_toPattern)
+#define usnum_close U_ICU_ENTRY_POINT_RENAME(usnum_close)
+#define usnum_multiplyByPowerOfTen U_ICU_ENTRY_POINT_RENAME(usnum_multiplyByPowerOfTen)
+#define usnum_openForInt64 U_ICU_ENTRY_POINT_RENAME(usnum_openForInt64)
+#define usnum_roundTo U_ICU_ENTRY_POINT_RENAME(usnum_roundTo)
+#define usnum_setMinimumFractionDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumFractionDigits)
+#define usnum_setMinimumIntegerDigits U_ICU_ENTRY_POINT_RENAME(usnum_setMinimumIntegerDigits)
+#define usnum_setSign U_ICU_ENTRY_POINT_RENAME(usnum_setSign)
+#define usnum_setToInt64 U_ICU_ENTRY_POINT_RENAME(usnum_setToInt64)
+#define usnum_truncateStart U_ICU_ENTRY_POINT_RENAME(usnum_truncateStart)
+#define usnumf_close U_ICU_ENTRY_POINT_RENAME(usnumf_close)
+#define usnumf_format U_ICU_ENTRY_POINT_RENAME(usnumf_format)
+#define usnumf_formatInt64 U_ICU_ENTRY_POINT_RENAME(usnumf_formatInt64)
+#define usnumf_openForLocale U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocale)
+#define usnumf_openForLocaleAndGroupingStrategy U_ICU_ENTRY_POINT_RENAME(usnumf_openForLocaleAndGroupingStrategy)
+#define uspoof_areConfusable U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusable)
+#define uspoof_areConfusableUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUTF8)
+#define uspoof_areConfusableUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_areConfusableUnicodeString)
+#define uspoof_check U_ICU_ENTRY_POINT_RENAME(uspoof_check)
+#define uspoof_check2 U_ICU_ENTRY_POINT_RENAME(uspoof_check2)
+#define uspoof_check2UTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_check2UTF8)
+#define uspoof_check2UnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_check2UnicodeString)
+#define uspoof_checkUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_checkUTF8)
+#define uspoof_checkUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_checkUnicodeString)
+#define uspoof_clone U_ICU_ENTRY_POINT_RENAME(uspoof_clone)
+#define uspoof_close U_ICU_ENTRY_POINT_RENAME(uspoof_close)
+#define uspoof_closeCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_closeCheckResult)
+#define uspoof_getAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedChars)
+#define uspoof_getAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedLocales)
+#define uspoof_getAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getAllowedUnicodeSet)
+#define uspoof_getCheckResultChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultChecks)
+#define uspoof_getCheckResultNumerics U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultNumerics)
+#define uspoof_getCheckResultRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getCheckResultRestrictionLevel)
+#define uspoof_getChecks U_ICU_ENTRY_POINT_RENAME(uspoof_getChecks)
+#define uspoof_getInclusionSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionSet)
+#define uspoof_getInclusionUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getInclusionUnicodeSet)
+#define uspoof_getRecommendedSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedSet)
+#define uspoof_getRecommendedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_getRecommendedUnicodeSet)
+#define uspoof_getRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_getRestrictionLevel)
+#define uspoof_getSkeleton U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeleton)
+#define uspoof_getSkeletonUTF8 U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUTF8)
+#define uspoof_getSkeletonUnicodeString U_ICU_ENTRY_POINT_RENAME(uspoof_getSkeletonUnicodeString)
+#define uspoof_internalInitStatics U_ICU_ENTRY_POINT_RENAME(uspoof_internalInitStatics)
+#define uspoof_open U_ICU_ENTRY_POINT_RENAME(uspoof_open)
+#define uspoof_openCheckResult U_ICU_ENTRY_POINT_RENAME(uspoof_openCheckResult)
+#define uspoof_openFromSerialized U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSerialized)
+#define uspoof_openFromSource U_ICU_ENTRY_POINT_RENAME(uspoof_openFromSource)
+#define uspoof_serialize U_ICU_ENTRY_POINT_RENAME(uspoof_serialize)
+#define uspoof_setAllowedChars U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedChars)
+#define uspoof_setAllowedLocales U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedLocales)
+#define uspoof_setAllowedUnicodeSet U_ICU_ENTRY_POINT_RENAME(uspoof_setAllowedUnicodeSet)
+#define uspoof_setChecks U_ICU_ENTRY_POINT_RENAME(uspoof_setChecks)
+#define uspoof_setRestrictionLevel U_ICU_ENTRY_POINT_RENAME(uspoof_setRestrictionLevel)
+#define uspoof_swap U_ICU_ENTRY_POINT_RENAME(uspoof_swap)
+#define usprep_close U_ICU_ENTRY_POINT_RENAME(usprep_close)
+#define usprep_open U_ICU_ENTRY_POINT_RENAME(usprep_open)
+#define usprep_openByType U_ICU_ENTRY_POINT_RENAME(usprep_openByType)
+#define usprep_prepare U_ICU_ENTRY_POINT_RENAME(usprep_prepare)
+#define usprep_swap U_ICU_ENTRY_POINT_RENAME(usprep_swap)
+#define ustr_hashCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashCharsN)
+#define ustr_hashICharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashICharsN)
+#define ustr_hashUCharsN U_ICU_ENTRY_POINT_RENAME(ustr_hashUCharsN)
+#define ustrcase_getCaseLocale U_ICU_ENTRY_POINT_RENAME(ustrcase_getCaseLocale)
+#define ustrcase_getTitleBreakIterator U_ICU_ENTRY_POINT_RENAME(ustrcase_getTitleBreakIterator)
+#define ustrcase_internalFold U_ICU_ENTRY_POINT_RENAME(ustrcase_internalFold)
+#define ustrcase_internalToLower U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToLower)
+#define ustrcase_internalToTitle U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToTitle)
+#define ustrcase_internalToUpper U_ICU_ENTRY_POINT_RENAME(ustrcase_internalToUpper)
+#define ustrcase_map U_ICU_ENTRY_POINT_RENAME(ustrcase_map)
+#define ustrcase_mapWithOverlap U_ICU_ENTRY_POINT_RENAME(ustrcase_mapWithOverlap)
+#define utext_char32At U_ICU_ENTRY_POINT_RENAME(utext_char32At)
+#define utext_clone U_ICU_ENTRY_POINT_RENAME(utext_clone)
+#define utext_close U_ICU_ENTRY_POINT_RENAME(utext_close)
+#define utext_copy U_ICU_ENTRY_POINT_RENAME(utext_copy)
+#define utext_current32 U_ICU_ENTRY_POINT_RENAME(utext_current32)
+#define utext_equals U_ICU_ENTRY_POINT_RENAME(utext_equals)
+#define utext_extract U_ICU_ENTRY_POINT_RENAME(utext_extract)
+#define utext_freeze U_ICU_ENTRY_POINT_RENAME(utext_freeze)
+#define utext_getNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getNativeIndex)
+#define utext_getPreviousNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_getPreviousNativeIndex)
+#define utext_hasMetaData U_ICU_ENTRY_POINT_RENAME(utext_hasMetaData)
+#define utext_isLengthExpensive U_ICU_ENTRY_POINT_RENAME(utext_isLengthExpensive)
+#define utext_isWritable U_ICU_ENTRY_POINT_RENAME(utext_isWritable)
+#define utext_moveIndex32 U_ICU_ENTRY_POINT_RENAME(utext_moveIndex32)
+#define utext_nativeLength U_ICU_ENTRY_POINT_RENAME(utext_nativeLength)
+#define utext_next32 U_ICU_ENTRY_POINT_RENAME(utext_next32)
+#define utext_next32From U_ICU_ENTRY_POINT_RENAME(utext_next32From)
+#define utext_openCharacterIterator U_ICU_ENTRY_POINT_RENAME(utext_openCharacterIterator)
+#define utext_openConstUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openConstUnicodeString)
+#define utext_openReplaceable U_ICU_ENTRY_POINT_RENAME(utext_openReplaceable)
+#define utext_openUChars U_ICU_ENTRY_POINT_RENAME(utext_openUChars)
+#define utext_openUTF8 U_ICU_ENTRY_POINT_RENAME(utext_openUTF8)
+#define utext_openUnicodeString U_ICU_ENTRY_POINT_RENAME(utext_openUnicodeString)
+#define utext_previous32 U_ICU_ENTRY_POINT_RENAME(utext_previous32)
+#define utext_previous32From U_ICU_ENTRY_POINT_RENAME(utext_previous32From)
+#define utext_replace U_ICU_ENTRY_POINT_RENAME(utext_replace)
+#define utext_setNativeIndex U_ICU_ENTRY_POINT_RENAME(utext_setNativeIndex)
+#define utext_setup U_ICU_ENTRY_POINT_RENAME(utext_setup)
+#define utf8_appendCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_appendCharSafeBody)
+#define utf8_back1SafeBody U_ICU_ENTRY_POINT_RENAME(utf8_back1SafeBody)
+#define utf8_countTrailBytes U_ICU_ENTRY_POINT_RENAME(utf8_countTrailBytes)
+#define utf8_nextCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_nextCharSafeBody)
+#define utf8_prevCharSafeBody U_ICU_ENTRY_POINT_RENAME(utf8_prevCharSafeBody)
+#define utmscale_fromInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_fromInt64)
+#define utmscale_getTimeScaleValue U_ICU_ENTRY_POINT_RENAME(utmscale_getTimeScaleValue)
+#define utmscale_toInt64 U_ICU_ENTRY_POINT_RENAME(utmscale_toInt64)
+#define utrace_cleanup U_ICU_ENTRY_POINT_RENAME(utrace_cleanup)
+#define utrace_data U_ICU_ENTRY_POINT_RENAME(utrace_data)
+#define utrace_entry U_ICU_ENTRY_POINT_RENAME(utrace_entry)
+#define utrace_exit U_ICU_ENTRY_POINT_RENAME(utrace_exit)
+#define utrace_format U_ICU_ENTRY_POINT_RENAME(utrace_format)
+#define utrace_functionName U_ICU_ENTRY_POINT_RENAME(utrace_functionName)
+#define utrace_getFunctions U_ICU_ENTRY_POINT_RENAME(utrace_getFunctions)
+#define utrace_getLevel U_ICU_ENTRY_POINT_RENAME(utrace_getLevel)
+#define utrace_setFunctions U_ICU_ENTRY_POINT_RENAME(utrace_setFunctions)
+#define utrace_setLevel U_ICU_ENTRY_POINT_RENAME(utrace_setLevel)
+#define utrace_vformat U_ICU_ENTRY_POINT_RENAME(utrace_vformat)
+#define utrans_clone U_ICU_ENTRY_POINT_RENAME(utrans_clone)
+#define utrans_close U_ICU_ENTRY_POINT_RENAME(utrans_close)
+#define utrans_countAvailableIDs U_ICU_ENTRY_POINT_RENAME(utrans_countAvailableIDs)
+#define utrans_getAvailableID U_ICU_ENTRY_POINT_RENAME(utrans_getAvailableID)
+#define utrans_getID U_ICU_ENTRY_POINT_RENAME(utrans_getID)
+#define utrans_getSourceSet U_ICU_ENTRY_POINT_RENAME(utrans_getSourceSet)
+#define utrans_getUnicodeID U_ICU_ENTRY_POINT_RENAME(utrans_getUnicodeID)
+#define utrans_open U_ICU_ENTRY_POINT_RENAME(utrans_open)
+#define utrans_openIDs U_ICU_ENTRY_POINT_RENAME(utrans_openIDs)
+#define utrans_openInverse U_ICU_ENTRY_POINT_RENAME(utrans_openInverse)
+#define utrans_openU U_ICU_ENTRY_POINT_RENAME(utrans_openU)
+#define utrans_register U_ICU_ENTRY_POINT_RENAME(utrans_register)
+#define utrans_rep_caseContextIterator U_ICU_ENTRY_POINT_RENAME(utrans_rep_caseContextIterator)
+#define utrans_setFilter U_ICU_ENTRY_POINT_RENAME(utrans_setFilter)
+#define utrans_stripRules U_ICU_ENTRY_POINT_RENAME(utrans_stripRules)
+#define utrans_toRules U_ICU_ENTRY_POINT_RENAME(utrans_toRules)
+#define utrans_trans U_ICU_ENTRY_POINT_RENAME(utrans_trans)
+#define utrans_transIncremental U_ICU_ENTRY_POINT_RENAME(utrans_transIncremental)
+#define utrans_transIncrementalUChars U_ICU_ENTRY_POINT_RENAME(utrans_transIncrementalUChars)
+#define utrans_transUChars U_ICU_ENTRY_POINT_RENAME(utrans_transUChars)
+#define utrans_transliterator_cleanup U_ICU_ENTRY_POINT_RENAME(utrans_transliterator_cleanup)
+#define utrans_unregister U_ICU_ENTRY_POINT_RENAME(utrans_unregister)
+#define utrans_unregisterID U_ICU_ENTRY_POINT_RENAME(utrans_unregisterID)
+#define utrie2_clone U_ICU_ENTRY_POINT_RENAME(utrie2_clone)
+#define utrie2_cloneAsThawed U_ICU_ENTRY_POINT_RENAME(utrie2_cloneAsThawed)
+#define utrie2_close U_ICU_ENTRY_POINT_RENAME(utrie2_close)
+#define utrie2_enum U_ICU_ENTRY_POINT_RENAME(utrie2_enum)
+#define utrie2_enumForLeadSurrogate U_ICU_ENTRY_POINT_RENAME(utrie2_enumForLeadSurrogate)
+#define utrie2_freeze U_ICU_ENTRY_POINT_RENAME(utrie2_freeze)
+#define utrie2_fromUTrie U_ICU_ENTRY_POINT_RENAME(utrie2_fromUTrie)
+#define utrie2_get32 U_ICU_ENTRY_POINT_RENAME(utrie2_get32)
+#define utrie2_get32FromLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_get32FromLeadSurrogateCodeUnit)
+#define utrie2_internalU8NextIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8NextIndex)
+#define utrie2_internalU8PrevIndex U_ICU_ENTRY_POINT_RENAME(utrie2_internalU8PrevIndex)
+#define utrie2_isFrozen U_ICU_ENTRY_POINT_RENAME(utrie2_isFrozen)
+#define utrie2_open U_ICU_ENTRY_POINT_RENAME(utrie2_open)
+#define utrie2_openDummy U_ICU_ENTRY_POINT_RENAME(utrie2_openDummy)
+#define utrie2_openFromSerialized U_ICU_ENTRY_POINT_RENAME(utrie2_openFromSerialized)
+#define utrie2_serialize U_ICU_ENTRY_POINT_RENAME(utrie2_serialize)
+#define utrie2_set32 U_ICU_ENTRY_POINT_RENAME(utrie2_set32)
+#define utrie2_set32ForLeadSurrogateCodeUnit U_ICU_ENTRY_POINT_RENAME(utrie2_set32ForLeadSurrogateCodeUnit)
+#define utrie2_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie2_setRange32)
+#define utrie2_swap U_ICU_ENTRY_POINT_RENAME(utrie2_swap)
+#define utrie_clone U_ICU_ENTRY_POINT_RENAME(utrie_clone)
+#define utrie_close U_ICU_ENTRY_POINT_RENAME(utrie_close)
+#define utrie_defaultGetFoldingOffset U_ICU_ENTRY_POINT_RENAME(utrie_defaultGetFoldingOffset)
+#define utrie_enum U_ICU_ENTRY_POINT_RENAME(utrie_enum)
+#define utrie_get32 U_ICU_ENTRY_POINT_RENAME(utrie_get32)
+#define utrie_getData U_ICU_ENTRY_POINT_RENAME(utrie_getData)
+#define utrie_open U_ICU_ENTRY_POINT_RENAME(utrie_open)
+#define utrie_serialize U_ICU_ENTRY_POINT_RENAME(utrie_serialize)
+#define utrie_set32 U_ICU_ENTRY_POINT_RENAME(utrie_set32)
+#define utrie_setRange32 U_ICU_ENTRY_POINT_RENAME(utrie_setRange32)
+#define utrie_swap U_ICU_ENTRY_POINT_RENAME(utrie_swap)
+#define utrie_swapAnyVersion U_ICU_ENTRY_POINT_RENAME(utrie_swapAnyVersion)
+#define utrie_unserialize U_ICU_ENTRY_POINT_RENAME(utrie_unserialize)
+#define utrie_unserializeDummy U_ICU_ENTRY_POINT_RENAME(utrie_unserializeDummy)
+#define vzone_clone U_ICU_ENTRY_POINT_RENAME(vzone_clone)
+#define vzone_close U_ICU_ENTRY_POINT_RENAME(vzone_close)
+#define vzone_countTransitionRules U_ICU_ENTRY_POINT_RENAME(vzone_countTransitionRules)
+#define vzone_equals U_ICU_ENTRY_POINT_RENAME(vzone_equals)
+#define vzone_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(vzone_getDynamicClassID)
+#define vzone_getLastModified U_ICU_ENTRY_POINT_RENAME(vzone_getLastModified)
+#define vzone_getNextTransition U_ICU_ENTRY_POINT_RENAME(vzone_getNextTransition)
+#define vzone_getOffset U_ICU_ENTRY_POINT_RENAME(vzone_getOffset)
+#define vzone_getOffset2 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset2)
+#define vzone_getOffset3 U_ICU_ENTRY_POINT_RENAME(vzone_getOffset3)
+#define vzone_getPreviousTransition U_ICU_ENTRY_POINT_RENAME(vzone_getPreviousTransition)
+#define vzone_getRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_getRawOffset)
+#define vzone_getStaticClassID U_ICU_ENTRY_POINT_RENAME(vzone_getStaticClassID)
+#define vzone_getTZURL U_ICU_ENTRY_POINT_RENAME(vzone_getTZURL)
+#define vzone_hasSameRules U_ICU_ENTRY_POINT_RENAME(vzone_hasSameRules)
+#define vzone_inDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_inDaylightTime)
+#define vzone_openData U_ICU_ENTRY_POINT_RENAME(vzone_openData)
+#define vzone_openID U_ICU_ENTRY_POINT_RENAME(vzone_openID)
+#define vzone_setLastModified U_ICU_ENTRY_POINT_RENAME(vzone_setLastModified)
+#define vzone_setRawOffset U_ICU_ENTRY_POINT_RENAME(vzone_setRawOffset)
+#define vzone_setTZURL U_ICU_ENTRY_POINT_RENAME(vzone_setTZURL)
+#define vzone_useDaylightTime U_ICU_ENTRY_POINT_RENAME(vzone_useDaylightTime)
+#define vzone_write U_ICU_ENTRY_POINT_RENAME(vzone_write)
+#define vzone_writeFromStart U_ICU_ENTRY_POINT_RENAME(vzone_writeFromStart)
+#define vzone_writeSimple U_ICU_ENTRY_POINT_RENAME(vzone_writeSimple)
+#define zrule_close U_ICU_ENTRY_POINT_RENAME(zrule_close)
+#define zrule_equals U_ICU_ENTRY_POINT_RENAME(zrule_equals)
+#define zrule_getDSTSavings U_ICU_ENTRY_POINT_RENAME(zrule_getDSTSavings)
+#define zrule_getName U_ICU_ENTRY_POINT_RENAME(zrule_getName)
+#define zrule_getRawOffset U_ICU_ENTRY_POINT_RENAME(zrule_getRawOffset)
+#define zrule_isEquivalentTo U_ICU_ENTRY_POINT_RENAME(zrule_isEquivalentTo)
+#define ztrans_adoptFrom U_ICU_ENTRY_POINT_RENAME(ztrans_adoptFrom)
+#define ztrans_adoptTo U_ICU_ENTRY_POINT_RENAME(ztrans_adoptTo)
+#define ztrans_clone U_ICU_ENTRY_POINT_RENAME(ztrans_clone)
+#define ztrans_close U_ICU_ENTRY_POINT_RENAME(ztrans_close)
+#define ztrans_equals U_ICU_ENTRY_POINT_RENAME(ztrans_equals)
+#define ztrans_getDynamicClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getDynamicClassID)
+#define ztrans_getFrom U_ICU_ENTRY_POINT_RENAME(ztrans_getFrom)
+#define ztrans_getStaticClassID U_ICU_ENTRY_POINT_RENAME(ztrans_getStaticClassID)
+#define ztrans_getTime U_ICU_ENTRY_POINT_RENAME(ztrans_getTime)
+#define ztrans_getTo U_ICU_ENTRY_POINT_RENAME(ztrans_getTo)
+#define ztrans_open U_ICU_ENTRY_POINT_RENAME(ztrans_open)
+#define ztrans_openEmpty U_ICU_ENTRY_POINT_RENAME(ztrans_openEmpty)
+#define ztrans_setFrom U_ICU_ENTRY_POINT_RENAME(ztrans_setFrom)
+#define ztrans_setTime U_ICU_ENTRY_POINT_RENAME(ztrans_setTime)
+#define ztrans_setTo U_ICU_ENTRY_POINT_RENAME(ztrans_setTo)
+
+#endif /* !(defined(_MSC_VER) && defined(__INTELLISENSE__)) */
+#endif /* U_DISABLE_RENAMING */
+#endif /* URENAME_H */
+
diff --git a/intl/icu/source/common/unicode/urep.h b/intl/icu/source/common/unicode/urep.h
new file mode 100644
index 0000000000..932202ddb0
--- /dev/null
+++ b/intl/icu/source/common/unicode/urep.h
@@ -0,0 +1,157 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+* Copyright (C) 1997-2010, International Business Machines
+* Corporation and others. All Rights Reserved.
+******************************************************************************
+* Date Name Description
+* 06/23/00 aliu Creation.
+******************************************************************************
+*/
+
+#ifndef __UREP_H
+#define __UREP_H
+
+#include "unicode/utypes.h"
+
+U_CDECL_BEGIN
+
+/********************************************************************
+ * General Notes
+ ********************************************************************
+ * TODO
+ * Add usage scenario
+ * Add test code
+ * Talk about pinning
+ * Talk about "can truncate result if out of memory"
+ */
+
+/********************************************************************
+ * Data Structures
+ ********************************************************************/
+/**
+ * \file
+ * \brief C API: Callbacks for UReplaceable
+ */
+/**
+ * An opaque replaceable text object. This will be manipulated only
+ * through the caller-supplied UReplaceableFunctor struct. Related
+ * to the C++ class Replaceable.
+ * This is currently only used in the Transliterator C API, see utrans.h .
+ * @stable ICU 2.0
+ */
+typedef void* UReplaceable;
+
+/**
+ * A set of function pointers that transliterators use to manipulate a
+ * UReplaceable. The caller should supply the required functions to
+ * manipulate their text appropriately. Related to the C++ class
+ * Replaceable.
+ * @stable ICU 2.0
+ */
+typedef struct UReplaceableCallbacks {
+
+ /**
+ * Function pointer that returns the number of UChar code units in
+ * this text.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @return The length of the text.
+ * @stable ICU 2.0
+ */
+ int32_t (*length)(const UReplaceable* rep);
+
+ /**
+ * Function pointer that returns a UChar code units at the given
+ * offset into this text; 0 <= offset < n, where n is the value
+ * returned by (*length)(rep). See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar (code unit).
+ * @return The UChar (code unit) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar (*charAt)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that returns a UChar32 code point at the given
+ * offset into this text. See unistr.h for a description of
+ * charAt() vs. char32At().
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param offset The index at which to fetch the UChar32 (code point).
+ * @return The UChar32 (code point) at offset, or U+FFFF if the offset is out of bounds.
+ * @stable ICU 2.0
+ */
+ UChar32 (*char32At)(const UReplaceable* rep,
+ int32_t offset);
+
+ /**
+ * Function pointer that replaces text between start and limit in
+ * this text with the given text. Attributes (out of band info)
+ * should be retained.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be replaced,
+ * inclusive.
+ * @param limit the ending index of the text to be replaced,
+ * exclusive.
+ * @param text the new text to replace the UChars from
+ * start..limit-1.
+ * @param textLength the number of UChars at text, or -1 if text
+ * is null-terminated.
+ * @stable ICU 2.0
+ */
+ void (*replace)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ const UChar* text,
+ int32_t textLength);
+
+ /**
+ * Function pointer that copies the characters in the range
+ * [<tt>start</tt>, <tt>limit</tt>) into the array <tt>dst</tt>.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start offset of first character which will be copied
+ * into the array
+ * @param limit offset immediately following the last character to
+ * be copied
+ * @param dst array in which to copy characters. The length of
+ * <tt>dst</tt> must be at least <tt>(limit - start)</tt>.
+ * @stable ICU 2.1
+ */
+ void (*extract)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ UChar* dst);
+
+ /**
+ * Function pointer that copies text between start and limit in
+ * this text to another index in the text. Attributes (out of
+ * band info) should be retained. After this call, there will be
+ * (at least) two copies of the characters originally located at
+ * start..limit-1.
+ *
+ * @param rep A pointer to "this" UReplaceable object.
+ * @param start the starting index of the text to be copied,
+ * inclusive.
+ * @param limit the ending index of the text to be copied,
+ * exclusive.
+ * @param dest the index at which the copy of the UChars should be
+ * inserted.
+ * @stable ICU 2.0
+ */
+ void (*copy)(UReplaceable* rep,
+ int32_t start,
+ int32_t limit,
+ int32_t dest);
+
+} UReplaceableCallbacks;
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/ures.h b/intl/icu/source/common/unicode/ures.h
new file mode 100644
index 0000000000..cc25b6e49c
--- /dev/null
+++ b/intl/icu/source/common/unicode/ures.h
@@ -0,0 +1,911 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1997-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File URES.H (formerly CRESBUND.H)
+*
+* Modification History:
+*
+* Date Name Description
+* 04/01/97 aliu Creation.
+* 02/22/99 damiba overhaul.
+* 04/04/99 helena Fixed internal header inclusion.
+* 04/15/99 Madhu Updated Javadoc
+* 06/14/99 stephen Removed functions taking a filename suffix.
+* 07/20/99 stephen Language-independent typedef to void*
+* 11/09/99 weiv Added ures_getLocale()
+* 06/24/02 weiv Added support for resource sharing
+******************************************************************************
+*/
+
+#ifndef URES_H
+#define URES_H
+
+#include "unicode/utypes.h"
+#include "unicode/uloc.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ * \file
+ * \brief C API: Resource Bundle
+ *
+ * <h2>C API: Resource Bundle</h2>
+ *
+ * C API representing a collection of resource information pertaining to a given
+ * locale. A resource bundle provides a way of accessing locale- specific information in
+ * a data file. You create a resource bundle that manages the resources for a given
+ * locale and then ask it for individual resources.
+ * <P>
+ * Resource bundles in ICU4C are currently defined using text files which conform to the following
+ * <a href="https://github.com/unicode-org/icu-docs/blob/main/design/bnf_rb.txt">BNF definition</a>.
+ * More on resource bundle concepts and syntax can be found in the
+ * <a href="https://unicode-org.github.io/icu/userguide/locale/resources">Users Guide</a>.
+ * <P>
+ */
+
+/**
+ * UResourceBundle is an opaque type for handles for resource bundles in C APIs.
+ * @stable ICU 2.0
+ */
+struct UResourceBundle;
+
+/**
+ * @stable ICU 2.0
+ */
+typedef struct UResourceBundle UResourceBundle;
+
+/**
+ * Numeric constants for types of resource items.
+ * @see ures_getType
+ * @stable ICU 2.0
+ */
+typedef enum {
+ /** Resource type constant for "no resource". @stable ICU 2.6 */
+ URES_NONE=-1,
+
+ /** Resource type constant for 16-bit Unicode strings. @stable ICU 2.6 */
+ URES_STRING=0,
+
+ /** Resource type constant for binary data. @stable ICU 2.6 */
+ URES_BINARY=1,
+
+ /** Resource type constant for tables of key-value pairs. @stable ICU 2.6 */
+ URES_TABLE=2,
+
+ /**
+ * Resource type constant for aliases;
+ * internally stores a string which identifies the actual resource
+ * storing the data (can be in a different resource bundle).
+ * Resolved internally before delivering the actual resource through the API.
+ * @stable ICU 2.6
+ */
+ URES_ALIAS=3,
+
+ /**
+ * Resource type constant for a single 28-bit integer, interpreted as
+ * signed or unsigned by the ures_getInt() or ures_getUInt() function.
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.6
+ */
+ URES_INT=7,
+
+ /** Resource type constant for arrays of resources. @stable ICU 2.6 */
+ URES_ARRAY=8,
+
+ /**
+ * Resource type constant for vectors of 32-bit integers.
+ * @see ures_getIntVector
+ * @stable ICU 2.6
+ */
+ URES_INT_VECTOR = 14,
+#ifndef U_HIDE_DEPRECATED_API
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_NONE=URES_NONE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_STRING=URES_STRING,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_BINARY=URES_BINARY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_TABLE=URES_TABLE,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ALIAS=URES_ALIAS,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT=URES_INT,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_ARRAY=URES_ARRAY,
+ /** @deprecated ICU 2.6 Use the URES_ constant instead. */
+ RES_INT_VECTOR=URES_INT_VECTOR,
+ /** @deprecated ICU 2.6 Not used. */
+ RES_RESERVED=15,
+
+ /**
+ * One more than the highest normal UResType value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ URES_LIMIT = 16
+#endif // U_HIDE_DEPRECATED_API
+} UResType;
+
+/*
+ * Functions to create and destroy resource bundles.
+ */
+
+/**
+ * Opens a UResourceBundle, from which users can extract strings by using
+ * their corresponding keys.
+ * Note that the caller is responsible of calling <TT>ures_close</TT> on each successfully
+ * opened resource bundle.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code.
+ * The UErrorCode err parameter is used to return status information to the user. To
+ * check whether the construction succeeded or not, you should check the value of
+ * U_SUCCESS(err). If you wish more detailed information, you can check for
+ * informational status results which still indicate success. U_USING_FALLBACK_WARNING
+ * indicates that a fall back locale was used. For example, 'de_CH' was requested,
+ * but nothing was found there, so 'de' was used. U_USING_DEFAULT_WARNING indicates that
+ * the default locale data or root locale data was used; neither the requested locale
+ * nor any of its fall back locales could be found. Please see the users guide for more
+ * information on this topic.
+ * @return a newly allocated resource bundle.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_open(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+
+/** This function does not care what kind of localeID is passed in. It simply opens a bundle with
+ * that name. Fallback mechanism is disabled for the new bundle. If the requested bundle contains
+ * an %%ALIAS directive, the results are undefined.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ *
+ * @param status fills in the outgoing error code. Either U_ZERO_ERROR or U_MISSING_RESOURCE_ERROR
+ * @return a newly allocated resource bundle or NULL if it doesn't exist.
+ * @see ures_close
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openDirect(const char* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+/**
+ * Same as ures_open() but takes a const UChar *path.
+ * This path will be converted to char * using the default converter,
+ * then ures_open() is called.
+ *
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param locale specifies the locale for which we want to open the resource
+ * if NULL, the default locale will be used. If strlen(locale) == 0
+ * root locale will be used.
+ * @param status fills in the outgoing error code.
+ * @return a newly allocated resource bundle.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_openU(const UChar* packageName,
+ const char* locale,
+ UErrorCode* status);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Returns the number of strings/arrays in resource bundles.
+ * Better to use ures_getSize, as this function will be deprecated.
+ *
+ *@param resourceBundle resource bundle containing the desired strings
+ *@param resourceKey key tagging the resource
+ *@param err fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_FALLBACK_WARNING </TT>
+ *@return: for <STRONG>Arrays</STRONG>: returns the number of resources in the array
+ * <STRONG>Tables</STRONG>: returns the number of resources in the table
+ * <STRONG>single string</STRONG>: returns 1
+ *@see ures_getSize
+ * @deprecated ICU 2.8 User ures_getSize instead
+ */
+U_DEPRECATED int32_t U_EXPORT2
+ures_countArrayItems(const UResourceBundle* resourceBundle,
+ const char* resourceKey,
+ UErrorCode* err);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Close a resource bundle, all pointers returned from the various ures_getXXX calls
+ * on this particular bundle should be considered invalid henceforth.
+ *
+ * @param resourceBundle a pointer to a resourceBundle struct. Can be NULL.
+ * @see ures_open
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_close(UResourceBundle* resourceBundle);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUResourceBundlePointer
+ * "Smart pointer" class, closes a UResourceBundle via ures_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUResourceBundlePointer, UResourceBundle, ures_close);
+
+U_NAMESPACE_END
+
+#endif
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Return the version number associated with this ResourceBundle as a string. Please
+ * use ures_getVersion as this function is going to be deprecated.
+ *
+ * @param resourceBundle The resource bundle for which the version is checked.
+ * @return A version number string as specified in the resource bundle or its parent.
+ * The caller does not own this string.
+ * @see ures_getVersion
+ * @deprecated ICU 2.8 Use ures_getVersion instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getVersionNumber(const UResourceBundle* resourceBundle);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Return the version number associated with this ResourceBundle as an
+ * UVersionInfo array.
+ *
+ * @param resB The resource bundle for which the version is checked.
+ * @param versionInfo A UVersionInfo array that is filled with the version number
+ * as specified in the resource bundle or its parent.
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_getVersion(const UResourceBundle* resB,
+ UVersionInfo versionInfo);
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Return the name of the Locale associated with this ResourceBundle. This API allows
+ * you to query for the real locale of the resource. For example, if you requested
+ * "en_US_CALIFORNIA" and only "en_US" bundle exists, "en_US" will be returned.
+ * For subresources, the locale where this resource comes from will be returned.
+ * If fallback has occurred, getLocale will reflect this.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @deprecated ICU 2.8 Use ures_getLocaleByType instead.
+ */
+U_DEPRECATED const char* U_EXPORT2
+ures_getLocale(const UResourceBundle* resourceBundle,
+ UErrorCode* status);
+#endif /* U_HIDE_DEPRECATED_API */
+
+/**
+ * Return the name of the Locale associated with this ResourceBundle.
+ * You can choose between requested, valid and real locale.
+ *
+ * @param resourceBundle resource bundle in question
+ * @param type You can choose between requested, valid and actual
+ * locale. For description see the definition of
+ * ULocDataLocaleType in uloc.h
+ * @param status just for catching illegal arguments
+ * @return A Locale name
+ * @stable ICU 2.8
+ */
+U_CAPI const char* U_EXPORT2
+ures_getLocaleByType(const UResourceBundle* resourceBundle,
+ ULocDataLocaleType type,
+ UErrorCode* status);
+
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Same as ures_open() but uses the fill-in parameter instead of allocating a new bundle.
+ *
+ * TODO need to revisit usefulness of this function
+ * and usage model for fillIn parameters without knowing sizeof(UResourceBundle)
+ * @param r The existing UResourceBundle to fill in. If NULL then status will be
+ * set to U_ILLEGAL_ARGUMENT_ERROR.
+ * @param packageName The packageName and locale together point to an ICU udata object,
+ * as defined by <code> udata_open( packageName, "res", locale, err) </code>
+ * or equivalent. Typically, packageName will refer to a (.dat) file, or to
+ * a package registered with udata_setAppData(). Using a full file or directory
+ * pathname for packageName is deprecated. If NULL, ICU data will be used.
+ * @param localeID specifies the locale for which we want to open the resource
+ * @param status The error code.
+ * @internal
+ */
+U_CAPI void U_EXPORT2
+ures_openFillIn(UResourceBundle *r,
+ const char* packageName,
+ const char* localeID,
+ UErrorCode* status);
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * Returns a string from a string resource type
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting string
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getString(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a UTF-8 string from a string resource.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param length Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getString
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8String(const UResourceBundle *resB,
+ char *dest, int32_t *length,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a binary data from a binary resource.
+ *
+ * @param resourceBundle a string resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of unsigned bytes which live in a memory mapped/DLL file.
+ * @see ures_getString
+ * @see ures_getIntVector
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const uint8_t* U_EXPORT2
+ures_getBinary(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns a 32 bit integer array from a resource.
+ *
+ * @param resourceBundle an int vector resource
+ * @param len fills in the length of resulting byte chunk
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * Always check the value of status. Don't count on returning NULL.
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return a pointer to a chunk of integers which live in a memory mapped/DLL file.
+ * @see ures_getBinary
+ * @see ures_getString
+ * @see ures_getInt
+ * @see ures_getUInt
+ * @stable ICU 2.0
+ */
+U_CAPI const int32_t* U_EXPORT2
+ures_getIntVector(const UResourceBundle* resourceBundle,
+ int32_t* len,
+ UErrorCode* status);
+
+/**
+ * Returns an unsigned integer from a resource.
+ * This integer is originally 28 bits.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_CAPI uint32_t U_EXPORT2
+ures_getUInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns a signed integer from a resource.
+ * This integer is originally 28 bit and the sign gets propagated.
+ *
+ * @param resourceBundle a string resource
+ * @param status fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return an integer value
+ * @see ures_getUInt
+ * @see ures_getIntVector
+ * @see ures_getBinary
+ * @see ures_getString
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ures_getInt(const UResourceBundle* resourceBundle,
+ UErrorCode *status);
+
+/**
+ * Returns the size of a resource. Size for scalar types is always 1,
+ * and for vector/table types is the number of child resources.
+ * @warning Integer array is treated as a scalar type. There are no
+ * APIs to access individual members of an integer array. It
+ * is always returned as a whole.
+ * @param resourceBundle a resource
+ * @return number of resources in a given resource.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+ures_getSize(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the type of a resource. Available types are defined in enum UResType
+ *
+ * @param resourceBundle a resource
+ * @return type of the given resource.
+ * @see UResType
+ * @stable ICU 2.0
+ */
+U_CAPI UResType U_EXPORT2
+ures_getType(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the key associated with a given resource. Not all the resources have a key - only
+ * those that are members of a table.
+ *
+ * @param resourceBundle a resource
+ * @return a key associated to this resource, or NULL if it doesn't have a key
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+ures_getKey(const UResourceBundle *resourceBundle);
+
+/* ITERATION API
+ This API provides means for iterating through a resource
+*/
+
+/**
+ * Resets the internal context of a resource so that iteration starts from the first element.
+ *
+ * @param resourceBundle a resource
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+ures_resetIterator(UResourceBundle *resourceBundle);
+
+/**
+ * Checks whether the given resource has another element to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @return true if there are more elements, false if there is no more elements
+ * @stable ICU 2.0
+ */
+U_CAPI UBool U_EXPORT2
+ures_hasNext(const UResourceBundle *resourceBundle);
+
+/**
+ * Returns the next resource in a given resource or NULL if there are no more resources
+ * to iterate over. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. You may still get a non NULL result even if an
+ * error occurred. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getNextResource(UResourceBundle *resourceBundle,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the next string in a given resource or NULL if there are no more resources
+ * to iterate over.
+ *
+ * @param resourceBundle a resource
+ * @param len fill in length of the string
+ * @param key fill in for key associated with this string. NULL if no key
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getNextString(UResourceBundle *resourceBundle,
+ int32_t* len,
+ const char ** key,
+ UErrorCode *status);
+
+/**
+ * Returns the resource in a given resource at the specified index. Features a fill-in parameter.
+ *
+ * @param resourceBundle the resource bundle from which to get a sub-resource
+ * @param indexR an index to the wanted resource.
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code. Don't count on NULL being returned if an error has
+ * occurred. Check status instead.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexR,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns the string in a given resource at the specified index.
+ *
+ * @param resourceBundle a resource
+ * @param indexS an index to the wanted string.
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByIndex(const UResourceBundle *resourceBundle,
+ int32_t indexS,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource at the specified index.
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param stringIndex An index to the wanted string.
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByIndex
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByIndex(const UResourceBundle *resB,
+ int32_t stringIndex,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+/**
+ * Returns a resource in a given resource that has a given key. This procedure works only with table
+ * resources. Features a fill-in parameter.
+ *
+ * @param resourceBundle a resource
+ * @param key a key associated with the wanted resource
+ * @param fillIn if NULL a new UResourceBundle struct is allocated and must be closed by the caller.
+ * Alternatively, you can supply a struct to be filled by this function.
+ * @param status fills in the outgoing error code.
+ * @return a pointer to a UResourceBundle struct. If fill in param was NULL, caller must close it
+ * @stable ICU 2.0
+ */
+U_CAPI UResourceBundle* U_EXPORT2
+ures_getByKey(const UResourceBundle *resourceBundle,
+ const char* key,
+ UResourceBundle *fillIn,
+ UErrorCode *status);
+
+/**
+ * Returns a string in a given resource that has a given key. This procedure works only with table
+ * resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param len fill in length of the string
+ * @param status fills in the outgoing error code. If an error occurred, we may return NULL, but don't
+ * count on it. Check status instead!
+ * @return a pointer to a zero-terminated UChar array which lives in a memory mapped/DLL file.
+ * @stable ICU 2.0
+ */
+U_CAPI const UChar* U_EXPORT2
+ures_getStringByKey(const UResourceBundle *resB,
+ const char* key,
+ int32_t* len,
+ UErrorCode *status);
+
+/**
+ * Returns a UTF-8 string from a resource and a key.
+ * This function works only with table resources.
+ *
+ * The UTF-8 string may be returnable directly as a pointer, or
+ * it may need to be copied, or transformed from UTF-16 using u_strToUTF8()
+ * or equivalent.
+ *
+ * If forceCopy==true, then the string is always written to the dest buffer
+ * and dest is returned.
+ *
+ * If forceCopy==false, then the string is returned as a pointer if possible,
+ * without needing a dest buffer (it can be NULL). If the string needs to be
+ * copied or transformed, then it may be placed into dest at an arbitrary offset.
+ *
+ * If the string is to be written to dest, then U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING are set if appropriate, as usual.
+ *
+ * If the string is transformed from UTF-16, then a conversion error may occur
+ * if an unpaired surrogate is encountered. If the function is successful, then
+ * the output UTF-8 string is always well-formed.
+ *
+ * @param resB Resource bundle.
+ * @param key A key associated with the wanted resource
+ * @param dest Destination buffer. Can be NULL only if capacity=*length==0.
+ * @param pLength Input: Capacity of destination buffer.
+ * Output: Actual length of the UTF-8 string, not counting the
+ * terminating NUL, even in case of U_BUFFER_OVERFLOW_ERROR.
+ * Can be NULL, meaning capacity=0 and the string length is not
+ * returned to the caller.
+ * @param forceCopy If true, then the output string will always be written to
+ * dest, with U_BUFFER_OVERFLOW_ERROR and
+ * U_STRING_NOT_TERMINATED_WARNING set if appropriate.
+ * If false, then the dest buffer may or may not contain a
+ * copy of the string. dest may or may not be modified.
+ * If a copy needs to be written, then the UErrorCode parameter
+ * indicates overflow etc. as usual.
+ * @param status Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to the UTF-8 string. It may be dest, or at some offset
+ * from dest (only if !forceCopy), or in unrelated memory.
+ * Always NUL-terminated unless the string was written to dest and
+ * length==capacity (in which case U_STRING_NOT_TERMINATED_WARNING is set).
+ *
+ * @see ures_getStringByKey
+ * @see u_strToUTF8
+ * @stable ICU 3.6
+ */
+U_CAPI const char * U_EXPORT2
+ures_getUTF8StringByKey(const UResourceBundle *resB,
+ const char *key,
+ char *dest, int32_t *pLength,
+ UBool forceCopy,
+ UErrorCode *status);
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/unistr.h"
+
+U_NAMESPACE_BEGIN
+/**
+ * Returns the string value from a string resource bundle.
+ *
+ * @param resB a resource, should have type URES_STRING
+ * @param status: fills in the outgoing error code
+ * could be <TT>U_MISSING_RESOURCE_ERROR</TT> if the key is not found
+ * could be a non-failing error
+ * e.g.: <TT>U_USING_FALLBACK_WARNING</TT>,<TT>U_USING_DEFAULT_WARNING </TT>
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeString(const UResourceBundle *resB, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const char16_t *r = ures_getString(resB, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns the next string in a resource, or an empty string if there are no more resources
+ * to iterate over.
+ * Use ures_getNextString() instead to distinguish between
+ * the end of the iteration and a real empty string value.
+ *
+ * @param resB a resource
+ * @param key fill in for key associated with this string
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getNextUnicodeString(UResourceBundle *resB, const char ** key, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const char16_t* r = ures_getNextString(resB, &len, key, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns the string in a given resource array or table at the specified index.
+ *
+ * @param resB a resource
+ * @param indexS an index to the wanted string.
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByIndex(const UResourceBundle *resB, int32_t indexS, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const char16_t* r = ures_getStringByIndex(resB, indexS, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+/**
+ * Returns a string in a resource that has a given key.
+ * This procedure works only with table resources.
+ *
+ * @param resB a resource
+ * @param key a key associated with the wanted string
+ * @param status fills in the outgoing error code
+ * @return The string value, or a bogus string if there is a failure UErrorCode.
+ * @stable ICU 2.0
+ */
+inline UnicodeString
+ures_getUnicodeStringByKey(const UResourceBundle *resB, const char* key, UErrorCode* status) {
+ UnicodeString result;
+ int32_t len = 0;
+ const char16_t* r = ures_getStringByKey(resB, key, &len, status);
+ if(U_SUCCESS(*status)) {
+ result.setTo(true, r, len);
+ } else {
+ result.setToBogus();
+ }
+ return result;
+}
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Create a string enumerator, owned by the caller, of all locales located within
+ * the specified resource tree.
+ * @param packageName name of the tree, such as (NULL) or U_ICUDATA_ALIAS or or "ICUDATA-coll"
+ * This call is similar to uloc_getAvailable().
+ * @param status error code
+ * @stable ICU 3.2
+ */
+U_CAPI UEnumeration* U_EXPORT2
+ures_openAvailableLocales(const char *packageName, UErrorCode *status);
+
+
+#endif /*_URES*/
+/*eof*/
diff --git a/intl/icu/source/common/unicode/uscript.h b/intl/icu/source/common/unicode/uscript.h
new file mode 100644
index 0000000000..dc97ab2ba5
--- /dev/null
+++ b/intl/icu/source/common/unicode/uscript.h
@@ -0,0 +1,724 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ **********************************************************************
+ * Copyright (C) 1997-2016, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ **********************************************************************
+ *
+ * File USCRIPT.H
+ *
+ * Modification History:
+ *
+ * Date Name Description
+ * 07/06/2001 Ram Creation.
+ ******************************************************************************
+ */
+
+#ifndef USCRIPT_H
+#define USCRIPT_H
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Unicode Script Information
+ */
+
+/**
+ * Constants for ISO 15924 script codes.
+ *
+ * The current set of script code constants supports at least all scripts
+ * that are encoded in the version of Unicode which ICU currently supports.
+ * The names of the constants are usually derived from the
+ * Unicode script property value aliases.
+ * See UAX #24 Unicode Script Property (http://www.unicode.org/reports/tr24/)
+ * and http://www.unicode.org/Public/UCD/latest/ucd/PropertyValueAliases.txt .
+ *
+ * In addition, constants for many ISO 15924 script codes
+ * are included, for use with language tags, CLDR data, and similar.
+ * Some of those codes are not used in the Unicode Character Database (UCD).
+ * For example, there are no characters that have a UCD script property value of
+ * Hans or Hant. All Han ideographs have the Hani script property value in Unicode.
+ *
+ * Private-use codes Qaaa..Qabx are not included, except as used in the UCD or in CLDR.
+ *
+ * Starting with ICU 55, script codes are only added when their scripts
+ * have been or will certainly be encoded in Unicode,
+ * and have been assigned Unicode script property value aliases,
+ * to ensure that their script names are stable and match the names of the constants.
+ * Script codes like Latf and Aran that are not subject to separate encoding
+ * may be added at any time.
+ *
+ * @stable ICU 2.2
+ */
+typedef enum UScriptCode {
+ /*
+ * Note: UScriptCode constants and their ISO script code comments
+ * are parsed by preparseucd.py.
+ * It matches lines like
+ * USCRIPT_<Unicode Script value name> = <integer>, / * <ISO script code> * /
+ */
+
+ /** @stable ICU 2.2 */
+ USCRIPT_INVALID_CODE = -1,
+ /** @stable ICU 2.2 */
+ USCRIPT_COMMON = 0, /* Zyyy */
+ /** @stable ICU 2.2 */
+ USCRIPT_INHERITED = 1, /* Zinh */ /* "Code for inherited script", for non-spacing combining marks; also Qaai */
+ /** @stable ICU 2.2 */
+ USCRIPT_ARABIC = 2, /* Arab */
+ /** @stable ICU 2.2 */
+ USCRIPT_ARMENIAN = 3, /* Armn */
+ /** @stable ICU 2.2 */
+ USCRIPT_BENGALI = 4, /* Beng */
+ /** @stable ICU 2.2 */
+ USCRIPT_BOPOMOFO = 5, /* Bopo */
+ /** @stable ICU 2.2 */
+ USCRIPT_CHEROKEE = 6, /* Cher */
+ /** @stable ICU 2.2 */
+ USCRIPT_COPTIC = 7, /* Copt */
+ /** @stable ICU 2.2 */
+ USCRIPT_CYRILLIC = 8, /* Cyrl */
+ /** @stable ICU 2.2 */
+ USCRIPT_DESERET = 9, /* Dsrt */
+ /** @stable ICU 2.2 */
+ USCRIPT_DEVANAGARI = 10, /* Deva */
+ /** @stable ICU 2.2 */
+ USCRIPT_ETHIOPIC = 11, /* Ethi */
+ /** @stable ICU 2.2 */
+ USCRIPT_GEORGIAN = 12, /* Geor */
+ /** @stable ICU 2.2 */
+ USCRIPT_GOTHIC = 13, /* Goth */
+ /** @stable ICU 2.2 */
+ USCRIPT_GREEK = 14, /* Grek */
+ /** @stable ICU 2.2 */
+ USCRIPT_GUJARATI = 15, /* Gujr */
+ /** @stable ICU 2.2 */
+ USCRIPT_GURMUKHI = 16, /* Guru */
+ /** @stable ICU 2.2 */
+ USCRIPT_HAN = 17, /* Hani */
+ /** @stable ICU 2.2 */
+ USCRIPT_HANGUL = 18, /* Hang */
+ /** @stable ICU 2.2 */
+ USCRIPT_HEBREW = 19, /* Hebr */
+ /** @stable ICU 2.2 */
+ USCRIPT_HIRAGANA = 20, /* Hira */
+ /** @stable ICU 2.2 */
+ USCRIPT_KANNADA = 21, /* Knda */
+ /** @stable ICU 2.2 */
+ USCRIPT_KATAKANA = 22, /* Kana */
+ /** @stable ICU 2.2 */
+ USCRIPT_KHMER = 23, /* Khmr */
+ /** @stable ICU 2.2 */
+ USCRIPT_LAO = 24, /* Laoo */
+ /** @stable ICU 2.2 */
+ USCRIPT_LATIN = 25, /* Latn */
+ /** @stable ICU 2.2 */
+ USCRIPT_MALAYALAM = 26, /* Mlym */
+ /** @stable ICU 2.2 */
+ USCRIPT_MONGOLIAN = 27, /* Mong */
+ /** @stable ICU 2.2 */
+ USCRIPT_MYANMAR = 28, /* Mymr */
+ /** @stable ICU 2.2 */
+ USCRIPT_OGHAM = 29, /* Ogam */
+ /** @stable ICU 2.2 */
+ USCRIPT_OLD_ITALIC = 30, /* Ital */
+ /** @stable ICU 2.2 */
+ USCRIPT_ORIYA = 31, /* Orya */
+ /** @stable ICU 2.2 */
+ USCRIPT_RUNIC = 32, /* Runr */
+ /** @stable ICU 2.2 */
+ USCRIPT_SINHALA = 33, /* Sinh */
+ /** @stable ICU 2.2 */
+ USCRIPT_SYRIAC = 34, /* Syrc */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAMIL = 35, /* Taml */
+ /** @stable ICU 2.2 */
+ USCRIPT_TELUGU = 36, /* Telu */
+ /** @stable ICU 2.2 */
+ USCRIPT_THAANA = 37, /* Thaa */
+ /** @stable ICU 2.2 */
+ USCRIPT_THAI = 38, /* Thai */
+ /** @stable ICU 2.2 */
+ USCRIPT_TIBETAN = 39, /* Tibt */
+ /** Canadian_Aboriginal script. @stable ICU 2.6 */
+ USCRIPT_CANADIAN_ABORIGINAL = 40, /* Cans */
+ /** Canadian_Aboriginal script (alias). @stable ICU 2.2 */
+ USCRIPT_UCAS = USCRIPT_CANADIAN_ABORIGINAL,
+ /** @stable ICU 2.2 */
+ USCRIPT_YI = 41, /* Yiii */
+ /* New scripts in Unicode 3.2 */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAGALOG = 42, /* Tglg */
+ /** @stable ICU 2.2 */
+ USCRIPT_HANUNOO = 43, /* Hano */
+ /** @stable ICU 2.2 */
+ USCRIPT_BUHID = 44, /* Buhd */
+ /** @stable ICU 2.2 */
+ USCRIPT_TAGBANWA = 45, /* Tagb */
+
+ /* New scripts in Unicode 4 */
+ /** @stable ICU 2.6 */
+ USCRIPT_BRAILLE = 46, /* Brai */
+ /** @stable ICU 2.6 */
+ USCRIPT_CYPRIOT = 47, /* Cprt */
+ /** @stable ICU 2.6 */
+ USCRIPT_LIMBU = 48, /* Limb */
+ /** @stable ICU 2.6 */
+ USCRIPT_LINEAR_B = 49, /* Linb */
+ /** @stable ICU 2.6 */
+ USCRIPT_OSMANYA = 50, /* Osma */
+ /** @stable ICU 2.6 */
+ USCRIPT_SHAVIAN = 51, /* Shaw */
+ /** @stable ICU 2.6 */
+ USCRIPT_TAI_LE = 52, /* Tale */
+ /** @stable ICU 2.6 */
+ USCRIPT_UGARITIC = 53, /* Ugar */
+
+ /** New script code in Unicode 4.0.1 @stable ICU 3.0 */
+ USCRIPT_KATAKANA_OR_HIRAGANA = 54,/*Hrkt */
+
+ /* New scripts in Unicode 4.1 */
+ /** @stable ICU 3.4 */
+ USCRIPT_BUGINESE = 55, /* Bugi */
+ /** @stable ICU 3.4 */
+ USCRIPT_GLAGOLITIC = 56, /* Glag */
+ /** @stable ICU 3.4 */
+ USCRIPT_KHAROSHTHI = 57, /* Khar */
+ /** @stable ICU 3.4 */
+ USCRIPT_SYLOTI_NAGRI = 58, /* Sylo */
+ /** @stable ICU 3.4 */
+ USCRIPT_NEW_TAI_LUE = 59, /* Talu */
+ /** @stable ICU 3.4 */
+ USCRIPT_TIFINAGH = 60, /* Tfng */
+ /** @stable ICU 3.4 */
+ USCRIPT_OLD_PERSIAN = 61, /* Xpeo */
+
+ /* New script codes from Unicode and ISO 15924 */
+ /** @stable ICU 3.6 */
+ USCRIPT_BALINESE = 62, /* Bali */
+ /** @stable ICU 3.6 */
+ USCRIPT_BATAK = 63, /* Batk */
+ /** @stable ICU 3.6 */
+ USCRIPT_BLISSYMBOLS = 64, /* Blis */
+ /** @stable ICU 3.6 */
+ USCRIPT_BRAHMI = 65, /* Brah */
+ /** @stable ICU 3.6 */
+ USCRIPT_CHAM = 66, /* Cham */
+ /** @stable ICU 3.6 */
+ USCRIPT_CIRTH = 67, /* Cirt */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC = 68, /* Cyrs */
+ /** @stable ICU 3.6 */
+ USCRIPT_DEMOTIC_EGYPTIAN = 69, /* Egyd */
+ /** @stable ICU 3.6 */
+ USCRIPT_HIERATIC_EGYPTIAN = 70, /* Egyh */
+ /** @stable ICU 3.6 */
+ USCRIPT_EGYPTIAN_HIEROGLYPHS = 71, /* Egyp */
+ /** @stable ICU 3.6 */
+ USCRIPT_KHUTSURI = 72, /* Geok */
+ /** @stable ICU 3.6 */
+ USCRIPT_SIMPLIFIED_HAN = 73, /* Hans */
+ /** @stable ICU 3.6 */
+ USCRIPT_TRADITIONAL_HAN = 74, /* Hant */
+ /** @stable ICU 3.6 */
+ USCRIPT_PAHAWH_HMONG = 75, /* Hmng */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_HUNGARIAN = 76, /* Hung */
+ /** @stable ICU 3.6 */
+ USCRIPT_HARAPPAN_INDUS = 77, /* Inds */
+ /** @stable ICU 3.6 */
+ USCRIPT_JAVANESE = 78, /* Java */
+ /** @stable ICU 3.6 */
+ USCRIPT_KAYAH_LI = 79, /* Kali */
+ /** @stable ICU 3.6 */
+ USCRIPT_LATIN_FRAKTUR = 80, /* Latf */
+ /** @stable ICU 3.6 */
+ USCRIPT_LATIN_GAELIC = 81, /* Latg */
+ /** @stable ICU 3.6 */
+ USCRIPT_LEPCHA = 82, /* Lepc */
+ /** @stable ICU 3.6 */
+ USCRIPT_LINEAR_A = 83, /* Lina */
+ /** @stable ICU 4.6 */
+ USCRIPT_MANDAIC = 84, /* Mand */
+ /** @stable ICU 3.6 */
+ USCRIPT_MANDAEAN = USCRIPT_MANDAIC,
+ /** @stable ICU 3.6 */
+ USCRIPT_MAYAN_HIEROGLYPHS = 85, /* Maya */
+ /** @stable ICU 4.6 */
+ USCRIPT_MEROITIC_HIEROGLYPHS = 86, /* Mero */
+ /** @stable ICU 3.6 */
+ USCRIPT_MEROITIC = USCRIPT_MEROITIC_HIEROGLYPHS,
+ /** @stable ICU 3.6 */
+ USCRIPT_NKO = 87, /* Nkoo */
+ /** @stable ICU 3.6 */
+ USCRIPT_ORKHON = 88, /* Orkh */
+ /** @stable ICU 3.6 */
+ USCRIPT_OLD_PERMIC = 89, /* Perm */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHAGS_PA = 90, /* Phag */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHOENICIAN = 91, /* Phnx */
+ /** @stable ICU 52 */
+ USCRIPT_MIAO = 92, /* Plrd */
+ /** @stable ICU 3.6 */
+ USCRIPT_PHONETIC_POLLARD = USCRIPT_MIAO,
+ /** @stable ICU 3.6 */
+ USCRIPT_RONGORONGO = 93, /* Roro */
+ /** @stable ICU 3.6 */
+ USCRIPT_SARATI = 94, /* Sara */
+ /** @stable ICU 3.6 */
+ USCRIPT_ESTRANGELO_SYRIAC = 95, /* Syre */
+ /** @stable ICU 3.6 */
+ USCRIPT_WESTERN_SYRIAC = 96, /* Syrj */
+ /** @stable ICU 3.6 */
+ USCRIPT_EASTERN_SYRIAC = 97, /* Syrn */
+ /** @stable ICU 3.6 */
+ USCRIPT_TENGWAR = 98, /* Teng */
+ /** @stable ICU 3.6 */
+ USCRIPT_VAI = 99, /* Vaii */
+ /** @stable ICU 3.6 */
+ USCRIPT_VISIBLE_SPEECH = 100,/* Visp */
+ /** @stable ICU 3.6 */
+ USCRIPT_CUNEIFORM = 101,/* Xsux */
+ /** @stable ICU 3.6 */
+ USCRIPT_UNWRITTEN_LANGUAGES = 102,/* Zxxx */
+ /** @stable ICU 3.6 */
+ USCRIPT_UNKNOWN = 103,/* Zzzz */ /* Unknown="Code for uncoded script", for unassigned code points */
+
+ /** @stable ICU 3.8 */
+ USCRIPT_CARIAN = 104,/* Cari */
+ /** @stable ICU 3.8 */
+ USCRIPT_JAPANESE = 105,/* Jpan */
+ /** @stable ICU 3.8 */
+ USCRIPT_LANNA = 106,/* Lana */
+ /** @stable ICU 3.8 */
+ USCRIPT_LYCIAN = 107,/* Lyci */
+ /** @stable ICU 3.8 */
+ USCRIPT_LYDIAN = 108,/* Lydi */
+ /** @stable ICU 3.8 */
+ USCRIPT_OL_CHIKI = 109,/* Olck */
+ /** @stable ICU 3.8 */
+ USCRIPT_REJANG = 110,/* Rjng */
+ /** @stable ICU 3.8 */
+ USCRIPT_SAURASHTRA = 111,/* Saur */
+ /** Sutton SignWriting @stable ICU 3.8 */
+ USCRIPT_SIGN_WRITING = 112,/* Sgnw */
+ /** @stable ICU 3.8 */
+ USCRIPT_SUNDANESE = 113,/* Sund */
+ /** @stable ICU 3.8 */
+ USCRIPT_MOON = 114,/* Moon */
+ /** @stable ICU 3.8 */
+ USCRIPT_MEITEI_MAYEK = 115,/* Mtei */
+
+ /** @stable ICU 4.0 */
+ USCRIPT_IMPERIAL_ARAMAIC = 116,/* Armi */
+ /** @stable ICU 4.0 */
+ USCRIPT_AVESTAN = 117,/* Avst */
+ /** @stable ICU 4.0 */
+ USCRIPT_CHAKMA = 118,/* Cakm */
+ /** @stable ICU 4.0 */
+ USCRIPT_KOREAN = 119,/* Kore */
+ /** @stable ICU 4.0 */
+ USCRIPT_KAITHI = 120,/* Kthi */
+ /** @stable ICU 4.0 */
+ USCRIPT_MANICHAEAN = 121,/* Mani */
+ /** @stable ICU 4.0 */
+ USCRIPT_INSCRIPTIONAL_PAHLAVI = 122,/* Phli */
+ /** @stable ICU 4.0 */
+ USCRIPT_PSALTER_PAHLAVI = 123,/* Phlp */
+ /** @stable ICU 4.0 */
+ USCRIPT_BOOK_PAHLAVI = 124,/* Phlv */
+ /** @stable ICU 4.0 */
+ USCRIPT_INSCRIPTIONAL_PARTHIAN = 125,/* Prti */
+ /** @stable ICU 4.0 */
+ USCRIPT_SAMARITAN = 126,/* Samr */
+ /** @stable ICU 4.0 */
+ USCRIPT_TAI_VIET = 127,/* Tavt */
+ /** @stable ICU 4.0 */
+ USCRIPT_MATHEMATICAL_NOTATION = 128,/* Zmth */
+ /** @stable ICU 4.0 */
+ USCRIPT_SYMBOLS = 129,/* Zsym */
+
+ /** @stable ICU 4.4 */
+ USCRIPT_BAMUM = 130,/* Bamu */
+ /** @stable ICU 4.4 */
+ USCRIPT_LISU = 131,/* Lisu */
+ /** @stable ICU 4.4 */
+ USCRIPT_NAKHI_GEBA = 132,/* Nkgb */
+ /** @stable ICU 4.4 */
+ USCRIPT_OLD_SOUTH_ARABIAN = 133,/* Sarb */
+
+ /** @stable ICU 4.6 */
+ USCRIPT_BASSA_VAH = 134,/* Bass */
+ /** @stable ICU 54 */
+ USCRIPT_DUPLOYAN = 135,/* Dupl */
+#ifndef U_HIDE_DEPRECATED_API
+ /** @deprecated ICU 54 Typo, use USCRIPT_DUPLOYAN */
+ USCRIPT_DUPLOYAN_SHORTAND = USCRIPT_DUPLOYAN,
+#endif /* U_HIDE_DEPRECATED_API */
+ /** @stable ICU 4.6 */
+ USCRIPT_ELBASAN = 136,/* Elba */
+ /** @stable ICU 4.6 */
+ USCRIPT_GRANTHA = 137,/* Gran */
+ /** @stable ICU 4.6 */
+ USCRIPT_KPELLE = 138,/* Kpel */
+ /** @stable ICU 4.6 */
+ USCRIPT_LOMA = 139,/* Loma */
+ /** Mende Kikakui @stable ICU 4.6 */
+ USCRIPT_MENDE = 140,/* Mend */
+ /** @stable ICU 4.6 */
+ USCRIPT_MEROITIC_CURSIVE = 141,/* Merc */
+ /** @stable ICU 4.6 */
+ USCRIPT_OLD_NORTH_ARABIAN = 142,/* Narb */
+ /** @stable ICU 4.6 */
+ USCRIPT_NABATAEAN = 143,/* Nbat */
+ /** @stable ICU 4.6 */
+ USCRIPT_PALMYRENE = 144,/* Palm */
+ /** @stable ICU 54 */
+ USCRIPT_KHUDAWADI = 145,/* Sind */
+ /** @stable ICU 4.6 */
+ USCRIPT_SINDHI = USCRIPT_KHUDAWADI,
+ /** @stable ICU 4.6 */
+ USCRIPT_WARANG_CITI = 146,/* Wara */
+
+ /** @stable ICU 4.8 */
+ USCRIPT_AFAKA = 147,/* Afak */
+ /** @stable ICU 4.8 */
+ USCRIPT_JURCHEN = 148,/* Jurc */
+ /** @stable ICU 4.8 */
+ USCRIPT_MRO = 149,/* Mroo */
+ /** @stable ICU 4.8 */
+ USCRIPT_NUSHU = 150,/* Nshu */
+ /** @stable ICU 4.8 */
+ USCRIPT_SHARADA = 151,/* Shrd */
+ /** @stable ICU 4.8 */
+ USCRIPT_SORA_SOMPENG = 152,/* Sora */
+ /** @stable ICU 4.8 */
+ USCRIPT_TAKRI = 153,/* Takr */
+ /** @stable ICU 4.8 */
+ USCRIPT_TANGUT = 154,/* Tang */
+ /** @stable ICU 4.8 */
+ USCRIPT_WOLEAI = 155,/* Wole */
+
+ /** @stable ICU 49 */
+ USCRIPT_ANATOLIAN_HIEROGLYPHS = 156,/* Hluw */
+ /** @stable ICU 49 */
+ USCRIPT_KHOJKI = 157,/* Khoj */
+ /** @stable ICU 49 */
+ USCRIPT_TIRHUTA = 158,/* Tirh */
+
+ /** @stable ICU 52 */
+ USCRIPT_CAUCASIAN_ALBANIAN = 159,/* Aghb */
+ /** @stable ICU 52 */
+ USCRIPT_MAHAJANI = 160,/* Mahj */
+
+ /** @stable ICU 54 */
+ USCRIPT_AHOM = 161,/* Ahom */
+ /** @stable ICU 54 */
+ USCRIPT_HATRAN = 162,/* Hatr */
+ /** @stable ICU 54 */
+ USCRIPT_MODI = 163,/* Modi */
+ /** @stable ICU 54 */
+ USCRIPT_MULTANI = 164,/* Mult */
+ /** @stable ICU 54 */
+ USCRIPT_PAU_CIN_HAU = 165,/* Pauc */
+ /** @stable ICU 54 */
+ USCRIPT_SIDDHAM = 166,/* Sidd */
+
+ /** @stable ICU 58 */
+ USCRIPT_ADLAM = 167,/* Adlm */
+ /** @stable ICU 58 */
+ USCRIPT_BHAIKSUKI = 168,/* Bhks */
+ /** @stable ICU 58 */
+ USCRIPT_MARCHEN = 169,/* Marc */
+ /** @stable ICU 58 */
+ USCRIPT_NEWA = 170,/* Newa */
+ /** @stable ICU 58 */
+ USCRIPT_OSAGE = 171,/* Osge */
+
+ /** @stable ICU 58 */
+ USCRIPT_HAN_WITH_BOPOMOFO = 172,/* Hanb */
+ /** @stable ICU 58 */
+ USCRIPT_JAMO = 173,/* Jamo */
+ /** @stable ICU 58 */
+ USCRIPT_SYMBOLS_EMOJI = 174,/* Zsye */
+
+ /** @stable ICU 60 */
+ USCRIPT_MASARAM_GONDI = 175,/* Gonm */
+ /** @stable ICU 60 */
+ USCRIPT_SOYOMBO = 176,/* Soyo */
+ /** @stable ICU 60 */
+ USCRIPT_ZANABAZAR_SQUARE = 177,/* Zanb */
+
+ /** @stable ICU 62 */
+ USCRIPT_DOGRA = 178,/* Dogr */
+ /** @stable ICU 62 */
+ USCRIPT_GUNJALA_GONDI = 179,/* Gong */
+ /** @stable ICU 62 */
+ USCRIPT_MAKASAR = 180,/* Maka */
+ /** @stable ICU 62 */
+ USCRIPT_MEDEFAIDRIN = 181,/* Medf */
+ /** @stable ICU 62 */
+ USCRIPT_HANIFI_ROHINGYA = 182,/* Rohg */
+ /** @stable ICU 62 */
+ USCRIPT_SOGDIAN = 183,/* Sogd */
+ /** @stable ICU 62 */
+ USCRIPT_OLD_SOGDIAN = 184,/* Sogo */
+
+ /** @stable ICU 64 */
+ USCRIPT_ELYMAIC = 185,/* Elym */
+ /** @stable ICU 64 */
+ USCRIPT_NYIAKENG_PUACHUE_HMONG = 186,/* Hmnp */
+ /** @stable ICU 64 */
+ USCRIPT_NANDINAGARI = 187,/* Nand */
+ /** @stable ICU 64 */
+ USCRIPT_WANCHO = 188,/* Wcho */
+
+ /** @stable ICU 66 */
+ USCRIPT_CHORASMIAN = 189,/* Chrs */
+ /** @stable ICU 66 */
+ USCRIPT_DIVES_AKURU = 190,/* Diak */
+ /** @stable ICU 66 */
+ USCRIPT_KHITAN_SMALL_SCRIPT = 191,/* Kits */
+ /** @stable ICU 66 */
+ USCRIPT_YEZIDI = 192,/* Yezi */
+
+ /** @stable ICU 70 */
+ USCRIPT_CYPRO_MINOAN = 193,/* Cpmn */
+ /** @stable ICU 70 */
+ USCRIPT_OLD_UYGHUR = 194,/* Ougr */
+ /** @stable ICU 70 */
+ USCRIPT_TANGSA = 195,/* Tnsa */
+ /** @stable ICU 70 */
+ USCRIPT_TOTO = 196,/* Toto */
+ /** @stable ICU 70 */
+ USCRIPT_VITHKUQI = 197,/* Vith */
+
+ /** @stable ICU 72 */
+ USCRIPT_KAWI = 198,/* Kawi */
+ /** @stable ICU 72 */
+ USCRIPT_NAG_MUNDARI = 199,/* Nagm */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UScriptCode value.
+ * The highest value is available via u_getIntPropertyMaxValue(UCHAR_SCRIPT).
+ *
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ USCRIPT_CODE_LIMIT = 200
+#endif // U_HIDE_DEPRECATED_API
+} UScriptCode;
+
+/**
+ * Gets the script codes associated with the given locale or ISO 15924 abbreviation or name.
+ * Fills in USCRIPT_MALAYALAM given "Malayam" OR "Mlym".
+ * Fills in USCRIPT_LATIN given "en" OR "en_US"
+ * If the required capacity is greater than the capacity of the destination buffer,
+ * then the error code is set to U_BUFFER_OVERFLOW_ERROR and the required capacity is returned.
+ *
+ * <p>Note: To search by short or long script alias only, use
+ * u_getPropertyValueEnum(UCHAR_SCRIPT, alias) instead. That does
+ * a fast lookup with no access of the locale data.
+ *
+ * @param nameOrAbbrOrLocale name of the script, as given in
+ * PropertyValueAliases.txt, or ISO 15924 code or locale
+ * @param fillIn the UScriptCode buffer to fill in the script code
+ * @param capacity the capacity (size) of UScriptCode buffer passed in.
+ * @param err the error status code.
+ * @return The number of script codes filled in the buffer passed in
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getCode(const char* nameOrAbbrOrLocale,UScriptCode* fillIn,int32_t capacity,UErrorCode *err);
+
+/**
+ * Returns the long Unicode script name, if there is one.
+ * Otherwise returns the 4-letter ISO 15924 script code.
+ * Returns "Malayam" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return long script name as given in PropertyValueAliases.txt, or the 4-letter code,
+ * or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getName(UScriptCode scriptCode);
+
+/**
+ * Returns the 4-letter ISO 15924 script code,
+ * which is the same as the short Unicode script name if Unicode has names for the script.
+ * Returns "Mlym" given USCRIPT_MALAYALAM.
+ *
+ * @param scriptCode UScriptCode enum
+ * @return short script name (4-letter code), or NULL if scriptCode is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI const char* U_EXPORT2
+uscript_getShortName(UScriptCode scriptCode);
+
+/**
+ * Gets the script code associated with the given codepoint.
+ * Returns USCRIPT_MALAYALAM given 0x0D02
+ * @param codepoint UChar32 codepoint
+ * @param err the error status code.
+ * @return The UScriptCode, or 0 if codepoint is invalid
+ * @stable ICU 2.4
+ */
+U_CAPI UScriptCode U_EXPORT2
+uscript_getScript(UChar32 codepoint, UErrorCode *err);
+
+/**
+ * Do the Script_Extensions of code point c contain script sc?
+ * If c does not have explicit Script_Extensions, then this tests whether
+ * c has the Script property value sc.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ * @param c code point
+ * @param sc script code
+ * @return true if sc is in Script_Extensions(c)
+ * @stable ICU 49
+ */
+U_CAPI UBool U_EXPORT2
+uscript_hasScript(UChar32 c, UScriptCode sc);
+
+/**
+ * Writes code point c's Script_Extensions as a list of UScriptCode values
+ * to the output scripts array and returns the number of script codes.
+ * - If c does have Script_Extensions, then the Script property value
+ * (normally Common or Inherited) is not included.
+ * - If c does not have Script_Extensions, then the one Script code is written to the output array.
+ * - If c is not a valid code point, then the one USCRIPT_UNKNOWN code is written.
+ * In other words, if the return value is 1,
+ * then the output array contains exactly c's single Script code.
+ * If the return value is n>=2, then the output array contains c's n Script_Extensions script codes.
+ *
+ * Some characters are commonly used in multiple scripts.
+ * For more information, see UAX #24: http://www.unicode.org/reports/tr24/.
+ *
+ * If there are more than capacity script codes to be written, then
+ * U_BUFFER_OVERFLOW_ERROR is set and the number of Script_Extensions is returned.
+ * (Usual ICU buffer handling behavior.)
+ *
+ * @param c code point
+ * @param scripts output script code array
+ * @param capacity capacity of the scripts array
+ * @param errorCode Standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return number of script codes in c's Script_Extensions, or 1 for the single Script value,
+ * written to scripts unless U_BUFFER_OVERFLOW_ERROR indicates insufficient capacity
+ * @stable ICU 49
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getScriptExtensions(UChar32 c,
+ UScriptCode *scripts, int32_t capacity,
+ UErrorCode *errorCode);
+
+/**
+ * Script usage constants.
+ * See UAX #31 Unicode Identifier and Pattern Syntax.
+ * http://www.unicode.org/reports/tr31/#Table_Candidate_Characters_for_Exclusion_from_Identifiers
+ *
+ * @stable ICU 51
+ */
+typedef enum UScriptUsage {
+ /** Not encoded in Unicode. @stable ICU 51 */
+ USCRIPT_USAGE_NOT_ENCODED,
+ /** Unknown script usage. @stable ICU 51 */
+ USCRIPT_USAGE_UNKNOWN,
+ /** Candidate for Exclusion from Identifiers. @stable ICU 51 */
+ USCRIPT_USAGE_EXCLUDED,
+ /** Limited Use script. @stable ICU 51 */
+ USCRIPT_USAGE_LIMITED_USE,
+ /** Aspirational Use script. @stable ICU 51 */
+ USCRIPT_USAGE_ASPIRATIONAL,
+ /** Recommended script. @stable ICU 51 */
+ USCRIPT_USAGE_RECOMMENDED
+} UScriptUsage;
+
+/**
+ * Writes the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @param dest output string array
+ * @param capacity number of UChars in the dest array
+ * @param pErrorCode standard ICU in/out error code, must pass U_SUCCESS() on input
+ * @return the string length, even if U_BUFFER_OVERFLOW_ERROR
+ * @stable ICU 51
+ */
+U_CAPI int32_t U_EXPORT2
+uscript_getSampleString(UScriptCode script, UChar *dest, int32_t capacity, UErrorCode *pErrorCode);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+class UnicodeString;
+U_NAMESPACE_END
+
+/**
+ * Returns the script sample character string.
+ * This string normally consists of one code point but might be longer.
+ * The string is empty if the script is not encoded.
+ *
+ * @param script script code
+ * @return the sample character string
+ * @stable ICU 51
+ */
+U_COMMON_API icu::UnicodeString U_EXPORT2
+uscript_getSampleUnicodeString(UScriptCode script);
+
+#endif
+
+/**
+ * Returns the script usage according to UAX #31 Unicode Identifier and Pattern Syntax.
+ * Returns USCRIPT_USAGE_NOT_ENCODED if the script is not encoded in Unicode.
+ *
+ * @param script script code
+ * @return script usage
+ * @see UScriptUsage
+ * @stable ICU 51
+ */
+U_CAPI UScriptUsage U_EXPORT2
+uscript_getUsage(UScriptCode script);
+
+/**
+ * Returns true if the script is written right-to-left.
+ * For example, Arab and Hebr.
+ *
+ * @param script script code
+ * @return true if the script is right-to-left
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isRightToLeft(UScriptCode script);
+
+/**
+ * Returns true if the script allows line breaks between letters (excluding hyphenation).
+ * Such a script typically requires dictionary-based line breaking.
+ * For example, Hani and Thai.
+ *
+ * @param script script code
+ * @return true if the script allows line breaks between letters
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_breaksBetweenLetters(UScriptCode script);
+
+/**
+ * Returns true if in modern (or most recent) usage of the script case distinctions are customary.
+ * For example, Latn and Cyrl.
+ *
+ * @param script script code
+ * @return true if the script is cased
+ * @stable ICU 51
+ */
+U_CAPI UBool U_EXPORT2
+uscript_isCased(UScriptCode script);
+
+#endif
diff --git a/intl/icu/source/common/unicode/uset.h b/intl/icu/source/common/unicode/uset.h
new file mode 100644
index 0000000000..ee4e0036d2
--- /dev/null
+++ b/intl/icu/source/common/unicode/uset.h
@@ -0,0 +1,1290 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: uset.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002mar07
+* created by: Markus W. Scherer
+*
+* C version of UnicodeSet.
+*/
+
+
+/**
+ * \file
+ * \brief C API: Unicode Set
+ *
+ * <p>This is a C wrapper around the C++ UnicodeSet class.</p>
+ */
+
+#ifndef __USET_H__
+#define __USET_H__
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+#ifndef USET_DEFINED
+
+#ifndef U_IN_DOXYGEN
+#define USET_DEFINED
+#endif
+/**
+ * USet is the C API type corresponding to C++ class UnicodeSet.
+ * Use the uset_* API to manipulate. Create with
+ * uset_open*, and destroy with uset_close.
+ * @stable ICU 2.4
+ */
+typedef struct USet USet;
+#endif
+
+/**
+ * Bitmask values to be passed to uset_openPatternOptions() or
+ * uset_applyPattern() taking an option parameter.
+ *
+ * Use at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ *
+ * Undefined options bits are ignored, and reserved for future use.
+ *
+ * @stable ICU 2.4
+ */
+enum {
+ /**
+ * Ignore white space within patterns unless quoted or escaped.
+ * @stable ICU 2.4
+ */
+ USET_IGNORE_SPACE = 1,
+
+ /**
+ * Enable case insensitive matching. E.g., "[ab]" with this flag
+ * will match 'a', 'A', 'b', and 'B'. "[^ab]" with this flag will
+ * match all except 'a', 'A', 'b', and 'B'. This performs a full
+ * closure over case mappings, e.g. 'ſ' (U+017F long s) for 's'.
+ *
+ * The resulting set is a superset of the input for the code points but
+ * not for the strings.
+ * It performs a case mapping closure of the code points and adds
+ * full case folding strings for the code points, and reduces strings of
+ * the original set to their full case folding equivalents.
+ *
+ * This is designed for case-insensitive matches, for example
+ * in regular expressions. The full code point case closure allows checking of
+ * an input character directly against the closure set.
+ * Strings are matched by comparing the case-folded form from the closure
+ * set with an incremental case folding of the string in question.
+ *
+ * The closure set will also contain single code points if the original
+ * set contained case-equivalent strings (like U+00DF for "ss" or "Ss" etc.).
+ * This is not necessary (that is, redundant) for the above matching method
+ * but results in the same closure sets regardless of whether the original
+ * set contained the code point or a string.
+ *
+ * @stable ICU 2.4
+ */
+ USET_CASE_INSENSITIVE = 2,
+
+ /**
+ * Adds all case mappings for each element in the set.
+ * This adds the full lower-, title-, and uppercase mappings as well as the full case folding
+ * of each existing element in the set.
+ *
+ * Unlike the “case insensitive” options, this does not perform a closure.
+ * For example, it does not add 'ſ' (U+017F long s) for 's',
+ * 'K' (U+212A Kelvin sign) for 'k', or replace set strings by their case-folded versions.
+ *
+ * @stable ICU 3.2
+ */
+ USET_ADD_CASE_MAPPINGS = 4,
+
+#ifndef U_HIDE_DRAFT_API
+ /**
+ * Enable case insensitive matching.
+ * Same as USET_CASE_INSENSITIVE but using only Simple_Case_Folding (scf) mappings,
+ * which map each code point to one code point,
+ * not full Case_Folding (cf) mappings, which map some code points to multiple code points.
+ *
+ * This is designed for case-insensitive matches, for example in certain
+ * regular expression implementations where only Simple_Case_Folding mappings are used,
+ * such as in ECMAScript (JavaScript) regular expressions.
+ *
+ * @draft ICU 73
+ */
+ USET_SIMPLE_CASE_INSENSITIVE = 6
+#endif // U_HIDE_DRAFT_API
+};
+
+/**
+ * Argument values for whether span() and similar functions continue while
+ * the current character is contained vs. not contained in the set.
+ *
+ * The functionality is straightforward for sets with only single code points,
+ * without strings (which is the common case):
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE work the same.
+ * - USET_SPAN_CONTAINED and USET_SPAN_SIMPLE are inverses of USET_SPAN_NOT_CONTAINED.
+ * - span() and spanBack() partition any string the same way when
+ * alternating between span(USET_SPAN_NOT_CONTAINED) and
+ * span(either "contained" condition).
+ * - Using a complemented (inverted) set and the opposite span conditions
+ * yields the same results.
+ *
+ * When a set contains multi-code point strings, then these statements may not
+ * be true, depending on the strings in the set (for example, whether they
+ * overlap with each other) and the string that is processed.
+ * For a set with strings:
+ * - The complement of the set contains the opposite set of code points,
+ * but the same set of strings.
+ * Therefore, complementing both the set and the span conditions
+ * may yield different results.
+ * - When starting spans at different positions in a string
+ * (span(s, ...) vs. span(s+1, ...)) the ends of the spans may be different
+ * because a set string may start before the later position.
+ * - span(USET_SPAN_SIMPLE) may be shorter than
+ * span(USET_SPAN_CONTAINED) because it will not recursively try
+ * all possible paths.
+ * For example, with a set which contains the three strings "xy", "xya" and "ax",
+ * span("xyax", USET_SPAN_CONTAINED) will return 4 but
+ * span("xyax", USET_SPAN_SIMPLE) will return 3.
+ * span(USET_SPAN_SIMPLE) will never be longer than
+ * span(USET_SPAN_CONTAINED).
+ * - With either "contained" condition, span() and spanBack() may partition
+ * a string in different ways.
+ * For example, with a set which contains the two strings "ab" and "ba",
+ * and when processing the string "aba",
+ * span() will yield contained/not-contained boundaries of { 0, 2, 3 }
+ * while spanBack() will yield boundaries of { 0, 1, 3 }.
+ *
+ * Note: If it is important to get the same boundaries whether iterating forward
+ * or backward through a string, then either only span() should be used and
+ * the boundaries cached for backward operation, or an ICU BreakIterator
+ * could be used.
+ *
+ * Note: Unpaired surrogates are treated like surrogate code points.
+ * Similarly, set strings match only on code point boundaries,
+ * never in the middle of a surrogate pair.
+ * Illegal UTF-8 sequences are treated like U+FFFD.
+ * When processing UTF-8 strings, malformed set strings
+ * (strings with unpaired surrogates which cannot be converted to UTF-8)
+ * are ignored.
+ *
+ * @stable ICU 3.8
+ */
+typedef enum USetSpanCondition {
+ /**
+ * Continues a span() while there is no set element at the current position.
+ * Increments by one code point at a time.
+ * Stops before the first set element (character or string).
+ * (For code points only, this is like while contains(current)==false).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of characters that are not in the set,
+ * and none of its strings overlap with the span.
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_NOT_CONTAINED = 0,
+ /**
+ * Spans the longest substring that is a concatenation of set elements (characters or strings).
+ * (For characters only, this is like while contains(current)==true).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set contains strings, then the span will be the longest substring for which there
+ * exists at least one non-overlapping concatenation of set elements (characters or strings).
+ * This is equivalent to a POSIX regular expression for <code>(OR of each set element)*</code>.
+ * (Java/ICU/Perl regex stops at the first match of an OR.)
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_CONTAINED = 1,
+ /**
+ * Continues a span() while there is a set element at the current position.
+ * Increments by the longest matching element at each position.
+ * (For characters only, this is like while contains(current)==true).
+ *
+ * When span() returns, the substring between where it started and the position
+ * it returned consists only of set elements (characters or strings) that are in the set.
+ *
+ * If a set only contains single characters, then this is the same
+ * as USET_SPAN_CONTAINED.
+ *
+ * If a set contains strings, then the span will be the longest substring
+ * with a match at each position with the longest single set element (character or string).
+ *
+ * Use this span condition together with other longest-match algorithms,
+ * such as ICU converters (ucnv_getUnicodeSet()).
+ *
+ * @stable ICU 3.8
+ */
+ USET_SPAN_SIMPLE = 2,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the last span condition.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ USET_SPAN_CONDITION_COUNT
+#endif // U_HIDE_DEPRECATED_API
+} USetSpanCondition;
+
+enum {
+ /**
+ * Capacity of USerializedSet::staticArray.
+ * Enough for any single-code point set.
+ * Also provides padding for nice sizeof(USerializedSet).
+ * @stable ICU 2.4
+ */
+ USET_SERIALIZED_STATIC_ARRAY_CAPACITY=8
+};
+
+/**
+ * A serialized form of a Unicode set. Limited manipulations are
+ * possible directly on a serialized set. See below.
+ * @stable ICU 2.4
+ */
+typedef struct USerializedSet {
+ /**
+ * The serialized Unicode Set.
+ * @stable ICU 2.4
+ */
+ const uint16_t *array;
+ /**
+ * The length of the array that contains BMP characters.
+ * @stable ICU 2.4
+ */
+ int32_t bmpLength;
+ /**
+ * The total length of the array.
+ * @stable ICU 2.4
+ */
+ int32_t length;
+ /**
+ * A small buffer for the array to reduce memory allocations.
+ * @stable ICU 2.4
+ */
+ uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY];
+} USerializedSet;
+
+/*********************************************************************
+ * USet API
+ *********************************************************************/
+
+/**
+ * Create an empty USet object.
+ * Equivalent to uset_open(1, 0).
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 4.2
+ */
+U_CAPI USet* U_EXPORT2
+uset_openEmpty(void);
+
+/**
+ * Creates a USet object that contains the range of characters
+ * start..end, inclusive. If <code>start > end</code>
+ * then an empty set is created (same as using uset_openEmpty()).
+ * @param start first character of the range, inclusive
+ * @param end last character of the range, inclusive
+ * @return a newly created USet. The caller must call uset_close() on
+ * it when done.
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_open(UChar32 start, UChar32 end);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPattern(const UChar* pattern, int32_t patternLength,
+ UErrorCode* ec);
+
+/**
+ * Creates a set from the given pattern. See the UnicodeSet class
+ * description for the syntax of the pattern language.
+ * @param pattern a string specifying what characters are in the set
+ * @param patternLength the length of the pattern, or -1 if null
+ * terminated
+ * @param options bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param ec the error code
+ * @stable ICU 2.4
+ */
+U_CAPI USet* U_EXPORT2
+uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode* ec);
+
+/**
+ * Disposes of the storage used by a USet object. This function should
+ * be called exactly once for objects returned by uset_open().
+ * @param set the object to dispose of
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_close(USet* set);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUSetPointer
+ * "Smart pointer" class, closes a USet via uset_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUSetPointer, USet, uset_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Returns a copy of this object.
+ * If this set is frozen, then the clone will be frozen as well.
+ * Use uset_cloneAsThawed() for a mutable clone of a frozen set.
+ * @param set the original set
+ * @return the newly allocated copy of the set
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI USet * U_EXPORT2
+uset_clone(const USet *set);
+
+/**
+ * Determines whether the set has been frozen (made immutable) or not.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return true/false for whether the set has been frozen
+ * @see uset_freeze
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI UBool U_EXPORT2
+uset_isFrozen(const USet *set);
+
+/**
+ * Freeze the set (make it immutable).
+ * Once frozen, it cannot be unfrozen and is therefore thread-safe
+ * until it is deleted.
+ * See the ICU4J Freezable interface for details.
+ * Freezing the set may also make some operations faster, for example
+ * uset_contains() and uset_span().
+ * A frozen set will not be modified. (It remains frozen.)
+ * @param set the set
+ * @return the same set, now frozen
+ * @see uset_isFrozen
+ * @see uset_cloneAsThawed
+ * @stable ICU 3.8
+ */
+U_CAPI void U_EXPORT2
+uset_freeze(USet *set);
+
+/**
+ * Clone the set and make the clone mutable.
+ * See the ICU4J Freezable interface for details.
+ * @param set the set
+ * @return the mutable clone
+ * @see uset_freeze
+ * @see uset_isFrozen
+ * @see uset_clone
+ * @stable ICU 3.8
+ */
+U_CAPI USet * U_EXPORT2
+uset_cloneAsThawed(const USet *set);
+
+/**
+ * Causes the USet object to represent the range <code>start - end</code>.
+ * If <code>start > end</code> then this USet is set to an empty range.
+ * A frozen set will not be modified.
+ * @param set the object to set to the given range
+ * @param start first character in the set, inclusive
+ * @param end last character in the set, inclusive
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_set(USet* set,
+ UChar32 start, UChar32 end);
+
+/**
+ * Modifies the set to represent the set specified by the given
+ * pattern. See the UnicodeSet class description for the syntax of
+ * the pattern language. See also the User Guide chapter about UnicodeSet.
+ * <em>Empties the set passed before applying the pattern.</em>
+ * A frozen set will not be modified.
+ * @param set The set to which the pattern is to be applied.
+ * @param pattern A pointer to UChar string specifying what characters are in the set.
+ * The character at pattern[0] must be a '['.
+ * @param patternLength The length of the UChar string. -1 if NUL terminated.
+ * @param options A bitmask for options to apply to the pattern.
+ * Valid options are USET_IGNORE_SPACE and
+ * at most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS,
+ * USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * @param status Returns an error if the pattern cannot be parsed.
+ * @return Upon successful parse, the value is either
+ * the index of the character after the closing ']'
+ * of the parsed pattern.
+ * If the status code indicates failure, then the return value
+ * is the index of the error in the source.
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+uset_applyPattern(USet *set,
+ const UChar *pattern, int32_t patternLength,
+ uint32_t options,
+ UErrorCode *status);
+
+/**
+ * Modifies the set to contain those code points which have the given value
+ * for the given binary or enumerated property, as returned by
+ * u_getIntPropertyValue. Prior contents of this set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the property
+ *
+ * @param prop a property in the range UCHAR_BIN_START..UCHAR_BIN_LIMIT-1
+ * or UCHAR_INT_START..UCHAR_INT_LIMIT-1
+ * or UCHAR_MASK_START..UCHAR_MASK_LIMIT-1.
+ *
+ * @param value a value in the range u_getIntPropertyMinValue(prop)..
+ * u_getIntPropertyMaxValue(prop), with one exception. If prop is
+ * UCHAR_GENERAL_CATEGORY_MASK, then value should not be a UCharCategory, but
+ * rather a mask value produced by U_GET_GC_MASK(). This allows grouped
+ * categories such as [:L:] to be represented.
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_applyIntPropertyValue(USet* set,
+ UProperty prop, int32_t value, UErrorCode* ec);
+
+/**
+ * Modifies the set to contain those code points which have the
+ * given value for the given property. Prior contents of this
+ * set are lost.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to contain the code points defined by the given
+ * property and value alias
+ *
+ * @param prop a string specifying a property alias, either short or long.
+ * The name is matched loosely. See PropertyAliases.txt for names and a
+ * description of loose matching. If the value string is empty, then this
+ * string is interpreted as either a General_Category value alias, a Script
+ * value alias, a binary property alias, or a special ID. Special IDs are
+ * matched loosely and correspond to the following sets:
+ *
+ * "ANY" = [\\u0000-\\U0010FFFF],
+ * "ASCII" = [\\u0000-\\u007F],
+ * "Assigned" = [:^Cn:].
+ *
+ * @param propLength the length of the prop, or -1 if NULL
+ *
+ * @param value a string specifying a value alias, either short or long.
+ * The name is matched loosely. See PropertyValueAliases.txt for names
+ * and a description of loose matching. In addition to aliases listed,
+ * numeric values and canonical combining classes may be expressed
+ * numerically, e.g., ("nv", "0.5") or ("ccc", "220"). The value string
+ * may also be empty.
+ *
+ * @param valueLength the length of the value, or -1 if NULL
+ *
+ * @param ec error code input/output parameter
+ *
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_applyPropertyAlias(USet* set,
+ const UChar *prop, int32_t propLength,
+ const UChar *value, int32_t valueLength,
+ UErrorCode* ec);
+
+/**
+ * Return true if the given position, in the given pattern, appears
+ * to be the start of a UnicodeSet pattern.
+ *
+ * @param pattern a string specifying the pattern
+ * @param patternLength the length of the pattern, or -1 if NULL
+ * @param pos the given position
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
+ int32_t pos);
+
+/**
+ * Returns a string representation of this set. If the result of
+ * calling this function is passed to a uset_openPattern(), it
+ * will produce another set that is equal to this one.
+ * @param set the set
+ * @param result the string to receive the rules, may be NULL
+ * @param resultCapacity the capacity of result, may be 0 if result is NULL
+ * @param escapeUnprintable if true then convert unprintable
+ * character to their hex escape representations, \\uxxxx or
+ * \\Uxxxxxxxx. Unprintable characters are those other than
+ * U+000A, U+0020..U+007E.
+ * @param ec error code.
+ * @return length of string, possibly larger than resultCapacity
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_toPattern(const USet* set,
+ UChar* result, int32_t resultCapacity,
+ UBool escapeUnprintable,
+ UErrorCode* ec);
+
+/**
+ * Adds the given character to the given USet. After this call,
+ * uset_contains(set, c) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param c the character to add
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_add(USet* set, UChar32 c);
+
+/**
+ * Adds all of the elements in the specified set to this set if
+ * they're not already present. This operation effectively
+ * modifies this set so that its value is the <i>union</i> of the two
+ * sets. The behavior of this operation is unspecified if the specified
+ * collection is modified while the operation is in progress.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to which to add the set
+ * @param additionalSet the source set whose elements are to be added to this set.
+ * @stable ICU 2.6
+ */
+U_CAPI void U_EXPORT2
+uset_addAll(USet* set, const USet *additionalSet);
+
+/**
+ * Adds the given range of characters to the given USet. After this call,
+ * uset_contains(set, start, end) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to add, inclusive
+ * @param end the last character of the range to add, inclusive
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uset_addRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Adds the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return true.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to add
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_addString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Adds each of the characters in this string to the set. Note: "ch" => {"c", "h"}
+ * If this set already contains any particular character, it has no effect on that character.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the source string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Removes the given character from the given USet. After this call,
+ * uset_contains(set, c) will return false.
+ * A frozen set will not be modified.
+ * @param set the object from which to remove the character
+ * @param c the character to remove
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_remove(USet* set, UChar32 c);
+
+/**
+ * Removes the given range of characters from the given USet. After this call,
+ * uset_contains(set, start, end) will return false.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param start the first character of the range to remove, inclusive
+ * @param end the last character of the range to remove, inclusive
+ * @stable ICU 2.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeRange(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Removes the given string to the given USet. After this call,
+ * uset_containsString(set, str, strLen) will return false.
+ * A frozen set will not be modified.
+ * @param set the object to which to add the character
+ * @param str the string to remove
+ * @param strLen the length of the string or -1 if null terminated.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_removeString(USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Removes EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Removes from this set all of its elements that are contained in the
+ * specified set. This operation effectively modifies this
+ * set so that its value is the <i>asymmetric set difference</i> of
+ * the two sets.
+ * A frozen set will not be modified.
+ * @param set the object from which the elements are to be removed
+ * @param removeSet the object that defines which elements will be
+ * removed from this set
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeAll(USet* set, const USet* removeSet);
+
+/**
+ * Retain only the elements in this set that are contained in the
+ * specified range. If <code>start > end</code> then an empty range is
+ * retained, leaving the set empty. This is equivalent to
+ * a boolean logic AND, or a set INTERSECTION.
+ * A frozen set will not be modified.
+ *
+ * @param set the object for which to retain only the specified range
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_retain(USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Retains only the specified string from this set if it is present.
+ * Upon return this set will be empty if it did not contain s, or
+ * will only contain s if it did contain s.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_retainString(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Retains EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Retains only the elements in this set that are contained in the
+ * specified set. In other words, removes from this set all of
+ * its elements that are not contained in the specified set. This
+ * operation effectively modifies this set so that its value is
+ * the <i>intersection</i> of the two sets.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the retain
+ * @param retain set that defines which elements this set will retain
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_retainAll(USet* set, const USet* retain);
+
+/**
+ * Reallocate this objects internal structures to take up the least
+ * possible space, without changing this object's value.
+ * A frozen set will not be modified.
+ *
+ * @param set the object on which to perform the compact
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_compact(USet* set);
+
+/**
+ * This is equivalent to
+ * <code>uset_complementRange(set, 0, 0x10FFFF)</code>.
+ *
+ * <strong>Note:</strong> This performs a symmetric difference with all code points
+ * <em>and thus retains all multicharacter strings</em>.
+ * In order to achieve a “code point complement” (all code points minus this set),
+ * the easiest is to <code>uset_complement(set); uset_removeAllStrings(set);</code>.
+ *
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_complement(USet* set);
+
+/**
+ * Complements the specified range in this set. Any character in
+ * the range will be removed if it is in this set, or will be
+ * added if it is not in this set. If <code>start > end</code>
+ * then an empty range is complemented, leaving the set unchanged.
+ * This is equivalent to a boolean logic XOR.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param start first character, inclusive, of range
+ * @param end last character, inclusive, of range
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementRange(USet *set, UChar32 start, UChar32 end);
+
+/**
+ * Complements the specified string in this set.
+ * The string will be removed if it is in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementString(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Complements EACH of the characters in this string. Note: "ch" == {"c", "h"}
+ * A frozen set will not be modified.
+ *
+ * @param set the object to be modified
+ * @param str the string
+ * @param length the length of the string, or -1 if NUL-terminated
+ * @stable ICU 69
+ */
+U_CAPI void U_EXPORT2
+uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
+
+/**
+ * Complements in this set all elements contained in the specified
+ * set. Any character in the other set will be removed if it is
+ * in this set, or will be added if it is not in this set.
+ * A frozen set will not be modified.
+ *
+ * @param set the set with which to complement
+ * @param complement set that defines which elements will be xor'ed
+ * from this set.
+ * @stable ICU 3.2
+ */
+U_CAPI void U_EXPORT2
+uset_complementAll(USet* set, const USet* complement);
+
+/**
+ * Removes all of the elements from this set. This set will be
+ * empty after this call returns.
+ * A frozen set will not be modified.
+ * @param set the set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_clear(USet* set);
+
+/**
+ * Close this set over the given attribute. For the attribute
+ * USET_CASE_INSENSITIVE, the result is to modify this set so that:
+ *
+ * 1. For each character or string 'a' in this set, all strings or
+ * characters 'b' such that foldCase(a) == foldCase(b) are added
+ * to this set.
+ *
+ * 2. For each string 'e' in the resulting set, if e !=
+ * foldCase(e), 'e' will be removed.
+ *
+ * Example: [aq\\u00DF{Bc}{bC}{Fi}] => [aAqQ\\u00DF\\uFB01{ss}{bc}{fi}]
+ *
+ * (Here foldCase(x) refers to the operation u_strFoldCase, and a
+ * == b denotes that the contents are the same, not pointer
+ * comparison.)
+ *
+ * A frozen set will not be modified.
+ *
+ * @param set the set
+ *
+ * @param attributes bitmask for attributes to close over.
+ * Valid options:
+ * At most one of USET_CASE_INSENSITIVE, USET_ADD_CASE_MAPPINGS, USET_SIMPLE_CASE_INSENSITIVE.
+ * These case options are mutually exclusive.
+ * Unrelated options bits are ignored.
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+uset_closeOver(USet* set, int32_t attributes);
+
+/**
+ * Remove all strings from this set.
+ *
+ * @param set the set
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+uset_removeAllStrings(USet* set);
+
+/**
+ * Returns true if the given USet contains no characters and no
+ * strings.
+ * @param set the set
+ * @return true if set is empty
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_isEmpty(const USet* set);
+
+/**
+ * @param set the set
+ * @return true if this set contains multi-character strings or the empty string.
+ * @stable ICU 70
+ */
+U_CAPI UBool U_EXPORT2
+uset_hasStrings(const USet *set);
+
+/**
+ * Returns true if the given USet contains the given character.
+ * This function works faster with a frozen set.
+ * @param set the set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_contains(const USet* set, UChar32 c);
+
+/**
+ * Returns true if the given USet contains all characters c
+ * where start <= c && c <= end.
+ * @param set the set
+ * @param start the first character of the range to test, inclusive
+ * @param end the last character of the range to test, inclusive
+ * @return true if set contains the range
+ * @stable ICU 2.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsRange(const USet* set, UChar32 start, UChar32 end);
+
+/**
+ * Returns true if the given USet contains the given string.
+ * @param set the set
+ * @param str the string
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if set contains str
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsString(const USet* set, const UChar* str, int32_t strLen);
+
+/**
+ * Returns the index of the given character within this set, where
+ * the set is ordered by ascending code point. If the character
+ * is not in this set, return -1. The inverse of this method is
+ * <code>charAt()</code>.
+ * @param set the set
+ * @param c the character to obtain the index for
+ * @return an index from 0..size()-1, or -1
+ * @stable ICU 3.2
+ */
+U_CAPI int32_t U_EXPORT2
+uset_indexOf(const USet* set, UChar32 c);
+
+/**
+ * Returns the character at the given index within this set, where
+ * the set is ordered by ascending code point. If the index is
+ * out of range for characters, returns (UChar32)-1.
+ * The inverse of this method is <code>indexOf()</code>.
+ *
+ * For iteration, this is slower than uset_getRangeCount()/uset_getItemCount()
+ * with uset_getItem(), because for each call it skips linearly over <code>index</code>
+ * characters in the ranges.
+ *
+ * @param set the set
+ * @param charIndex an index from 0..size()-1 to obtain the char for
+ * @return the character at the given index, or (UChar32)-1.
+ * @stable ICU 3.2
+ */
+U_CAPI UChar32 U_EXPORT2
+uset_charAt(const USet* set, int32_t charIndex);
+
+/**
+ * Returns the number of characters and strings contained in this set.
+ * The last (uset_getItemCount() - uset_getRangeCount()) items are strings.
+ *
+ * This is slower than uset_getRangeCount() and uset_getItemCount() because
+ * it counts the code points of all ranges.
+ *
+ * @param set the set
+ * @return a non-negative integer counting the characters and strings
+ * contained in set
+ * @stable ICU 2.4
+ * @see uset_getRangeCount
+ */
+U_CAPI int32_t U_EXPORT2
+uset_size(const USet* set);
+
+/**
+ * @param set the set
+ * @return the number of ranges in this set.
+ * @stable ICU 70
+ * @see uset_getItemCount
+ * @see uset_getItem
+ * @see uset_size
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getRangeCount(const USet *set);
+
+/**
+ * Returns the number of items in this set. An item is either a range
+ * of characters or a single multicharacter string.
+ * @param set the set
+ * @return a non-negative integer counting the character ranges
+ * and/or strings contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItemCount(const USet* set);
+
+/**
+ * Returns an item of this set. An item is either a range of
+ * characters or a single multicharacter string (which can be the empty string).
+ *
+ * If <code>itemIndex</code> is less than uset_getRangeCount(), then this function returns 0,
+ * and the range is <code>*start</code>..<code>*end</code>.
+ *
+ * If <code>itemIndex</code> is at least uset_getRangeCount() and less than uset_getItemCount(), then
+ * this function copies the string into <code>str[strCapacity]</code> and
+ * returns the length of the string (0 for the empty string).
+ *
+ * If <code>itemIndex</code> is out of range, then this function returns -1.
+ *
+ * Note that 0 is returned for each range as well as for the empty string.
+ *
+ * @param set the set
+ * @param itemIndex a non-negative integer in the range 0..uset_getItemCount(set)-1
+ * @param start pointer to variable to receive first character in range, inclusive;
+ * can be NULL for a string item
+ * @param end pointer to variable to receive last character in range, inclusive;
+ * can be NULL for a string item
+ * @param str buffer to receive the string, may be NULL
+ * @param strCapacity capacity of str, or 0 if str is NULL
+ * @param ec error code; U_INDEX_OUTOFBOUNDS_ERROR if the itemIndex is out of range
+ * @return the length of the string (0 or >= 2), or 0 if the item is a range,
+ * or -1 if the itemIndex is out of range
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getItem(const USet* set, int32_t itemIndex,
+ UChar32* start, UChar32* end,
+ UChar* str, int32_t strCapacity,
+ UErrorCode* ec);
+
+/**
+ * Returns true if set1 contains all the characters and strings
+ * of set2. It answers the question, 'Is set1 a superset of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsAll(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if this set contains all the characters
+ * of the given string. This is does not check containment of grapheme
+ * clusters, like uset_containsString.
+ * @param set set of characters to be checked for containment
+ * @param str string containing codepoints to be checked for containment
+ * @param strLen the length of the string or -1 if null terminated.
+ * @return true if the test condition is met
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
+
+/**
+ * Returns true if set1 contains none of the characters and strings
+ * of set2. It answers the question, 'Is set1 a disjoint set of set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsNone(const USet* set1, const USet* set2);
+
+/**
+ * Returns true if set1 contains some of the characters and strings
+ * of set2. It answers the question, 'Does set1 and set2 have an intersection?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_containsSome(const USet* set1, const USet* set2);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Unpaired surrogates are treated according to contains() of their surrogate code points.
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the length of the initial substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Similar to the strspn() C library function.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the length of the initial substring according to the spanCondition;
+ * 0 if the start of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns the start of the trailing substring of the input string which
+ * consists only of characters and strings that are contained in this set
+ * (USET_SPAN_CONTAINED, USET_SPAN_SIMPLE),
+ * or only of characters and strings that are not contained
+ * in this set (USET_SPAN_NOT_CONTAINED).
+ * See USetSpanCondition for details.
+ * Malformed byte sequences are treated according to contains(0xfffd).
+ * This function works faster with a frozen set and with a non-negative string length argument.
+ * @param set the set
+ * @param s start of the string (UTF-8)
+ * @param length of the string; can be -1 for NUL-terminated
+ * @param spanCondition specifies the containment condition
+ * @return the start of the trailing substring according to the spanCondition;
+ * the string length if the end of the string does not fit the spanCondition
+ * @stable ICU 3.8
+ * @see USetSpanCondition
+ */
+U_CAPI int32_t U_EXPORT2
+uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
+
+/**
+ * Returns true if set1 contains all of the characters and strings
+ * of set2, and vis versa. It answers the question, 'Is set1 equal to set2?'
+ * @param set1 set to be checked for containment
+ * @param set2 set to be checked for containment
+ * @return true if the test condition is met
+ * @stable ICU 3.2
+ */
+U_CAPI UBool U_EXPORT2
+uset_equals(const USet* set1, const USet* set2);
+
+/*********************************************************************
+ * Serialized set API
+ *********************************************************************/
+
+/**
+ * Serializes this set into an array of 16-bit integers. Serialization
+ * (currently) only records the characters in the set; multicharacter
+ * strings are ignored.
+ *
+ * The array
+ * has following format (each line is one 16-bit integer):
+ *
+ * length = (n+2*m) | (m!=0?0x8000:0)
+ * bmpLength = n; present if m!=0
+ * bmp[0]
+ * bmp[1]
+ * ...
+ * bmp[n-1]
+ * supp-high[0]
+ * supp-low[0]
+ * supp-high[1]
+ * supp-low[1]
+ * ...
+ * supp-high[m-1]
+ * supp-low[m-1]
+ *
+ * The array starts with a header. After the header are n bmp
+ * code points, then m supplementary code points. Either n or m
+ * or both may be zero. n+2*m is always <= 0x7FFF.
+ *
+ * If there are no supplementary characters (if m==0) then the
+ * header is one 16-bit integer, 'length', with value n.
+ *
+ * If there are supplementary characters (if m!=0) then the header
+ * is two 16-bit integers. The first, 'length', has value
+ * (n+2*m)|0x8000. The second, 'bmpLength', has value n.
+ *
+ * After the header the code points are stored in ascending order.
+ * Supplementary code points are stored as most significant 16
+ * bits followed by least significant 16 bits.
+ *
+ * @param set the set
+ * @param dest pointer to buffer of destCapacity 16-bit integers.
+ * May be NULL only if destCapacity is zero.
+ * @param destCapacity size of dest, or zero. Must not be negative.
+ * @param pErrorCode pointer to the error code. Will be set to
+ * U_INDEX_OUTOFBOUNDS_ERROR if n+2*m > 0x7FFF. Will be set to
+ * U_BUFFER_OVERFLOW_ERROR if n+2*m+(m!=0?2:1) > destCapacity.
+ * @return the total length of the serialized format, including
+ * the header, that is, n+2*m+(m!=0?2:1), or 0 on error other
+ * than U_BUFFER_OVERFLOW_ERROR.
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
+
+/**
+ * Given a serialized array, fill in the given serialized set object.
+ * @param fillSet pointer to result
+ * @param src pointer to start of array
+ * @param srcLength length of array
+ * @return true if the given array is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
+
+/**
+ * Set the USerializedSet to contain the given character (and nothing
+ * else).
+ * @param fillSet pointer to result
+ * @param c The codepoint to set
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+uset_setSerializedToOne(USerializedSet* fillSet, UChar32 c);
+
+/**
+ * Returns true if the given USerializedSet contains the given
+ * character.
+ * @param set the serialized set
+ * @param c The codepoint to check for within the set
+ * @return true if set contains c
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_serializedContains(const USerializedSet* set, UChar32 c);
+
+/**
+ * Returns the number of disjoint ranges of characters contained in
+ * the given serialized set. Ignores any strings contained in the
+ * set.
+ * @param set the serialized set
+ * @return a non-negative integer counting the character ranges
+ * contained in set
+ * @stable ICU 2.4
+ */
+U_CAPI int32_t U_EXPORT2
+uset_getSerializedRangeCount(const USerializedSet* set);
+
+/**
+ * Returns a range of characters contained in the given serialized
+ * set.
+ * @param set the serialized set
+ * @param rangeIndex a non-negative integer in the range 0..
+ * uset_getSerializedRangeCount(set)-1
+ * @param pStart pointer to variable to receive first character
+ * in range, inclusive
+ * @param pEnd pointer to variable to receive last character in range,
+ * inclusive
+ * @return true if rangeIndex is valid, otherwise false
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
+ UChar32* pStart, UChar32* pEnd);
+
+#endif
diff --git a/intl/icu/source/common/unicode/usetiter.h b/intl/icu/source/common/unicode/usetiter.h
new file mode 100644
index 0000000000..3168d3b0f6
--- /dev/null
+++ b/intl/icu/source/common/unicode/usetiter.h
@@ -0,0 +1,323 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (c) 2002-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*/
+#ifndef USETITER_H
+#define USETITER_H
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+
+#include "unicode/uobject.h"
+#include "unicode/unistr.h"
+
+/**
+ * \file
+ * \brief C++ API: UnicodeSetIterator iterates over the contents of a UnicodeSet.
+ */
+
+U_NAMESPACE_BEGIN
+
+class UnicodeSet;
+class UnicodeString;
+
+/**
+ *
+ * UnicodeSetIterator iterates over the contents of a UnicodeSet. It
+ * iterates over either code points or code point ranges. After all
+ * code points or ranges have been returned, it returns the
+ * multicharacter strings of the UnicodeSet, if any.
+ *
+ * This class is not intended for public subclassing.
+ *
+ * <p>To iterate over code points and strings, use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (it.next()) {
+ * processItem(it.getString());
+ * }
+ * </pre>
+ * <p>Each item in the set is accessed as a string. Set elements
+ * consisting of single code points are returned as strings containing
+ * just the one code point.
+ *
+ * <p>To iterate over code point ranges, instead of individual code points,
+ * use a loop like this:
+ * <pre>
+ * UnicodeSetIterator it(set);
+ * while (it.nextRange()) {
+ * if (it.isString()) {
+ * processString(it.getString());
+ * } else {
+ * processCodepointRange(it.getCodepoint(), it.getCodepointEnd());
+ * }
+ * }
+ * </pre>
+ *
+ * To iterate over only the strings, start with <code>skipToStrings()</code>.
+ *
+ * @author M. Davis
+ * @stable ICU 2.4
+ */
+class U_COMMON_API UnicodeSetIterator final : public UObject {
+ /**
+ * Value of <tt>codepoint</tt> if the iterator points to a string.
+ * If <tt>codepoint == IS_STRING</tt>, then examine
+ * <tt>string</tt> for the current iteration result.
+ */
+ enum { IS_STRING = -1 };
+
+ /**
+ * Current code point, or the special value <tt>IS_STRING</tt>, if
+ * the iterator points to a string.
+ */
+ UChar32 codepoint;
+
+ /**
+ * When iterating over ranges using <tt>nextRange()</tt>,
+ * <tt>codepointEnd</tt> contains the inclusive end of the
+ * iteration range, if <tt>codepoint != IS_STRING</tt>. If
+ * iterating over code points using <tt>next()</tt>, or if
+ * <tt>codepoint == IS_STRING</tt>, then the value of
+ * <tt>codepointEnd</tt> is undefined.
+ */
+ UChar32 codepointEnd;
+
+ /**
+ * If <tt>codepoint == IS_STRING</tt>, then <tt>string</tt> points
+ * to the current string. If <tt>codepoint != IS_STRING</tt>, the
+ * value of <tt>string</tt> is undefined.
+ */
+ const UnicodeString* string;
+
+ public:
+
+ /**
+ * Create an iterator over the given set. The iterator is valid
+ * only so long as <tt>set</tt> is valid.
+ * @param set set to iterate over
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator(const UnicodeSet& set);
+
+ /**
+ * Create an iterator over nothing. <tt>next()</tt> and
+ * <tt>nextRange()</tt> return false. This is a convenience
+ * constructor allowing the target to be set later.
+ * @stable ICU 2.4
+ */
+ UnicodeSetIterator();
+
+ /**
+ * Destructor.
+ * @stable ICU 2.4
+ */
+ virtual ~UnicodeSetIterator();
+
+ /**
+ * Returns true if the current element is a string. If so, the
+ * caller can retrieve it with <tt>getString()</tt>. If this
+ * method returns false, the current element is a code point or
+ * code point range, depending on whether <tt>next()</tt> or
+ * <tt>nextRange()</tt> was called.
+ * Elements of types string and codepoint can both be retrieved
+ * with the function <tt>getString()</tt>.
+ * Elements of type codepoint can also be retrieved with
+ * <tt>getCodepoint()</tt>.
+ * For ranges, <tt>getCodepoint()</tt> returns the starting codepoint
+ * of the range, and <tt>getCodepointEnd()</tt> returns the end
+ * of the range.
+ * @stable ICU 2.4
+ */
+ inline UBool isString() const;
+
+ /**
+ * Returns the current code point, if <tt>isString()</tt> returned
+ * false. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepoint() const;
+
+ /**
+ * Returns the end of the current code point range, if
+ * <tt>isString()</tt> returned false and <tt>nextRange()</tt> was
+ * called. Otherwise returns an undefined result.
+ * @stable ICU 2.4
+ */
+ inline UChar32 getCodepointEnd() const;
+
+ /**
+ * Returns the current string, if <tt>isString()</tt> returned
+ * true. If the current iteration item is a code point, a UnicodeString
+ * containing that single code point is returned.
+ *
+ * Ownership of the returned string remains with the iterator.
+ * The string is guaranteed to remain valid only until the iterator is
+ * advanced to the next item, or until the iterator is deleted.
+ *
+ * @stable ICU 2.4
+ */
+ const UnicodeString& getString();
+
+ /**
+ * Skips over the remaining code points/ranges, if any.
+ * A following call to next() or nextRange() will yield a string, if there is one.
+ * No-op if next() would return false, or if it would yield a string anyway.
+ *
+ * @return *this
+ * @stable ICU 70
+ * @see UnicodeSet#strings()
+ */
+ inline UnicodeSetIterator &skipToStrings() {
+ // Finish code point/range iteration.
+ range = endRange;
+ endElement = -1;
+ nextElement = 0;
+ return *this;
+ }
+
+ /**
+ * Advances the iteration position to the next element in the set,
+ * which can be either a single code point or a string.
+ * If there are no more elements in the set, return false.
+ *
+ * <p>
+ * If <tt>isString() == true</tt>, the value is a
+ * string, otherwise the value is a
+ * single code point. Elements of either type can be retrieved
+ * with the function <tt>getString()</tt>, while elements of
+ * consisting of a single code point can be retrieved with
+ * <tt>getCodepoint()</tt>
+ *
+ * <p>The order of iteration is all code points in sorted order,
+ * followed by all strings sorted order. Do not mix
+ * calls to <tt>next()</tt> and <tt>nextRange()</tt> without
+ * calling <tt>reset()</tt> between them. The results of doing so
+ * are undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool next();
+
+ /**
+ * Returns the next element in the set, either a code point range
+ * or a string. If there are no more elements in the set, return
+ * false. If <tt>isString() == true</tt>, the value is a
+ * string and can be accessed with <tt>getString()</tt>. Otherwise the value is a
+ * range of one or more code points from <tt>getCodepoint()</tt> to
+ * <tt>getCodepointeEnd()</tt> inclusive.
+ *
+ * <p>The order of iteration is all code points ranges in sorted
+ * order, followed by all strings sorted order. Ranges are
+ * disjoint and non-contiguous. The value returned from <tt>getString()</tt>
+ * is undefined unless <tt>isString() == true</tt>. Do not mix calls to
+ * <tt>next()</tt> and <tt>nextRange()</tt> without calling
+ * <tt>reset()</tt> between them. The results of doing so are
+ * undefined.
+ *
+ * @return true if there was another element in the set.
+ * @stable ICU 2.4
+ */
+ UBool nextRange();
+
+ /**
+ * Sets this iterator to visit the elements of the given set and
+ * resets it to the start of that set. The iterator is valid only
+ * so long as <tt>set</tt> is valid.
+ * @param set the set to iterate over.
+ * @stable ICU 2.4
+ */
+ void reset(const UnicodeSet& set);
+
+ /**
+ * Resets this iterator to the start of the set.
+ * @stable ICU 2.4
+ */
+ void reset();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for this class.
+ *
+ * @stable ICU 2.4
+ */
+ static UClassID U_EXPORT2 getStaticClassID();
+
+ /**
+ * ICU "poor man's RTTI", returns a UClassID for the actual class.
+ *
+ * @stable ICU 2.4
+ */
+ virtual UClassID getDynamicClassID() const override;
+
+ // ======================= PRIVATES ===========================
+
+private:
+
+ // endElement and nextElements are really UChar32's, but we keep
+ // them as signed int32_t's so we can do comparisons with
+ // endElement set to -1. Leave them as int32_t's.
+ /** The set
+ */
+ const UnicodeSet* set;
+ /** End range
+ */
+ int32_t endRange;
+ /** Range
+ */
+ int32_t range;
+ /** End element
+ */
+ int32_t endElement;
+ /** Next element
+ */
+ int32_t nextElement;
+ /** Next string
+ */
+ int32_t nextString;
+ /** String count
+ */
+ int32_t stringCount;
+
+ /**
+ * Points to the string to use when the caller asks for a
+ * string and the current iteration item is a code point, not a string.
+ */
+ UnicodeString *cpString;
+
+ /** Copy constructor. Disallowed.
+ */
+ UnicodeSetIterator(const UnicodeSetIterator&) = delete;
+
+ /** Assignment operator. Disallowed.
+ */
+ UnicodeSetIterator& operator=(const UnicodeSetIterator&) = delete;
+
+ /** Load range
+ */
+ void loadRange(int32_t range);
+};
+
+inline UBool UnicodeSetIterator::isString() const {
+ return codepoint < 0;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepoint() const {
+ return codepoint;
+}
+
+inline UChar32 UnicodeSetIterator::getCodepointEnd() const {
+ return codepointEnd;
+}
+
+
+U_NAMESPACE_END
+
+#endif /* U_SHOW_CPLUSPLUS_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/ushape.h b/intl/icu/source/common/unicode/ushape.h
new file mode 100644
index 0000000000..14371edc8f
--- /dev/null
+++ b/intl/icu/source/common/unicode/ushape.h
@@ -0,0 +1,476 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+******************************************************************************
+*
+* Copyright (C) 2000-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+******************************************************************************
+* file name: ushape.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2000jun29
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USHAPE_H__
+#define __USHAPE_H__
+
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Arabic shaping
+ *
+ */
+
+/**
+ * Shape Arabic text on a character basis.
+ *
+ * <p>This function performs basic operations for "shaping" Arabic text. It is most
+ * useful for use with legacy data formats and legacy display technology
+ * (simple terminals). All operations are performed on Unicode characters.</p>
+ *
+ * <p>Text-based shaping means that some character code points in the text are
+ * replaced by others depending on the context. It transforms one kind of text
+ * into another. In comparison, modern displays for Arabic text select
+ * appropriate, context-dependent font glyphs for each text element, which means
+ * that they transform text into a glyph vector.</p>
+ *
+ * <p>Text transformations are necessary when modern display technology is not
+ * available or when text needs to be transformed to or from legacy formats that
+ * use "shaped" characters. Since the Arabic script is cursive, connecting
+ * adjacent letters to each other, computers select images for each letter based
+ * on the surrounding letters. This usually results in four images per Arabic
+ * letter: initial, middle, final, and isolated forms. In Unicode, on the other
+ * hand, letters are normally stored abstract, and a display system is expected
+ * to select the necessary glyphs. (This makes searching and other text
+ * processing easier because the same letter has only one code.) It is possible
+ * to mimic this with text transformations because there are characters in
+ * Unicode that are rendered as letters with a specific shape
+ * (or cursive connectivity). They were included for interoperability with
+ * legacy systems and codepages, and for unsophisticated display systems.</p>
+ *
+ * <p>A second kind of text transformations is supported for Arabic digits:
+ * For compatibility with legacy codepages that only include European digits,
+ * it is possible to replace one set of digits by another, changing the
+ * character code points. These operations can be performed for either
+ * Arabic-Indic Digits (U+0660...U+0669) or Eastern (Extended) Arabic-Indic
+ * digits (U+06f0...U+06f9).</p>
+ *
+ * <p>Some replacements may result in more or fewer characters (code points).
+ * By default, this means that the destination buffer may receive text with a
+ * length different from the source length. Some legacy systems rely on the
+ * length of the text to be constant. They expect extra spaces to be added
+ * or consumed either next to the affected character or at the end of the
+ * text.</p>
+ *
+ * <p>For details about the available operations, see the description of the
+ * <code>U_SHAPE_...</code> options.</p>
+ *
+ * @param source The input text.
+ *
+ * @param sourceLength The number of UChars in <code>source</code>.
+ *
+ * @param dest The destination buffer that will receive the results of the
+ * requested operations. It may be <code>NULL</code> only if
+ * <code>destSize</code> is 0. The source and destination must not
+ * overlap.
+ *
+ * @param destSize The size (capacity) of the destination buffer in UChars.
+ * If <code>destSize</code> is 0, then no output is produced,
+ * but the necessary buffer size is returned ("preflighting").
+ *
+ * @param options This is a 32-bit set of flags that specify the operations
+ * that are performed on the input text. If no error occurs,
+ * then the result will always be written to the destination
+ * buffer.
+ *
+ * @param pErrorCode must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return The number of UChars written to the destination buffer.
+ * If an error occurred, then no output was written, or it may be
+ * incomplete. If <code>U_BUFFER_OVERFLOW_ERROR</code> is set, then
+ * the return value indicates the necessary destination buffer size.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_shapeArabic(const UChar *source, int32_t sourceLength,
+ UChar *dest, int32_t destSize,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Affects: LamAlef options
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_GROW_SHRINK 0
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_GROW_SHRINK
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_RESIZE 0
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces next to modified characters.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_NEAR
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_NEAR 1
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the end of the text.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_END
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_END 2
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING 3
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * If more room is necessary, then try to consume spaces at the beginning of the text.
+ * Affects: LamAlef options
+ * This option is an alias to U_SHAPE_LENGTH_FIXED_SPACES_AT_BEGINNING
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_BEGIN 3
+
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping Mode: For each LAMALEF character found, expand LAMALEF using space at end.
+ * If there is no space at end, use spaces at beginning of the buffer. If there
+ * is no space at beginning of the buffer, use spaces at the near (i.e. the space
+ * after the LAMALEF character).
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * Deshaping Mode: Perform the same function as the flag equals U_SHAPE_LAMALEF_END.
+ * Affects: LamAlef options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_AUTO 0x10000
+
+/** Bit mask for memory options. @stable ICU 2.0 */
+#define U_SHAPE_LENGTH_MASK 0x10003 /* Changed old value 3 */
+
+
+/**
+ * Bit mask for LamAlef memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_LAMALEF_MASK 0x10003 /* updated */
+
+/** Direction indicator: the source is in logical (keyboard) order. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_LOGICAL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual RTL order,
+ * the rightmost displayed character stored first.
+ * This option is an alias to U_SHAPE_TEXT_DIRECTION_LOGICAL
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_RTL 0
+
+/**
+ * Direction indicator:
+ * the source is in visual LTR order,
+ * the leftmost displayed character stored first.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_TEXT_DIRECTION_VISUAL_LTR 4
+
+/** Bit mask for direction indicators. @stable ICU 2.0 */
+#define U_SHAPE_TEXT_DIRECTION_MASK 4
+
+
+/** Letter shaping option: do not perform letter shaping. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_NOOP 0
+
+/** Letter shaping option: replace abstract letter characters by "shaped" ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_SHAPE 8
+
+/** Letter shaping option: replace "shaped" letter characters by abstract ones. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_UNSHAPE 0x10
+
+/**
+ * Letter shaping option: replace abstract letter characters by "shaped" ones.
+ * The only difference with U_SHAPE_LETTERS_SHAPE is that Tashkeel letters
+ * are always "shaped" into the isolated form instead of the medial form
+ * (selecting code points from the Arabic Presentation Forms-B block).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_LETTERS_SHAPE_TASHKEEL_ISOLATED 0x18
+
+
+/** Bit mask for letter shaping options. @stable ICU 2.0 */
+#define U_SHAPE_LETTERS_MASK 0x18
+
+
+/** Digit shaping option: do not perform digit shaping. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_NOOP 0
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_EN2AN 0x20
+
+/**
+ * Digit shaping option:
+ * Replace Arabic-Indic digits by European digits (U+0030...).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_AN2EN 0x40
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be not an Arabic letter
+ * (it is <code>U_LEFT_TO_RIGHT</code> [L] or <code>U_RIGHT_TO_LEFT</code> [R]).
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_LR 0x60
+
+/**
+ * Digit shaping option:
+ * Replace European digits (U+0030...) by Arabic-Indic digits if the most recent
+ * strongly directional character is an Arabic letter
+ * (<code>u_charDirection()</code> result <code>U_RIGHT_TO_LEFT_ARABIC</code> [AL]).<br>
+ * The direction of "preceding" depends on the direction indicator option.
+ * For the first characters, the preceding strongly directional character
+ * (initial state) is assumed to be an Arabic letter.
+ * @stable ICU 2.0
+ */
+#define U_SHAPE_DIGITS_ALEN2AN_INIT_AL 0x80
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_RESERVED 0xa0
+
+/** Bit mask for digit shaping options. @stable ICU 2.0 */
+#define U_SHAPE_DIGITS_MASK 0xe0
+
+
+/** Digit type option: Use Arabic-Indic digits (U+0660...U+0669). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN 0
+
+/** Digit type option: Use Eastern (Extended) Arabic-Indic digits (U+06f0...U+06f9). @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_AN_EXTENDED 0x100
+
+/** Not a valid option value. May be replaced by a new option. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_RESERVED 0x200
+
+/** Bit mask for digit type options. @stable ICU 2.0 */
+#define U_SHAPE_DIGIT_TYPE_MASK 0x300 /* I need to change this from 0x3f00 to 0x300 */
+
+/**
+ * Tashkeel aggregation option:
+ * Replaces any combination of U+0651 with one of
+ * U+064C, U+064D, U+064E, U+064F, U+0650 with
+ * U+FC5E, U+FC5F, U+FC60, U+FC61, U+FC62 consecutively.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_AGGREGATE_TASHKEEL 0x4000
+/** Tashkeel aggregation option: do not aggregate tashkeels. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_NOOP 0
+/** Bit mask for tashkeel aggregation. @stable ICU 3.6 */
+#define U_SHAPE_AGGREGATE_TASHKEEL_MASK 0x4000
+
+/**
+ * Presentation form option:
+ * Don't replace Arabic Presentation Forms-A and Arabic Presentation Forms-B
+ * characters with 0+06xx characters, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION 0x8000
+/** Presentation form option:
+ * Replace Arabic Presentation Forms-A and Arabic Presentationo Forms-B with
+ * their unshaped correspondents in range 0+06xx, before shaping.
+ * @stable ICU 3.6
+ */
+#define U_SHAPE_PRESERVE_PRESENTATION_NOOP 0
+/** Bit mask for preserve presentation form. @stable ICU 3.6 */
+#define U_SHAPE_PRESERVE_PRESENTATION_MASK 0x8000
+
+/* Seen Tail option */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: The SEEN family character will expand into two characters using space near
+ * the SEEN family character(i.e. the space after the character).
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * De-shaping mode: Any Seen character followed by Tail character will be
+ * replaced by one cell Seen and a space will replace the Tail.
+ * Affects: Seen options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SEEN_TWOCELL_NEAR 0x200000
+
+/**
+ * Bit mask for Seen memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SEEN_MASK 0x700000
+
+/* YehHamza option */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: The YEHHAMZA character will expand into two characters using space near it
+ * (i.e. the space after the character
+ * If there are no spaces found, an error U_NO_SPACE_AVAILABLE (as defined in utypes.h)
+ * will be set in pErrorCode
+ *
+ * De-shaping mode: Any Yeh (final or isolated) character followed by Hamza character will be
+ * replaced by one cell YehHamza and space will replace the Hamza.
+ * Affects: YehHamza options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_YEHHAMZA_TWOCELL_NEAR 0x1000000
+
+
+/**
+ * Bit mask for YehHamza memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_YEHHAMZA_MASK 0x3800000
+
+/* New Tashkeel options */
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Spaces will be placed at beginning of the buffer
+ *
+ * De-shaping mode: N/A
+ * Affects: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_BEGIN 0x40000
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by spaces.
+ * Spaces will be placed at end of the buffer
+ *
+ * De-shaping mode: N/A
+ * Affects: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_END 0x60000
+
+/**
+ * Memory option: allow the result to have a different length than the source.
+ * Shaping mode: Tashkeel characters will be removed, buffer length will shrink.
+ * De-shaping mode: N/A
+ *
+ * Affect: Tashkeel options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_RESIZE 0x80000
+
+/**
+ * Memory option: the result must have the same length as the source.
+ * Shaping mode: Tashkeel characters will be replaced by Tatweel if it is connected to adjacent
+ * characters (i.e. shaped on Tatweel) or replaced by space if it is not connected.
+ *
+ * De-shaping mode: N/A
+ * Affects: YehHamza options
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_REPLACE_BY_TATWEEL 0xC0000
+
+/**
+ * Bit mask for Tashkeel replacement with Space or Tatweel memory options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_TASHKEEL_MASK 0xE0000
+
+
+/* Space location Control options */
+/**
+ * This option affect the meaning of BEGIN and END options. if this option is not used the default
+ * for BEGIN and END will be as following:
+ * The Default (for both Visual LTR, Visual RTL and Logical Text)
+ * 1. BEGIN always refers to the start address of physical memory.
+ * 2. END always refers to the end address of physical memory.
+ *
+ * If this option is used it will swap the meaning of BEGIN and END only for Visual LTR text.
+ *
+ * The effect on BEGIN and END Memory Options will be as following:
+ * A. BEGIN For Visual LTR text: This will be the beginning (right side) of the visual text(
+ * corresponding to the physical memory address end for Visual LTR text, Same as END in
+ * default behavior)
+ * B. BEGIN For Logical text: Same as BEGIN in default behavior.
+ * C. END For Visual LTR text: This will be the end (left side) of the visual text (corresponding
+ * to the physical memory address beginning for Visual LTR text, Same as BEGIN in default behavior.
+ * D. END For Logical text: Same as END in default behavior).
+ * Affects: All LamAlef BEGIN, END and AUTO options.
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_BEGIN_END 0x4000000
+
+/**
+ * Bit mask for swapping BEGIN and END for Visual LTR text
+ * @stable ICU 4.2
+ */
+#define U_SHAPE_SPACES_RELATIVE_TO_TEXT_MASK 0x4000000
+
+/**
+ * If this option is used, shaping will use the new Unicode code point for TAIL (i.e. 0xFE73).
+ * If this option is not specified (Default), old unofficial Unicode TAIL code point is used (i.e. 0x200B)
+ * De-shaping will not use this option as it will always search for both the new Unicode code point for the
+ * TAIL (i.e. 0xFE73) or the old unofficial Unicode TAIL code point (i.e. 0x200B) and de-shape the
+ * Seen-Family letter accordingly.
+ *
+ * Shaping Mode: Only shaping.
+ * De-shaping Mode: N/A.
+ * Affects: All Seen options
+ * @stable ICU 4.8
+ */
+#define U_SHAPE_TAIL_NEW_UNICODE 0x8000000
+
+/**
+ * Bit mask for new Unicode Tail option
+ * @stable ICU 4.8
+ */
+#define U_SHAPE_TAIL_TYPE_MASK 0x8000000
+
+#endif
diff --git a/intl/icu/source/common/unicode/usprep.h b/intl/icu/source/common/unicode/usprep.h
new file mode 100644
index 0000000000..f8a0f58e0d
--- /dev/null
+++ b/intl/icu/source/common/unicode/usprep.h
@@ -0,0 +1,274 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+ *******************************************************************************
+ *
+ * Copyright (C) 2003-2014, International Business Machines
+ * Corporation and others. All Rights Reserved.
+ *
+ *******************************************************************************
+ * file name: usprep.h
+ * encoding: UTF-8
+ * tab size: 8 (not used)
+ * indentation:4
+ *
+ * created on: 2003jul2
+ * created by: Ram Viswanadha
+ */
+
+#ifndef __USPREP_H__
+#define __USPREP_H__
+
+/**
+ * \file
+ * \brief C API: Implements the StringPrep algorithm.
+ */
+
+#include "unicode/utypes.h"
+
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#endif // U_SHOW_CPLUSPLUS_API
+
+/**
+ *
+ * StringPrep API implements the StingPrep framework as described by RFC 3454.
+ * StringPrep prepares Unicode strings for use in network protocols.
+ * Profiles of StingPrep are set of rules and data according to with the
+ * Unicode Strings are prepared. Each profiles contains tables which describe
+ * how a code point should be treated. The tables are broadly classified into
+ * <ul>
+ * <li> Unassigned Table: Contains code points that are unassigned
+ * in the Unicode Version supported by StringPrep. Currently
+ * RFC 3454 supports Unicode 3.2. </li>
+ * <li> Prohibited Table: Contains code points that are prohibited from
+ * the output of the StringPrep processing function. </li>
+ * <li> Mapping Table: Contains code points that are deleted from the output or case mapped. </li>
+ * </ul>
+ *
+ * The procedure for preparing Unicode strings:
+ * <ol>
+ * <li> Map: For each character in the input, check if it has a mapping
+ * and, if so, replace it with its mapping. </li>
+ * <li> Normalize: Possibly normalize the result of step 1 using Unicode
+ * normalization. </li>
+ * <li> Prohibit: Check for any characters that are not allowed in the
+ * output. If any are found, return an error.</li>
+ * <li> Check bidi: Possibly check for right-to-left characters, and if
+ * any are found, make sure that the whole string satisfies the
+ * requirements for bidirectional strings. If the string does not
+ * satisfy the requirements for bidirectional strings, return an
+ * error. </li>
+ * </ol>
+ * @author Ram Viswanadha
+ */
+#if !UCONFIG_NO_IDNA
+
+#include "unicode/parseerr.h"
+
+/**
+ * The StringPrep profile
+ * @stable ICU 2.8
+ */
+typedef struct UStringPrepProfile UStringPrepProfile;
+
+
+/**
+ * Option to prohibit processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_DEFAULT 0x0000
+
+/**
+ * Option to allow processing of unassigned code points in the input
+ *
+ * @see usprep_prepare
+ * @stable ICU 2.8
+ */
+#define USPREP_ALLOW_UNASSIGNED 0x0001
+
+/**
+ * enums for the standard stringprep profile types
+ * supported by usprep_openByType.
+ * @see usprep_openByType
+ * @stable ICU 4.2
+ */
+typedef enum UStringPrepProfileType {
+ /**
+ * RFC3491 Nameprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3491_NAMEPREP,
+ /**
+ * RFC3530 nfs4_cs_prep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CS_PREP,
+ /**
+ * RFC3530 nfs4_cs_prep with case insensitive option
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CS_PREP_CI,
+ /**
+ * RFC3530 nfs4_cis_prep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_CIS_PREP,
+ /**
+ * RFC3530 nfs4_mixed_prep for prefix
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_MIXED_PREP_PREFIX,
+ /**
+ * RFC3530 nfs4_mixed_prep for suffix
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3530_NFS4_MIXED_PREP_SUFFIX,
+ /**
+ * RFC3722 iSCSI
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3722_ISCSI,
+ /**
+ * RFC3920 XMPP Nodeprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3920_NODEPREP,
+ /**
+ * RFC3920 XMPP Resourceprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC3920_RESOURCEPREP,
+ /**
+ * RFC4011 Policy MIB Stringprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4011_MIB,
+ /**
+ * RFC4013 SASLprep
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4013_SASLPREP,
+ /**
+ * RFC4505 trace
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4505_TRACE,
+ /**
+ * RFC4518 LDAP
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4518_LDAP,
+ /**
+ * RFC4518 LDAP for case ignore, numeric and stored prefix
+ * matching rules
+ * @stable ICU 4.2
+ */
+ USPREP_RFC4518_LDAP_CI
+} UStringPrepProfileType;
+
+/**
+ * Creates a StringPrep profile from the data file.
+ *
+ * @param path string containing the full path pointing to the directory
+ * where the profile reside followed by the package name
+ * e.g. "/usr/resource/my_app/profiles/mydata" on a Unix system.
+ * if NULL, ICU default data files will be used.
+ * @param fileName name of the profile file to be opened
+ * @param status ICU error code in/out parameter. Must not be NULL.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 2.8
+ */
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_open(const char* path,
+ const char* fileName,
+ UErrorCode* status);
+
+/**
+ * Creates a StringPrep profile for the specified profile type.
+ *
+ * @param type The profile type
+ * @param status ICU error code in/out parameter. Must not be NULL.
+ * Must fulfill U_SUCCESS before the function call.
+ * @return Pointer to UStringPrepProfile that is opened. Should be closed by
+ * calling usprep_close()
+ * @see usprep_close()
+ * @stable ICU 4.2
+ */
+U_CAPI UStringPrepProfile* U_EXPORT2
+usprep_openByType(UStringPrepProfileType type,
+ UErrorCode* status);
+
+/**
+ * Closes the profile
+ * @param profile The profile to close
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+usprep_close(UStringPrepProfile* profile);
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUStringPrepProfilePointer
+ * "Smart pointer" class, closes a UStringPrepProfile via usprep_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUStringPrepProfilePointer, UStringPrepProfile, usprep_close);
+
+U_NAMESPACE_END
+
+#endif
+
+/**
+ * Prepare the input buffer for use in applications with the given profile. This operation maps, normalizes(NFKC),
+ * checks for prohibited and BiDi characters in the order defined by RFC 3454
+ * depending on the options specified in the profile.
+ *
+ * @param prep The profile to use
+ * @param src Pointer to UChar buffer containing the string to prepare
+ * @param srcLength Number of characters in the source string
+ * @param dest Pointer to the destination buffer to receive the output
+ * @param destCapacity The capacity of destination array
+ * @param options A bit set of options:
+ *
+ * - USPREP_DEFAULT Prohibit processing of unassigned code points in the input
+ *
+ * - USPREP_ALLOW_UNASSIGNED Treat the unassigned code points are in the input
+ * as normal Unicode code points.
+ *
+ * @param parseError Pointer to UParseError struct to receive information on position
+ * of error if an error is encountered. Can be NULL.
+ * @param status ICU in/out error code parameter.
+ * U_INVALID_CHAR_FOUND if src contains
+ * unmatched single surrogates.
+ * U_INDEX_OUTOFBOUNDS_ERROR if src contains
+ * too many code points.
+ * U_BUFFER_OVERFLOW_ERROR if destCapacity is not enough
+ * @return The number of UChars in the destination buffer
+ * @stable ICU 2.8
+ */
+
+U_CAPI int32_t U_EXPORT2
+usprep_prepare( const UStringPrepProfile* prep,
+ const UChar* src, int32_t srcLength,
+ UChar* dest, int32_t destCapacity,
+ int32_t options,
+ UParseError* parseError,
+ UErrorCode* status );
+
+
+#endif /* #if !UCONFIG_NO_IDNA */
+
+#endif
diff --git a/intl/icu/source/common/unicode/ustring.h b/intl/icu/source/common/unicode/ustring.h
new file mode 100644
index 0000000000..03c697c722
--- /dev/null
+++ b/intl/icu/source/common/unicode/ustring.h
@@ -0,0 +1,1685 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1998-2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* File ustring.h
+*
+* Modification History:
+*
+* Date Name Description
+* 12/07/98 bertrand Creation.
+******************************************************************************
+*/
+
+#ifndef USTRING_H
+#define USTRING_H
+
+#include "unicode/utypes.h"
+#include "unicode/putil.h"
+#include "unicode/uiter.h"
+
+/**
+ * \def UBRK_TYPEDEF_UBREAK_ITERATOR
+ * @internal
+ */
+
+#ifndef UBRK_TYPEDEF_UBREAK_ITERATOR
+# define UBRK_TYPEDEF_UBREAK_ITERATOR
+/** Simple declaration for u_strToTitle() to avoid including unicode/ubrk.h. @stable ICU 2.1*/
+ typedef struct UBreakIterator UBreakIterator;
+#endif
+
+/**
+ * \file
+ * \brief C API: Unicode string handling functions
+ *
+ * These C API functions provide general Unicode string handling.
+ *
+ * Some functions are equivalent in name, signature, and behavior to the ANSI C <string.h>
+ * functions. (For example, they do not check for bad arguments like NULL string pointers.)
+ * In some cases, only the thread-safe variant of such a function is implemented here
+ * (see u_strtok_r()).
+ *
+ * Other functions provide more Unicode-specific functionality like locale-specific
+ * upper/lower-casing and string comparison in code point order.
+ *
+ * ICU uses 16-bit Unicode (UTF-16) in the form of arrays of UChar code units.
+ * UTF-16 encodes each Unicode code point with either one or two UChar code units.
+ * (This is the default form of Unicode, and a forward-compatible extension of the original,
+ * fixed-width form that was known as UCS-2. UTF-16 superseded UCS-2 with Unicode 2.0
+ * in 1996.)
+ *
+ * Some APIs accept a 32-bit UChar32 value for a single code point.
+ *
+ * ICU also handles 16-bit Unicode text with unpaired surrogates.
+ * Such text is not well-formed UTF-16.
+ * Code-point-related functions treat unpaired surrogates as surrogate code points,
+ * i.e., as separate units.
+ *
+ * Although UTF-16 is a variable-width encoding form (like some legacy multi-byte encodings),
+ * it is much more efficient even for random access because the code unit values
+ * for single-unit characters vs. lead units vs. trail units are completely disjoint.
+ * This means that it is easy to determine character (code point) boundaries from
+ * random offsets in the string.
+ *
+ * Unicode (UTF-16) string processing is optimized for the single-unit case.
+ * Although it is important to support supplementary characters
+ * (which use pairs of lead/trail code units called "surrogates"),
+ * their occurrence is rare. Almost all characters in modern use require only
+ * a single UChar code unit (i.e., their code point values are <=0xffff).
+ *
+ * For more details see the User Guide Strings chapter (https://unicode-org.github.io/icu/userguide/strings/).
+ * For a discussion of the handling of unpaired surrogates see also
+ * Jitterbug 2145 and its icu mailing list proposal on 2002-sep-18.
+ */
+
+/**
+ * \defgroup ustring_ustrlen String Length
+ * \ingroup ustring_strlen
+ */
+/*@{*/
+/**
+ * Determine the length of an array of UChar.
+ *
+ * @param s The array of UChars, NULL (U+0000) terminated.
+ * @return The number of UChars in <code>chars</code>, minus the terminator.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strlen(const UChar *s);
+/*@}*/
+
+/**
+ * Count Unicode code points in the length UChar code units of the string.
+ * A code point may occupy either one or two UChar code units.
+ * Counting code points involves reading all code units.
+ *
+ * This functions is basically the inverse of the U16_FWD_N() macro (see utf.h).
+ *
+ * @param s The input string.
+ * @param length The number of UChar code units to be checked, or -1 to count all
+ * code points before the first NUL (U+0000).
+ * @return The number of code points in the specified code units.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_countChar32(const UChar *s, int32_t length);
+
+/**
+ * Check if the string contains more Unicode code points than a certain number.
+ * This is more efficient than counting all code points in the entire string
+ * and comparing that number with a threshold.
+ * This function may not need to scan the string at all if the length is known
+ * (not -1 for NUL-termination) and falls within a certain range, and
+ * never needs to count more than 'number+1' code points.
+ * Logically equivalent to (u_countChar32(s, length)>number).
+ * A Unicode code point may occupy either one or two UChar code units.
+ *
+ * @param s The input string.
+ * @param length The length of the string, or -1 if it is NUL-terminated.
+ * @param number The number of code points in the string is compared against
+ * the 'number' parameter.
+ * @return Boolean value for whether the string contains more Unicode code points
+ * than 'number'. Same as (u_countChar32(s, length)>number).
+ * @stable ICU 2.4
+ */
+U_CAPI UBool U_EXPORT2
+u_strHasMoreChar32Than(const UChar *s, int32_t length, int32_t number);
+
+/**
+ * Concatenate two ustrings. Appends a copy of <code>src</code>,
+ * including the null terminator, to <code>dst</code>. The initial copied
+ * character from <code>src</code> overwrites the null terminator in <code>dst</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strcat(UChar *dst,
+ const UChar *src);
+
+/**
+ * Concatenate two ustrings.
+ * Appends at most <code>n</code> characters from <code>src</code> to <code>dst</code>.
+ * Adds a terminating NUL.
+ * If src is too long, then only <code>n-1</code> characters will be copied
+ * before the terminating NUL.
+ * If <code>n&lt;=0</code> then dst is not modified.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to append; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strncat(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strrstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the first occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the first occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindFirst(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar * U_EXPORT2
+u_strchr(const UChar *s, UChar c);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strstr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar * U_EXPORT2
+u_strchr32(const UChar *s, UChar32 c);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param substring The substring to find (NUL-terminated).
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindFirst
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrstr(const UChar *s, const UChar *substring);
+
+/**
+ * Find the last occurrence of a substring in a string.
+ * The substring is found at code point boundaries.
+ * That means that if the substring begins with
+ * a trail surrogate or ends with a lead surrogate,
+ * then it is found only if these surrogates stand alone in the text.
+ * Otherwise, the substring edge units would be matched against
+ * halves of surrogate pairs.
+ *
+ * @param s The string to search.
+ * @param length The length of s (number of UChars), or -1 if it is NUL-terminated.
+ * @param substring The substring to find (NUL-terminated).
+ * @param subLength The length of substring (number of UChars), or -1 if it is NUL-terminated.
+ * @return A pointer to the last occurrence of <code>substring</code> in <code>s</code>,
+ * or <code>s</code> itself if the <code>substring</code> is empty,
+ * or <code>NULL</code> if <code>substring</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFindLast(const UChar *s, int32_t length, const UChar *substring, int32_t subLength);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The BMP code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr(const UChar *s, UChar c);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (NUL-terminated).
+ * @param c The code point to find.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memchr32
+ * @see u_strrstr
+ * @see u_strFindLast
+ */
+U_CAPI UChar * U_EXPORT2
+u_strrchr32(const UChar *s, UChar32 c);
+
+/**
+ * Locates the first occurrence in the string <code>string</code> of any of the characters
+ * in the string <code>matchSet</code>.
+ * Works just like C's strpbrk but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return A pointer to the character in <code>string</code> that matches one of the
+ * characters in <code>matchSet</code>, or NULL if no such character is found.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar * U_EXPORT2
+u_strpbrk(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that do not occur somewhere in <code>matchSet</code>.
+ * Works just like C's strcspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do not
+ * occur in <code>matchSet</code>.
+ * @see u_strspn
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * Returns the number of consecutive characters in <code>string</code>,
+ * beginning with the first, that occur somewhere in <code>matchSet</code>.
+ * Works just like C's strspn but with Unicode.
+ *
+ * @param string The string in which to search, NUL-terminated.
+ * @param matchSet A NUL-terminated string defining a set of code points
+ * for which to search in the text string.
+ * @return The number of initial characters in <code>string</code> that do
+ * occur in <code>matchSet</code>.
+ * @see u_strcspn
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strspn(const UChar *string, const UChar *matchSet);
+
+/**
+ * The string tokenizer API allows an application to break a string into
+ * tokens. Unlike strtok(), the saveState (the current pointer within the
+ * original string) is maintained in saveState. In the first call, the
+ * argument src is a pointer to the string. In subsequent calls to
+ * return successive tokens of that string, src must be specified as
+ * NULL. The value saveState is set by this function to maintain the
+ * function's position within the string, and on each subsequent call
+ * you must give this argument the same variable. This function does
+ * handle surrogate pairs. This function is similar to the strtok_r()
+ * the POSIX Threads Extension (1003.1c-1995) version.
+ *
+ * @param src String containing token(s). This string will be modified.
+ * After the first call to u_strtok_r(), this argument must
+ * be NULL to get to the next token.
+ * @param delim Set of delimiter characters (Unicode code points).
+ * @param saveState The current pointer within the original string,
+ * which is set by this function. The saveState
+ * parameter should the address of a local variable of type
+ * UChar *. (i.e. defined "UChar *myLocalSaveState" and use
+ * &myLocalSaveState for this parameter).
+ * @return A pointer to the next token found in src, or NULL
+ * when there are no more tokens.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar * U_EXPORT2
+u_strtok_r(UChar *src,
+ const UChar *delim,
+ UChar **saveState);
+
+/**
+ * Compare two Unicode strings for bitwise equality (code unit order).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2,</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcmp(const UChar *s1,
+ const UChar *s2);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * See u_strCompare for details.
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcmpCodePointOrder(const UChar *s1, const UChar *s2);
+
+/**
+ * Compare two Unicode strings (binary order).
+ *
+ * The comparison can be done in code unit order or in code point order.
+ * They differ only in UTF-16 when
+ * comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param codePointOrder Choose between code unit order (false)
+ * and code point order (true).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ UBool codePointOrder);
+
+/**
+ * Compare two Unicode strings (binary order)
+ * as presented by UCharIterator objects.
+ * Works otherwise just like u_strCompare().
+ *
+ * Both iterators are reset to their start positions.
+ * When the function returns, it is undefined where the iterators
+ * have stopped.
+ *
+ * @param iter1 First source string iterator.
+ * @param iter2 Second source string iterator.
+ * @param codePointOrder Choose between code unit order (false)
+ * and code point order (true).
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @see u_strCompare
+ *
+ * @stable ICU 2.6
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCompareIter(UCharIterator *iter1, UCharIterator *iter2, UBool codePointOrder);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to
+ * u_strCompare(u_strFoldCase(s1, options),
+ * u_strFoldCase(s2, options),
+ * (options&U_COMPARE_CODE_POINT_ORDER)!=0).
+ *
+ * The comparison can be done in UTF-16 code unit order or in code point order.
+ * They differ only when comparing supplementary code points (U+10000..U+10ffff)
+ * to BMP code points near the end of the BMP (i.e., U+e000..U+ffff).
+ * In code unit order, high BMP code points sort after supplementary code points
+ * because they are stored as pairs of surrogates which are at U+d800..U+dfff.
+ *
+ * This functions works with strings of different explicitly specified lengths
+ * unlike the ANSI C-like u_strcmp() and u_memcmp() etc.
+ * NUL-terminated strings are possible with length arguments of -1.
+ *
+ * @param s1 First source string.
+ * @param length1 Length of first source string, or -1 if NUL-terminated.
+ *
+ * @param s2 Second source string.
+ * @param length2 Length of second source string, or -1 if NUL-terminated.
+ *
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ *
+ * @return <0 or 0 or >0 as usual for string comparisons
+ *
+ * @stable ICU 2.2
+ */
+U_CAPI int32_t U_EXPORT2
+u_strCaseCompare(const UChar *s1, int32_t length1,
+ const UChar *s2, int32_t length2,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+/**
+ * Compare two ustrings for bitwise equality.
+ * Compares at most <code>n</code> characters.
+ *
+ * @param ucs1 A string to compare (can be NULL/invalid if n<=0).
+ * @param ucs2 A string to compare (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to compare; always returns 0 if n<=0.
+ * @return 0 if <code>s1</code> and <code>s2</code> are bitwise equal; a negative
+ * value if <code>s1</code> is bitwise less than <code>s2</code>; a positive
+ * value if <code>s1</code> is bitwise greater than <code>s2</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncmp(const UChar *ucs1,
+ const UChar *ucs2,
+ int32_t n);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_strncmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t n);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, options), u_strFoldCase(s2, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strcasecmp(const UChar *s1, const UChar *s2, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, at most n, options),
+ * u_strFoldCase(s2, at most n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param n The maximum number of characters each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strncasecmp(const UChar *s1, const UChar *s2, int32_t n, uint32_t options);
+
+/**
+ * Compare two strings case-insensitively using full case folding.
+ * This is equivalent to u_strcmp(u_strFoldCase(s1, n, options),
+ * u_strFoldCase(s2, n, options)).
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param length The number of characters in each string to case-fold and then compare.
+ * @param options A bit set of options:
+ * - U_FOLD_CASE_DEFAULT or 0 is used for default options:
+ * Comparison in code unit order with default case folding.
+ *
+ * - U_COMPARE_CODE_POINT_ORDER
+ * Set to choose code point order instead of code unit order
+ * (see u_strCompare for details).
+ *
+ * - U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ *
+ * @return A negative, zero, or positive integer indicating the comparison result.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcasecmp(const UChar *s1, const UChar *s2, int32_t length, uint32_t options);
+
+/**
+ * Copy a ustring. Adds a null terminator.
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strcpy(UChar *dst,
+ const UChar *src);
+
+/**
+ * Copy a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ *
+ * @param dst The destination string.
+ * @param src The source string (can be NULL/invalid if n<=0).
+ * @param n The maximum number of characters to copy; no-op if <=0.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strncpy(UChar *dst,
+ const UChar *src,
+ int32_t n);
+
+#if !UCONFIG_NO_CONVERSION
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Adds a null terminator.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2 u_uastrcpy(UChar *dst,
+ const char *src );
+
+/**
+ * Copy a byte string encoded in the default codepage to a ustring.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a host byte to UChar conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2 u_uastrncpy(UChar *dst,
+ const char *src,
+ int32_t n);
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Adds a null terminator.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI char* U_EXPORT2 u_austrcpy(char *dst,
+ const UChar *src );
+
+/**
+ * Copy ustring to a byte string encoded in the default codepage.
+ * Copies at most <code>n</code> characters. The result will be null terminated
+ * if the length of <code>src</code> is less than <code>n</code>.
+ * Performs a UChar to host byte conversion
+ *
+ * @param dst The destination string.
+ * @param src The source string.
+ * @param n The maximum number of characters to copy.
+ * @return A pointer to <code>dst</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI char* U_EXPORT2 u_austrncpy(char *dst,
+ const UChar *src,
+ int32_t n );
+
+#endif
+
+/**
+ * Synonym for memcpy(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to copy; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memcpy(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Synonym for memmove(), but with UChars only.
+ * @param dest The destination string
+ * @param src The source string (can be NULL/invalid if count<=0)
+ * @param count The number of characters to move; no-op if <=0
+ * @return A pointer to <code>dest</code>
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memmove(UChar *dest, const UChar *src, int32_t count);
+
+/**
+ * Initialize <code>count</code> characters of <code>dest</code> to <code>c</code>.
+ *
+ * @param dest The destination string.
+ * @param c The character to initialize the string.
+ * @param count The maximum number of characters to set.
+ * @return A pointer to <code>dest</code>.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_memset(UChar *dest, UChar c, int32_t count);
+
+/**
+ * Compare the first <code>count</code> UChars of each buffer.
+ *
+ * @param buf1 The first string to compare.
+ * @param buf2 The second string to compare.
+ * @param count The maximum number of UChars to compare.
+ * @return When buf1 < buf2, a negative number is returned.
+ * When buf1 == buf2, 0 is returned.
+ * When buf1 > buf2, a positive number is returned.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcmp(const UChar *buf1, const UChar *buf2, int32_t count);
+
+/**
+ * Compare two Unicode strings in code point order.
+ * This is different in UTF-16 from u_memcmp() if supplementary characters are present.
+ * For details, see u_strCompare().
+ *
+ * @param s1 A string to compare.
+ * @param s2 A string to compare.
+ * @param count The maximum number of characters to compare.
+ * @return a negative/zero/positive integer corresponding to whether
+ * the first string is less than/equal to/greater than the second one
+ * in code point order
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_memcmpCodePointOrder(const UChar *s1, const UChar *s2, int32_t count);
+
+/**
+ * Find the first occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr
+ * @see u_memchr32
+ * @see u_strFindFirst
+ */
+U_CAPI UChar* U_EXPORT2
+u_memchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the first occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the first occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.0
+ *
+ * @see u_strchr32
+ * @see u_memchr
+ * @see u_strFindFirst
+ */
+U_CAPI UChar* U_EXPORT2
+u_memchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Find the last occurrence of a BMP code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The BMP code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr
+ * @see u_memrchr32
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr(const UChar *s, UChar c, int32_t count);
+
+/**
+ * Find the last occurrence of a code point in a string.
+ * A surrogate code point is found only if its match in the text is not
+ * part of a surrogate pair.
+ * A NUL character is found at the string terminator.
+ *
+ * @param s The string to search (contains <code>count</code> UChars).
+ * @param c The code point to find.
+ * @param count The length of the string.
+ * @return A pointer to the last occurrence of <code>c</code> in <code>s</code>
+ * or <code>NULL</code> if <code>c</code> is not in <code>s</code>.
+ * @stable ICU 2.4
+ *
+ * @see u_strrchr32
+ * @see u_memrchr
+ * @see u_strFindLast
+ */
+U_CAPI UChar* U_EXPORT2
+u_memrchr32(const UChar *s, UChar32 c, int32_t count);
+
+/**
+ * Unicode String literals in C.
+ * We need one macro to declare a variable for the string
+ * and to statically preinitialize it if possible,
+ * and a second macro to dynamically initialize such a string variable if necessary.
+ *
+ * The macros are defined for maximum performance.
+ * They work only for strings that contain "invariant characters", i.e.,
+ * only latin letters, digits, and some punctuation.
+ * See utypes.h for details.
+ *
+ * A pair of macros for a single string must be used with the same
+ * parameters.
+ * The string parameter must be a C string literal.
+ * The length of the string, not including the terminating
+ * `NUL`, must be specified as a constant.
+ * The U_STRING_DECL macro should be invoked exactly once for one
+ * such string variable before it is used.
+ *
+ * Usage:
+ *
+ * U_STRING_DECL(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_DECL(ustringVar2, "jumps 5%", 8);
+ * static UBool didInit=false;
+ *
+ * int32_t function() {
+ * if(!didInit) {
+ * U_STRING_INIT(ustringVar1, "Quick-Fox 2", 11);
+ * U_STRING_INIT(ustringVar2, "jumps 5%", 8);
+ * didInit=true;
+ * }
+ * return u_strcmp(ustringVar1, ustringVar2);
+ * }
+ *
+ * Note that the macros will NOT consistently work if their argument is another #`define`.
+ * The following will not work on all platforms, don't use it.
+ *
+ * #define GLUCK "Mr. Gluck"
+ * U_STRING_DECL(var, GLUCK, 9)
+ * U_STRING_INIT(var, GLUCK, 9)
+ *
+ * Instead, use the string literal "Mr. Gluck" as the argument to both macro
+ * calls.
+ *
+ *
+ * @stable ICU 2.0
+ */
+#if defined(U_DECLARE_UTF16)
+# define U_STRING_DECL(var, cs, length) static const UChar *var=(const UChar *)U_DECLARE_UTF16(cs)
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#elif U_SIZEOF_WCHAR_T==U_SIZEOF_UCHAR && (U_CHARSET_FAMILY==U_ASCII_FAMILY || defined(U_WCHAR_IS_UTF16))
+# define U_STRING_DECL(var, cs, length) static const UChar var[(length)+1]=L ## cs
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length)
+#else
+# define U_STRING_DECL(var, cs, length) static UChar var[(length)+1]
+ /**@stable ICU 2.0 */
+# define U_STRING_INIT(var, cs, length) u_charsToUChars(cs, var, length+1)
+#endif
+
+/**
+ * Unescape a string of characters and write the resulting
+ * Unicode characters to the destination buffer. The following escape
+ * sequences are recognized:
+ *
+ * \\uhhhh 4 hex digits; h in [0-9A-Fa-f]
+ * \\Uhhhhhhhh 8 hex digits
+ * \\xhh 1-2 hex digits
+ * \\x{h...} 1-8 hex digits
+ * \\ooo 1-3 octal digits; o in [0-7]
+ * \\cX control-X; X is masked with 0x1F
+ *
+ * as well as the standard ANSI C escapes:
+ *
+ * \\a => U+0007, \\b => U+0008, \\t => U+0009, \\n => U+000A,
+ * \\v => U+000B, \\f => U+000C, \\r => U+000D, \\e => U+001B,
+ * \\&quot; => U+0022, \\' => U+0027, \\? => U+003F, \\\\ => U+005C
+ *
+ * Anything else following a backslash is generically escaped. For
+ * example, "[a\\-z]" returns "[a-z]".
+ *
+ * If an escape sequence is ill-formed, this method returns an empty
+ * string. An example of an ill-formed sequence is "\\u" followed by
+ * fewer than 4 hex digits.
+ *
+ * The above characters are recognized in the compiler's codepage,
+ * that is, they are coded as 'u', '\\', etc. Characters that are
+ * not parts of escape sequences are converted using u_charsToUChars().
+ *
+ * This function is similar to UnicodeString::unescape() but not
+ * identical to it. The latter takes a source UnicodeString, so it
+ * does escape recognition but no conversion.
+ *
+ * @param src a zero-terminated string of invariant characters
+ * @param dest pointer to buffer to receive converted and unescaped
+ * text and, if there is room, a zero terminator. May be NULL for
+ * preflighting, in which case no UChars will be written, but the
+ * return value will still be valid. On error, an empty string is
+ * stored here (if possible).
+ * @param destCapacity the number of UChars that may be written at
+ * dest. Ignored if dest == NULL.
+ * @return the length of unescaped string.
+ * @see u_unescapeAt
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_unescape(const char *src,
+ UChar *dest, int32_t destCapacity);
+
+U_CDECL_BEGIN
+/**
+ * Callback function for u_unescapeAt() that returns a character of
+ * the source text given an offset and a context pointer. The context
+ * pointer will be whatever is passed into u_unescapeAt().
+ *
+ * @param offset pointer to the offset that will be passed to u_unescapeAt().
+ * @param context an opaque pointer passed directly into u_unescapeAt()
+ * @return the character represented by the escape sequence at
+ * offset
+ * @see u_unescapeAt
+ * @stable ICU 2.0
+ */
+typedef UChar (U_CALLCONV *UNESCAPE_CHAR_AT)(int32_t offset, void *context);
+U_CDECL_END
+
+/**
+ * Unescape a single sequence. The character at offset-1 is assumed
+ * (without checking) to be a backslash. This method takes a callback
+ * pointer to a function that returns the UChar at a given offset. By
+ * varying this callback, ICU functions are able to unescape char*
+ * strings, UnicodeString objects, and UFILE pointers.
+ *
+ * If offset is out of range, or if the escape sequence is ill-formed,
+ * (UChar32)0xFFFFFFFF is returned. See documentation of u_unescape()
+ * for a list of recognized sequences.
+ *
+ * @param charAt callback function that returns a UChar of the source
+ * text given an offset and a context pointer.
+ * @param offset pointer to the offset that will be passed to charAt.
+ * The offset value will be updated upon return to point after the
+ * last parsed character of the escape sequence. On error the offset
+ * is unchanged.
+ * @param length the number of characters in the source text. The
+ * last character of the source text is considered to be at offset
+ * length-1.
+ * @param context an opaque pointer passed directly into charAt.
+ * @return the character represented by the escape sequence at
+ * offset, or (UChar32)0xFFFFFFFF on error.
+ * @see u_unescape()
+ * @see UnicodeString#unescape()
+ * @see UnicodeString#unescapeAt()
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32 U_EXPORT2
+u_unescapeAt(UNESCAPE_CHAR_AT charAt,
+ int32_t *offset,
+ int32_t length,
+ void *context);
+
+/**
+ * Uppercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToUpper(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+/**
+ * Lowercase the characters in a string.
+ * Casing is locale-dependent and context-sensitive.
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToLower(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+/**
+ * Titlecase a string.
+ * Casing is locale-dependent and context-sensitive.
+ * Titlecasing uses a break iterator to find the first characters of words
+ * that are to be titlecased. It titlecases those characters and lowercases
+ * all others.
+ *
+ * The titlecase break iterator can be provided to customize for arbitrary
+ * styles, using rules and dictionaries beyond the standard iterators.
+ * It may be more efficient to always provide an iterator to avoid
+ * opening and closing one for each string.
+ * The standard titlecase iterator for the root locale implements the
+ * algorithm of Unicode TR 21.
+ *
+ * This function uses only the setText(), first() and next() methods of the
+ * provided break iterator.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param titleIter A break iterator to find the first characters of words
+ * that are to be titlecased.
+ * If none is provided (NULL), then a standard titlecase
+ * break iterator is opened.
+ * @param locale The locale to consider, or "" for the root locale or NULL for the default locale.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.1
+ */
+U_CAPI int32_t U_EXPORT2
+u_strToTitle(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ UBreakIterator *titleIter,
+ const char *locale,
+ UErrorCode *pErrorCode);
+
+#endif
+
+/**
+ * Case-folds the characters in a string.
+ *
+ * Case-folding is locale-independent and not context-sensitive,
+ * but there is an option for whether to include or exclude mappings for dotted I
+ * and dotless i that are marked with 'T' in CaseFolding.txt.
+ *
+ * The result may be longer or shorter than the original.
+ * The source string and the destination buffer are allowed to overlap.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the result
+ * without writing any of the result string.
+ * @param src The original string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The length of the result string. It may be greater than destCapacity. In that case,
+ * only some of the result was written to the destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI int32_t U_EXPORT2
+u_strFoldCase(UChar *dest, int32_t destCapacity,
+ const UChar *src, int32_t srcLength,
+ uint32_t options,
+ UErrorCode *pErrorCode);
+
+#if defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION
+/**
+ * Convert a UTF-16 string to a wchar_t string.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions UTF-16 -> default charset -> wchar_t* are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of wchar_t's). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI wchar_t* U_EXPORT2
+u_strToWCS(wchar_t *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+/**
+ * Convert a wchar_t string to UTF-16.
+ * If it is known at compile time that wchar_t strings are in UTF-16 or UTF-32, then
+ * this function simply calls the fast, dedicated function for that.
+ * Otherwise, two conversions wchar_t* -> default charset -> UTF-16 are performed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromWCS(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const wchar_t *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+#endif /* defined(U_WCHAR_IS_UTF16) || defined(U_WCHAR_IS_UTF32) || !UCONFIG_NO_CONVERSION */
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strToUTF8WithSub
+ * @see u_strFromUTF8
+ */
+U_CAPI char* U_EXPORT2
+u_strToUTF8(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @stable ICU 2.0
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-8.
+ *
+ * Same as u_strToUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF8
+ * @see u_strFromUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI char* U_EXPORT2
+u_strToUTF8WithSub(char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF8().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8Lenient
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF8WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-8 string to UTF-16.
+ *
+ * Same as u_strFromUTF8() except that this function is designed to be very fast,
+ * which it achieves by being lenient about malformed UTF-8 sequences.
+ * This function is intended for use in environments where UTF-8 text is
+ * expected to be well-formed.
+ *
+ * Its semantics are:
+ * - Well-formed UTF-8 text is correctly converted to well-formed UTF-16 text.
+ * - The function will not read beyond the input string, nor write beyond
+ * the destCapacity.
+ * - Malformed UTF-8 results in "garbage" 16-bit Unicode strings which may not
+ * be well-formed UTF-16.
+ * The function will resynchronize to valid code point boundaries
+ * within a small number of code points after an illegal sequence.
+ * - Non-shortest forms are not detected and will result in "spoofing" output.
+ *
+ * For further performance improvement, if srcLength is given (>=0),
+ * then it must be destCapacity>=srcLength.
+ *
+ * There is no inverse u_strToUTF8Lenient() function because there is practically
+ * no performance gain from not checking that a UTF-16 string is well-formed.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * Unlike for other ICU functions, if srcLength>=0 then it
+ * must be destCapacity>=srcLength.
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * Unlike for other ICU functions, if srcLength>=0 but
+ * destCapacity<srcLength, then *pDestLength will be set to srcLength
+ * (and U_BUFFER_OVERFLOW_ERROR will be set)
+ * regardless of the actual result length.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8
+ * @see u_strFromUTF8WithSub
+ * @see u_strToUTF8WithSub
+ * @stable ICU 3.6
+ */
+U_CAPI UChar * U_EXPORT2
+u_strFromUTF8Lenient(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32WithSub
+ * @see u_strFromUTF32
+ * @stable ICU 2.0
+ */
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ * If the input string is not well-formed, then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Must be a valid pointer to an error code value,
+ * which must not indicate a failure before the function call.
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32WithSub
+ * @see u_strToUTF32
+ * @stable ICU 2.0
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-16 string to UTF-32.
+ *
+ * Same as u_strToUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strToUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChar32s). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strToUTF32
+ * @see u_strFromUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_CAPI UChar32* U_EXPORT2
+u_strToUTF32WithSub(UChar32 *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a UTF-32 string to UTF-16.
+ *
+ * Same as u_strFromUTF32() except for the additional subchar which is output for
+ * illegal input sequences, instead of stopping with the U_INVALID_CHAR_FOUND error code.
+ * With subchar==U_SENTINEL, this function behaves exactly like u_strFromUTF32().
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF32
+ * @see u_strToUTF32WithSub
+ * @stable ICU 4.2
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromUTF32WithSub(UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar32 *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a 16-bit Unicode string to Java Modified UTF-8.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#modified-utf-8
+ *
+ * This function behaves according to the documentation for Java DataOutput.writeUTF()
+ * except that it does not encode the output length in the destination buffer
+ * and does not have an output length restriction.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataOutput.html#writeUTF(java.lang.String)
+ *
+ * The input string need not be well-formed UTF-16.
+ * (Therefore there is no subchar parameter.)
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of chars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @stable ICU 4.4
+ * @see u_strToUTF8WithSub
+ * @see u_strFromJavaModifiedUTF8WithSub
+ */
+U_CAPI char* U_EXPORT2
+u_strToJavaModifiedUTF8(
+ char *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const UChar *src,
+ int32_t srcLength,
+ UErrorCode *pErrorCode);
+
+/**
+ * Convert a Java Modified UTF-8 string to a 16-bit Unicode string.
+ * If the input string is not well-formed and no substitution char is specified,
+ * then the U_INVALID_CHAR_FOUND error code is set.
+ *
+ * This function behaves according to the documentation for Java DataInput.readUTF()
+ * except that it takes a length parameter rather than
+ * interpreting the first two input bytes as the length.
+ * See http://java.sun.com/javase/6/docs/api/java/io/DataInput.html#readUTF()
+ *
+ * The output string may not be well-formed UTF-16.
+ *
+ * @param dest A buffer for the result string. The result will be zero-terminated if
+ * the buffer is large enough.
+ * @param destCapacity The size of the buffer (number of UChars). If it is 0, then
+ * dest may be NULL and the function will only return the length of the
+ * result without writing any of the result string (pre-flighting).
+ * @param pDestLength A pointer to receive the number of units written to the destination. If
+ * pDestLength!=NULL then *pDestLength is always set to the
+ * number of output units corresponding to the transformation of
+ * all the input units, even in case of a buffer overflow.
+ * @param src The original source string
+ * @param srcLength The length of the original string. If -1, then src must be zero-terminated.
+ * @param subchar The substitution character to use in place of an illegal input sequence,
+ * or U_SENTINEL if the function is to return with U_INVALID_CHAR_FOUND instead.
+ * A substitution character can be any valid Unicode code point (up to U+10FFFF)
+ * except for surrogate code points (U+D800..U+DFFF).
+ * The recommended value is U+FFFD "REPLACEMENT CHARACTER".
+ * @param pNumSubstitutions Output parameter receiving the number of substitutions if subchar>=0.
+ * Set to 0 if no substitutions occur or subchar<0.
+ * pNumSubstitutions can be NULL.
+ * @param pErrorCode Pointer to a standard ICU error code. Its input value must
+ * pass the U_SUCCESS() test, or else the function returns
+ * immediately. Check for U_FAILURE() on output or use with
+ * function chaining. (See User Guide for details.)
+ * @return The pointer to destination buffer.
+ * @see u_strFromUTF8WithSub
+ * @see u_strFromUTF8Lenient
+ * @see u_strToJavaModifiedUTF8
+ * @stable ICU 4.4
+ */
+U_CAPI UChar* U_EXPORT2
+u_strFromJavaModifiedUTF8WithSub(
+ UChar *dest,
+ int32_t destCapacity,
+ int32_t *pDestLength,
+ const char *src,
+ int32_t srcLength,
+ UChar32 subchar, int32_t *pNumSubstitutions,
+ UErrorCode *pErrorCode);
+
+#endif
diff --git a/intl/icu/source/common/unicode/ustringtrie.h b/intl/icu/source/common/unicode/ustringtrie.h
new file mode 100644
index 0000000000..fd85648225
--- /dev/null
+++ b/intl/icu/source/common/unicode/ustringtrie.h
@@ -0,0 +1,97 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: udicttrie.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010dec17
+* created by: Markus W. Scherer
+*/
+
+#ifndef __USTRINGTRIE_H__
+#define __USTRINGTRIE_H__
+
+/**
+ * \file
+ * \brief C API: Helper definitions for dictionary trie APIs.
+ */
+
+#include "unicode/utypes.h"
+
+
+/**
+ * Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
+ * @see USTRINGTRIE_MATCHES
+ * @see USTRINGTRIE_HAS_VALUE
+ * @see USTRINGTRIE_HAS_NEXT
+ * @stable ICU 4.8
+ */
+enum UStringTrieResult {
+ /**
+ * The input unit(s) did not continue a matching string.
+ * Once current()/next() return USTRINGTRIE_NO_MATCH,
+ * all further calls to current()/next() will also return USTRINGTRIE_NO_MATCH,
+ * until the trie is reset to its original state or to a saved state.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_NO_MATCH,
+ /**
+ * The input unit(s) continued a matching string
+ * but there is no value for the string so far.
+ * (It is a prefix of a longer string.)
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_NO_VALUE,
+ /**
+ * The input unit(s) continued a matching string
+ * and there is a value for the string so far.
+ * This value will be returned by getValue().
+ * No further input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_FINAL_VALUE,
+ /**
+ * The input unit(s) continued a matching string
+ * and there is a value for the string so far.
+ * This value will be returned by getValue().
+ * Another input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+ USTRINGTRIE_INTERMEDIATE_VALUE
+};
+
+/**
+ * Same as (result!=USTRINGTRIE_NO_MATCH).
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if the input bytes/units so far are part of a matching string/byte sequence.
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_MATCHES(result) ((result)!=USTRINGTRIE_NO_MATCH)
+
+/**
+ * Equivalent to (result==USTRINGTRIE_INTERMEDIATE_VALUE || result==USTRINGTRIE_FINAL_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if there is a value for the input bytes/units so far.
+ * @see BytesTrie::getValue
+ * @see UCharsTrie::getValue
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_HAS_VALUE(result) ((result)>=USTRINGTRIE_FINAL_VALUE)
+
+/**
+ * Equivalent to (result==USTRINGTRIE_NO_VALUE || result==USTRINGTRIE_INTERMEDIATE_VALUE) but
+ * this macro evaluates result exactly once.
+ * @param result A result from BytesTrie::first(), UCharsTrie::next() etc.
+ * @return true if another input byte/unit can continue a matching string.
+ * @stable ICU 4.8
+ */
+#define USTRINGTRIE_HAS_NEXT(result) ((result)&1)
+
+#endif /* __USTRINGTRIE_H__ */
diff --git a/intl/icu/source/common/unicode/utext.h b/intl/icu/source/common/unicode/utext.h
new file mode 100644
index 0000000000..423b281631
--- /dev/null
+++ b/intl/icu/source/common/unicode/utext.h
@@ -0,0 +1,1603 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2004-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utext.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2004oct06
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UTEXT_H__
+#define __UTEXT_H__
+
+/**
+ * \file
+ * \brief C API: Abstract Unicode Text API
+ *
+ * The Text Access API provides a means to allow text that is stored in alternative
+ * formats to work with ICU services. ICU normally operates on text that is
+ * stored in UTF-16 format, in (UChar *) arrays for the C APIs or as type
+ * UnicodeString for C++ APIs.
+ *
+ * ICU Text Access allows other formats, such as UTF-8 or non-contiguous
+ * UTF-16 strings, to be placed in a UText wrapper and then passed to ICU services.
+ *
+ * There are three general classes of usage for UText:
+ *
+ * Application Level Use. This is the simplest usage - applications would
+ * use one of the utext_open() functions on their input text, and pass
+ * the resulting UText to the desired ICU service.
+ *
+ * Second is usage in ICU Services, such as break iteration, that will need to
+ * operate on input presented to them as a UText. These implementations
+ * will need to use the iteration and related UText functions to gain
+ * access to the actual text.
+ *
+ * The third class of UText users are "text providers." These are the
+ * UText implementations for the various text storage formats. An application
+ * or system with a unique text storage format can implement a set of
+ * UText provider functions for that format, which will then allow
+ * ICU services to operate on that format.
+ *
+ *
+ * <em>Iterating over text</em>
+ *
+ * Here is sample code for a forward iteration over the contents of a UText
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ *
+ * for (c=utext_next32From(ut, 0); c>=0; c=utext_next32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * And here is similar code to iterate in the reverse direction, from the end
+ * of the text towards the beginning.
+ *
+ * \code
+ * UChar32 c;
+ * UText *ut = whatever();
+ * int textLength = utext_nativeLength(ut);
+ * for (c=utext_previous32From(ut, textLength); c>=0; c=utext_previous32(ut)) {
+ * // do whatever with the codepoint c here.
+ * }
+ * \endcode
+ *
+ * <em>Characters and Indexing</em>
+ *
+ * Indexing into text by UText functions is nearly always in terms of the native
+ * indexing of the underlying text storage. The storage format could be UTF-8
+ * or UTF-32, for example. When coding to the UText access API, no assumptions
+ * can be made regarding the size of characters, or how far an index
+ * may move when iterating between characters.
+ *
+ * All indices supplied to UText functions are pinned to the length of the
+ * text. An out-of-bounds index is not considered to be an error, but is
+ * adjusted to be in the range 0 <= index <= length of input text.
+ *
+ *
+ * When an index position is returned from a UText function, it will be
+ * a native index to the underlying text. In the case of multi-unit characters,
+ * it will always refer to the first position of the character,
+ * never to the interior. This is essentially the same thing as saying that
+ * a returned index will always point to a boundary between characters.
+ *
+ * When a native index is supplied to a UText function, all indices that
+ * refer to any part of a multi-unit character representation are considered
+ * to be equivalent. In the case of multi-unit characters, an incoming index
+ * will be logically normalized to refer to the start of the character.
+ *
+ * It is possible to test whether a native index is on a code point boundary
+ * by doing a utext_setNativeIndex() followed by a utext_getNativeIndex().
+ * If the index is returned unchanged, it was on a code point boundary. If
+ * an adjusted index is returned, the original index referred to the
+ * interior of a character.
+ *
+ * <em>Conventions for calling UText functions</em>
+ *
+ * Most UText access functions have as their first parameter a (UText *) pointer,
+ * which specifies the UText to be used. Unless otherwise noted, the
+ * pointer must refer to a valid, open UText. Attempting to
+ * use a closed UText or passing a NULL pointer is a programming error and
+ * will produce undefined results or NULL pointer exceptions.
+ *
+ * The UText_Open family of functions can either open an existing (closed)
+ * UText, or heap allocate a new UText. Here is sample code for creating
+ * a stack-allocated UText.
+ *
+ * \code
+ * char *s = whatever(); // A utf-8 string
+ * U_ErrorCode status = U_ZERO_ERROR;
+ * UText ut = UTEXT_INITIALIZER;
+ * utext_openUTF8(ut, s, -1, &status);
+ * if (U_FAILURE(status)) {
+ * // error handling
+ * } else {
+ * // work with the UText
+ * }
+ * \endcode
+ *
+ * Any existing UText passed to an open function _must_ have been initialized,
+ * either by the UTEXT_INITIALIZER, or by having been originally heap-allocated
+ * by an open function. Passing NULL will cause the open function to
+ * heap-allocate and fully initialize a new UText.
+ *
+ */
+
+
+
+#include "unicode/utypes.h"
+#include "unicode/uchar.h"
+#if U_SHOW_CPLUSPLUS_API
+#include "unicode/localpointer.h"
+#include "unicode/rep.h"
+#include "unicode/unistr.h"
+#include "unicode/chariter.h"
+#endif
+
+
+U_CDECL_BEGIN
+
+struct UText;
+typedef struct UText UText; /**< C typedef for struct UText. @stable ICU 3.6 */
+
+
+/***************************************************************************************
+ *
+ * C Functions for creating UText wrappers around various kinds of text strings.
+ *
+ ****************************************************************************************/
+
+
+/**
+ * Close function for UText instances.
+ * Cleans up, releases any resources being held by an open UText.
+ * <p>
+ * If the UText was originally allocated by one of the utext_open functions,
+ * the storage associated with the utext will also be freed.
+ * If the UText storage originated with the application, as it would with
+ * a local or static instance, the storage will not be deleted.
+ *
+ * An open UText can be reset to refer to new string by using one of the utext_open()
+ * functions without first closing the UText.
+ *
+ * @param ut The UText to be closed.
+ * @return NULL if the UText struct was deleted by the close. If the UText struct
+ * was originally provided by the caller to the open function, it is
+ * returned by this function, and may be safely used again in
+ * a subsequent utext_open.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_close(UText *ut);
+
+/**
+ * Open a read-only UText implementation for UTF-8 strings.
+ *
+ * \htmlonly
+ * Any invalid UTF-8 in the input will be handled in this way:
+ * a sequence of bytes that has the form of a truncated, but otherwise valid,
+ * UTF-8 sequence will be replaced by a single unicode replacement character, \uFFFD.
+ * Any other illegal bytes will each be replaced by a \uFFFD.
+ * \endhtmlonly
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UTF-8 string.
+ * @param s A UTF-8 string. Must not be NULL.
+ * @param length The length of the UTF-8 string in bytes, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
+
+
+/**
+ * Open a read-only UText for UChar * string.
+ *
+ * @param ut Pointer to a UText struct. If NULL, a new UText will be created.
+ * If non-NULL, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified UChar string.
+ * @param s A UChar (UTF-16) string
+ * @param length The number of UChars in the input string, or -1 if the string is
+ * zero terminated.
+ * @param status Errors are returned here.
+ * @return A pointer to the UText. If a pre-allocated UText was provided, it
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
+
+
+#if U_SHOW_CPLUSPLUS_API
+/**
+ * Open a writable UText for a non-const UnicodeString.
+ *
+ * @param ut Pointer to a UText struct. If nullptr, a new UText will be created.
+ * If non-nullptr, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A UnicodeString.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a UText for a const UnicodeString. The resulting UText will not be writable.
+ *
+ * @param ut Pointer to a UText struct. If nullptr, a new UText will be created.
+ * If non-nullptr, must refer to an initialized UText struct, which will then
+ * be reset to reference the specified input string.
+ * @param s A const UnicodeString to be wrapped.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status);
+
+
+/**
+ * Open a writable UText implementation for an ICU Replaceable object.
+ * @param ut Pointer to a UText struct. If nullptr, a new UText will be created.
+ * If non-nullptr, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param rep A Replaceable text object.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status);
+
+/**
+ * Open a UText implementation over an ICU CharacterIterator.
+ * @param ut Pointer to a UText struct. If nullptr, a new UText will be created.
+ * If non-nullptr, must refer to an already existing UText, which will then
+ * be reset to reference the specified replaceable text.
+ * @param ci A Character Iterator.
+ * @param status Errors are returned here.
+ * @return Pointer to the UText. If a UText was supplied as input, this
+ * will always be used and returned.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status);
+
+#endif
+
+
+/**
+ * Clone a UText. This is much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not affect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * The standard UText implementations for UTF8, UChar *, UnicodeString and
+ * Replaceable all support deep cloning.
+ *
+ * The UText returned from a deep clone will be writable, assuming that the text
+ * provider is able to support writing, even if the source UText had been made
+ * non-writable by means of UText_freeze().
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation will not fail, barring truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * Shallow UText clones should be avoided if the UText functions that modify the
+ * text are expected to be used, either on the original or the cloned UText.
+ * Any such modifications can cause unpredictable behavior. Read Only
+ * shallow clones provide some protection against errors of this type by
+ * disabling text modification via the cloned UText.
+ *
+ * A shallow clone made with the readOnly parameter == false will preserve the
+ * utext_isWritable() state of the source object. Note, however, that
+ * write operations must be avoided while more than one UText exists that refer
+ * to the same underlying text.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for read access only with shallow clones, and for both read and
+ * write access with deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * If non-NULL, must refer to an already existing UText, which will then
+ * be reset to become the clone.
+ * @param src The UText to be cloned.
+ * @param deep true to request a deep clone, false for a shallow clone.
+ * @param readOnly true to request that the cloned UText have read only access to the
+ * underlying text.
+
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * will be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
+
+
+/**
+ * Compare two UText objects for equality.
+ * UTexts are equal if they are iterating over the same text, and
+ * have the same iteration position within the text.
+ * If either or both of the parameters are NULL, the comparison is false.
+ *
+ * @param a The first of the two UTexts to compare.
+ * @param b The other UText to be compared.
+ * @return true if the two UTexts are equal.
+ * @stable ICU 3.6
+ */
+U_CAPI UBool U_EXPORT2
+utext_equals(const UText *a, const UText *b);
+
+
+/*****************************************************************************
+ *
+ * Functions to work with the text represented by a UText wrapper
+ *
+ *****************************************************************************/
+
+/**
+ * Get the length of the text. Depending on the characteristics
+ * of the underlying text representation, this may be expensive.
+ * @see utext_isLengthExpensive()
+ *
+ *
+ * @param ut the text to be accessed.
+ * @return the length of the text, expressed in native units.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int64_t U_EXPORT2
+utext_nativeLength(UText *ut);
+
+/**
+ * Return true if calculating the length of the text could be expensive.
+ * Finding the length of NUL terminated strings is considered to be expensive.
+ *
+ * Note that the value of this function may change
+ * as the result of other operations on a UText.
+ * Once the length of a string has been discovered, it will no longer
+ * be expensive to report it.
+ *
+ * @param ut the text to be accessed.
+ * @return true if determining the length of the text could be time consuming.
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_isLengthExpensive(const UText *ut);
+
+/**
+ * Returns the code point at the requested index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * If the specified index points to the interior of a multi-unit
+ * character - one of the trail bytes of a UTF-8 sequence, for example -
+ * the complete code point will be returned.
+ *
+ * The iteration position will be set to the start of the returned code point.
+ *
+ * This function is roughly equivalent to the sequence
+ * utext_setNativeIndex(index);
+ * utext_current32();
+ * (There is a subtle difference if the index is out of bounds by being less than zero -
+ * utext_setNativeIndex(negative value) sets the index to zero, after which utext_current()
+ * will return the char at zero. utext_char32At(negative index), on the other hand, will
+ * return the U_SENTINEL value of -1.)
+ *
+ * @param ut the text to be accessed
+ * @param nativeIndex the native index of the character to be accessed. If the index points
+ * to other than the first unit of a multi-unit character, it will be adjusted
+ * to the start of the character.
+ * @return the code point at the specified index.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_char32At(UText *ut, int64_t nativeIndex);
+
+
+/**
+ *
+ * Get the code point at the current iteration position,
+ * or U_SENTINEL (-1) if the iteration has reached the end of
+ * the input text.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the current iterator position.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_current32(UText *ut);
+
+
+/**
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ *
+ * If the position is at the end of the text (the index following
+ * the last character, which is also the length of the text),
+ * return U_SENTINEL (-1) and do not advance the index.
+ *
+ * This is a post-increment operation.
+ *
+ * An inline macro version of this function, UTEXT_NEXT32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the Unicode code point at the iteration position.
+ * @see UTEXT_NEXT32
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_next32(UText *ut);
+
+
+/**
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ *
+ * If the initial position is at the start of the text (index of 0)
+ * return U_SENTINEL (-1), and leave the position unchanged.
+ *
+ * An inline macro version of this function, UTEXT_PREVIOUS32(),
+ * is available for performance critical use.
+ *
+ * @param ut the text to be accessed.
+ * @return the previous UChar32 code point, or U_SENTINEL (-1)
+ * if the iteration has reached the start of the text.
+ * @see UTEXT_PREVIOUS32
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_previous32(UText *ut);
+
+
+/**
+ * Set the iteration index and return the code point at that index.
+ * Leave the iteration index at the start of the following code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a forward iteration. The results are identical to the those
+ * from the sequence
+ * \code
+ * utext_setIndex();
+ * utext_next32();
+ * \endcode
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index, in the native units of the text provider.
+ * @return Code point which starts at or before index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_next32From(UText *ut, int64_t nativeIndex);
+
+
+
+/**
+ * Set the iteration index, and return the code point preceding the
+ * one specified by the initial index. Leave the iteration position
+ * at the start of the returned code point.
+ *
+ * This function is the most efficient and convenient way to
+ * begin a backwards iteration.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex Iteration index in the native units of the text provider.
+ * @return Code point preceding the one at the initial index,
+ * or U_SENTINEL (-1) if it is out of bounds.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI UChar32 U_EXPORT2
+utext_previous32From(UText *ut, int64_t nativeIndex);
+
+/**
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @param ut the text to be accessed.
+ * @return the current index position, in the native units of the text provider.
+ * @stable ICU 3.4
+ */
+U_CAPI int64_t U_EXPORT2
+utext_getNativeIndex(const UText *ut);
+
+/**
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ * <p>
+ * It will usually be more efficient to begin an iteration
+ * using the functions utext_next32From() or utext_previous32From()
+ * rather than setIndex().
+ * <p>
+ * Moving the index position to an adjacent character is best done
+ * with utext_next32(), utext_previous32() or utext_moveIndex32().
+ * Attempting to do direct arithmetic on the index position is
+ * complicated by the fact that the size (in native units) of a
+ * character depends on the underlying representation of the character
+ * (UTF-8, UTF-16, UTF-32, arbitrary codepage), and is not
+ * easily knowable.
+ *
+ * @param ut the text to be accessed.
+ * @param nativeIndex the native unit index of the new iteration position.
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+utext_setNativeIndex(UText *ut, int64_t nativeIndex);
+
+/**
+ * Move the iterator position by delta code points. The number of code points
+ * is a signed number; a negative delta will move the iterator backwards,
+ * towards the start of the text.
+ * <p>
+ * The index is moved by <code>delta</code> code points
+ * forward or backward, but no further backward than to 0 and
+ * no further forward than to utext_nativeLength().
+ * The resulting index value will be in between 0 and length, inclusive.
+ *
+ * @param ut the text to be accessed.
+ * @param delta the signed number of code points to move the iteration position.
+ * @return true if the position could be moved the requested number of positions while
+ * staying within the range [0 - text length].
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_moveIndex32(UText *ut, int32_t delta);
+
+/**
+ * Get the native index of the character preceding the current position.
+ * If the iteration position is already at the start of the text, zero
+ * is returned.
+ * The value returned is the same as that obtained from the following sequence,
+ * but without the side effect of changing the iteration position.
+ *
+ * \code
+ * UText *ut = whatever;
+ * ...
+ * utext_previous(ut)
+ * utext_getNativeIndex(ut);
+ * \endcode
+ *
+ * This function is most useful during forwards iteration, where it will get the
+ * native index of the character most recently returned from utext_next().
+ *
+ * @param ut the text to be accessed
+ * @return the native index of the character preceding the current index position,
+ * or zero if the current position is at the start of the text.
+ * @stable ICU 3.6
+ */
+U_CAPI int64_t U_EXPORT2
+utext_getPreviousNativeIndex(UText *ut);
+
+
+/**
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) of the data to be extracted is returned. The
+ * full number of UChars is returned, even when the extracted text is truncated
+ * because the specified buffer size is too small.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer. This
+ * terminating NUL is not included in the returned length.
+ * <p>
+ * The iteration index is left at the position following the last extracted character.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.\
+ * If the specified index is out of range,
+ * it will be pinned to be within 0 <= index <= textLength
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract. If the specified index is out of range,
+ * it will be pinned to be within 0 <= index <= textLength.
+ * nativeLimit must be >= nativeStart.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * U_BUFFER_OVERFLOW_ERROR: the extracted text was truncated because the
+ * buffer was too small. Returns number of UChars for preflighting.
+ * @return Number of UChars in the data to be extracted. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+utext_extract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+
+
+/************************************************************************************
+ *
+ * #define inline versions of selected performance-critical text access functions
+ * Caution: do not use auto increment++ or decrement-- expressions
+ * as parameters to these macros.
+ *
+ * For most use, where there is no extreme performance constraint, the
+ * normal, non-inline functions are a better choice. The resulting code
+ * will be smaller, and, if the need ever arises, easier to debug.
+ *
+ * These are implemented as #defines rather than real functions
+ * because there is no fully portable way to do inline functions in plain C.
+ *
+ ************************************************************************************/
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * inline version of utext_current32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @internal ICU 4.4 technology preview
+ */
+#define UTEXT_CURRENT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * inline version of utext_next32(), for performance-critical situations.
+ *
+ * Get the code point at the current iteration position of the UText, and
+ * advance the position to the first index following the character.
+ * This is a post-increment operation.
+ * Returns U_SENTINEL (-1) if the position is at the end of the
+ * text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_NEXT32(ut) \
+ ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
+ ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
+
+/**
+ * inline version of utext_previous32(), for performance-critical situations.
+ *
+ * Move the iterator position to the character (code point) whose
+ * index precedes the current position, and return that character.
+ * This is a pre-decrement operation.
+ * Returns U_SENTINEL (-1) if the position is at the start of the text.
+ *
+ * @stable ICU 3.4
+ */
+#define UTEXT_PREVIOUS32(ut) \
+ ((ut)->chunkOffset > 0 && \
+ (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
+ (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
+
+/**
+ * inline version of utext_getNativeIndex(), for performance-critical situations.
+ *
+ * Get the current iterator position, which can range from 0 to
+ * the length of the text.
+ * The position is a native index into the input text, in whatever format it
+ * may have (possibly UTF-8 for example), and may not always be the same as
+ * the corresponding UChar (UTF-16) index.
+ * The returned position will always be aligned to a code point boundary.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_GETNATIVEINDEX(ut) \
+ ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
+ (ut)->chunkNativeStart+(ut)->chunkOffset : \
+ (ut)->pFuncs->mapOffsetToNative(ut))
+
+/**
+ * inline version of utext_setNativeIndex(), for performance-critical situations.
+ *
+ * Set the current iteration position to the nearest code point
+ * boundary at or preceding the specified index.
+ * The index is in the native units of the original input text.
+ * If the index is out of range, it will be pinned to be within
+ * the range of the input text.
+ *
+ * @stable ICU 3.8
+ */
+#define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
+ int64_t __offset = (ix) - (ut)->chunkNativeStart; \
+ if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
+ (ut)->chunkOffset=(int32_t)__offset; \
+ } else { \
+ utext_setNativeIndex((ut), (ix)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+
+
+/************************************************************************************
+ *
+ * Functions related to writing or modifying the text.
+ * These will work only with modifiable UTexts. Attempting to
+ * modify a read-only UText will return an error status.
+ *
+ ************************************************************************************/
+
+
+/**
+ * Return true if the text can be written (modified) with utext_replace() or
+ * utext_copy(). For the text to be writable, the text provider must
+ * be of a type that supports writing and the UText must not be frozen.
+ *
+ * Attempting to modify text when utext_isWriteable() is false will fail -
+ * the text will not be modified, and an error will be returned from the function
+ * that attempted the modification.
+ *
+ * @param ut the UText to be tested.
+ * @return true if the text is modifiable.
+ *
+ * @see utext_freeze()
+ * @see utext_replace()
+ * @see utext_copy()
+ * @stable ICU 3.4
+ *
+ */
+U_CAPI UBool U_EXPORT2
+utext_isWritable(const UText *ut);
+
+
+/**
+ * Test whether there is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ *
+ * @param ut The UText to be tested
+ * @return true if the underlying text includes meta data.
+ * @stable ICU 3.4
+ */
+U_CAPI UBool U_EXPORT2
+utext_hasMetaData(const UText *ut);
+
+
+/**
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns true.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a replace operation
+ * on a UText is undefined for any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the native index of the start of the region to be replaced
+ * @param nativeLimit the native index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacementLength length of the replacement text, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI int32_t U_EXPORT2
+utext_replace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacementLength,
+ UErrorCode *status);
+
+
+
+/**
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * The iteration position is left following the newly inserted text
+ * at the destination position.
+ *
+ * This function is only available on UText types that support writing,
+ * that is, ones where utext_isWritable() returns true.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. Behavior after a copy operation
+ * on a UText is undefined in any other additional UTexts that refer to the
+ * modified string.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The native index of the start of the region to be copied or moved
+ * @param nativeLimit The native index of the character position following the region
+ * to be copied.
+ * @param destIndex The native destination index to which the source substring is
+ * copied or moved.
+ * @param move If true, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+U_CAPI void U_EXPORT2
+utext_copy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t destIndex,
+ UBool move,
+ UErrorCode *status);
+
+
+/**
+ * <p>
+ * Freeze a UText. This prevents any modification to the underlying text itself
+ * by means of functions operating on this UText.
+ * </p>
+ * <p>
+ * Once frozen, a UText can not be unfrozen. The intent is to ensure
+ * that a the text underlying a frozen UText wrapper cannot be modified via that UText.
+ * </p>
+ * <p>
+ * Caution: freezing a UText will disable changes made via the specific
+ * frozen UText wrapper only; it will not have any effect on the ability to
+ * directly modify the text by bypassing the UText. Any such backdoor modifications
+ * are always an error while UText access is occurring because the underlying
+ * text can get out of sync with UText's buffering.
+ * </p>
+ *
+ * @param ut The UText to be frozen.
+ * @see utext_isWritable()
+ * @stable ICU 3.6
+ */
+U_CAPI void U_EXPORT2
+utext_freeze(UText *ut);
+
+
+/**
+ * UText provider properties (bit field indexes).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+enum {
+ /**
+ * It is potentially time consuming for the provider to determine the length of the text.
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE = 1,
+ /**
+ * Text chunks remain valid and usable until the text object is modified or
+ * deleted, not just until the next time the access() function is called
+ * (which is the default).
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_STABLE_CHUNKS = 2,
+ /**
+ * The provider supports modifying the text via the replace() and copy()
+ * functions.
+ * @see Replaceable
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_WRITABLE = 3,
+ /**
+ * There is meta data associated with the text.
+ * @see Replaceable::hasMetaData()
+ * @stable ICU 3.4
+ */
+ UTEXT_PROVIDER_HAS_META_DATA = 4,
+ /**
+ * Text provider owns the text storage.
+ * Generally occurs as the result of a deep clone of the UText.
+ * When closing the UText, the associated text must
+ * also be closed/deleted/freed/ whatever is appropriate.
+ * @stable ICU 3.6
+ */
+ UTEXT_PROVIDER_OWNS_TEXT = 5
+};
+
+/**
+ * Function type declaration for UText.clone().
+ *
+ * clone a UText. Much like opening a UText where the source text is itself
+ * another UText.
+ *
+ * A deep clone will copy both the UText data structures and the underlying text.
+ * The original and cloned UText will operate completely independently; modifications
+ * made to the text in one will not effect the other. Text providers are not
+ * required to support deep clones. The user of clone() must check the status return
+ * and be prepared to handle failures.
+ *
+ * A shallow clone replicates only the UText data structures; it does not make
+ * a copy of the underlying text. Shallow clones can be used as an efficient way to
+ * have multiple iterators active in a single text string that is not being
+ * modified.
+ *
+ * A shallow clone operation must not fail except for truly exceptional conditions such
+ * as memory allocation failures.
+ *
+ * A UText and its clone may be safely concurrently accessed by separate threads.
+ * This is true for both shallow and deep clones.
+ * It is the responsibility of the Text Provider to ensure that this thread safety
+ * constraint is met.
+
+ *
+ * @param dest A UText struct to be filled in with the result of the clone operation,
+ * or NULL if the clone function should heap-allocate a new UText struct.
+ * @param src The UText to be cloned.
+ * @param deep true to request a deep clone, false for a shallow clone.
+ * @param status Errors are returned here. For deep clones, U_UNSUPPORTED_ERROR
+ * should be returned if the text provider is unable to clone the
+ * original text.
+ * @return The newly created clone, or NULL if the clone operation failed.
+ *
+ * @stable ICU 3.4
+ */
+typedef UText * U_CALLCONV
+UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
+
+
+/**
+ * Function type declaration for UText.nativeLength().
+ *
+ * @param ut the UText to get the length of.
+ * @return the length, in the native units of the original text string.
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextNativeLength(UText *ut);
+
+/**
+ * Function type declaration for UText.access(). Get the description of the text chunk
+ * containing the text at a requested native index. The UText's iteration
+ * position will be left at the requested index. If the index is out
+ * of bounds, the iteration position will be left at the start or end
+ * of the string, as appropriate.
+ *
+ * Chunks must begin and end on code point boundaries. A single code point
+ * comprised of multiple storage units must never span a chunk boundary.
+ *
+ *
+ * @param ut the UText being accessed.
+ * @param nativeIndex Requested index of the text to be accessed.
+ * @param forward If true, then the returned chunk must contain text
+ * starting from the index, so that start<=index<limit.
+ * If false, then the returned chunk must contain text
+ * before the index, so that start<index<=limit.
+ * @return True if the requested index could be accessed. The chunk
+ * will contain the requested text.
+ * False value if a chunk cannot be accessed
+ * (the requested index is out of bounds).
+ *
+ * @see UText
+ * @stable ICU 3.4
+ */
+typedef UBool U_CALLCONV
+UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
+
+/**
+ * Function type declaration for UText.extract().
+ *
+ * Extract text from a UText into a UChar buffer. The range of text to be extracted
+ * is specified in the native indices of the UText provider. These may not necessarily
+ * be UTF-16 indices.
+ * <p>
+ * The size (number of 16 bit UChars) in the data to be extracted is returned. The
+ * full amount is returned, even when the specified buffer size is smaller.
+ * <p>
+ * The extracted string will (if you are a user) / must (if you are a text provider)
+ * be NUL-terminated if there is sufficient space in the destination buffer.
+ *
+ * @param ut the UText from which to extract data.
+ * @param nativeStart the native index of the first character to extract.
+ * @param nativeLimit the native string index of the position following the last
+ * character to extract.
+ * @param dest the UChar (UTF-16) buffer into which the extracted text is placed
+ * @param destCapacity The size, in UChars, of the destination buffer. May be zero
+ * for precomputing the required size.
+ * @param status receives any error status.
+ * If U_BUFFER_OVERFLOW_ERROR: Returns number of UChars for
+ * preflighting.
+ * @return Number of UChars in the data. Does not include a trailing NUL.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextExtract(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ UChar *dest, int32_t destCapacity,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.replace().
+ *
+ * Replace a range of the original text with a replacement text.
+ *
+ * Leaves the current iteration position at the position following the
+ * newly inserted replacement text.
+ *
+ * This function need only be implemented on UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut the UText representing the text to be operated on.
+ * @param nativeStart the index of the start of the region to be replaced
+ * @param nativeLimit the index of the character following the region to be replaced.
+ * @param replacementText pointer to the replacement text
+ * @param replacmentLength length of the replacement text in UChars, or -1 if the text is NUL terminated.
+ * @param status receives any error status. Possible errors include
+ * U_NO_WRITE_PERMISSION
+ *
+ * @return The signed number of (native) storage units by which
+ * the length of the text expanded or contracted.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextReplace(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ const UChar *replacementText, int32_t replacmentLength,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.copy().
+ *
+ * Copy or move a substring from one position to another within the text,
+ * while retaining any metadata associated with the text.
+ * This function is used to duplicate or reorder substrings.
+ * The destination index must not overlap the source range.
+ *
+ * The text to be copied or moved is inserted at destIndex;
+ * it does not replace or overwrite any existing text.
+ *
+ * This function need only be implemented for UText types that support writing.
+ *
+ * When using this function, there should be only a single UText opened onto the
+ * underlying native text string. The function is responsible for updating the
+ * text chunk within the UText to reflect the updated iteration position,
+ * taking into account any changes to the underlying string's structure caused
+ * by the replace operation.
+ *
+ * @param ut The UText representing the text to be operated on.
+ * @param nativeStart The index of the start of the region to be copied or moved
+ * @param nativeLimit The index of the character following the region to be replaced.
+ * @param nativeDest The destination index to which the source substring is copied or moved.
+ * @param move If true, then the substring is moved, not copied/duplicated.
+ * @param status receives any error status. Possible errors include U_NO_WRITE_PERMISSION
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextCopy(UText *ut,
+ int64_t nativeStart, int64_t nativeLimit,
+ int64_t nativeDest,
+ UBool move,
+ UErrorCode *status);
+
+/**
+ * Function type declaration for UText.mapOffsetToNative().
+ * Map from the current UChar offset within the current text chunk to
+ * the corresponding native index in the original source text.
+ *
+ * This is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut the UText.
+ * @return Absolute (native) index corresponding to chunkOffset in the current chunk.
+ * The returned native index should always be to a code point boundary.
+ *
+ * @stable ICU 3.4
+ */
+typedef int64_t U_CALLCONV
+UTextMapOffsetToNative(const UText *ut);
+
+/**
+ * Function type declaration for UText.mapIndexToUTF16().
+ * Map from a native index to a UChar offset within a text chunk.
+ * Behavior is undefined if the native index does not fall within the
+ * current chunk.
+ *
+ * This function is required only for text providers that do not use native UTF-16 indexes.
+ *
+ * @param ut The UText containing the text chunk.
+ * @param nativeIndex Absolute (native) text index, chunk->start<=index<=chunk->limit.
+ * @return Chunk-relative UTF-16 offset corresponding to the specified native
+ * index.
+ *
+ * @stable ICU 3.4
+ */
+typedef int32_t U_CALLCONV
+UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
+
+
+/**
+ * Function type declaration for UText.utextClose().
+ *
+ * A Text Provider close function is only required for provider types that make
+ * allocations in their open function (or other functions) that must be
+ * cleaned when the UText is closed.
+ *
+ * The allocation of the UText struct itself and any "extra" storage
+ * associated with the UText is handled by the common UText implementation
+ * and does not require provider specific cleanup in a close function.
+ *
+ * Most UText provider implementations do not need to implement this function.
+ *
+ * @param ut A UText object to be closed.
+ *
+ * @stable ICU 3.4
+ */
+typedef void U_CALLCONV
+UTextClose(UText *ut);
+
+
+/**
+ * (public) Function dispatch table for UText.
+ * Conceptually very much like a C++ Virtual Function Table.
+ * This struct defines the organization of the table.
+ * Each text provider implementation must provide an
+ * actual table that is initialized with the appropriate functions
+ * for the type of text being handled.
+ * @stable ICU 3.6
+ */
+struct UTextFuncs {
+ /**
+ * (public) Function table size, sizeof(UTextFuncs)
+ * Intended for use should the table grow to accommodate added
+ * functions in the future, to allow tests for older format
+ * function tables that do not contain the extensions.
+ *
+ * Fields are placed for optimal alignment on
+ * 32/64/128-bit-pointer machines, by normally grouping together
+ * 4 32-bit fields,
+ * 4 pointers,
+ * 2 64-bit fields
+ * in sequence.
+ * @stable ICU 3.6
+ */
+ int32_t tableSize;
+
+ /**
+ * (private) Alignment padding.
+ * Do not use, reserved for use by the UText framework only.
+ * @internal
+ */
+ int32_t reserved1, /** @internal */ reserved2, /** @internal */ reserved3;
+
+
+ /**
+ * (public) Function pointer for UTextClone
+ *
+ * @see UTextClone
+ * @stable ICU 3.6
+ */
+ UTextClone *clone;
+
+ /**
+ * (public) function pointer for UTextLength
+ * May be expensive to compute!
+ *
+ * @see UTextLength
+ * @stable ICU 3.6
+ */
+ UTextNativeLength *nativeLength;
+
+ /**
+ * (public) Function pointer for UTextAccess.
+ *
+ * @see UTextAccess
+ * @stable ICU 3.6
+ */
+ UTextAccess *access;
+
+ /**
+ * (public) Function pointer for UTextExtract.
+ *
+ * @see UTextExtract
+ * @stable ICU 3.6
+ */
+ UTextExtract *extract;
+
+ /**
+ * (public) Function pointer for UTextReplace.
+ *
+ * @see UTextReplace
+ * @stable ICU 3.6
+ */
+ UTextReplace *replace;
+
+ /**
+ * (public) Function pointer for UTextCopy.
+ *
+ * @see UTextCopy
+ * @stable ICU 3.6
+ */
+ UTextCopy *copy;
+
+ /**
+ * (public) Function pointer for UTextMapOffsetToNative.
+ *
+ * @see UTextMapOffsetToNative
+ * @stable ICU 3.6
+ */
+ UTextMapOffsetToNative *mapOffsetToNative;
+
+ /**
+ * (public) Function pointer for UTextMapNativeIndexToUTF16.
+ *
+ * @see UTextMapNativeIndexToUTF16
+ * @stable ICU 3.6
+ */
+ UTextMapNativeIndexToUTF16 *mapNativeIndexToUTF16;
+
+ /**
+ * (public) Function pointer for UTextClose.
+ *
+ * @see UTextClose
+ * @stable ICU 3.6
+ */
+ UTextClose *close;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare1;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare2;
+
+ /**
+ * (private) Spare function pointer
+ * @internal
+ */
+ UTextClose *spare3;
+
+};
+/**
+ * Function dispatch table for UText
+ * @see UTextFuncs
+ */
+typedef struct UTextFuncs UTextFuncs;
+
+ /**
+ * UText struct. Provides the interface between the generic UText access code
+ * and the UText provider code that works on specific kinds of
+ * text (UTF-8, noncontiguous UTF-16, whatever.)
+ *
+ * Applications that are using predefined types of text providers
+ * to pass text data to ICU services will have no need to view the
+ * internals of the UText structs that they open.
+ *
+ * @stable ICU 3.6
+ */
+struct UText {
+ /**
+ * (private) Magic. Used to help detect when UText functions are handed
+ * invalid or uninitialized UText structs.
+ * utext_openXYZ() functions take an initialized,
+ * but not necessarily open, UText struct as an
+ * optional fill-in parameter. This magic field
+ * is used to check for that initialization.
+ * Text provider close functions must NOT clear
+ * the magic field because that would prevent
+ * reuse of the UText struct.
+ * @internal
+ */
+ uint32_t magic;
+
+
+ /**
+ * (private) Flags for managing the allocation and freeing of
+ * memory associated with this UText.
+ * @internal
+ */
+ int32_t flags;
+
+
+ /**
+ * Text provider properties. This set of flags is maintained by the
+ * text provider implementation.
+ * @stable ICU 3.4
+ */
+ int32_t providerProperties;
+
+ /**
+ * (public) sizeOfStruct=sizeof(UText)
+ * Allows possible backward compatible extension.
+ *
+ * @stable ICU 3.4
+ */
+ int32_t sizeOfStruct;
+
+ /* ------ 16 byte alignment boundary ----------- */
+
+
+ /**
+ * (protected) Native index of the first character position following
+ * the current chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeLimit;
+
+ /**
+ * (protected) Size in bytes of the extra space (pExtra).
+ * @stable ICU 3.4
+ */
+ int32_t extraSize;
+
+ /**
+ * (protected) The highest chunk offset where native indexing and
+ * chunk (UTF-16) indexing correspond. For UTF-16 sources, value
+ * will be equal to chunkLength.
+ *
+ * @stable ICU 3.6
+ */
+ int32_t nativeIndexingLimit;
+
+ /* ---- 16 byte alignment boundary------ */
+
+ /**
+ * (protected) Native index of the first character in the text chunk.
+ * @stable ICU 3.6
+ */
+ int64_t chunkNativeStart;
+
+ /**
+ * (protected) Current iteration position within the text chunk (UTF-16 buffer).
+ * This is the index to the character that will be returned by utext_next32().
+ * @stable ICU 3.6
+ */
+ int32_t chunkOffset;
+
+ /**
+ * (protected) Length the text chunk (UTF-16 buffer), in UChars.
+ * @stable ICU 3.6
+ */
+ int32_t chunkLength;
+
+ /* ---- 16 byte alignment boundary-- */
+
+
+ /**
+ * (protected) pointer to a chunk of text in UTF-16 format.
+ * May refer either to original storage of the source of the text, or
+ * if conversion was required, to a buffer owned by the UText.
+ * @stable ICU 3.6
+ */
+ const UChar *chunkContents;
+
+ /**
+ * (public) Pointer to Dispatch table for accessing functions for this UText.
+ * @stable ICU 3.6
+ */
+ const UTextFuncs *pFuncs;
+
+ /**
+ * (protected) Pointer to additional space requested by the
+ * text provider during the utext_open operation.
+ * @stable ICU 3.4
+ */
+ void *pExtra;
+
+ /**
+ * (protected) Pointer to string or text-containing object or similar.
+ * This is the source of the text that this UText is wrapping, in a format
+ * that is known to the text provider functions.
+ * @stable ICU 3.4
+ */
+ const void *context;
+
+ /* --- 16 byte alignment boundary--- */
+
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *p;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *q;
+ /**
+ * (protected) Pointer fields available for use by the text provider.
+ * Not used by UText common code.
+ * @stable ICU 3.6
+ */
+ const void *r;
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ void *privP;
+
+
+ /* --- 16 byte alignment boundary--- */
+
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int64_t a;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t b;
+
+ /**
+ * (protected) Integer field reserved for use by the text provider.
+ * Not used by the UText framework, or by the client (user) of the UText.
+ * @stable ICU 3.4
+ */
+ int32_t c;
+
+ /* ---- 16 byte alignment boundary---- */
+
+
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int64_t privA;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privB;
+ /**
+ * Private field reserved for future use by the UText framework
+ * itself. This is not to be touched by the text providers.
+ * @internal ICU 3.4
+ */
+ int32_t privC;
+};
+
+
+/**
+ * Common function for use by Text Provider implementations to allocate and/or initialize
+ * a new UText struct. To be called in the implementation of utext_open() functions.
+ * If the supplied UText parameter is null, a new UText struct will be allocated on the heap.
+ * If the supplied UText is already open, the provider's close function will be called
+ * so that the struct can be reused by the open that is in progress.
+ *
+ * @param ut pointer to a UText struct to be re-used, or null if a new UText
+ * should be allocated.
+ * @param extraSpace The amount of additional space to be allocated as part
+ * of this UText, for use by types of providers that require
+ * additional storage.
+ * @param status Errors are returned here.
+ * @return pointer to the UText, allocated if necessary, with extra space set up if requested.
+ * @stable ICU 3.4
+ */
+U_CAPI UText * U_EXPORT2
+utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
+
+// do not use #ifndef U_HIDE_INTERNAL_API around the following!
+/**
+ * @internal
+ * Value used to help identify correctly initialized UText structs.
+ * Note: must be publicly visible so that UTEXT_INITIALIZER can access it.
+ */
+enum {
+ UTEXT_MAGIC = 0x345ad82c
+};
+
+/**
+ * initializer to be used with local (stack) instances of a UText
+ * struct. UText structs must be initialized before passing
+ * them to one of the utext_open functions.
+ *
+ * @stable ICU 3.6
+ */
+#define UTEXT_INITIALIZER { \
+ UTEXT_MAGIC, /* magic */ \
+ 0, /* flags */ \
+ 0, /* providerProps */ \
+ sizeof(UText), /* sizeOfStruct */ \
+ 0, /* chunkNativeLimit */ \
+ 0, /* extraSize */ \
+ 0, /* nativeIndexingLimit */ \
+ 0, /* chunkNativeStart */ \
+ 0, /* chunkOffset */ \
+ 0, /* chunkLength */ \
+ NULL, /* chunkContents */ \
+ NULL, /* pFuncs */ \
+ NULL, /* pExtra */ \
+ NULL, /* context */ \
+ NULL, NULL, NULL, /* p, q, r */ \
+ NULL, /* privP */ \
+ 0, 0, 0, /* a, b, c */ \
+ 0, 0, 0 /* privA,B,C, */ \
+ }
+
+
+U_CDECL_END
+
+
+#if U_SHOW_CPLUSPLUS_API
+
+U_NAMESPACE_BEGIN
+
+/**
+ * \class LocalUTextPointer
+ * "Smart pointer" class, closes a UText via utext_close().
+ * For most methods see the LocalPointerBase base class.
+ *
+ * @see LocalPointerBase
+ * @see LocalPointer
+ * @stable ICU 4.4
+ */
+U_DEFINE_LOCAL_OPEN_POINTER(LocalUTextPointer, UText, utext_close);
+
+U_NAMESPACE_END
+
+#endif
+
+
+#endif
diff --git a/intl/icu/source/common/unicode/utf.h b/intl/icu/source/common/unicode/utf.h
new file mode 100644
index 0000000000..c9d5f5785c
--- /dev/null
+++ b/intl/icu/source/common/unicode/utf.h
@@ -0,0 +1,225 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Code point macros
+ *
+ * This file defines macros for checking whether a code point is
+ * a surrogate or a non-character etc.
+ *
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 0 then utf.h is included by utypes.h
+ * and itself includes utf8.h and utf16.h after some
+ * common definitions.
+ * If U_NO_DEFAULT_INCLUDE_UTF_HEADERS is 1 then each of these headers must be
+ * included explicitly if their definitions are used.
+ *
+ * utf8.h and utf16.h define macros for efficiently getting code points
+ * in and out of UTF-8/16 strings.
+ * utf16.h macros have "U16_" prefixes.
+ * utf8.h defines similar macros with "U8_" prefixes for UTF-8 string handling.
+ *
+ * ICU mostly processes 16-bit Unicode strings.
+ * Most of the time, such strings are well-formed UTF-16.
+ * Single, unpaired surrogates must be handled as well, and are treated in ICU
+ * like regular code points where possible.
+ * (Pairs of surrogate code points are indistinguishable from supplementary
+ * code points encoded as pairs of supplementary code units.)
+ *
+ * In fact, almost all Unicode code points in normal text (>99%)
+ * are on the BMP (<=U+ffff) and even <=U+d7ff.
+ * ICU functions handle supplementary code points (U+10000..U+10ffff)
+ * but are optimized for the much more frequently occurring BMP code points.
+ *
+ * umachine.h defines UChar to be an unsigned 16-bit integer.
+ * Since ICU 59, ICU uses char16_t in C++, UChar only in C,
+ * and defines UChar=char16_t by default. See the UChar API docs for details.
+ *
+ * UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff) and U_SENTINEL (-1).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.
+ *
+ * utf.h defines a small number of C macros for single Unicode code points.
+ * These are simple checks for surrogates and non-characters.
+ * For actual Unicode character properties see uchar.h.
+ *
+ * By default, string operations must be done with error checking in case
+ * a string is not well-formed UTF-16 or UTF-8.
+ *
+ * The U16_ macros detect if a surrogate code unit is unpaired
+ * (lead unit without trail unit or vice versa) and just return the unit itself
+ * as the code point.
+ *
+ * The U8_ macros detect illegal byte sequences and return a negative value.
+ * Starting with ICU 60, the observable length of a single illegal byte sequence
+ * skipped by one of these macros follows the Unicode 6+ recommendation
+ * which is consistent with the W3C Encoding Standard.
+ *
+ * There are ..._OR_FFFD versions of both U16_ and U8_ macros
+ * that return U+FFFD for illegal code unit sequences.
+ *
+ * The regular "safe" macros require that the initial, passed-in string index
+ * is within bounds. They only check the index when they read more than one
+ * code unit. This is usually done with code similar to the following loop:
+ * <pre>while(i<length) {
+ * U16_NEXT(s, i, length, c);
+ * // use c
+ * }</pre>
+ *
+ * When it is safe to assume that text is well-formed UTF-16
+ * (does not contain single, unpaired surrogates), then one can use
+ * U16_..._UNSAFE macros.
+ * These do not check for proper code unit sequences or truncated text and may
+ * yield wrong results or even cause a crash if they are used with "malformed"
+ * text.
+ * In practice, U16_..._UNSAFE macros will produce slightly less code but
+ * should not be faster because the processing is only different when a
+ * surrogate code unit is detected, which will be rare.
+ *
+ * Similarly for UTF-8, there are "safe" macros without a suffix,
+ * and U8_..._UNSAFE versions.
+ * The performance differences are much larger here because UTF-8 provides so
+ * many opportunities for malformed sequences.
+ * The unsafe UTF-8 macros are entirely implemented inside the macro definitions
+ * and are fast, while the safe UTF-8 macros call functions for some complicated cases.
+ *
+ * Unlike with UTF-16, malformed sequences cannot be expressed with distinct
+ * code point values (0..U+10ffff). They are indicated with negative values instead.
+ *
+ * For more information see the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ *
+ * @stable ICU 2.4
+ */
+
+#ifndef __UTF_H__
+#define __UTF_H__
+
+#include "unicode/umachine.h"
+/* include the utfXX.h after the following definitions */
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Is this code point a Unicode noncharacter?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((c)<=0xfdef || ((c)&0xfffe)==0xfffe) && (c)<=0x10ffff)
+
+/**
+ * Is c a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ (0xdfff<(c) && (c)<=0x10ffff && !U_IS_UNICODE_NONCHAR(c)))
+
+/**
+ * Is this code point a BMP code point (U+0000..U+ffff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.8
+ */
+#define U_IS_BMP(c) ((uint32_t)(c)<=0xffff)
+
+/**
+ * Is this code point a supplementary code point (U+10000..U+10ffff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.8
+ */
+#define U_IS_SUPPLEMENTARY(c) ((uint32_t)((c)-0x10000)<=0xfffff)
+
+/**
+ * Is this code point a lead surrogate (U+d800..U+dbff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code point a trail surrogate (U+dc00..U+dfff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code point a surrogate (U+d800..U+dfff)?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE(c) (((c)&0xfffff800)==0xd800)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 32-bit code point
+ * @return true or false
+ * @stable ICU 4.2
+ */
+#define U_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/* include the utfXX.h ------------------------------------------------------ */
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/* utf_old.h contains deprecated, pre-ICU 2.4 definitions */
+#include "unicode/utf_old.h"
+
+#endif /* !U_NO_DEFAULT_INCLUDE_UTF_HEADERS */
+
+#endif /* __UTF_H__ */
diff --git a/intl/icu/source/common/unicode/utf16.h b/intl/icu/source/common/unicode/utf16.h
new file mode 100644
index 0000000000..3902c60e95
--- /dev/null
+++ b/intl/icu/source/common/unicode/utf16.h
@@ -0,0 +1,734 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf16.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep09
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 16-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 16-bit Unicode (UTF-16) code units and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF16_H__
+#define __UTF16_H__
+
+#include <stdbool.h>
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SINGLE(c) !U_IS_SURROGATE(c)
+
+/**
+ * Is this code unit a lead surrogate (U+d800..U+dbff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_LEAD(c) (((c)&0xfffffc00)==0xd800)
+
+/**
+ * Is this code unit a trail surrogate (U+dc00..U+dfff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_TRAIL(c) (((c)&0xfffffc00)==0xdc00)
+
+/**
+ * Is this code unit a surrogate (U+d800..U+dfff)?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE(c) U_IS_SURROGATE(c)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a lead surrogate?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U16_IS_SURROGATE_LEAD(c) (((c)&0x400)==0)
+
+/**
+ * Assuming c is a surrogate code point (U16_IS_SURROGATE(c)),
+ * is it a trail surrogate?
+ * @param c 16-bit code unit
+ * @return true or false
+ * @stable ICU 4.2
+ */
+#define U16_IS_SURROGATE_TRAIL(c) (((c)&0x400)!=0)
+
+/**
+ * Helper constant for U16_GET_SUPPLEMENTARY.
+ * @internal
+ */
+#define U16_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/**
+ * Get a supplementary code point value (U+10000..U+10ffff)
+ * from its lead and trail surrogates.
+ * The result is undefined if the input values are not
+ * lead and trail surrogates.
+ *
+ * @param lead lead surrogate (U+d800..U+dbff)
+ * @param trail trail surrogate (U+dc00..U+dfff)
+ * @return supplementary code point (U+10000..U+10ffff)
+ * @stable ICU 2.4
+ */
+#define U16_GET_SUPPLEMENTARY(lead, trail) \
+ (((UChar32)(lead)<<10UL)+(UChar32)(trail)-U16_SURROGATE_OFFSET)
+
+
+/**
+ * Get the lead surrogate (0xd800..0xdbff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return lead surrogate (U+d800..U+dbff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_LEAD(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/**
+ * Get the trail surrogate (0xdc00..0xdfff) for a
+ * supplementary code point (0x10000..0x10ffff).
+ * @param supplementary 32-bit code point (U+10000..U+10ffff)
+ * @return trail surrogate (U+dc00..U+dfff) for supplementary
+ * @stable ICU 2.4
+ */
+#define U16_TRAIL(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/**
+ * How many 16-bit code units are used to encode this Unicode code point? (1 or 2)
+ * The result is not defined if c is not a Unicode code point (U+0000..U+10ffff).
+ * @param c 32-bit code point
+ * @return 1 or 2
+ * @stable ICU 2.4
+ */
+#define U16_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/**
+ * The maximum number of 16-bit code units per Unicode code point (U+0000..U+10ffff).
+ * @return 2
+ * @stable ICU 2.4
+ */
+#define U16_MAX_LENGTH 2
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ * The result is undefined if the offset points to a single, unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_GET
+ * @stable ICU 2.4
+ */
+#define U16_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)+1]); \
+ } else { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[(i)-1], (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to that unpaired surrogate.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The offset may point to either the lead or trail surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the adjacent matching surrogate as well.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to a single, unpaired surrogate, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U16_NEXT_UNSAFE or U16_NEXT_OR_FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_GET_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c)) { \
+ if((i)+1!=(length) && U16_IS_TRAIL(__c2=(s)[(i)+1])) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } else { \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset points to a single, unpaired lead surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_NEXT
+ * @stable ICU 2.4
+ */
+#define U16_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((c), (s)[(i)++]); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_NEXT(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_LEAD(c)) { \
+ uint16_t __c2; \
+ if((i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead surrogate unit
+ * for a supplementary code point, in which case the macro will read
+ * the following trail surrogate as well.
+ * If the offset points to a trail surrogate or
+ * to a single, unpaired lead surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @param c output UChar32 variable
+ * @see U16_NEXT_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_NEXT_OR_FFFD(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_LEAD(c) && (i)!=(length) && U16_IS_TRAIL(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=U16_GET_SUPPLEMENTARY((c), __c2); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U16_APPEND
+ * @stable ICU 2.4
+ */
+#define U16_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 or 2 code units.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a surrogate pair is written, checks for sufficient space in the string.
+ * If the code point is not valid or a trail surrogate does not fit,
+ * then isError is set to true.
+ *
+ * @param s const UChar * string buffer
+ * @param i string offset, must be i<capacity
+ * @param capacity size of the string buffer
+ * @param c code point to append
+ * @param isError output UBool set to true if an error occurs, otherwise not modified
+ * @see U16_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* c>0x10ffff or not enough space */ { \
+ (isError)=true; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_FWD_1
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)++])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i string offset, must be i<length
+ * @param length string length
+ * @see U16_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)++]) && (i)!=(length) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_FWD_N
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U16_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U16_FWD_1(s, i, length); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[i])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to the trail surrogate of a surrogate pair,
+ * then the offset is decremented.
+ * Otherwise, it is not modified.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<=i
+ * @see U16_SET_CP_START_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[i]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind a single, unpaired trail surrogate.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U16_PREV
+ * @stable ICU 2.4
+ */
+#define U16_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ (c)=U16_GET_SUPPLEMENTARY((s)[--(i)], (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to that unpaired surrogate.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_TRAIL(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a trail surrogate unit
+ * for a supplementary code point, then the macro will read
+ * the preceding lead surrogate as well.
+ * If the offset is behind a lead surrogate or behind a single, unpaired
+ * trail surrogate, then c is set to U+FFFD.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @param c output UChar32 variable
+ * @see U16_PREV_UNSAFE
+ * @stable ICU 60
+ */
+#define U16_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(U16_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(U16_IS_SURROGATE_TRAIL(c) && (i)>(start) && U16_IS_LEAD(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=U16_GET_SUPPLEMENTARY(__c2, (c)); \
+ } else { \
+ (c)=0xfffd; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_BACK_1
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[--(i)])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start starting string offset (usually 0)
+ * @param i string offset, must be start<i
+ * @see U16_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_TRAIL((s)[--(i)]) && (i)>(start) && U16_IS_LEAD((s)[(i)-1])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U16_BACK_N
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * @param s const UChar * string
+ * @param start start of string
+ * @param i string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U16_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U16_BACK_1(s, start, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-16.
+ *
+ * @param s const UChar * string
+ * @param i string offset
+ * @see U16_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U16_IS_LEAD((s)[(i)-1])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind the lead surrogate of a surrogate pair,
+ * then the offset is incremented.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, handles unpaired surrogates and checks for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const UChar * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, start<=i<=length
+ * @param length int32_t string length
+ * @see U16_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U16_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((start)<(i) && ((i)<(length) || (length)<0) && U16_IS_LEAD((s)[(i)-1]) && U16_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/utf32.h b/intl/icu/source/common/unicode/utf32.h
new file mode 100644
index 0000000000..8822c4dd09
--- /dev/null
+++ b/intl/icu/source/common/unicode/utf32.h
@@ -0,0 +1,25 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2001, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf32.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep20
+* created by: Markus W. Scherer
+*/
+/**
+ * \file
+ * \brief C API: UTF-32 macros
+ *
+ * This file is obsolete and its contents moved to utf_old.h.
+ * See utf_old.h and Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ */
diff --git a/intl/icu/source/common/unicode/utf8.h b/intl/icu/source/common/unicode/utf8.h
new file mode 100644
index 0000000000..5a07435fcf
--- /dev/null
+++ b/intl/icu/source/common/unicode/utf8.h
@@ -0,0 +1,882 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 1999-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf8.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 1999sep13
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: 8-bit Unicode handling macros
+ *
+ * This file defines macros to deal with 8-bit Unicode (UTF-8) code units (bytes) and strings.
+ *
+ * For more information see utf.h and the ICU User Guide Strings chapter
+ * (https://unicode-org.github.io/icu/userguide/strings).
+ *
+ * <em>Usage:</em>
+ * ICU coding guidelines for if() statements should be followed when using these macros.
+ * Compound statements (curly braces {}) must be used for if-else-while...
+ * bodies and all macro statements should be terminated with semicolon.
+ */
+
+#ifndef __UTF8_H__
+#define __UTF8_H__
+
+#include <stdbool.h>
+#include "unicode/umachine.h"
+#ifndef __UTF_H__
+# include "unicode/utf.h"
+#endif
+
+/* internal definitions ----------------------------------------------------- */
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte.
+ * Returns 0 for 0..0xc1 as well as for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES(leadByte) \
+ (U8_IS_LEAD(leadByte) ? \
+ ((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0)+1 : 0)
+
+/**
+ * Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
+ * Returns 0 for 0..0xc1. Undefined for 0xf5..0xff.
+ * leadByte might be evaluated multiple times.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ *
+ * @param leadByte The first byte of a UTF-8 sequence. Must be 0..0xff.
+ * @internal
+ */
+#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte) \
+ (((uint8_t)(leadByte)>=0xc2)+((uint8_t)(leadByte)>=0xe0)+((uint8_t)(leadByte)>=0xf0))
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this file and thus must remain stable.
+ * @internal
+ */
+#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/**
+ * Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * Lead byte E0..EF bits 3..0 are used as byte index,
+ * first trail byte bits 7..5 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD3_AND_T1
+ * @internal
+ */
+#define U8_LEAD3_T1_BITS "\x20\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x30\x10\x30\x30"
+
+/**
+ * Internal 3-byte UTF-8 validity check.
+ * Non-zero if lead byte E0..EF and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD3_AND_T1(lead, t1) (U8_LEAD3_T1_BITS[(lead)&0xf]&(1<<((uint8_t)(t1)>>5)))
+
+/**
+ * Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
+ * Each bit indicates whether one lead byte + first trail byte pair starts a valid sequence.
+ * First trail byte bits 7..4 are used as byte index,
+ * lead byte F0..F4 bits 2..0 are used as bit index into that byte.
+ * @see U8_IS_VALID_LEAD4_AND_T1
+ * @internal
+ */
+#define U8_LEAD4_T1_BITS "\x00\x00\x00\x00\x00\x00\x00\x00\x1E\x0F\x0F\x0F\x00\x00\x00\x00"
+
+/**
+ * Internal 4-byte UTF-8 validity check.
+ * Non-zero if lead byte F0..F4 and first trail byte 00..FF start a valid sequence.
+ * @internal
+ */
+#define U8_IS_VALID_LEAD4_AND_T1(lead, t1) (U8_LEAD4_T1_BITS[(uint8_t)(t1)>>4]&(1<<((lead)&7)))
+
+/**
+ * Function for handling "next code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI UChar32 U_EXPORT2
+utf8_nextCharSafeBody(const uint8_t *s, int32_t *pi, int32_t length, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "append code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utf8_appendCharSafeBody(uint8_t *s, int32_t i, int32_t length, UChar32 c, UBool *pIsError);
+
+/**
+ * Function for handling "previous code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI UChar32 U_EXPORT2
+utf8_prevCharSafeBody(const uint8_t *s, int32_t start, int32_t *pi, UChar32 c, UBool strict);
+
+/**
+ * Function for handling "skip backward one code point" with error-checking.
+ *
+ * This is internal since it is not meant to be called directly by external clients;
+ * however it is called by public macros in this
+ * file and thus must remain stable, and should not be hidden when other internal
+ * functions are hidden (otherwise public macros would fail to compile).
+ * @internal
+ */
+U_CAPI int32_t U_EXPORT2
+utf8_back1SafeBody(const uint8_t *s, int32_t start, int32_t i);
+
+/* single-code point definitions -------------------------------------------- */
+
+/**
+ * Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_SINGLE(c) (((c)&0x80)==0)
+
+/**
+ * Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_LEAD(c) ((uint8_t)((c)-0xc2)<=0x32)
+// 0x32=0xf4-0xc2
+
+/**
+ * Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
+ * @param c 8-bit code unit (byte)
+ * @return true or false
+ * @stable ICU 2.4
+ */
+#define U8_IS_TRAIL(c) ((int8_t)(c)<-0x40)
+
+/**
+ * How many code units (bytes) are used for the UTF-8 encoding
+ * of this Unicode code point?
+ * @param c 32-bit code point
+ * @return 1..4, or 0 if c is a surrogate or not a Unicode code point
+ * @stable ICU 2.4
+ */
+#define U8_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xd7ff ? 3 : \
+ ((uint32_t)(c)<=0xdfff || (uint32_t)(c)>0x10ffff ? 0 : \
+ ((uint32_t)(c)<=0xffff ? 3 : 4)\
+ ) \
+ ) \
+ ) \
+ )
+
+/**
+ * The maximum number of UTF-8 code units (bytes) per Unicode code point (U+0000..U+10ffff).
+ * @return 4
+ * @stable ICU 2.4
+ */
+#define U8_MAX_LENGTH 4
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ * The result is undefined if the offset points to an illegal UTF-8
+ * byte sequence.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_GET
+ * @stable ICU 2.4
+ */
+#define U8_GET_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_unsafe_index=(int32_t)(i); \
+ U8_SET_CP_START_UNSAFE(s, _u8_get_unsafe_index); \
+ U8_NEXT_UNSAFE(s, _u8_get_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to a negative value.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_GET_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_GET(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a random-access offset,
+ * without changing the offset.
+ * The offset may point to either the lead byte or one of the trail bytes
+ * for a code point, in which case the macro will read all of the bytes
+ * for the code point.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * If the offset points to an illegal UTF-8 byte sequence, then
+ * c is set to U+FFFD.
+ * Iteration through a string is more efficient with U8_NEXT_UNSAFE or U8_NEXT_OR_FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_GET() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset
+ * @param i int32_t string offset, must be start<=i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_GET
+ * @stable ICU 51
+ */
+#define U8_GET_OR_FFFD(s, start, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _u8_get_index=(i); \
+ U8_SET_CP_START(s, start, _u8_get_index); \
+ U8_NEXT_OR_FFFD(s, _u8_get_index, length, c); \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * The result is undefined if the offset points to a trail byte
+ * or an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_NEXT
+ * @stable ICU 2.4
+ */
+#define U8_NEXT_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if(!U8_IS_SINGLE(c)) { \
+ if((c)<0xe0) { \
+ (c)=(((c)&0x1f)<<6)|((s)[(i)++]&0x3f); \
+ } else if((c)<0xf0) { \
+ /* no need for (c&0xf) because the upper bits are truncated after <<12 in the cast to (UChar) */ \
+ (c)=(UChar)(((c)<<12)|(((s)[i]&0x3f)<<6)|((s)[(i)+1]&0x3f)); \
+ (i)+=2; \
+ } else { \
+ (c)=(((c)&7)<<18)|(((s)[i]&0x3f)<<12)|(((s)[(i)+1]&0x3f)<<6)|((s)[(i)+2]&0x3f); \
+ (i)+=3; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_NEXT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_NEXT(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, U_SENTINEL)
+
+/**
+ * Get a code point from a string at a code point boundary offset,
+ * and advance the offset to the next code point boundary.
+ * (Post-incrementing forward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * The offset may point to the lead byte of a multi-byte sequence,
+ * in which case the macro will read the whole sequence.
+ * If the offset points to a trail byte or an illegal UTF-8 sequence, then
+ * c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_NEXT() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_NEXT
+ * @stable ICU 51
+ */
+#define U8_NEXT_OR_FFFD(s, i, length, c) U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, 0xfffd)
+
+/** @internal */
+#define U8_INTERNAL_NEXT_OR_SUB(s, i, length, c, sub) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[(i)++]; \
+ if(!U8_IS_SINGLE(c)) { \
+ uint8_t __t = 0; \
+ if((i)!=(length) && \
+ /* fetch/validate/assemble all but last trail byte */ \
+ ((c)>=0xe0 ? \
+ ((c)<0xf0 ? /* U+0800..U+FFFF except surrogates */ \
+ U8_LEAD3_T1_BITS[(c)&=0xf]&(1<<((__t=(s)[i])>>5)) && \
+ (__t&=0x3f, 1) \
+ : /* U+10000..U+10FFFF */ \
+ ((c)-=0xf0)<=4 && \
+ U8_LEAD4_T1_BITS[(__t=(s)[i])>>4]&(1<<(c)) && \
+ ((c)=((c)<<6)|(__t&0x3f), ++(i)!=(length)) && \
+ (__t=(s)[i]-0x80)<=0x3f) && \
+ /* valid second-to-last trail byte */ \
+ ((c)=((c)<<6)|__t, ++(i)!=(length)) \
+ : /* U+0080..U+07FF */ \
+ (c)>=0xc2 && ((c)&=0x1f, 1)) && \
+ /* last trail byte */ \
+ (__t=(s)[i]-0x80)<=0x3f && \
+ ((c)=((c)<<6)|__t, ++(i), 1)) { \
+ } else { \
+ (c)=(sub); /* ill-formed*/ \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Unsafe" macro, assumes a valid code point and sufficient space in the string.
+ * Otherwise, the result is undefined.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i string offset
+ * @param c code point to append
+ * @see U8_APPEND
+ * @stable ICU 2.4
+ */
+#define U8_APPEND_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
+ } else { \
+ if(__uc<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+ } else { \
+ if(__uc<=0xffff) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Append a code point to a string, overwriting 1 to 4 bytes.
+ * The offset points to the current end of the string contents
+ * and is advanced (post-increment).
+ * "Safe" macro, checks for a valid code point.
+ * If a non-ASCII code point is written, checks for sufficient space in the string.
+ * If the code point is not valid or trail bytes do not fit,
+ * then isError is set to true.
+ *
+ * @param s const uint8_t * string buffer
+ * @param i int32_t string offset, must be i<capacity
+ * @param capacity int32_t size of the string buffer
+ * @param c UChar32 code point to append
+ * @param isError output UBool set to true if an error occurs, otherwise not modified
+ * @see U8_APPEND_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_APPEND(s, i, capacity, c, isError) UPRV_BLOCK_MACRO_BEGIN { \
+ uint32_t __uc=(c); \
+ if(__uc<=0x7f) { \
+ (s)[(i)++]=(uint8_t)__uc; \
+ } else if(__uc<=0x7ff && (i)+1<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>6)|0xc0); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if((__uc<=0xd7ff || (0xe000<=__uc && __uc<=0xffff)) && (i)+2<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>12)|0xe0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else if(0xffff<__uc && __uc<=0x10ffff && (i)+3<(capacity)) { \
+ (s)[(i)++]=(uint8_t)((__uc>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)(((__uc>>12)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)(((__uc>>6)&0x3f)|0x80); \
+ (s)[(i)++]=(uint8_t)((__uc&0x3f)|0x80); \
+ } else { \
+ (isError)=true; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_FWD_1
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=1+U8_COUNT_TRAIL_BYTES_UNSAFE((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the next.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @see U8_FWD_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_1(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ uint8_t __b=(s)[(i)++]; \
+ if(U8_IS_LEAD(__b) && (i)!=(length)) { \
+ uint8_t __t1=(s)[i]; \
+ if((0xe0<=__b && __b<0xf0)) { \
+ if(U8_IS_VALID_LEAD3_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+ } else if(__b<0xe0) { \
+ if(U8_IS_TRAIL(__t1)) { \
+ ++(i); \
+ } \
+ } else /* c>=0xf0 */ { \
+ if(U8_IS_VALID_LEAD4_AND_T1(__b, __t1) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i]) && \
+ ++(i)!=(length) && U8_IS_TRAIL((s)[i])) { \
+ ++(i); \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_FWD_N
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Advance the string offset from one code point boundary to the n-th next one,
+ * i.e., move forward by n code points.
+ * (Post-incrementing iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param i int32_t string offset, must be i<length
+ * @param length int32_t string length
+ * @param n number of code points to skip
+ * @see U8_FWD_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_FWD_N(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && ((i)<(length) || ((length)<0 && (s)[i]!=0))) { \
+ U8_FWD_1(s, i, length); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_START
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(U8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary
+ * at the start of a code point.
+ * If the offset points to a UTF-8 trail byte,
+ * then the offset is moved backward to the corresponding lead byte.
+ * Otherwise, it is not modified.
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_TRUNCATE_IF_INCOMPLETE(), this macro always reads s[i].
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i
+ * @see U8_SET_CP_START_UNSAFE
+ * @see U8_TRUNCATE_IF_INCOMPLETE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_START(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U8_IS_TRAIL((s)[(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * If the string ends with a UTF-8 byte sequence that is valid so far
+ * but incomplete, then reduce the length of the string to end before
+ * the lead byte of that incomplete sequence.
+ * For example, if the string ends with E1 80, the length is reduced by 2.
+ *
+ * In all other cases (the string ends with a complete sequence, or it is not
+ * possible for any further trail byte to extend the trailing sequence)
+ * the length remains unchanged.
+ *
+ * Useful for processing text split across multiple buffers
+ * (save the incomplete sequence for later)
+ * and for optimizing iteration
+ * (check for string length only once per character).
+ *
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ * Unlike U8_SET_CP_START(), this macro never reads s[length].
+ *
+ * (In UTF-16, simply check for U16_IS_LEAD(last code unit).)
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param length int32_t string length (usually start<=length)
+ * @see U8_SET_CP_START
+ * @stable ICU 61
+ */
+#define U8_TRUNCATE_IF_INCOMPLETE(s, start, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((length)>(start)) { \
+ uint8_t __b1=s[(length)-1]; \
+ if(U8_IS_SINGLE(__b1)) { \
+ /* common ASCII character */ \
+ } else if(U8_IS_LEAD(__b1)) { \
+ --(length); \
+ } else if(U8_IS_TRAIL(__b1) && ((length)-2)>=(start)) { \
+ uint8_t __b2=s[(length)-2]; \
+ if(0xe0<=__b2 && __b2<=0xf4) { \
+ if(__b2<0xf0 ? U8_IS_VALID_LEAD3_AND_T1(__b2, __b1) : \
+ U8_IS_VALID_LEAD4_AND_T1(__b2, __b1)) { \
+ (length)-=2; \
+ } \
+ } else if(U8_IS_TRAIL(__b2) && ((length)-3)>=(start)) { \
+ uint8_t __b3=s[(length)-3]; \
+ if(0xf0<=__b3 && __b3<=0xf4 && U8_IS_VALID_LEAD4_AND_T1(__b3, __b2)) { \
+ (length)-=3; \
+ } \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * The result is undefined if the offset is behind an illegal UTF-8 sequence.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param c output UChar32 variable
+ * @see U8_PREV
+ * @stable ICU 2.4
+ */
+#define U8_PREV_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(U8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ U8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to a negative value.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to <0 in case of an error
+ * @see U8_PREV_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_PREV(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(!U8_IS_SINGLE(c)) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -1); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one
+ * and get the code point between them.
+ * (Pre-decrementing backward iteration.)
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The input offset may be the same as the string length.
+ * If the offset is behind a multi-byte sequence, then the macro will read
+ * the whole sequence.
+ * If the offset is behind a lead byte, then that itself
+ * will be returned as the code point.
+ * If the offset is behind an illegal UTF-8 sequence, then c is set to U+FFFD.
+ *
+ * This macro does not distinguish between a real U+FFFD in the text
+ * and U+FFFD returned for an ill-formed sequence.
+ * Use U8_PREV() if that distinction is important.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @param c output UChar32 variable, set to U+FFFD in case of an error
+ * @see U8_PREV
+ * @stable ICU 51
+ */
+#define U8_PREV_OR_FFFD(s, start, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(uint8_t)(s)[--(i)]; \
+ if(!U8_IS_SINGLE(c)) { \
+ (c)=utf8_prevCharSafeBody((const uint8_t *)s, start, &(i), c, -3); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_BACK_1
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(U8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the previous one.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<i
+ * @see U8_BACK_1_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_1(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(U8_IS_TRAIL((s)[--(i)])) { \
+ (i)=utf8_back1SafeBody(s, start, (i)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @param n number of code points to skip
+ * @see U8_BACK_N
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ U8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Move the string offset from one code point boundary to the n-th one before it,
+ * i.e., move backward by n code points.
+ * (Pre-decrementing backward iteration.)
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t index of the start of the string
+ * @param i int32_t string offset, must be start<i
+ * @param n number of code points to skip
+ * @see U8_BACK_N_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_BACK_N(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0 && (i)>(start)) { \
+ U8_BACK_1(s, start, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Unsafe" macro, assumes well-formed UTF-8.
+ *
+ * @param s const uint8_t * string
+ * @param i string offset
+ * @see U8_SET_CP_LIMIT
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ U8_BACK_1_UNSAFE(s, i); \
+ U8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/**
+ * Adjust a random-access offset to a code point boundary after a code point.
+ * If the offset is behind a partial multi-byte sequence,
+ * then the offset is incremented to behind the whole sequence.
+ * Otherwise, it is not modified.
+ * The input offset may be the same as the string length.
+ * "Safe" macro, checks for illegal sequences and for string boundaries.
+ *
+ * The length can be negative for a NUL-terminated string.
+ *
+ * @param s const uint8_t * string
+ * @param start int32_t starting string offset (usually 0)
+ * @param i int32_t string offset, must be start<=i<=length
+ * @param length int32_t string length
+ * @see U8_SET_CP_LIMIT_UNSAFE
+ * @stable ICU 2.4
+ */
+#define U8_SET_CP_LIMIT(s, start, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ if((start)<(i) && ((i)<(length) || (length)<0)) { \
+ U8_BACK_1(s, start, i); \
+ U8_FWD_1(s, i, length); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/utf_old.h b/intl/icu/source/common/unicode/utf_old.h
new file mode 100644
index 0000000000..6b868c7280
--- /dev/null
+++ b/intl/icu/source/common/unicode/utf_old.h
@@ -0,0 +1,1201 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2002-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utf_old.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2002sep21
+* created by: Markus W. Scherer
+*/
+
+/**
+ * \file
+ * \brief C API: Deprecated macros for Unicode string handling
+ *
+ * The macros in utf_old.h are all deprecated and their use discouraged.
+ * Some of the design principles behind the set of UTF macros
+ * have changed or proved impractical.
+ * Almost all of the old "UTF macros" are at least renamed.
+ * If you are looking for a new equivalent to an old macro, please see the
+ * comment at the old one.
+ *
+ * Brief summary of reasons for deprecation:
+ * - Switch on UTF_SIZE (selection of UTF-8/16/32 default string processing)
+ * was impractical.
+ * - Switch on UTF_SAFE etc. (selection of unsafe/safe/strict default string processing)
+ * was of little use and impractical.
+ * - Whole classes of macros became obsolete outside of the UTF_SIZE/UTF_SAFE
+ * selection framework: UTF32_ macros (all trivial)
+ * and UTF_ default and intermediate macros (all aliases).
+ * - The selection framework also caused many macro aliases.
+ * - Change in Unicode standard: "irregular" sequences (3.0) became illegal (3.2).
+ * - Change of language in Unicode standard:
+ * Growing distinction between internal x-bit Unicode strings and external UTF-x
+ * forms, with the former more lenient.
+ * Suggests renaming of UTF16_ macros to U16_.
+ * - The prefix "UTF_" without a width number confused some users.
+ * - "Safe" append macros needed the addition of an error indicator output.
+ * - "Safe" UTF-8 macros used legitimate (if rarely used) code point values
+ * to indicate error conditions.
+ * - The use of the "_CHAR" infix for code point operations confused some users.
+ *
+ * More details:
+ *
+ * Until ICU 2.2, utf.h theoretically allowed to choose among UTF-8/16/32
+ * for string processing, and among unsafe/safe/strict default macros for that.
+ *
+ * It proved nearly impossible to write non-trivial, high-performance code
+ * that is UTF-generic.
+ * Unsafe default macros would be dangerous for default string processing,
+ * and the main reason for the "strict" versions disappeared:
+ * Between Unicode 3.0 and 3.2 all "irregular" UTF-8 sequences became illegal.
+ * The only other conditions that "strict" checked for were non-characters,
+ * which are valid during processing. Only during text input/output should they
+ * be checked, and at that time other well-formedness checks may be
+ * necessary or useful as well.
+ * This can still be done by using U16_NEXT and U_IS_UNICODE_NONCHAR
+ * or U_IS_UNICODE_CHAR.
+ *
+ * The old UTF8_..._SAFE macros also used some normal Unicode code points
+ * to indicate malformed sequences.
+ * The new UTF8_ macros without suffix use negative values instead.
+ *
+ * The entire contents of utf32.h was moved here without replacement
+ * because all those macros were trivial and
+ * were meaningful only in the framework of choosing the UTF size.
+ *
+ * See Jitterbug 2150 and its discussion on the ICU mailing list
+ * in September 2002.
+ *
+ * <hr>
+ *
+ * <em>Obsolete part</em> of pre-ICU 2.4 utf.h file documentation:
+ *
+ * <p>The original concept for these files was for ICU to allow
+ * in principle to set which UTF (UTF-8/16/32) is used internally
+ * by defining UTF_SIZE to either 8, 16, or 32. utf.h would then define the UChar type
+ * accordingly. UTF-16 was the default.</p>
+ *
+ * <p>This concept has been abandoned.
+ * A lot of the ICU source code assumes UChar strings are in UTF-16.
+ * This is especially true for low-level code like
+ * conversion, normalization, and collation.
+ * The utf.h header enforces the default of UTF-16.
+ * The UTF-8 and UTF-32 macros remain for now for completeness and backward compatibility.</p>
+ *
+ * <p>Accordingly, utf.h defines UChar to be an unsigned 16-bit integer. If this matches wchar_t, then
+ * UChar is defined to be exactly wchar_t, otherwise uint16_t.</p>
+ *
+ * <p>UChar32 is defined to be a signed 32-bit integer (int32_t), large enough for a 21-bit
+ * Unicode code point (Unicode scalar value, 0..0x10ffff).
+ * Before ICU 2.4, the definition of UChar32 was similarly platform-dependent as
+ * the definition of UChar. For details see the documentation for UChar32 itself.</p>
+ *
+ * <p>utf.h also defines a number of C macros for handling single Unicode code points and
+ * for using UTF Unicode strings. It includes utf8.h, utf16.h, and utf32.h for the actual
+ * implementations of those macros and then aliases one set of them (for UTF-16) for general use.
+ * The UTF-specific macros have the UTF size in the macro name prefixes (UTF16_...), while
+ * the general alias macros always begin with UTF_...</p>
+ *
+ * <p>Many string operations can be done with or without error checking.
+ * Where such a distinction is useful, there are two versions of the macros, "unsafe" and "safe"
+ * ones with ..._UNSAFE and ..._SAFE suffixes. The unsafe macros are fast but may cause
+ * program failures if the strings are not well-formed. The safe macros have an additional, boolean
+ * parameter "strict". If strict is false, then only illegal sequences are detected.
+ * Otherwise, irregular sequences and non-characters are detected as well (like single surrogates).
+ * Safe macros return special error code points for illegal/irregular sequences:
+ * Typically, U+ffff, or values that would result in a code unit sequence of the same length
+ * as the erroneous input sequence.<br>
+ * Note that _UNSAFE macros have fewer parameters: They do not have the strictness parameter, and
+ * they do not have start/length parameters for boundary checking.</p>
+ *
+ * <p>Here, the macros are aliased in two steps:
+ * In the first step, the UTF-specific macros with UTF16_ prefix and _UNSAFE and _SAFE suffixes are
+ * aliased according to the UTF_SIZE to macros with UTF_ prefix and the same suffixes and signatures.
+ * Then, in a second step, the default, general alias macros are set to use either the unsafe or
+ * the safe/not strict (default) or the safe/strict macro;
+ * these general macros do not have a strictness parameter.</p>
+ *
+ * <p>It is possible to change the default choice for the general alias macros to be unsafe, safe/not strict or safe/strict.
+ * The default is safe/not strict. It is not recommended to select the unsafe macros as the basis for
+ * Unicode string handling in ICU! To select this, define UTF_SAFE, UTF_STRICT, or UTF_UNSAFE.</p>
+ *
+ * <p>For general use, one should use the default, general macros with UTF_ prefix and no _SAFE/_UNSAFE suffix.
+ * Only in some cases it may be necessary to control the choice of macro directly and use a less generic alias.
+ * For example, if it can be assumed that a string is well-formed and the index will stay within the bounds,
+ * then the _UNSAFE version may be used.
+ * If a UTF-8 string is to be processed, then the macros with UTF8_ prefixes need to be used.</p>
+ *
+ * <hr>
+ *
+ * Deprecated ICU 2.4. Use the macros in utf.h, utf16.h, utf8.h instead.
+ */
+
+#ifndef __UTF_OLD_H__
+#define __UTF_OLD_H__
+
+#include "unicode/utf.h"
+#include "unicode/utf8.h"
+#include "unicode/utf16.h"
+
+/**
+ * \def U_HIDE_OBSOLETE_UTF_OLD_H
+ *
+ * Hides the obsolete definitions in unicode/utf_old.h.
+ * Recommended to be set to 1 at compile time to make sure
+ * the long-deprecated macros are no longer used.
+ *
+ * For reasons for the deprecation see the utf_old.h file comments.
+ *
+ * @internal
+ */
+#ifndef U_HIDE_OBSOLETE_UTF_OLD_H
+# define U_HIDE_OBSOLETE_UTF_OLD_H 0
+#endif
+
+#if !defined(U_HIDE_DEPRECATED_API) && !U_HIDE_OBSOLETE_UTF_OLD_H
+
+/* Formerly utf.h, part 1 --------------------------------------------------- */
+
+#ifdef U_USE_UTF_DEPRECATES
+/**
+ * Unicode string and array offset and index type.
+ * ICU always counts Unicode code units (UChars) for
+ * string offsets, indexes, and lengths, not Unicode code points.
+ *
+ * @obsolete ICU 2.6. Use int32_t directly instead since this API will be removed in that release.
+ */
+typedef int32_t UTextOffset;
+#endif
+
+/** Number of bits in a Unicode string code unit - ICU uses 16-bit Unicode. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF_SIZE 16
+
+/**
+ * The default choice for general Unicode string macros is to use the ..._SAFE macro implementations
+ * with strict=false.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_SAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_UNSAFE
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#undef UTF_STRICT
+
+/**
+ * UTF8_ERROR_VALUE_1 and UTF8_ERROR_VALUE_2 are special error values for UTF-8,
+ * which need 1 or 2 bytes in UTF-8:
+ * \code
+ * U+0015 = NAK = Negative Acknowledge, C0 control character
+ * U+009f = highest C1 control character
+ * \endcode
+ *
+ * These are used by UTF8_..._SAFE macros so that they can return an error value
+ * that needs the same number of code units (bytes) as were seen by
+ * a macro. They should be tested with UTF_IS_ERROR() or UTF_IS_VALID().
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_1 0x15
+
+/**
+ * See documentation on UTF8_ERROR_VALUE_1 for details.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF8_ERROR_VALUE_2 0x9f
+
+/**
+ * Error value for all UTFs. This code point value will be set by macros with error
+ * checking if an error is detected.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ERROR_VALUE 0xffff
+
+/**
+ * Is a given 32-bit code an error value
+ * as returned by one of the macros for any UTF?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_ERROR(c) \
+ (((c)&0xfffe)==0xfffe || (c)==UTF8_ERROR_VALUE_1 || (c)==UTF8_ERROR_VALUE_2)
+
+/**
+ * This is a combined macro: Is c a valid Unicode value _and_ not an error code?
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_IS_VALID(c) \
+ (UTF_IS_UNICODE_CHAR(c) && \
+ (c)!=UTF8_ERROR_VALUE_1 && (c)!=UTF8_ERROR_VALUE_2)
+
+/**
+ * Is this code unit or code point a surrogate (U+d800..U+dfff)?
+ * @deprecated ICU 2.4. Renamed to U_IS_SURROGATE and U16_IS_SURROGATE, see utf_old.h.
+ */
+#define UTF_IS_SURROGATE(uchar) (((uchar)&0xfffff800)==0xd800)
+
+/**
+ * Is a given 32-bit code point a Unicode noncharacter?
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_NONCHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_NONCHAR(c) \
+ ((c)>=0xfdd0 && \
+ ((uint32_t)(c)<=0xfdef || ((c)&0xfffe)==0xfffe) && \
+ (uint32_t)(c)<=0x10ffff)
+
+/**
+ * Is a given 32-bit value a Unicode code point value (0..U+10ffff)
+ * that can be assigned a character?
+ *
+ * Code points that are not characters include:
+ * - single surrogate code points (U+d800..U+dfff, 2048 code points)
+ * - the last two code points on each plane (U+__fffe and U+__ffff, 34 code points)
+ * - U+fdd0..U+fdef (new with Unicode 3.1, 32 code points)
+ * - the highest Unicode code point value is U+10ffff
+ *
+ * This means that all code points below U+d800 are character code points,
+ * and that boundary is tested first for performance.
+ *
+ * @deprecated ICU 2.4. Renamed to U_IS_UNICODE_CHAR, see utf_old.h.
+ */
+#define UTF_IS_UNICODE_CHAR(c) \
+ ((uint32_t)(c)<0xd800 || \
+ ((uint32_t)(c)>0xdfff && \
+ (uint32_t)(c)<=0x10ffff && \
+ !UTF_IS_UNICODE_NONCHAR(c)))
+
+/* Formerly utf8.h ---------------------------------------------------------- */
+
+/**
+* \var utf8_countTrailBytes
+* Internal array with numbers of trail bytes for any given byte used in
+* lead byte position.
+*
+* This is internal since it is not meant to be called directly by external clients;
+* however it is called by public macros in this file and thus must remain stable,
+* and should not be hidden when other internal functions are hidden (otherwise
+* public macros would fail to compile).
+* @internal
+*/
+#ifdef U_UTF8_IMPL
+// No forward declaration if compiling utf_impl.cpp, which defines utf8_countTrailBytes.
+#elif defined(U_STATIC_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION)
+U_CAPI const uint8_t utf8_countTrailBytes[];
+#else
+U_CFUNC U_IMPORT const uint8_t utf8_countTrailBytes[];
+#endif
+
+/**
+ * Count the trail bytes for a UTF-8 lead byte.
+ * @deprecated ICU 2.4. Renamed to U8_COUNT_TRAIL_BYTES, see utf_old.h.
+ */
+#define UTF8_COUNT_TRAIL_BYTES(leadByte) (utf8_countTrailBytes[(uint8_t)leadByte])
+
+/**
+ * Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
+ * @deprecated ICU 2.4. Renamed to U8_MASK_LEAD_BYTE, see utf_old.h.
+ */
+#define UTF8_MASK_LEAD_BYTE(leadByte, countTrailBytes) ((leadByte)&=(1<<(6-(countTrailBytes)))-1)
+
+/** Is this this code point a single code unit (byte)? @deprecated ICU 2.4. Renamed to U8_IS_SINGLE, see utf_old.h. */
+#define UTF8_IS_SINGLE(uchar) (((uchar)&0x80)==0)
+/** Is this this code unit the lead code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_LEAD, see utf_old.h. */
+#define UTF8_IS_LEAD(uchar) ((uint8_t)((uchar)-0xc0)<0x3e)
+/** Is this this code unit a trailing code unit (byte) of a code point? @deprecated ICU 2.4. Renamed to U8_IS_TRAIL, see utf_old.h. */
+#define UTF8_IS_TRAIL(uchar) (((uchar)&0xc0)==0x80)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U8_LENGTH or test ((uint32_t)(c)>0x7f) instead, see utf_old.h. */
+#define UTF8_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0x7f)
+
+/**
+ * Given the lead character, how many bytes are taken by this code point.
+ * ICU does not deal with code points >0x10ffff
+ * unless necessary for advancing in the byte stream.
+ *
+ * These length macros take into account that for values >0x10ffff
+ * the UTF8_APPEND_CHAR_SAFE macros would write the error code point 0xffff
+ * with 3 bytes.
+ * Code point comparisons need to be in uint32_t because UChar32
+ * may be a signed type, and negative values must be recognized.
+ *
+ * @deprecated ICU 2.4. Use U8_LENGTH instead, see utf.h.
+ */
+#if 1
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)((c)-0x10000)>0xfffff ? 3 : 4) \
+ ) \
+ )
+#else
+# define UTF8_CHAR_LENGTH(c) \
+ ((uint32_t)(c)<=0x7f ? 1 : \
+ ((uint32_t)(c)<=0x7ff ? 2 : \
+ ((uint32_t)(c)<=0xffff ? 3 : \
+ ((uint32_t)(c)<=0x10ffff ? 4 : \
+ ((uint32_t)(c)<=0x3ffffff ? 5 : \
+ ((uint32_t)(c)<=0x7fffffff ? 6 : 3) \
+ ) \
+ ) \
+ ) \
+ ) \
+ )
+#endif
+
+/** The maximum number of bytes per code point. @deprecated ICU 2.4. Renamed to U8_MAX_LENGTH, see utf_old.h. */
+#define UTF8_MAX_CHAR_LENGTH 4
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF8_ARRAY_SIZE(size) ((5*(size))/2)
+
+/** @deprecated ICU 2.4. Renamed to U8_GET_UNSAFE, see utf_old.h. */
+#define UTF8_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _utf8_get_char_unsafe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_UNSAFE(s, _utf8_get_char_unsafe_index); \
+ UTF8_NEXT_CHAR_UNSAFE(s, _utf8_get_char_unsafe_index, c); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_GET instead, see utf_old.h. */
+#define UTF8_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t _utf8_get_char_safe_index=(int32_t)(i); \
+ UTF8_SET_CHAR_START_SAFE(s, start, _utf8_get_char_safe_index); \
+ UTF8_NEXT_CHAR_SAFE(s, _utf8_get_char_safe_index, length, c, strict); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_NEXT_UNSAFE, see utf_old.h. */
+#define UTF8_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if((uint8_t)((c)-0xc0)<0x35) { \
+ uint8_t __count=UTF8_COUNT_TRAIL_BYTES(c); \
+ UTF8_MASK_LEAD_BYTE(c, __count); \
+ switch(__count) { \
+ /* each following branch falls through to the next one */ \
+ case 3: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 2: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ case 1: \
+ (c)=((c)<<6)|((s)[(i)++]&0x3f); \
+ /* no other branches to optimize switch() */ \
+ break; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_APPEND_UNSAFE, see utf_old.h. */
+#define UTF8_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ if((uint32_t)(c)<=0x7ff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>6)|0xc0); \
+ } else { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint8_t)(((c)>>12)|0xe0); \
+ } else { \
+ (s)[(i)++]=(uint8_t)(((c)>>18)|0xf0); \
+ (s)[(i)++]=(uint8_t)((((c)>>12)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)((((c)>>6)&0x3f)|0x80); \
+ } \
+ (s)[(i)++]=(uint8_t)(((c)&0x3f)|0x80); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=1+UTF8_COUNT_TRAIL_BYTES((s)[i]); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF8_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(UTF8_IS_TRAIL((s)[i])) { --(i); } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_NEXT instead, see utf_old.h. */
+#define UTF8_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if((c)>=0x80) { \
+ if(UTF8_IS_LEAD(c)) { \
+ (c)=utf8_nextCharSafeBody(s, &(i), (int32_t)(length), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_APPEND instead, see utf_old.h. */
+#define UTF8_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x7f) { \
+ (s)[(i)++]=(uint8_t)(c); \
+ } else { \
+ (i)=utf8_appendCharSafeBody(s, (int32_t)(i), (int32_t)(length), c, NULL); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_1, see utf_old.h. */
+#define UTF8_FWD_1_SAFE(s, i, length) U8_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U8_FWD_N, see utf_old.h. */
+#define UTF8_FWD_N_SAFE(s, i, length, n) U8_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_START, see utf_old.h. */
+#define UTF8_SET_CHAR_START_SAFE(s, start, i) U8_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_PREV_UNSAFE, see utf_old.h. */
+#define UTF8_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF8_IS_TRAIL(c)) { \
+ uint8_t __b, __count=1, __shift=6; \
+\
+ /* c is a trail byte */ \
+ (c)&=0x3f; \
+ for(;;) { \
+ __b=(s)[--(i)]; \
+ if(__b>=0xc0) { \
+ UTF8_MASK_LEAD_BYTE(__b, __count); \
+ (c)|=(UChar32)__b<<__shift; \
+ break; \
+ } else { \
+ (c)|=(UChar32)(__b&0x3f)<<__shift; \
+ ++__count; \
+ __shift+=6; \
+ } \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ while(UTF8_IS_TRAIL((s)[--(i)])) {} \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF8_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ UTF8_BACK_1_UNSAFE(s, i); \
+ UTF8_FWD_1_UNSAFE(s, i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U8_PREV instead, see utf_old.h. */
+#define UTF8_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if((c)>=0x80) { \
+ if((c)<=0xbf) { \
+ (c)=utf8_prevCharSafeBody(s, start, &(i), c, strict); \
+ } else { \
+ (c)=UTF8_ERROR_VALUE_1; \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_1, see utf_old.h. */
+#define UTF8_BACK_1_SAFE(s, start, i) U8_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U8_BACK_N, see utf_old.h. */
+#define UTF8_BACK_N_SAFE(s, start, i, n) U8_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U8_SET_CP_LIMIT, see utf_old.h. */
+#define UTF8_SET_CHAR_LIMIT_SAFE(s, start, i, length) U8_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf16.h --------------------------------------------------------- */
+
+/** Is uchar a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h. */
+#define UTF_IS_FIRST_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xd800)
+
+/** Is uchar a second/trail surrogate? @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h. */
+#define UTF_IS_SECOND_SURROGATE(uchar) (((uchar)&0xfffffc00)==0xdc00)
+
+/** Assuming c is a surrogate, is it a first/lead surrogate? @deprecated ICU 2.4. Renamed to U_IS_SURROGATE_LEAD and U16_IS_SURROGATE_LEAD, see utf_old.h. */
+#define UTF_IS_SURROGATE_FIRST(c) (((c)&0x400)==0)
+
+/** Helper constant for UTF16_GET_PAIR_VALUE. @deprecated ICU 2.4. Renamed to U16_SURROGATE_OFFSET, see utf_old.h. */
+#define UTF_SURROGATE_OFFSET ((0xd800<<10UL)+0xdc00-0x10000)
+
+/** Get the UTF-32 value from the surrogate code units. @deprecated ICU 2.4. Renamed to U16_GET_SUPPLEMENTARY, see utf_old.h. */
+#define UTF16_GET_PAIR_VALUE(first, second) \
+ (((first)<<10UL)+(second)-UTF_SURROGATE_OFFSET)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF_FIRST_SURROGATE(supplementary) (UChar)(((supplementary)>>10)+0xd7c0)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF_SECOND_SURROGATE(supplementary) (UChar)(((supplementary)&0x3ff)|0xdc00)
+
+/** @deprecated ICU 2.4. Renamed to U16_LEAD, see utf_old.h. */
+#define UTF16_LEAD(supplementary) UTF_FIRST_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_TRAIL, see utf_old.h. */
+#define UTF16_TRAIL(supplementary) UTF_SECOND_SURROGATE(supplementary)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_SINGLE, see utf_old.h. */
+#define UTF16_IS_SINGLE(uchar) !UTF_IS_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_LEAD, see utf_old.h. */
+#define UTF16_IS_LEAD(uchar) UTF_IS_FIRST_SURROGATE(uchar)
+
+/** @deprecated ICU 2.4. Renamed to U16_IS_TRAIL, see utf_old.h. */
+#define UTF16_IS_TRAIL(uchar) UTF_IS_SECOND_SURROGATE(uchar)
+
+/** Does this scalar Unicode value need multiple code units for storage? @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead, see utf_old.h. */
+#define UTF16_NEED_MULTIPLE_UCHAR(c) ((uint32_t)(c)>0xffff)
+
+/** @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h. */
+#define UTF16_CHAR_LENGTH(c) ((uint32_t)(c)<=0xffff ? 1 : 2)
+
+/** @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h. */
+#define UTF16_MAX_CHAR_LENGTH 2
+
+/** Average number of code units compared to UTF-16. @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF16_ARRAY_SIZE(size) (size)
+
+/**
+ * Get a single code point from an offset that points to any
+ * of the code units that belong to that code point.
+ * Assume 0<=i<length.
+ *
+ * This could be used for iteration together with
+ * UTF16_CHAR_LENGTH() and UTF_IS_ERROR(),
+ * but the use of UTF16_NEXT_CHAR[_UNSAFE]() and
+ * UTF16_PREV_CHAR[_UNSAFE]() is more efficient for that.
+ * @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h.
+ */
+#define UTF16_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)+1]); \
+ } else { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[(i)-1], (c)); \
+ } \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(UTF_IS_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if(UTF_IS_SURROGATE_FIRST(c)) { \
+ if((i)+1<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)+1])) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else { \
+ if((i)-1>=(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF16_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((c), (s)[(i)++]); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF16_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)++])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF16_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_FWD_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_SECOND_SURROGATE((s)[i])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(UTF_IS_FIRST_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)<(length) && UTF_IS_SECOND_SURROGATE(__c2=(s)[(i)])) { \
+ ++(i); \
+ (c)=UTF16_GET_PAIR_VALUE((c), __c2); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched first surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched second surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF16_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0xffff) { \
+ (s)[(i)++]=(uint16_t)(c); \
+ } else if((uint32_t)(c)<=0x10ffff) { \
+ if((i)+1<(length)) { \
+ (s)[(i)++]=(uint16_t)(((c)>>10)+0xd7c0); \
+ (s)[(i)++]=(uint16_t)(((c)&0x3ff)|0xdc00); \
+ } else /* not enough space */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+ } else /* c>0x10ffff, write error value */ { \
+ (s)[(i)++]=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF16_FWD_1_SAFE(s, i, length) U16_FWD_1(s, i, length)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF16_FWD_N_SAFE(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF16_SET_CHAR_START_SAFE(s, start, i) U16_SET_CP_START(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF16_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ (c)=UTF16_GET_PAIR_VALUE((s)[--(i)], (c)); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_SECOND_SURROGATE((s)[--(i)])) { \
+ --(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF16_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ int32_t __N=(n); \
+ while(__N>0) { \
+ UTF16_BACK_1_UNSAFE(s, i); \
+ --__N; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ if(UTF_IS_FIRST_SURROGATE((s)[(i)-1])) { \
+ ++(i); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF16_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(UTF_IS_SECOND_SURROGATE(c)) { \
+ uint16_t __c2; \
+ if((i)>(start) && UTF_IS_FIRST_SURROGATE(__c2=(s)[(i)-1])) { \
+ --(i); \
+ (c)=UTF16_GET_PAIR_VALUE(__c2, (c)); \
+ /* strict: ((c)&0xfffe)==0xfffe is caught by UTF_IS_ERROR() and UTF_IS_UNICODE_CHAR() */ \
+ } else if(strict) {\
+ /* unmatched second surrogate */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+ } else if((strict) && !UTF_IS_UNICODE_CHAR(c)) { \
+ /* unmatched first surrogate or other non-character */ \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF16_BACK_1_SAFE(s, start, i) U16_BACK_1(s, start, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF16_BACK_N_SAFE(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+/* Formerly utf32.h --------------------------------------------------------- */
+
+/*
+* Old documentation:
+*
+* This file defines macros to deal with UTF-32 code units and code points.
+* Signatures and semantics are the same as for the similarly named macros
+* in utf16.h.
+* utf32.h is included by utf.h after unicode/umachine.h</p>
+* and some common definitions.
+* <p><b>Usage:</b> ICU coding guidelines for if() statements should be followed when using these macros.
+* Compound statements (curly braces {}) must be used for if-else-while...
+* bodies and all macro statements should be terminated with semicolon.</p>
+*/
+
+/* internal definitions ----------------------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SAFE(c, strict) \
+ (!(strict) ? \
+ (uint32_t)(c)<=0x10ffff : \
+ UTF_IS_UNICODE_CHAR(c))
+
+/*
+ * For the semantics of all of these macros, see utf16.h.
+ * The UTF-32 versions are trivial because any code point is
+ * encoded using exactly one code unit.
+ */
+
+/* single-code point definitions -------------------------------------------- */
+
+/* classes of code unit values */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_SINGLE(uchar) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_LEAD(uchar) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_IS_TRAIL(uchar) 0
+
+/* number of code units per code point */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEED_MULTIPLE_UCHAR(c) 0
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_CHAR_LENGTH(c) 1
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_MAX_CHAR_LENGTH 1
+
+/* average number of code units compared to UTF-16 */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_ARRAY_SIZE(size) (size)
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_GET_CHAR_SAFE(s, start, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[i]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with forward iteration --------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (s)[(i)++]=(c); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ ++(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)+=(n); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_NEXT_CHAR_SAFE(s, i, length, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[(i)++]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_APPEND_CHAR_SAFE(s, i, length, c) UPRV_BLOCK_MACRO_BEGIN { \
+ if((uint32_t)(c)<=0x10ffff) { \
+ (s)[(i)++]=(c); \
+ } else /* c>0x10ffff, write 0xfffd */ { \
+ (s)[(i)++]=0xfffd; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_1_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+ ++(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_FWD_N_SAFE(s, i, length, n) UPRV_BLOCK_MACRO_BEGIN { \
+ if(((i)+=(n))>(length)) { \
+ (i)=(length); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_START_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/* definitions with backward iteration -------------------------------------- */
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_UNSAFE(s, i, c) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+ --(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_UNSAFE(s, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)-=(n); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_UNSAFE(s, i) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_PREV_CHAR_SAFE(s, start, i, c, strict) UPRV_BLOCK_MACRO_BEGIN { \
+ (c)=(s)[--(i)]; \
+ if(!UTF32_IS_SAFE(c, strict)) { \
+ (c)=UTF_ERROR_VALUE; \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_1_SAFE(s, start, i) UPRV_BLOCK_MACRO_BEGIN { \
+ --(i); \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_BACK_N_SAFE(s, start, i, n) UPRV_BLOCK_MACRO_BEGIN { \
+ (i)-=(n); \
+ if((i)<(start)) { \
+ (i)=(start); \
+ } \
+} UPRV_BLOCK_MACRO_END
+
+/** @deprecated ICU 2.4. Obsolete, see utf_old.h. */
+#define UTF32_SET_CHAR_LIMIT_SAFE(s, i, length) UPRV_BLOCK_MACRO_BEGIN { \
+} UPRV_BLOCK_MACRO_END
+
+/* Formerly utf.h, part 2 --------------------------------------------------- */
+
+/**
+ * Estimate the number of code units for a string based on the number of UTF-16 code units.
+ *
+ * @deprecated ICU 2.4. Obsolete, see utf_old.h.
+ */
+#define UTF_ARRAY_SIZE(size) UTF16_ARRAY_SIZE(size)
+
+/** @deprecated ICU 2.4. Renamed to U16_GET_UNSAFE, see utf_old.h. */
+#define UTF_GET_CHAR_UNSAFE(s, i, c) UTF16_GET_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_GET instead, see utf_old.h. */
+#define UTF_GET_CHAR_SAFE(s, start, i, length, c, strict) UTF16_GET_CHAR_SAFE(s, start, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_NEXT_UNSAFE, see utf_old.h. */
+#define UTF_NEXT_CHAR_UNSAFE(s, i, c) UTF16_NEXT_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_NEXT instead, see utf_old.h. */
+#define UTF_NEXT_CHAR_SAFE(s, i, length, c, strict) UTF16_NEXT_CHAR_SAFE(s, i, length, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_APPEND_UNSAFE, see utf_old.h. */
+#define UTF_APPEND_CHAR_UNSAFE(s, i, c) UTF16_APPEND_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h. */
+#define UTF_APPEND_CHAR_SAFE(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1_UNSAFE, see utf_old.h. */
+#define UTF_FWD_1_UNSAFE(s, i) UTF16_FWD_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h. */
+#define UTF_FWD_1_SAFE(s, i, length) UTF16_FWD_1_SAFE(s, i, length)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N_UNSAFE, see utf_old.h. */
+#define UTF_FWD_N_UNSAFE(s, i, n) UTF16_FWD_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h. */
+#define UTF_FWD_N_SAFE(s, i, length, n) UTF16_FWD_N_SAFE(s, i, length, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_START_UNSAFE(s, i) UTF16_SET_CHAR_START_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h. */
+#define UTF_SET_CHAR_START_SAFE(s, start, i) UTF16_SET_CHAR_START_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_PREV_UNSAFE, see utf_old.h. */
+#define UTF_PREV_CHAR_UNSAFE(s, i, c) UTF16_PREV_CHAR_UNSAFE(s, i, c)
+
+/** @deprecated ICU 2.4. Use U16_PREV instead, see utf_old.h. */
+#define UTF_PREV_CHAR_SAFE(s, start, i, c, strict) UTF16_PREV_CHAR_SAFE(s, start, i, c, strict)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1_UNSAFE, see utf_old.h. */
+#define UTF_BACK_1_UNSAFE(s, i) UTF16_BACK_1_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h. */
+#define UTF_BACK_1_SAFE(s, start, i) UTF16_BACK_1_SAFE(s, start, i)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N_UNSAFE, see utf_old.h. */
+#define UTF_BACK_N_UNSAFE(s, i, n) UTF16_BACK_N_UNSAFE(s, i, n)
+
+/** @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h. */
+#define UTF_BACK_N_SAFE(s, start, i, n) UTF16_BACK_N_SAFE(s, start, i, n)
+
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT_UNSAFE, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_UNSAFE(s, i) UTF16_SET_CHAR_LIMIT_UNSAFE(s, i)
+
+/** @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h. */
+#define UTF_SET_CHAR_LIMIT_SAFE(s, start, i, length) UTF16_SET_CHAR_LIMIT_SAFE(s, start, i, length)
+
+/* Define default macros (UTF-16 "safe") ------------------------------------ */
+
+/**
+ * Does this code unit alone encode a code point (BMP, not a surrogate)?
+ * Same as UTF16_IS_SINGLE.
+ * @deprecated ICU 2.4. Renamed to U_IS_SINGLE and U16_IS_SINGLE, see utf_old.h.
+ */
+#define UTF_IS_SINGLE(uchar) U16_IS_SINGLE(uchar)
+
+/**
+ * Is this code unit the first one of several (a lead surrogate)?
+ * Same as UTF16_IS_LEAD.
+ * @deprecated ICU 2.4. Renamed to U_IS_LEAD and U16_IS_LEAD, see utf_old.h.
+ */
+#define UTF_IS_LEAD(uchar) U16_IS_LEAD(uchar)
+
+/**
+ * Is this code unit one of several but not the first one (a trail surrogate)?
+ * Same as UTF16_IS_TRAIL.
+ * @deprecated ICU 2.4. Renamed to U_IS_TRAIL and U16_IS_TRAIL, see utf_old.h.
+ */
+#define UTF_IS_TRAIL(uchar) U16_IS_TRAIL(uchar)
+
+/**
+ * Does this code point require multiple code units (is it a supplementary code point)?
+ * Same as UTF16_NEED_MULTIPLE_UCHAR.
+ * @deprecated ICU 2.4. Use U16_LENGTH or test ((uint32_t)(c)>0xffff) instead.
+ */
+#define UTF_NEED_MULTIPLE_UCHAR(c) UTF16_NEED_MULTIPLE_UCHAR(c)
+
+/**
+ * How many code units are used to encode this code point (1 or 2)?
+ * Same as UTF16_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_LENGTH, see utf_old.h.
+ */
+#define UTF_CHAR_LENGTH(c) U16_LENGTH(c)
+
+/**
+ * How many code units are used at most for any Unicode code point (2)?
+ * Same as UTF16_MAX_CHAR_LENGTH.
+ * @deprecated ICU 2.4. Renamed to U16_MAX_LENGTH, see utf_old.h.
+ */
+#define UTF_MAX_CHAR_LENGTH U16_MAX_LENGTH
+
+/**
+ * Set c to the code point that contains the code unit i.
+ * i could point to the lead or the trail surrogate for the code point.
+ * i is not modified.
+ * Same as UTF16_GET_CHAR.
+ * \pre 0<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_GET, see utf_old.h.
+ */
+#define UTF_GET_CHAR(s, start, i, length, c) U16_GET(s, start, i, length, c)
+
+/**
+ * Set c to the code point that starts at code unit i
+ * and advance i to beyond the code units of this code point (post-increment).
+ * i must point to the first code unit of a code point.
+ * Otherwise c is set to the trail unit (surrogate) itself.
+ * Same as UTF16_NEXT_CHAR.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_NEXT, see utf_old.h.
+ */
+#define UTF_NEXT_CHAR(s, i, length, c) U16_NEXT(s, i, length, c)
+
+/**
+ * Append the code units of code point c to the string at index i
+ * and advance i to beyond the new code units (post-increment).
+ * The code units beginning at index i will be overwritten.
+ * Same as UTF16_APPEND_CHAR.
+ * \pre 0<=c<=0x10ffff
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Use U16_APPEND instead, see utf_old.h.
+ */
+#define UTF_APPEND_CHAR(s, i, length, c) UTF16_APPEND_CHAR_SAFE(s, i, length, c)
+
+/**
+ * Advance i to beyond the code units of the code point that begins at i.
+ * I.e., advance i by one code point.
+ * Same as UTF16_FWD_1.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_1, see utf_old.h.
+ */
+#define UTF_FWD_1(s, i, length) U16_FWD_1(s, i, length)
+
+/**
+ * Advance i to beyond the code units of the n code points where the first one begins at i.
+ * I.e., advance i by n code points.
+ * Same as UT16_FWD_N.
+ * \pre 0<=i<length
+ * \post 0<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_FWD_N, see utf_old.h.
+ */
+#define UTF_FWD_N(s, i, length, n) U16_FWD_N(s, i, length, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points to the beginning
+ * of a code point.
+ * The input index points to any code unit of a code point and is moved to point to
+ * the first code unit of the same code point. i is never incremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is decremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_NEXT_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_START.
+ * \pre start<=i<length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_START, see utf_old.h.
+ */
+#define UTF_SET_CHAR_START(s, start, i) U16_SET_CP_START(s, start, i)
+
+/**
+ * Set c to the code point that has code units before i
+ * and move i backward (towards the beginning of the string)
+ * to the first code unit of this code point (pre-increment).
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_PREV_CHAR.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_PREV, see utf_old.h.
+ */
+#define UTF_PREV_CHAR(s, start, i, c) U16_PREV(s, start, i, c)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the code point that has code units before i.
+ * I.e., move i backward by one code point.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_1.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_1, see utf_old.h.
+ */
+#define UTF_BACK_1(s, start, i) U16_BACK_1(s, start, i)
+
+/**
+ * Move i backward (towards the beginning of the string)
+ * to the first code unit of the n code points that have code units before i.
+ * I.e., move i backward by n code points.
+ * i must point to the first code unit after the last unit of a code point (i==length is allowed).
+ * Same as UTF16_BACK_N.
+ * \pre start<i<=length
+ * \post start<=i<length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_BACK_N, see utf_old.h.
+ */
+#define UTF_BACK_N(s, start, i, n) U16_BACK_N(s, start, i, n)
+
+/**
+ * Take the random-access index i and adjust it so that it points beyond
+ * a code point. The input index points beyond any code unit
+ * of a code point and is moved to point beyond the last code unit of the same
+ * code point. i is never decremented.
+ * In other words, if i points to a trail surrogate that is preceded by a matching
+ * lead surrogate, then i is incremented. Otherwise it is not modified.
+ * This can be used to start an iteration with UTF_PREV_CHAR() from a random index.
+ * Same as UTF16_SET_CHAR_LIMIT.
+ * \pre start<i<=length
+ * \post start<i<=length
+ *
+ * @deprecated ICU 2.4. Renamed to U16_SET_CP_LIMIT, see utf_old.h.
+ */
+#define UTF_SET_CHAR_LIMIT(s, start, i, length) U16_SET_CP_LIMIT(s, start, i, length)
+
+#endif // !U_HIDE_DEPRECATED_API && !U_HIDE_OBSOLETE_UTF_OLD_H
+
+#endif
diff --git a/intl/icu/source/common/unicode/utrace.h b/intl/icu/source/common/unicode/utrace.h
new file mode 100644
index 0000000000..677486f473
--- /dev/null
+++ b/intl/icu/source/common/unicode/utrace.h
@@ -0,0 +1,506 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*
+* Copyright (C) 2003-2013, International Business Machines
+* Corporation and others. All Rights Reserved.
+*
+*******************************************************************************
+* file name: utrace.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2003aug06
+* created by: Markus W. Scherer
+*
+* Definitions for ICU tracing/logging.
+*
+*/
+
+#ifndef __UTRACE_H__
+#define __UTRACE_H__
+
+#include <stdarg.h>
+#include "unicode/utypes.h"
+
+/**
+ * \file
+ * \brief C API: Definitions for ICU tracing/logging.
+ *
+ * This provides API for debugging the internals of ICU without the use of
+ * a traditional debugger.
+ *
+ * By default, tracing is disabled in ICU. If you need to debug ICU with
+ * tracing, please compile ICU with the --enable-tracing configure option.
+ */
+
+U_CDECL_BEGIN
+
+/**
+ * Trace severity levels. Higher levels increase the verbosity of the trace output.
+ * @see utrace_setLevel
+ * @stable ICU 2.8
+ */
+typedef enum UTraceLevel {
+ /** Disable all tracing @stable ICU 2.8*/
+ UTRACE_OFF=-1,
+ /** Trace error conditions only @stable ICU 2.8*/
+ UTRACE_ERROR=0,
+ /** Trace errors and warnings @stable ICU 2.8*/
+ UTRACE_WARNING=3,
+ /** Trace opens and closes of ICU services @stable ICU 2.8*/
+ UTRACE_OPEN_CLOSE=5,
+ /** Trace an intermediate number of ICU operations @stable ICU 2.8*/
+ UTRACE_INFO=7,
+ /** Trace the maximum number of ICU operations @stable ICU 2.8*/
+ UTRACE_VERBOSE=9
+} UTraceLevel;
+
+/**
+ * These are the ICU functions that will be traced when tracing is enabled.
+ * @stable ICU 2.8
+ */
+typedef enum UTraceFunctionNumber {
+ UTRACE_FUNCTION_START=0,
+ UTRACE_U_INIT=UTRACE_FUNCTION_START,
+ UTRACE_U_CLEANUP,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_FUNCTION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ UTRACE_CONVERSION_START=0x1000,
+ UTRACE_UCNV_OPEN=UTRACE_CONVERSION_START,
+ UTRACE_UCNV_OPEN_PACKAGE,
+ UTRACE_UCNV_OPEN_ALGORITHMIC,
+ UTRACE_UCNV_CLONE,
+ UTRACE_UCNV_CLOSE,
+ UTRACE_UCNV_FLUSH_CACHE,
+ UTRACE_UCNV_LOAD,
+ UTRACE_UCNV_UNLOAD,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_CONVERSION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ UTRACE_COLLATION_START=0x2000,
+ UTRACE_UCOL_OPEN=UTRACE_COLLATION_START,
+ UTRACE_UCOL_CLOSE,
+ UTRACE_UCOL_STRCOLL,
+ UTRACE_UCOL_GET_SORTKEY,
+ UTRACE_UCOL_GETLOCALE,
+ UTRACE_UCOL_NEXTSORTKEYPART,
+ UTRACE_UCOL_STRCOLLITER,
+ UTRACE_UCOL_OPEN_FROM_SHORT_STRING,
+ UTRACE_UCOL_STRCOLLUTF8, /**< @stable ICU 50 */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal collation trace location.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_COLLATION_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /**
+ * The lowest resource/data location.
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_START=0x3000,
+
+ /**
+ * Indicates that a value was read from a resource bundle. Provides three
+ * C-style strings to UTraceData: type, file name, and resource path. The
+ * possible types are:
+ *
+ * - "string" (a string value was accessed)
+ * - "binary" (a binary value was accessed)
+ * - "intvector" (a integer vector value was accessed)
+ * - "int" (a signed integer value was accessed)
+ * - "uint" (a unsigned integer value was accessed)
+ * - "get" (a path was loaded, but the value was not accessed)
+ * - "getalias" (a path was loaded, and an alias was resolved)
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_RESOURCE=UTRACE_UDATA_START,
+
+ /**
+ * Indicates that a resource bundle was opened.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_BUNDLE,
+
+ /**
+ * Indicates that a data file was opened, but not *.res files.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_DATA_FILE,
+
+ /**
+ * Indicates that a *.res file was opened.
+ *
+ * This differs from UTRACE_UDATA_BUNDLE because a res file is typically
+ * opened only once per application runtime, but the bundle corresponding
+ * to that res file may be opened many times.
+ *
+ * Provides one C-style string to UTraceData: file name.
+ *
+ * @stable ICU 65
+ */
+ UTRACE_UDATA_RES_FILE,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * One more than the highest normal resource/data trace location.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_RES_DATA_LIMIT,
+#endif // U_HIDE_INTERNAL_API
+
+ /**
+ * The lowest break iterator location.
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_START=0x4000,
+
+ /**
+ * Indicates that a character instance of break iterator was created.
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_CHARACTER = UTRACE_UBRK_START,
+
+ /**
+ * Indicates that a word instance of break iterator was created.
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_WORD,
+
+ /**
+ * Indicates that a line instance of break iterator was created.
+ *
+ * Provides one C-style string to UTraceData: the lb value ("",
+ * "loose", "strict", or "normal").
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_LINE,
+
+ /**
+ * Indicates that a sentence instance of break iterator was created.
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_SENTENCE,
+
+ /**
+ * Indicates that a title instance of break iterator was created.
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_TITLE,
+
+ /**
+ * Indicates that an internal dictionary break engine was created.
+ *
+ * Provides one C-style string to UTraceData: the script code of what
+ * the break engine cover ("Hani", "Khmr", "Laoo", "Mymr", or "Thai").
+ *
+ * @stable ICU 67
+ */
+ UTRACE_UBRK_CREATE_BREAK_ENGINE,
+
+#ifndef U_HIDE_INTERNAL_API
+ /**
+ * One more than the highest normal break iterator trace location.
+ * @internal The numeric value may change over time, see ICU ticket #12420.
+ */
+ UTRACE_UBRK_LIMIT,
+#endif // U_HIDE_INTERNAL_API
+
+} UTraceFunctionNumber;
+
+/**
+ * Setter for the trace level.
+ * @param traceLevel A UTraceLevel value.
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_setLevel(int32_t traceLevel);
+
+/**
+ * Getter for the trace level.
+ * @return The UTraceLevel value being used by ICU.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_getLevel(void);
+
+/* Trace function pointers types ----------------------------- */
+
+/**
+ * Type signature for the trace function to be called when entering a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being entered.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceEntry(const void *context, int32_t fnNumber);
+
+/**
+ * Type signature for the trace function to be called when exiting from a function.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param fmt A formatting string that describes the number and types
+ * of arguments included with the variable args. The fmt
+ * string has the same form as the utrace_vformat format
+ * string.
+ * @param args A variable arguments list. Contents are described by
+ * the fmt parameter.
+ * @see utrace_vformat
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceExit(const void *context, int32_t fnNumber,
+ const char *fmt, va_list args);
+
+/**
+ * Type signature for the trace function to be called from within an ICU function
+ * to display data or messages.
+ * @param context value supplied at the time the trace functions are set.
+ * @param fnNumber Enum value indicating the ICU function being exited.
+ * @param level The current tracing level
+ * @param fmt A format string describing the tracing data that is supplied
+ * as variable args
+ * @param args The data being traced, passed as variable args.
+ * @stable ICU 2.8
+ */
+typedef void U_CALLCONV
+UTraceData(const void *context, int32_t fnNumber, int32_t level,
+ const char *fmt, va_list args);
+
+/**
+ * Set ICU Tracing functions. Installs application-provided tracing
+ * functions into ICU. After doing this, subsequent ICU operations
+ * will call back to the installed functions, providing a trace
+ * of the use of ICU. Passing a NULL pointer for a tracing function
+ * is allowed, and inhibits tracing action at points where that function
+ * would be called.
+ * <p>
+ * Tracing and Threads: Tracing functions are global to a process, and
+ * will be called in response to ICU operations performed by any
+ * thread. If tracing of an individual thread is desired, the
+ * tracing functions must themselves filter by checking that the
+ * current thread is the desired thread.
+ *
+ * @param context an uninterpreted pointer. Whatever is passed in
+ * here will in turn be passed to each of the tracing
+ * functions UTraceEntry, UTraceExit and UTraceData.
+ * ICU does not use or alter this pointer.
+ * @param e Callback function to be called on entry to a
+ * a traced ICU function.
+ * @param x Callback function to be called on exit from a
+ * traced ICU function.
+ * @param d Callback function to be called from within a
+ * traced ICU function, for the purpose of providing
+ * data to the trace.
+ *
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_setFunctions(const void *context,
+ UTraceEntry *e, UTraceExit *x, UTraceData *d);
+
+/**
+ * Get the currently installed ICU tracing functions. Note that a null function
+ * pointer will be returned if no trace function has been set.
+ *
+ * @param context The currently installed tracing context.
+ * @param e The currently installed UTraceEntry function.
+ * @param x The currently installed UTraceExit function.
+ * @param d The currently installed UTraceData function.
+ * @stable ICU 2.8
+ */
+U_CAPI void U_EXPORT2
+utrace_getFunctions(const void **context,
+ UTraceEntry **e, UTraceExit **x, UTraceData **d);
+
+
+
+/*
+ *
+ * ICU trace format string syntax
+ *
+ * Format Strings are passed to UTraceData functions, and define the
+ * number and types of the trace data being passed on each call.
+ *
+ * The UTraceData function, which is supplied by the application,
+ * not by ICU, can either forward the trace data (passed via
+ * varargs) and the format string back to ICU for formatting into
+ * a displayable string, or it can interpret the format itself,
+ * and do as it wishes with the trace data.
+ *
+ *
+ * Goals for the format string
+ * - basic data output
+ * - easy to use for trace programmer
+ * - sufficient provision for data types for trace output readability
+ * - well-defined types and binary portable APIs
+ *
+ * Non-goals
+ * - printf compatibility
+ * - fancy formatting
+ * - argument reordering and other internationalization features
+ *
+ * ICU trace format strings contain plain text with argument inserts,
+ * much like standard printf format strings.
+ * Each insert begins with a '%', then optionally contains a 'v',
+ * then exactly one type character.
+ * Two '%' in a row represent a '%' instead of an insert.
+ * The trace format strings need not have \n at the end.
+ *
+ *
+ * Types
+ * -----
+ *
+ * Type characters:
+ * - c A char character in the default codepage.
+ * - s A NUL-terminated char * string in the default codepage.
+ * - S A UChar * string. Requires two params, (ptr, length). Length=-1 for nul term.
+ * - b A byte (8-bit integer).
+ * - h A 16-bit integer. Also a 16 bit Unicode code unit.
+ * - d A 32-bit integer. Also a 20 bit Unicode code point value.
+ * - l A 64-bit integer.
+ * - p A data pointer.
+ *
+ * Vectors
+ * -------
+ *
+ * If the 'v' is not specified, then one item of the specified type
+ * is passed in.
+ * If the 'v' (for "vector") is specified, then a vector of items of the
+ * specified type is passed in, via a pointer to the first item
+ * and an int32_t value for the length of the vector.
+ * Length==-1 means zero or NUL termination. Works for vectors of all types.
+ *
+ * Note: %vS is a vector of (UChar *) strings. The strings must
+ * be nul terminated as there is no way to provide a
+ * separate length parameter for each string. The length
+ * parameter (required for all vectors) is the number of
+ * strings, not the length of the strings.
+ *
+ * Examples
+ * --------
+ *
+ * These examples show the parameters that will be passed to an application's
+ * UTraceData() function for various formats.
+ *
+ * - the precise formatting is up to the application!
+ * - the examples use type casts for arguments only to _show_ the types of
+ * arguments without needing variable declarations in the examples;
+ * the type casts will not be necessary in actual code
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "There is a character %c in the string %s.", // Format String
+ * (char)c, (const char *)s); // varargs parameters
+ * -> There is a character 0x42 'B' in the string "Bravo".
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "Vector of bytes %vb vector of chars %vc",
+ * (const uint8_t *)bytes, (int32_t)bytesLength,
+ * (const char *)chars, (int32_t)charsLength);
+ * -> Vector of bytes
+ * 42 63 64 3f [4]
+ * vector of chars
+ * "Bcd?"[4]
+ *
+ * UTraceDataFunc(context, fnNumber, level,
+ * "An int32_t %d and a whole bunch of them %vd",
+ * (int32_t)-5, (const int32_t *)ints, (int32_t)intsLength);
+ * -> An int32_t 0xfffffffb and a whole bunch of them
+ * fffffffb 00000005 0000010a [3]
+ *
+ */
+
+
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * back to this function to format the trace output in a
+ * human readable form. Note that a UTraceData function may choose
+ * to not format the data; it could, for example, save it in
+ * in the raw form it was received (more compact), leaving
+ * formatting for a later trace analysis tool.
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param args Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_vformat(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, va_list args);
+
+/**
+ * Trace output Formatter. An application's UTraceData tracing functions may call
+ * this function to format any additional trace data, beyond that
+ * provided by default, in human readable form with the same
+ * formatting conventions used by utrace_vformat().
+ * @param outBuf pointer to a buffer to receive the formatted output. Output
+ * will be nul terminated if there is space in the buffer -
+ * if the length of the requested output < the output buffer size.
+ * @param capacity Length of the output buffer.
+ * @param indent Number of spaces to indent the output. Intended to allow
+ * data displayed from nested functions to be indented for readability.
+ * @param fmt Format specification for the data to output
+ * @param ... Data to be formatted.
+ * @return Length of formatted output, including the terminating NUL.
+ * If buffer capacity is insufficient, the required capacity is returned.
+ * @stable ICU 2.8
+ */
+U_CAPI int32_t U_EXPORT2
+utrace_format(char *outBuf, int32_t capacity,
+ int32_t indent, const char *fmt, ...);
+
+
+
+/* Trace function numbers --------------------------------------------------- */
+
+/**
+ * Get the name of a function from its trace function number.
+ *
+ * @param fnNumber The trace number for an ICU function.
+ * @return The name string for the function.
+ *
+ * @see UTraceFunctionNumber
+ * @stable ICU 2.8
+ */
+U_CAPI const char * U_EXPORT2
+utrace_functionName(int32_t fnNumber);
+
+U_CDECL_END
+
+#endif
diff --git a/intl/icu/source/common/unicode/utypes.h b/intl/icu/source/common/unicode/utypes.h
new file mode 100644
index 0000000000..f890d5d1db
--- /dev/null
+++ b/intl/icu/source/common/unicode/utypes.h
@@ -0,0 +1,730 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 1996-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+**********************************************************************
+*
+* FILE NAME : UTYPES.H (formerly ptypes.h)
+*
+* Date Name Description
+* 12/11/96 helena Creation.
+* 02/27/97 aliu Added typedefs for UClassID, int8, int16, int32,
+* uint8, uint16, and uint32.
+* 04/01/97 aliu Added XP_CPLUSPLUS and modified to work under C as
+* well as C++.
+* Modified to use memcpy() for uprv_arrayCopy() fns.
+* 04/14/97 aliu Added TPlatformUtilities.
+* 05/07/97 aliu Added import/export specifiers (replacing the old
+* broken EXT_CLASS). Added version number for our
+* code. Cleaned up header.
+* 6/20/97 helena Java class name change.
+* 08/11/98 stephen UErrorCode changed from typedef to enum
+* 08/12/98 erm Changed T_ANALYTIC_PACKAGE_VERSION to 3
+* 08/14/98 stephen Added uprv_arrayCopy() for int8_t, int16_t, int32_t
+* 12/09/98 jfitz Added BUFFER_OVERFLOW_ERROR (bug 1100066)
+* 04/20/99 stephen Cleaned up & reworked for autoconf.
+* Renamed to utypes.h.
+* 05/05/99 stephen Changed to use <inttypes.h>
+* 12/07/99 helena Moved copyright notice string from ucnv_bld.h here.
+*******************************************************************************
+*/
+
+#ifndef UTYPES_H
+#define UTYPES_H
+
+
+#include "unicode/umachine.h"
+#include "unicode/uversion.h"
+#include "unicode/uconfig.h"
+#include <float.h>
+
+#if !U_NO_DEFAULT_INCLUDE_UTF_HEADERS
+# include "unicode/utf.h"
+#endif
+
+/*!
+ * \file
+ * \brief Basic definitions for ICU, for both C and C++ APIs
+ *
+ * This file defines basic types, constants, and enumerations directly or
+ * indirectly by including other header files, especially utf.h for the
+ * basic character and string definitions and umachine.h for consistent
+ * integer and other types.
+ */
+
+
+/**
+ * \def U_SHOW_CPLUSPLUS_API
+ * @internal
+ */
+#ifdef __cplusplus
+# ifndef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 1
+# endif
+#else
+# undef U_SHOW_CPLUSPLUS_API
+# define U_SHOW_CPLUSPLUS_API 0
+#endif
+
+/** @{ API visibility control */
+
+/**
+ * \def U_HIDE_DRAFT_API
+ * Define this to 1 to request that draft API be "hidden"
+ * @internal
+ */
+/**
+ * \def U_HIDE_INTERNAL_API
+ * Define this to 1 to request that internal API be "hidden"
+ * @internal
+ */
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_DRAFT_API)
+#define U_HIDE_DRAFT_API 1
+#endif
+#if !U_DEFAULT_SHOW_DRAFT && !defined(U_SHOW_INTERNAL_API)
+#define U_HIDE_INTERNAL_API 1
+#endif
+
+/** @} */
+
+/*===========================================================================*/
+/* ICUDATA naming scheme */
+/*===========================================================================*/
+
+/**
+ * \def U_ICUDATA_TYPE_LETTER
+ *
+ * This is a platform-dependent string containing one letter:
+ * - b for big-endian, ASCII-family platforms
+ * - l for little-endian, ASCII-family platforms
+ * - e for big-endian, EBCDIC-family platforms
+ * This letter is part of the common data file name.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_ICUDATA_TYPE_LITLETTER
+ * The non-string form of U_ICUDATA_TYPE_LETTER
+ * @stable ICU 2.0
+ */
+#if U_CHARSET_FAMILY
+# if U_IS_BIG_ENDIAN
+ /* EBCDIC - should always be BE */
+# define U_ICUDATA_TYPE_LETTER "e"
+# define U_ICUDATA_TYPE_LITLETTER e
+# else
+# error "Don't know what to do with little endian EBCDIC!"
+# define U_ICUDATA_TYPE_LETTER "x"
+# define U_ICUDATA_TYPE_LITLETTER x
+# endif
+#else
+# if U_IS_BIG_ENDIAN
+ /* Big-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "b"
+# define U_ICUDATA_TYPE_LITLETTER b
+# else
+ /* Little-endian ASCII */
+# define U_ICUDATA_TYPE_LETTER "l"
+# define U_ICUDATA_TYPE_LITLETTER l
+# endif
+#endif
+
+/**
+ * A single string literal containing the icudata stub name. i.e. 'icudt18e' for
+ * ICU 1.8.x on EBCDIC, etc..
+ * @stable ICU 2.0
+ */
+#define U_ICUDATA_NAME "icudt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER
+#ifndef U_HIDE_INTERNAL_API
+#define U_USRDATA_NAME "usrdt" U_ICU_VERSION_SHORT U_ICUDATA_TYPE_LETTER /**< @internal */
+#define U_USE_USRDATA 0 /**< @internal */
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * U_ICU_ENTRY_POINT is the name of the DLL entry point to the ICU data library.
+ * Defined as a literal, not a string.
+ * Tricky Preprocessor use - ## operator replaces macro parameters with the literal string
+ * from the corresponding macro invocation, _before_ other macro substitutions.
+ * Need a nested \#defines to get the actual version numbers rather than
+ * the literal text U_ICU_VERSION_MAJOR_NUM into the name.
+ * The net result will be something of the form
+ * \#define U_ICU_ENTRY_POINT icudt19_dat
+ * @stable ICU 2.4
+ */
+#define U_ICUDATA_ENTRY_POINT U_DEF2_ICUDATA_ENTRY_POINT(U_ICU_VERSION_MAJOR_NUM,U_LIB_SUFFIX_C_NAME)
+
+#ifndef U_HIDE_INTERNAL_API
+/**
+ * Do not use. Note that it's OK for the 2nd argument to be undefined (literal).
+ * @internal
+ */
+#define U_DEF2_ICUDATA_ENTRY_POINT(major,suff) U_DEF_ICUDATA_ENTRY_POINT(major,suff)
+
+/**
+ * Do not use.
+ * @internal
+ */
+#ifndef U_DEF_ICUDATA_ENTRY_POINT
+/* affected by symbol renaming. See platform.h */
+#ifndef U_LIB_SUFFIX_C_NAME
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##major##_dat
+#else
+#define U_DEF_ICUDATA_ENTRY_POINT(major, suff) icudt##suff ## major##_dat
+#endif
+#endif
+#endif /* U_HIDE_INTERNAL_API */
+
+/**
+ * \def NULL
+ * Define NULL if necessary, to nullptr for C++ and to ((void *)0) for C.
+ * @stable ICU 2.0
+ */
+#ifndef NULL
+#ifdef __cplusplus
+#define NULL nullptr
+#else
+#define NULL ((void *)0)
+#endif
+#endif
+
+/*===========================================================================*/
+/* Calendar/TimeZone data types */
+/*===========================================================================*/
+
+/**
+ * Date and Time data type.
+ * This is a primitive data type that holds the date and time
+ * as the number of milliseconds since 1970-jan-01, 00:00 UTC.
+ * UTC leap seconds are ignored.
+ * @stable ICU 2.0
+ */
+typedef double UDate;
+
+/** The number of milliseconds per second @stable ICU 2.0 */
+#define U_MILLIS_PER_SECOND (1000)
+/** The number of milliseconds per minute @stable ICU 2.0 */
+#define U_MILLIS_PER_MINUTE (60000)
+/** The number of milliseconds per hour @stable ICU 2.0 */
+#define U_MILLIS_PER_HOUR (3600000)
+/** The number of milliseconds per day @stable ICU 2.0 */
+#define U_MILLIS_PER_DAY (86400000)
+
+/**
+ * Maximum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MAX DBL_MAX
+
+/**
+ * Minimum UDate value
+ * @stable ICU 4.8
+ */
+#define U_DATE_MIN -U_DATE_MAX
+
+/*===========================================================================*/
+/* Shared library/DLL import-export API control */
+/*===========================================================================*/
+
+/*
+ * Control of symbol import/export.
+ * ICU is separated into three libraries.
+ */
+
+/**
+ * \def U_COMBINED_IMPLEMENTATION
+ * Set to export library symbols from inside the ICU library
+ * when all of ICU is in a single library.
+ * This can be set as a compiler option while building ICU, and it
+ * needs to be the first one tested to override U_COMMON_API, U_I18N_API, etc.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_DATA_API
+ * Set to export library symbols from inside the stubdata library,
+ * and to import them from outside.
+ * @stable ICU 3.0
+ */
+
+/**
+ * \def U_COMMON_API
+ * Set to export library symbols from inside the common library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_I18N_API
+ * Set to export library symbols from inside the i18n library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUT_API
+ * Set to export library symbols from inside the layout engine library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_LAYOUTEX_API
+ * Set to export library symbols from inside the layout extensions library,
+ * and to import them from outside.
+ * @stable ICU 2.6
+ */
+
+/**
+ * \def U_IO_API
+ * Set to export library symbols from inside the ustdio library,
+ * and to import them from outside.
+ * @stable ICU 2.0
+ */
+
+/**
+ * \def U_TOOLUTIL_API
+ * Set to export library symbols from inside the toolutil library,
+ * and to import them from outside.
+ * @stable ICU 3.4
+ */
+
+#ifdef U_IN_DOXYGEN
+// This definition is required when generating the API docs.
+#define U_COMBINED_IMPLEMENTATION 1
+#endif
+
+#if defined(U_COMBINED_IMPLEMENTATION)
+#define U_DATA_API U_EXPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_EXPORT
+#elif defined(U_STATIC_IMPLEMENTATION)
+#define U_DATA_API
+#define U_COMMON_API
+#define U_I18N_API
+#define U_LAYOUT_API
+#define U_LAYOUTEX_API
+#define U_IO_API
+#define U_TOOLUTIL_API
+#elif defined(U_COMMON_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_EXPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_I18N_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_EXPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUT_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_EXPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_LAYOUTEX_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_EXPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_IO_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_EXPORT
+#define U_TOOLUTIL_API U_IMPORT
+#elif defined(U_TOOLUTIL_IMPLEMENTATION)
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_EXPORT
+#else
+#define U_DATA_API U_IMPORT
+#define U_COMMON_API U_IMPORT
+#define U_I18N_API U_IMPORT
+#define U_LAYOUT_API U_IMPORT
+#define U_LAYOUTEX_API U_IMPORT
+#define U_IO_API U_IMPORT
+#define U_TOOLUTIL_API U_IMPORT
+#endif
+
+/**
+ * \def U_STANDARD_CPP_NAMESPACE
+ * Control of C++ Namespace
+ * @stable ICU 2.0
+ */
+#ifdef __cplusplus
+#define U_STANDARD_CPP_NAMESPACE ::
+#else
+#define U_STANDARD_CPP_NAMESPACE
+#endif
+
+/*===========================================================================*/
+/* UErrorCode */
+/*===========================================================================*/
+
+/**
+ * Standard ICU4C error code type, a substitute for exceptions.
+ *
+ * Initialize the UErrorCode with U_ZERO_ERROR, and check for success or
+ * failure using U_SUCCESS() or U_FAILURE():
+ *
+ * UErrorCode errorCode = U_ZERO_ERROR;
+ * // call ICU API that needs an error code parameter.
+ * if (U_FAILURE(errorCode)) {
+ * // An error occurred. Handle it here.
+ * }
+ *
+ * C++ code should use icu::ErrorCode, available in unicode/errorcode.h, or a
+ * suitable subclass.
+ *
+ * For more information, see:
+ * https://unicode-org.github.io/icu/userguide/dev/codingguidelines#details-about-icu-error-codes
+ *
+ * Note: By convention, ICU functions that take a reference (C++) or a pointer
+ * (C) to a UErrorCode first test:
+ *
+ * if (U_FAILURE(errorCode)) { return immediately; }
+ *
+ * so that in a chain of such functions the first one that sets an error code
+ * causes the following ones to not perform any operations.
+ *
+ * @stable ICU 2.0
+ */
+typedef enum UErrorCode {
+ /* The ordering of U_ERROR_INFO_START Vs U_USING_FALLBACK_WARNING looks weird
+ * and is that way because VC++ debugger displays first encountered constant,
+ * which is not the what the code is used for
+ */
+
+ U_USING_FALLBACK_WARNING = -128, /**< A resource bundle lookup returned a fallback result (not an error) */
+
+ U_ERROR_WARNING_START = -128, /**< Start of information results (semantically successful) */
+
+ U_USING_DEFAULT_WARNING = -127, /**< A resource bundle lookup returned a result from the root locale (not an error) */
+
+ U_SAFECLONE_ALLOCATED_WARNING = -126, /**< A SafeClone operation required allocating memory (informational only) */
+
+ U_STATE_OLD_WARNING = -125, /**< ICU has to use compatibility layer to construct the service. Expect performance/memory usage degradation. Consider upgrading */
+
+ U_STRING_NOT_TERMINATED_WARNING = -124,/**< An output string could not be NUL-terminated because output length==destCapacity. */
+
+ U_SORT_KEY_TOO_SHORT_WARNING = -123, /**< Number of levels requested in getBound is higher than the number of levels in the sort key */
+
+ U_AMBIGUOUS_ALIAS_WARNING = -122, /**< This converter alias can go to different converter implementations */
+
+ U_DIFFERENT_UCA_VERSION = -121, /**< ucol_open encountered a mismatch between UCA version and collator image version, so the collator was constructed from rules. No impact to further function */
+
+ U_PLUGIN_CHANGED_LEVEL_WARNING = -120, /**< A plugin caused a level change. May not be an error, but later plugins may not load. */
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal UErrorCode warning value.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_ERROR_WARNING_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ U_ZERO_ERROR = 0, /**< No error, no warning. */
+
+ U_ILLEGAL_ARGUMENT_ERROR = 1, /**< Start of codes indicating failure */
+ U_MISSING_RESOURCE_ERROR = 2, /**< The requested resource cannot be found */
+ U_INVALID_FORMAT_ERROR = 3, /**< Data format is not what is expected */
+ U_FILE_ACCESS_ERROR = 4, /**< The requested file cannot be found */
+ U_INTERNAL_PROGRAM_ERROR = 5, /**< Indicates a bug in the library code */
+ U_MESSAGE_PARSE_ERROR = 6, /**< Unable to parse a message (message format) */
+ U_MEMORY_ALLOCATION_ERROR = 7, /**< Memory allocation error */
+ U_INDEX_OUTOFBOUNDS_ERROR = 8, /**< Trying to access the index that is out of bounds */
+ U_PARSE_ERROR = 9, /**< Equivalent to Java ParseException */
+ U_INVALID_CHAR_FOUND = 10, /**< Character conversion: Unmappable input sequence. In other APIs: Invalid character. */
+ U_TRUNCATED_CHAR_FOUND = 11, /**< Character conversion: Incomplete input sequence. */
+ U_ILLEGAL_CHAR_FOUND = 12, /**< Character conversion: Illegal input sequence/combination of input units. */
+ U_INVALID_TABLE_FORMAT = 13, /**< Conversion table file found, but corrupted */
+ U_INVALID_TABLE_FILE = 14, /**< Conversion table file not found */
+ U_BUFFER_OVERFLOW_ERROR = 15, /**< A result would not fit in the supplied buffer */
+ U_UNSUPPORTED_ERROR = 16, /**< Requested operation not supported in current context */
+ U_RESOURCE_TYPE_MISMATCH = 17, /**< an operation is requested over a resource that does not support it */
+ U_ILLEGAL_ESCAPE_SEQUENCE = 18, /**< ISO-2022 illegal escape sequence */
+ U_UNSUPPORTED_ESCAPE_SEQUENCE = 19, /**< ISO-2022 unsupported escape sequence */
+ U_NO_SPACE_AVAILABLE = 20, /**< No space available for in-buffer expansion for Arabic shaping */
+ U_CE_NOT_FOUND_ERROR = 21, /**< Currently used only while setting variable top, but can be used generally */
+ U_PRIMARY_TOO_LONG_ERROR = 22, /**< User tried to set variable top to a primary that is longer than two bytes */
+ U_STATE_TOO_OLD_ERROR = 23, /**< ICU cannot construct a service from this state, as it is no longer supported */
+ U_TOO_MANY_ALIASES_ERROR = 24, /**< There are too many aliases in the path to the requested resource.
+ It is very possible that a circular alias definition has occurred */
+ U_ENUM_OUT_OF_SYNC_ERROR = 25, /**< UEnumeration out of sync with underlying collection */
+ U_INVARIANT_CONVERSION_ERROR = 26, /**< Unable to convert a UChar* string to char* with the invariant converter. */
+ U_INVALID_STATE_ERROR = 27, /**< Requested operation can not be completed with ICU in its current state */
+ U_COLLATOR_VERSION_MISMATCH = 28, /**< Collator version is not compatible with the base version */
+ U_USELESS_COLLATOR_ERROR = 29, /**< Collator is options only and no base is specified */
+ U_NO_WRITE_PERMISSION = 30, /**< Attempt to modify read-only or constant data. */
+ /**
+ * The input is impractically long for an operation.
+ * It is rejected because it may lead to problems such as excessive
+ * processing time, stack depth, or heap memory requirements.
+ *
+ * @stable ICU 68
+ */
+ U_INPUT_TOO_LONG_ERROR = 31,
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest standard error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_STANDARD_ERROR_LIMIT = 32,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10000 0x10100 are reserved for Transliterator.
+ */
+ U_BAD_VARIABLE_DEFINITION=0x10000,/**< Missing '$' or duplicate variable name */
+ U_PARSE_ERROR_START = 0x10000, /**< Start of Transliterator errors */
+ U_MALFORMED_RULE, /**< Elements of a rule are misplaced */
+ U_MALFORMED_SET, /**< A UnicodeSet pattern is invalid*/
+ U_MALFORMED_SYMBOL_REFERENCE, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_UNICODE_ESCAPE, /**< A Unicode escape pattern is invalid*/
+ U_MALFORMED_VARIABLE_DEFINITION, /**< A variable definition is invalid */
+ U_MALFORMED_VARIABLE_REFERENCE, /**< A variable reference is invalid */
+ U_MISMATCHED_SEGMENT_DELIMITERS, /**< UNUSED as of ICU 2.4 */
+ U_MISPLACED_ANCHOR_START, /**< A start anchor appears at an illegal position */
+ U_MISPLACED_CURSOR_OFFSET, /**< A cursor offset occurs at an illegal position */
+ U_MISPLACED_QUANTIFIER, /**< A quantifier appears after a segment close delimiter */
+ U_MISSING_OPERATOR, /**< A rule contains no operator */
+ U_MISSING_SEGMENT_CLOSE, /**< UNUSED as of ICU 2.4 */
+ U_MULTIPLE_ANTE_CONTEXTS, /**< More than one ante context */
+ U_MULTIPLE_CURSORS, /**< More than one cursor */
+ U_MULTIPLE_POST_CONTEXTS, /**< More than one post context */
+ U_TRAILING_BACKSLASH, /**< A dangling backslash */
+ U_UNDEFINED_SEGMENT_REFERENCE, /**< A segment reference does not correspond to a defined segment */
+ U_UNDEFINED_VARIABLE, /**< A variable reference does not correspond to a defined variable */
+ U_UNQUOTED_SPECIAL, /**< A special character was not quoted or escaped */
+ U_UNTERMINATED_QUOTE, /**< A closing single quote is missing */
+ U_RULE_MASK_ERROR, /**< A rule is hidden by an earlier more general rule */
+ U_MISPLACED_COMPOUND_FILTER, /**< A compound filter is in an invalid location */
+ U_MULTIPLE_COMPOUND_FILTERS, /**< More than one compound filter */
+ U_INVALID_RBT_SYNTAX, /**< A "::id" rule was passed to the RuleBasedTransliterator parser */
+ U_INVALID_PROPERTY_PATTERN, /**< UNUSED as of ICU 2.4 */
+ U_MALFORMED_PRAGMA, /**< A 'use' pragma is invalid */
+ U_UNCLOSED_SEGMENT, /**< A closing ')' is missing */
+ U_ILLEGAL_CHAR_IN_SEGMENT, /**< UNUSED as of ICU 2.4 */
+ U_VARIABLE_RANGE_EXHAUSTED, /**< Too many stand-ins generated for the given variable range */
+ U_VARIABLE_RANGE_OVERLAP, /**< The variable range overlaps characters used in rules */
+ U_ILLEGAL_CHARACTER, /**< A special character is outside its allowed context */
+ U_INTERNAL_TRANSLITERATOR_ERROR, /**< Internal transliterator system error */
+ U_INVALID_ID, /**< A "::id" rule specifies an unknown transliterator */
+ U_INVALID_FUNCTION, /**< A "&fn()" rule specifies an unknown transliterator */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal Transliterator error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PARSE_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10100 0x10200 are reserved for the formatting API.
+ */
+ U_UNEXPECTED_TOKEN=0x10100, /**< Syntax error in format pattern */
+ U_FMT_PARSE_ERROR_START=0x10100, /**< Start of format library errors */
+ U_MULTIPLE_DECIMAL_SEPARATORS, /**< More than one decimal separator in number pattern */
+ U_MULTIPLE_DECIMAL_SEPERATORS = U_MULTIPLE_DECIMAL_SEPARATORS, /**< Typo: kept for backward compatibility. Use U_MULTIPLE_DECIMAL_SEPARATORS */
+ U_MULTIPLE_EXPONENTIAL_SYMBOLS, /**< More than one exponent symbol in number pattern */
+ U_MALFORMED_EXPONENTIAL_PATTERN, /**< Grouping symbol in exponent pattern */
+ U_MULTIPLE_PERCENT_SYMBOLS, /**< More than one percent symbol in number pattern */
+ U_MULTIPLE_PERMILL_SYMBOLS, /**< More than one permill symbol in number pattern */
+ U_MULTIPLE_PAD_SPECIFIERS, /**< More than one pad symbol in number pattern */
+ U_PATTERN_SYNTAX_ERROR, /**< Syntax error in format pattern */
+ U_ILLEGAL_PAD_POSITION, /**< Pad symbol misplaced in number pattern */
+ U_UNMATCHED_BRACES, /**< Braces do not match in message pattern */
+ U_UNSUPPORTED_PROPERTY, /**< UNUSED as of ICU 2.4 */
+ U_UNSUPPORTED_ATTRIBUTE, /**< UNUSED as of ICU 2.4 */
+ U_ARGUMENT_TYPE_MISMATCH, /**< Argument name and argument index mismatch in MessageFormat functions */
+ U_DUPLICATE_KEYWORD, /**< Duplicate keyword in PluralFormat */
+ U_UNDEFINED_KEYWORD, /**< Undefined Plural keyword */
+ U_DEFAULT_KEYWORD_MISSING, /**< Missing DEFAULT rule in plural rules */
+ U_DECIMAL_NUMBER_SYNTAX_ERROR, /**< Decimal number syntax error */
+ U_FORMAT_INEXACT_ERROR, /**< Cannot format a number exactly and rounding mode is ROUND_UNNECESSARY @stable ICU 4.8 */
+ U_NUMBER_ARG_OUTOFBOUNDS_ERROR, /**< The argument to a NumberFormatter helper method was out of bounds; the bounds are usually 0 to 999. @stable ICU 61 */
+ U_NUMBER_SKELETON_SYNTAX_ERROR, /**< The number skeleton passed to C++ NumberFormatter or C UNumberFormatter was invalid or contained a syntax error. @stable ICU 62 */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal formatting API error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_FMT_PARSE_ERROR_LIMIT = 0x10114,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10200 0x102ff are reserved for BreakIterator.
+ */
+ U_BRK_INTERNAL_ERROR=0x10200, /**< An internal error (bug) was detected. */
+ U_BRK_ERROR_START=0x10200, /**< Start of codes indicating Break Iterator failures */
+ U_BRK_HEX_DIGITS_EXPECTED, /**< Hex digits expected as part of a escaped char in a rule. */
+ U_BRK_SEMICOLON_EXPECTED, /**< Missing ';' at the end of a RBBI rule. */
+ U_BRK_RULE_SYNTAX, /**< Syntax error in RBBI rule. */
+ U_BRK_UNCLOSED_SET, /**< UnicodeSet writing an RBBI rule missing a closing ']'. */
+ U_BRK_ASSIGN_ERROR, /**< Syntax error in RBBI rule assignment statement. */
+ U_BRK_VARIABLE_REDFINITION, /**< RBBI rule $Variable redefined. */
+ U_BRK_MISMATCHED_PAREN, /**< Mis-matched parentheses in an RBBI rule. */
+ U_BRK_NEW_LINE_IN_QUOTED_STRING, /**< Missing closing quote in an RBBI rule. */
+ U_BRK_UNDEFINED_VARIABLE, /**< Use of an undefined $Variable in an RBBI rule. */
+ U_BRK_INIT_ERROR, /**< Initialization failure. Probable missing ICU Data. */
+ U_BRK_RULE_EMPTY_SET, /**< Rule contains an empty Unicode Set. */
+ U_BRK_UNRECOGNIZED_OPTION, /**< !!option in RBBI rules not recognized. */
+ U_BRK_MALFORMED_RULE_TAG, /**< The {nnn} tag on a rule is malformed */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal BreakIterator error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_BRK_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10300-0x103ff are reserved for regular expression related errors.
+ */
+ U_REGEX_INTERNAL_ERROR=0x10300, /**< An internal error (bug) was detected. */
+ U_REGEX_ERROR_START=0x10300, /**< Start of codes indicating Regexp failures */
+ U_REGEX_RULE_SYNTAX, /**< Syntax error in regexp pattern. */
+ U_REGEX_INVALID_STATE, /**< RegexMatcher in invalid state for requested operation */
+ U_REGEX_BAD_ESCAPE_SEQUENCE, /**< Unrecognized backslash escape sequence in pattern */
+ U_REGEX_PROPERTY_SYNTAX, /**< Incorrect Unicode property */
+ U_REGEX_UNIMPLEMENTED, /**< Use of regexp feature that is not yet implemented. */
+ U_REGEX_MISMATCHED_PAREN, /**< Incorrectly nested parentheses in regexp pattern. */
+ U_REGEX_NUMBER_TOO_BIG, /**< Decimal number is too large. */
+ U_REGEX_BAD_INTERVAL, /**< Error in {min,max} interval */
+ U_REGEX_MAX_LT_MIN, /**< In {min,max}, max is less than min. */
+ U_REGEX_INVALID_BACK_REF, /**< Back-reference to a non-existent capture group. */
+ U_REGEX_INVALID_FLAG, /**< Invalid value for match mode flags. */
+ U_REGEX_LOOK_BEHIND_LIMIT, /**< Look-Behind pattern matches must have a bounded maximum length. */
+ U_REGEX_SET_CONTAINS_STRING, /**< Regexps cannot have UnicodeSets containing strings.*/
+#ifndef U_HIDE_DEPRECATED_API
+ U_REGEX_OCTAL_TOO_BIG, /**< Octal character constants must be <= 0377. @deprecated ICU 54. This error cannot occur. */
+#endif /* U_HIDE_DEPRECATED_API */
+ U_REGEX_MISSING_CLOSE_BRACKET=U_REGEX_SET_CONTAINS_STRING+2, /**< Missing closing bracket on a bracket expression. */
+ U_REGEX_INVALID_RANGE, /**< In a character range [x-y], x is greater than y. */
+ U_REGEX_STACK_OVERFLOW, /**< Regular expression backtrack stack overflow. */
+ U_REGEX_TIME_OUT, /**< Maximum allowed match time exceeded */
+ U_REGEX_STOPPED_BY_CALLER, /**< Matching operation aborted by user callback fn. */
+ U_REGEX_PATTERN_TOO_BIG, /**< Pattern exceeds limits on size or complexity. @stable ICU 55 */
+ U_REGEX_INVALID_CAPTURE_GROUP_NAME, /**< Invalid capture group name. @stable ICU 55 */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal regular expression error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_REGEX_ERROR_LIMIT=U_REGEX_STOPPED_BY_CALLER+3,
+#endif // U_HIDE_DEPRECATED_API
+
+ /*
+ * Error codes in the range 0x10400-0x104ff are reserved for IDNA related error codes.
+ */
+ U_IDNA_PROHIBITED_ERROR=0x10400,
+ U_IDNA_ERROR_START=0x10400,
+ U_IDNA_UNASSIGNED_ERROR,
+ U_IDNA_CHECK_BIDI_ERROR,
+ U_IDNA_STD3_ASCII_RULES_ERROR,
+ U_IDNA_ACE_PREFIX_ERROR,
+ U_IDNA_VERIFICATION_ERROR,
+ U_IDNA_LABEL_TOO_LONG_ERROR,
+ U_IDNA_ZERO_LENGTH_LABEL_ERROR,
+ U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR,
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal IDNA error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_IDNA_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+ /*
+ * Aliases for StringPrep
+ */
+ U_STRINGPREP_PROHIBITED_ERROR = U_IDNA_PROHIBITED_ERROR,
+ U_STRINGPREP_UNASSIGNED_ERROR = U_IDNA_UNASSIGNED_ERROR,
+ U_STRINGPREP_CHECK_BIDI_ERROR = U_IDNA_CHECK_BIDI_ERROR,
+
+ /*
+ * Error codes in the range 0x10500-0x105ff are reserved for Plugin related error codes.
+ */
+ U_PLUGIN_ERROR_START=0x10500, /**< Start of codes indicating plugin failures */
+ U_PLUGIN_TOO_HIGH=0x10500, /**< The plugin's level is too high to be loaded right now. */
+ U_PLUGIN_DIDNT_SET_LEVEL, /**< The plugin didn't call uplug_setPlugLevel in response to a QUERY */
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal plug-in error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_PLUGIN_ERROR_LIMIT,
+#endif // U_HIDE_DEPRECATED_API
+
+#ifndef U_HIDE_DEPRECATED_API
+ /**
+ * One more than the highest normal error code.
+ * @deprecated ICU 58 The numeric value may change over time, see ICU ticket #12420.
+ */
+ U_ERROR_LIMIT=U_PLUGIN_ERROR_LIMIT
+#endif // U_HIDE_DEPRECATED_API
+} UErrorCode;
+
+/* Use the following to determine if an UErrorCode represents */
+/* operational success or failure. */
+
+#ifdef __cplusplus
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_SUCCESS(UErrorCode code) { return (UBool)(code<=U_ZERO_ERROR); }
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+ static
+ inline UBool U_FAILURE(UErrorCode code) { return (UBool)(code>U_ZERO_ERROR); }
+#else
+ /**
+ * Does the error code indicate success?
+ * @stable ICU 2.0
+ */
+# define U_SUCCESS(x) ((x)<=U_ZERO_ERROR)
+ /**
+ * Does the error code indicate a failure?
+ * @stable ICU 2.0
+ */
+# define U_FAILURE(x) ((x)>U_ZERO_ERROR)
+#endif
+
+/**
+ * Return a string for a UErrorCode value.
+ * The string will be the same as the name of the error code constant
+ * in the UErrorCode enum above.
+ * @stable ICU 2.0
+ */
+U_CAPI const char * U_EXPORT2
+u_errorName(UErrorCode code);
+
+
+#endif /* _UTYPES */
diff --git a/intl/icu/source/common/unicode/uvernum.h b/intl/icu/source/common/unicode/uvernum.h
new file mode 100644
index 0000000000..f0fc671b4b
--- /dev/null
+++ b/intl/icu/source/common/unicode/uvernum.h
@@ -0,0 +1,191 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2000-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uvernum.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+* Updated by: Steven R. Loomis
+*
+*/
+
+/**
+ * \file
+ * \brief C API: definitions of ICU version numbers
+ *
+ * This file is included by uversion.h and other files. This file contains only
+ * macros and definitions. The actual version numbers are defined here.
+ */
+
+ /*
+ * IMPORTANT: When updating version, the following things need to be done:
+ * source/common/unicode/uvernum.h - this file: update major, minor,
+ * patchlevel, suffix, version, short version constants, namespace,
+ * renaming macro, and copyright
+ *
+ * The following files need to be updated as well, which can be done
+ * by running the UNIX makefile target 'update-windows-makefiles' in icu4c/source.
+ *
+ * source/allinone/Build.Windows.IcuVersion.props - Update the IcuMajorVersion
+ * source/data/makedata.mak - change U_ICUDATA_NAME so that it contains
+ * the new major/minor combination, and UNICODE_VERSION
+ * for the Unicode version.
+ */
+
+#ifndef UVERNUM_H
+#define UVERNUM_H
+
+/** The standard copyright notice that gets compiled into each library.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING \
+ " Copyright (C) 2016 and later: Unicode, Inc. and others. License & terms of use: http://www.unicode.org/copyright.html "
+
+/** The current ICU major version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_MAJOR_NUM 73
+
+/** The current ICU minor version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_MINOR_NUM 1
+
+/** The current ICU patchlevel version as an integer.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION_PATCHLEVEL_NUM 0
+
+/** The current ICU build level version as an integer.
+ * This value is for use by ICU clients. It defaults to 0.
+ * @stable ICU 4.0
+ */
+#ifndef U_ICU_VERSION_BUILDLEVEL_NUM
+#define U_ICU_VERSION_BUILDLEVEL_NUM 0
+#endif
+
+/** Glued version suffix for renamers
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SUFFIX _73
+
+/**
+ * \def U_DEF2_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/**
+ * \def U_DEF_ICU_ENTRY_POINT_RENAME
+ * @internal
+ */
+/** Glued version suffix function for renamers
+ * This value will change in the subsequent releases of ICU.
+ * If a custom suffix (such as matching library suffixes) is desired, this can be modified.
+ * Note that if present, platform.h may contain an earlier definition of this macro.
+ * \def U_ICU_ENTRY_POINT_RENAME
+ * @stable ICU 4.2
+ */
+/**
+ * Disable the version suffix. Use the custom suffix if exists.
+ * \def U_DISABLE_VERSION_SUFFIX
+ * @internal
+ */
+#ifndef U_DISABLE_VERSION_SUFFIX
+#define U_DISABLE_VERSION_SUFFIX 0
+#endif
+
+#ifndef U_ICU_ENTRY_POINT_RENAME
+#ifdef U_HAVE_LIB_SUFFIX
+# if !U_DISABLE_VERSION_SUFFIX
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z) x ## y ## z
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y,z) U_DEF_ICU_ENTRY_POINT_RENAME(x,y,z)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX,U_LIB_SUFFIX_C_NAME)
+# else
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_LIB_SUFFIX_C_NAME)
+# endif
+#else
+# if !U_DISABLE_VERSION_SUFFIX
+# define U_DEF_ICU_ENTRY_POINT_RENAME(x,y) x ## y
+# define U_DEF2_ICU_ENTRY_POINT_RENAME(x,y) U_DEF_ICU_ENTRY_POINT_RENAME(x,y)
+# define U_ICU_ENTRY_POINT_RENAME(x) U_DEF2_ICU_ENTRY_POINT_RENAME(x,U_ICU_VERSION_SUFFIX)
+# else
+# define U_ICU_ENTRY_POINT_RENAME(x) x
+# endif
+#endif
+#endif
+
+/** The current ICU library version as a dotted-decimal string. The patchlevel
+ * only appears in this string if it non-zero.
+ * This value will change in the subsequent releases of ICU
+ * @stable ICU 2.4
+ */
+#define U_ICU_VERSION "73.1"
+
+/**
+ * The current ICU library major version number as a string, for library name suffixes.
+ * This value will change in subsequent releases of ICU.
+ *
+ * Until ICU 4.8, this was the combination of the single-digit major and minor ICU version numbers
+ * into one string without dots ("48").
+ * Since ICU 49, it is the double-digit major ICU version number.
+ * See https://unicode-org.github.io/icu/userguide/design#version-numbers-in-icu
+ *
+ * @stable ICU 2.6
+ */
+#define U_ICU_VERSION_SHORT "73"
+
+#ifndef U_HIDE_INTERNAL_API
+/** Data version in ICU4C.
+ * @internal ICU 4.4 Internal Use Only
+ **/
+#define U_ICU_DATA_VERSION "73.1"
+#endif /* U_HIDE_INTERNAL_API */
+
+/*===========================================================================
+ * ICU collation framework version information
+ * Version info that can be obtained from a collator is affected by these
+ * numbers in a secret and magic way. Please use collator version as whole
+ *===========================================================================
+ */
+
+/**
+ * Collation runtime version (sort key generator, strcoll).
+ * If the version is different, sort keys for the same string could be different.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_RUNTIME_VERSION 9
+
+/**
+ * Collation builder code version.
+ * When this is different, the same tailoring might result
+ * in assigning different collation elements to code points.
+ * This value may change in subsequent releases of ICU.
+ * @stable ICU 2.4
+ */
+#define UCOL_BUILDER_VERSION 9
+
+#ifndef U_HIDE_DEPRECATED_API
+/**
+ * Constant 1.
+ * This was intended to be the version of collation tailorings,
+ * but instead the tailoring data carries a version number.
+ * @deprecated ICU 54
+ */
+#define UCOL_TAILORINGS_VERSION 1
+#endif /* U_HIDE_DEPRECATED_API */
+
+#endif
diff --git a/intl/icu/source/common/unicode/uversion.h b/intl/icu/source/common/unicode/uversion.h
new file mode 100644
index 0000000000..113568df8c
--- /dev/null
+++ b/intl/icu/source/common/unicode/uversion.h
@@ -0,0 +1,187 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2000-2011, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+*
+* file name: uversion.h
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Created by: Vladimir Weinstein
+*
+* Gets included by utypes.h and Windows .rc files
+*/
+
+/**
+ * \file
+ * \brief C API: API for accessing ICU version numbers.
+ */
+/*===========================================================================*/
+/* Main ICU version information */
+/*===========================================================================*/
+
+#ifndef UVERSION_H
+#define UVERSION_H
+
+#include "unicode/umachine.h"
+
+/* Actual version info lives in uvernum.h */
+#include "unicode/uvernum.h"
+
+/** Maximum length of the copyright string.
+ * @stable ICU 2.4
+ */
+#define U_COPYRIGHT_STRING_LENGTH 128
+
+/** An ICU version consists of up to 4 numbers from 0..255.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_LENGTH 4
+
+/** In a string, ICU version fields are delimited by dots.
+ * @stable ICU 2.4
+ */
+#define U_VERSION_DELIMITER '.'
+
+/** The maximum length of an ICU version string.
+ * @stable ICU 2.4
+ */
+#define U_MAX_VERSION_STRING_LENGTH 20
+
+/** The binary form of a version on ICU APIs is an array of 4 uint8_t.
+ * To compare two versions, use memcmp(v1,v2,sizeof(UVersionInfo)).
+ * @stable ICU 2.4
+ */
+typedef uint8_t UVersionInfo[U_MAX_VERSION_LENGTH];
+
+/*===========================================================================*/
+/* C++ namespace if supported. Versioned unless versioning is disabled. */
+/*===========================================================================*/
+
+/* Define C++ namespace symbols. */
+#ifdef __cplusplus
+
+/**
+ * \def U_NAMESPACE_BEGIN
+ * This is used to begin a declaration of a public ICU C++ API within
+ * versioned-ICU-namespace block.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_END
+ * This is used to end a declaration of a public ICU C++ API.
+ * It ends the versioned-ICU-namespace block begun by U_NAMESPACE_BEGIN.
+ *
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_USE
+ * This is used to specify that the rest of the code uses the
+ * public ICU C++ API namespace.
+ * @stable ICU 2.4
+ */
+
+/**
+ * \def U_NAMESPACE_QUALIFIER
+ * This is used to qualify that a function or class is part of
+ * the public ICU C++ API namespace.
+ *
+ * This macro is unnecessary since ICU 49 requires namespace support.
+ * You can just use "icu::" instead.
+ * @stable ICU 2.4
+ */
+
+# if U_DISABLE_RENAMING
+# define U_ICU_NAMESPACE icu
+ namespace U_ICU_NAMESPACE { }
+# else
+# define U_ICU_NAMESPACE U_ICU_ENTRY_POINT_RENAME(icu)
+ namespace U_ICU_NAMESPACE { }
+ namespace icu = U_ICU_NAMESPACE;
+# endif
+
+# define U_NAMESPACE_BEGIN namespace U_ICU_NAMESPACE {
+# define U_NAMESPACE_END }
+# define U_NAMESPACE_USE using namespace U_ICU_NAMESPACE;
+# define U_NAMESPACE_QUALIFIER U_ICU_NAMESPACE::
+
+# ifndef U_USING_ICU_NAMESPACE
+# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || \
+ defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION) || \
+ defined(U_LAYOUTEX_IMPLEMENTATION) || defined(U_TOOLUTIL_IMPLEMENTATION)
+# define U_USING_ICU_NAMESPACE 0
+# else
+# define U_USING_ICU_NAMESPACE 0
+# endif
+# endif
+# if U_USING_ICU_NAMESPACE
+ U_NAMESPACE_USE
+# endif
+#endif /* __cplusplus */
+
+/*===========================================================================*/
+/* General version helper functions. Definitions in putil.c */
+/*===========================================================================*/
+
+/**
+ * Parse a string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A string with dotted-decimal version information,
+ * with up to four non-negative number fields with
+ * values of up to 255 each.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+u_versionFromString(UVersionInfo versionArray, const char *versionString);
+
+/**
+ * Parse a Unicode string with dotted-decimal version information and
+ * fill in a UVersionInfo structure with the result.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The destination structure for the version information.
+ * @param versionString A Unicode string with dotted-decimal version
+ * information, with up to four non-negative number
+ * fields with values of up to 255 each.
+ * @stable ICU 4.2
+ */
+U_CAPI void U_EXPORT2
+u_versionFromUString(UVersionInfo versionArray, const UChar *versionString);
+
+
+/**
+ * Write a string with dotted-decimal version information according
+ * to the input UVersionInfo.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray The version information to be written as a string.
+ * @param versionString A string buffer that will be filled in with
+ * a string corresponding to the numeric version
+ * information in versionArray.
+ * The buffer size must be at least U_MAX_VERSION_STRING_LENGTH.
+ * @stable ICU 2.4
+ */
+U_CAPI void U_EXPORT2
+u_versionToString(const UVersionInfo versionArray, char *versionString);
+
+/**
+ * Gets the ICU release version. The version array stores the version information
+ * for ICU. For example, release "1.3.31.2" is then represented as 0x01031F02.
+ * Definition of this function lives in putil.c
+ *
+ * @param versionArray the version # information, the result will be filled in
+ * @stable ICU 2.0
+ */
+U_CAPI void U_EXPORT2
+u_getVersion(UVersionInfo versionArray);
+#endif