1 files changed, 627 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/ucol.cpp b/intl/icu/source/i18n/ucol.cpp
new file mode 100644
index 0000000000..7c9fb00bf3
--- /dev/null
+++ b/intl/icu/source/i18n/ucol.cpp
@@ -0,0 +1,627 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+*   Copyright (C) 1996-2015, International Business Machines
+*   Corporation and others.  All Rights Reserved.
+*******************************************************************************
+*   file name:  ucol.cpp
+*   encoding:   UTF-8
+*   tab size:   8 (not used)
+*   indentation:4
+*
+* Modification history
+* Date        Name      Comments
+* 1996-1999   various members of ICU team maintained C API for collation framework
+* 02/16/2001  synwee    Added internal method getPrevSpecialCE
+* 03/01/2001  synwee    Added maxexpansion functionality.
+* 03/16/2001  weiv      Collation framework is rewritten in C and made UCA compliant
+* 2012-2014   markus    Rewritten in C++ again.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/bytestream.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "utracimp.h"
+
+U_NAMESPACE_USE
+
+U_CAPI UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+                const UCollator *base,
+                UErrorCode *status)
+{
+    if(U_FAILURE(*status)) { return nullptr; }
+    RuleBasedCollator *coll = new RuleBasedCollator(
+            bin, length,
+            RuleBasedCollator::rbcFromUCollator(base),
+            *status);
+    if(coll == nullptr) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    }
+    if(U_FAILURE(*status)) {
+        delete coll;
+        return nullptr;
+    }
+    return coll->toUCollator();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+                 uint8_t *buffer, int32_t capacity,
+                 UErrorCode *status)
+{
+    if(U_FAILURE(*status)) {
+        return 0;
+    }
+    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+    if(rbc == nullptr && coll != nullptr) {
+        *status = U_UNSUPPORTED_ERROR;
+        return 0;
+    }
+    return rbc->cloneBinary(buffer, capacity, *status);
+}
+
+U_CAPI UCollator* U_EXPORT2
+ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
+{
+    if (status == nullptr || U_FAILURE(*status)){
+        return nullptr;
+    }
+    if (coll == nullptr) {
+       *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return nullptr;
+    }
+    if (pBufferSize != nullptr) {
+        int32_t inputSize = *pBufferSize;
+        *pBufferSize = 1;
+        if (inputSize == 0) {
+            return nullptr;  // preflighting for deprecated functionality
+        }
+    }
+    Collator *newColl = Collator::fromUCollator(coll)->clone();
+    if (newColl == nullptr) {
+        *status = U_MEMORY_ALLOCATION_ERROR;
+        return nullptr;
+    } else if (pBufferSize != nullptr) {
+        *status = U_SAFECLONE_ALLOCATED_WARNING;
+    }
+    return newColl->toUCollator();
+}
+
+U_CAPI UCollator* U_EXPORT2
+ucol_clone(const UCollator *coll, UErrorCode *status)
+{
+    return ucol_safeClone(coll, nullptr, nullptr, status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_close(UCollator *coll)
+{
+    UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
+    UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
+    if(coll != nullptr) {
+        delete Collator::fromUCollator(coll);
+    }
+    UTRACE_EXIT();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
+                   const uint8_t *src2, int32_t src2Length,
+                   uint8_t *dest, int32_t destCapacity) {
+    /* check arguments */
+    if( src1==nullptr || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
+        src2==nullptr || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
+        destCapacity<0 || (destCapacity>0 && dest==nullptr)
+    ) {
+        /* error, attempt to write a zero byte and return 0 */
+        if(dest!=nullptr && destCapacity>0) {
+            *dest=0;
+        }
+        return 0;
+    }
+
+    /* check lengths and capacity */
+    if(src1Length<0) {
+        src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
+    }
+    if(src2Length<0) {
+        src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
+    }
+
+    int32_t destLength=src1Length+src2Length;
+    if(destLength>destCapacity) {
+        /* the merged sort key does not fit into the destination */
+        return destLength;
+    }
+
+    /* merge the sort keys with the same number of levels */
+    uint8_t *p=dest;
+    for(;;) {
+        /* copy level from src1 not including 00 or 01 */
+        uint8_t b;
+        while((b=*src1)>=2) {
+            ++src1;
+            *p++=b;
+        }
+
+        /* add a 02 merge separator */
+        *p++=2;
+
+        /* copy level from src2 not including 00 or 01 */
+        while((b=*src2)>=2) {
+            ++src2;
+            *p++=b;
+        }
+
+        /* if both sort keys have another level, then add a 01 level separator and continue */
+        if(*src1==1 && *src2==1) {
+            ++src1;
+            ++src2;
+            *p++=1;
+        } else {
+            break;
+        }
+    }
+
+    /*
+     * here, at least one sort key is finished now, but the other one
+     * might have some contents left from containing more levels;
+     * that contents is just appended to the result
+     */
+    if(*src1!=0) {
+        /* src1 is not finished, therefore *src2==0, and src1 is appended */
+        src2=src1;
+    }
+    /* append src2, "the other, unfinished sort key" */
+    while((*p++=*src2++)!=0) {}
+
+    /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
+    return (int32_t)(p-dest);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getSortKey(const    UCollator    *coll,
+        const    char16_t     *source,
+        int32_t        sourceLength,
+        uint8_t        *result,
+        int32_t        resultLength)
+{
+    UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
+    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
+            ((sourceLength==-1 && source!=nullptr) ? u_strlen(source) : sourceLength));
+    }
+
+    int32_t keySize = Collator::fromUCollator(coll)->
+            getSortKey(source, sourceLength, result, resultLength);
+
+    UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
+    UTRACE_EXIT_VALUE(keySize);
+    return keySize;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_nextSortKeyPart(const UCollator *coll,
+                     UCharIterator *iter,
+                     uint32_t state[2],
+                     uint8_t *dest, int32_t count,
+                     UErrorCode *status)
+{
+    /* error checking */
+    if(status==nullptr || U_FAILURE(*status)) {
+        return 0;
+    }
+    UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
+    UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
+                  coll, iter, state[0], state[1], dest, count);
+
+    int32_t i = Collator::fromUCollator(coll)->
+            internalNextSortKeyPart(iter, state, dest, count, *status);
+
+    // Return number of meaningful sortkey bytes.
+    UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
+                  dest,i, state[0], state[1]);
+    UTRACE_EXIT_VALUE_STATUS(i, *status);
+    return i;
+}
+
+/**
+ * Produce a bound for a given sortkey and a number of levels.
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getBound(const uint8_t       *source,
+        int32_t             sourceLength,
+        UColBoundMode       boundType,
+        uint32_t            noOfLevels,
+        uint8_t             *result,
+        int32_t             resultLength,
+        UErrorCode          *status)
+{
+    // consistency checks
+    if(status == nullptr || U_FAILURE(*status)) {
+        return 0;
+    }
+    if(source == nullptr) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        return 0;
+    }
+
+    int32_t sourceIndex = 0;
+    // Scan the string until we skip enough of the key OR reach the end of the key
+    do {
+        sourceIndex++;
+        if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
+            noOfLevels--;
+        }
+    } while (noOfLevels > 0
+        && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
+
+    if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
+        && noOfLevels > 0) {
+            *status = U_SORT_KEY_TOO_SHORT_WARNING;
+    }
+
+
+    // READ ME: this code assumes that the values for boundType
+    // enum will not changes. They are set so that the enum value
+    // corresponds to the number of extra bytes each bound type
+    // needs.
+    if(result != nullptr && resultLength >= sourceIndex+boundType) {
+        uprv_memcpy(result, source, sourceIndex);
+        switch(boundType) {
+            // Lower bound just gets terminated. No extra bytes
+        case UCOL_BOUND_LOWER: // = 0
+            break;
+            // Upper bound needs one extra byte
+        case UCOL_BOUND_UPPER: // = 1
+            result[sourceIndex++] = 2;
+            break;
+            // Upper long bound needs two extra bytes
+        case UCOL_BOUND_UPPER_LONG: // = 2
+            result[sourceIndex++] = 0xFF;
+            result[sourceIndex++] = 0xFF;
+            break;
+        default:
+            *status = U_ILLEGAL_ARGUMENT_ERROR;
+            return 0;
+        }
+        result[sourceIndex++] = 0;
+
+        return sourceIndex;
+    } else {
+        return sourceIndex+boundType+1;
+    }
+}
+
+U_CAPI void U_EXPORT2
+ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
+    if(U_FAILURE(*pErrorCode)) { return; }
+    Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
+}
+
+U_CAPI UColReorderCode U_EXPORT2
+ucol_getMaxVariable(const UCollator *coll) {
+    return Collator::fromUCollator(coll)->getMaxVariable();
+}
+
+U_CAPI uint32_t  U_EXPORT2
+ucol_setVariableTop(UCollator *coll, const char16_t *varTop, int32_t len, UErrorCode *status) {
+    if(U_FAILURE(*status) || coll == nullptr) {
+        return 0;
+    }
+    return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
+}
+
+U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
+    if(U_FAILURE(*status) || coll == nullptr) {
+        return 0;
+    }
+    return Collator::fromUCollator(coll)->getVariableTop(*status);
+}
+
+U_CAPI void  U_EXPORT2
+ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
+    if(U_FAILURE(*status) || coll == nullptr) {
+        return;
+    }
+    Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
+}
+
+U_CAPI void  U_EXPORT2
+ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
+    if(U_FAILURE(*status) || coll == nullptr) {
+      return;
+    }
+
+    Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
+}
+
+U_CAPI UColAttributeValue  U_EXPORT2
+ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
+    if(U_FAILURE(*status) || coll == nullptr) {
+      return UCOL_DEFAULT;
+    }
+
+    return Collator::fromUCollator(coll)->getAttribute(attr, *status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_setStrength(    UCollator                *coll,
+            UCollationStrength        strength)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
+}
+
+U_CAPI UCollationStrength U_EXPORT2
+ucol_getStrength(const UCollator *coll)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
+}
+
+U_CAPI int32_t U_EXPORT2 
+ucol_getReorderCodes(const UCollator *coll,
+                    int32_t *dest,
+                    int32_t destCapacity,
+                    UErrorCode *status) {
+    if (U_FAILURE(*status)) {
+        return 0;
+    }
+
+    return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
+}
+
+U_CAPI void U_EXPORT2 
+ucol_setReorderCodes(UCollator* coll,
+                    const int32_t* reorderCodes,
+                    int32_t reorderCodesLength,
+                    UErrorCode *status) {
+    if (U_FAILURE(*status)) {
+        return;
+    }
+
+    Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
+}
+
+U_CAPI int32_t U_EXPORT2 
+ucol_getEquivalentReorderCodes(int32_t reorderCode,
+                    int32_t* dest,
+                    int32_t destCapacity,
+                    UErrorCode *pErrorCode) {
+    return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+ucol_getVersion(const UCollator* coll,
+                UVersionInfo versionInfo)
+{
+    Collator::fromUCollator(coll)->getVersion(versionInfo);
+}
+
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollIter( const UCollator    *coll,
+                 UCharIterator *sIter,
+                 UCharIterator *tIter,
+                 UErrorCode         *status)
+{
+    if(!status || U_FAILURE(*status)) {
+        return UCOL_EQUAL;
+    }
+
+    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
+    UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
+
+    if(sIter == nullptr || tIter == nullptr || coll == nullptr) {
+        *status = U_ILLEGAL_ARGUMENT_ERROR;
+        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
+        return UCOL_EQUAL;
+    }
+
+    UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
+
+    UTRACE_EXIT_VALUE_STATUS(result, *status);
+    return result;
+}
+
+
+/*                                                                      */
+/* ucol_strcoll     Main public API string comparison function          */
+/*                                                                      */
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcoll( const UCollator    *coll,
+              const char16_t     *source,
+              int32_t            sourceLength,
+              const char16_t     *target,
+              int32_t            targetLength)
+{
+    UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
+    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
+        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
+    }
+
+    UErrorCode status = U_ZERO_ERROR;
+    UCollationResult returnVal = Collator::fromUCollator(coll)->
+            compare(source, sourceLength, target, targetLength, status);
+    UTRACE_EXIT_VALUE_STATUS(returnVal, status);
+    return returnVal;
+}
+
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollUTF8(
+        const UCollator *coll,
+        const char      *source,
+        int32_t         sourceLength,
+        const char      *target,
+        int32_t         targetLength,
+        UErrorCode      *status)
+{
+    UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
+    if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+        UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+        UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
+        UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
+    }
+
+    if (U_FAILURE(*status)) {
+        /* do nothing */
+        UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
+        return UCOL_EQUAL;
+    }
+
+    UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
+            source, sourceLength, target, targetLength, *status);
+    UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
+    return returnVal;
+}
+
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_greater(    const    UCollator        *coll,
+        const    char16_t         *source,
+        int32_t            sourceLength,
+        const    char16_t         *target,
+        int32_t            targetLength)
+{
+    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+        == UCOL_GREATER);
+}
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_greaterOrEqual(    const    UCollator    *coll,
+            const    char16_t     *source,
+            int32_t        sourceLength,
+            const    char16_t     *target,
+            int32_t        targetLength)
+{
+    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+        != UCOL_LESS);
+}
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_equal(        const    UCollator        *coll,
+            const    char16_t         *source,
+            int32_t            sourceLength,
+            const    char16_t         *target,
+            int32_t            targetLength)
+{
+    return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+        == UCOL_EQUAL);
+}
+
+U_CAPI void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
+    const Collator *c = Collator::fromUCollator(coll);
+    if(c != nullptr) {
+        UVersionInfo v;
+        c->getVersion(v);
+        // Note: This is tied to how the current implementation encodes the UCA version
+        // in the overall getVersion().
+        // Alternatively, we could load the root collator and get at lower-level data from there.
+        // Either way, it will reflect the input collator's UCA version only
+        // if it is a known implementation.
+        // It would be cleaner to make this a virtual Collator method.
+        info[0] = v[1] >> 3;
+        info[1] = v[1] & 7;
+        info[2] = v[2] >> 6;
+        info[3] = 0;
+    }
+}
+
+U_CAPI const char16_t * U_EXPORT2
+ucol_getRules(const UCollator *coll, int32_t *length) {
+    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+    // OK to crash if coll==nullptr: We do not want to check "this" pointers.
+    if(rbc != nullptr || coll == nullptr) {
+        const UnicodeString &rules = rbc->getRules();
+        U_ASSERT(rules.getBuffer()[rules.length()] == 0);
+        *length = rules.length();
+        return rules.getBuffer();
+    }
+    static const char16_t _NUL = 0;
+    *length = 0;
+    return &_NUL;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, char16_t *buffer, int32_t bufferLen) {
+    UnicodeString rules;
+    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+    if(rbc != nullptr || coll == nullptr) {
+        rbc->getRules(delta, rules);
+    }
+    if(buffer != nullptr && bufferLen > 0) {
+        UErrorCode errorCode = U_ZERO_ERROR;
+        return rules.extract(buffer, bufferLen, errorCode);
+    } else {
+        return rules.length();
+    }
+}
+
+U_CAPI const char * U_EXPORT2
+ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
+    return ucol_getLocaleByType(coll, type, status);
+}
+
+U_CAPI const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
+    if(U_FAILURE(*status)) {
+        return nullptr;
+    }
+    UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
+    UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
+
+    const char *result;
+    const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+    if(rbc == nullptr && coll != nullptr) {
+        *status = U_UNSUPPORTED_ERROR;
+        result = nullptr;
+    } else {
+        result = rbc->internalGetLocaleID(type, *status);
+    }
+
+    UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
+    UTRACE_EXIT_STATUS(*status);
+    return result;
+}
+
+U_CAPI USet * U_EXPORT2
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
+    if(U_FAILURE(*status)) {
+        return nullptr;
+    }
+    UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
+    if(U_FAILURE(*status)) {
+        delete set;
+        return nullptr;
+    }
+    return set->toUSet();
+}
+
+U_CAPI UBool U_EXPORT2
+ucol_equals(const UCollator *source, const UCollator *target) {
+    return source == target ||
+        (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
+}
+
+#endif /* #if !UCONFIG_NO_COLLATION */