diff options
Diffstat (limited to 'intl/icu/source/i18n/ucol.cpp')
-rw-r--r-- | intl/icu/source/i18n/ucol.cpp | 621 |
1 files changed, 621 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/ucol.cpp b/intl/icu/source/i18n/ucol.cpp new file mode 100644 index 0000000000..f59333ede3 --- /dev/null +++ b/intl/icu/source/i18n/ucol.cpp @@ -0,0 +1,621 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 1996-2015, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* file name: ucol.cpp +* encoding: UTF-8 +* tab size: 8 (not used) +* indentation:4 +* +* Modification history +* Date Name Comments +* 1996-1999 various members of ICU team maintained C API for collation framework +* 02/16/2001 synwee Added internal method getPrevSpecialCE +* 03/01/2001 synwee Added maxexpansion functionality. +* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant +* 2012-2014 markus Rewritten in C++ again. +*/ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_COLLATION + +#include "unicode/coll.h" +#include "unicode/tblcoll.h" +#include "unicode/bytestream.h" +#include "unicode/coleitr.h" +#include "unicode/ucoleitr.h" +#include "unicode/ustring.h" +#include "cmemory.h" +#include "collation.h" +#include "cstring.h" +#include "putilimp.h" +#include "uassert.h" +#include "utracimp.h" + +U_NAMESPACE_USE + +U_CAPI UCollator* U_EXPORT2 +ucol_openBinary(const uint8_t *bin, int32_t length, + const UCollator *base, + UErrorCode *status) +{ + if(U_FAILURE(*status)) { return NULL; } + RuleBasedCollator *coll = new RuleBasedCollator( + bin, length, + RuleBasedCollator::rbcFromUCollator(base), + *status); + if(coll == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return NULL; + } + if(U_FAILURE(*status)) { + delete coll; + return NULL; + } + return coll->toUCollator(); +} + +U_CAPI int32_t U_EXPORT2 +ucol_cloneBinary(const UCollator *coll, + uint8_t *buffer, int32_t capacity, + UErrorCode *status) +{ + if(U_FAILURE(*status)) { + return 0; + } + const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); + if(rbc == NULL && coll != NULL) { + *status = U_UNSUPPORTED_ERROR; + return 0; + } + return rbc->cloneBinary(buffer, capacity, *status); +} + +U_CAPI UCollator* U_EXPORT2 +ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status) +{ + if (status == NULL || U_FAILURE(*status)){ + return NULL; + } + if (coll == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return NULL; + } + if (pBufferSize != NULL) { + int32_t inputSize = *pBufferSize; + *pBufferSize = 1; + if (inputSize == 0) { + return NULL; // preflighting for deprecated functionality + } + } + Collator *newColl = Collator::fromUCollator(coll)->clone(); + if (newColl == NULL) { + *status = U_MEMORY_ALLOCATION_ERROR; + return nullptr; + } else { + *status = U_SAFECLONE_ALLOCATED_WARNING; + } + return newColl->toUCollator(); +} + +U_CAPI void U_EXPORT2 +ucol_close(UCollator *coll) +{ + UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE); + UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll); + if(coll != NULL) { + delete Collator::fromUCollator(coll); + } + UTRACE_EXIT(); +} + +U_CAPI int32_t U_EXPORT2 +ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length, + const uint8_t *src2, int32_t src2Length, + uint8_t *dest, int32_t destCapacity) { + /* check arguments */ + if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) || + src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) || + destCapacity<0 || (destCapacity>0 && dest==NULL) + ) { + /* error, attempt to write a zero byte and return 0 */ + if(dest!=NULL && destCapacity>0) { + *dest=0; + } + return 0; + } + + /* check lengths and capacity */ + if(src1Length<0) { + src1Length=(int32_t)uprv_strlen((const char *)src1)+1; + } + if(src2Length<0) { + src2Length=(int32_t)uprv_strlen((const char *)src2)+1; + } + + int32_t destLength=src1Length+src2Length; + if(destLength>destCapacity) { + /* the merged sort key does not fit into the destination */ + return destLength; + } + + /* merge the sort keys with the same number of levels */ + uint8_t *p=dest; + for(;;) { + /* copy level from src1 not including 00 or 01 */ + uint8_t b; + while((b=*src1)>=2) { + ++src1; + *p++=b; + } + + /* add a 02 merge separator */ + *p++=2; + + /* copy level from src2 not including 00 or 01 */ + while((b=*src2)>=2) { + ++src2; + *p++=b; + } + + /* if both sort keys have another level, then add a 01 level separator and continue */ + if(*src1==1 && *src2==1) { + ++src1; + ++src2; + *p++=1; + } else { + break; + } + } + + /* + * here, at least one sort key is finished now, but the other one + * might have some contents left from containing more levels; + * that contents is just appended to the result + */ + if(*src1!=0) { + /* src1 is not finished, therefore *src2==0, and src1 is appended */ + src2=src1; + } + /* append src2, "the other, unfinished sort key" */ + while((*p++=*src2++)!=0) {} + + /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */ + return (int32_t)(p-dest); +} + +U_CAPI int32_t U_EXPORT2 +ucol_getSortKey(const UCollator *coll, + const UChar *source, + int32_t sourceLength, + uint8_t *result, + int32_t resultLength) +{ + UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY); + if (UTRACE_LEVEL(UTRACE_VERBOSE)) { + UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source, + ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength)); + } + + int32_t keySize = Collator::fromUCollator(coll)-> + getSortKey(source, sourceLength, result, resultLength); + + UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize); + UTRACE_EXIT_VALUE(keySize); + return keySize; +} + +U_CAPI int32_t U_EXPORT2 +ucol_nextSortKeyPart(const UCollator *coll, + UCharIterator *iter, + uint32_t state[2], + uint8_t *dest, int32_t count, + UErrorCode *status) +{ + /* error checking */ + if(status==NULL || U_FAILURE(*status)) { + return 0; + } + UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART); + UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d", + coll, iter, state[0], state[1], dest, count); + + int32_t i = Collator::fromUCollator(coll)-> + internalNextSortKeyPart(iter, state, dest, count, *status); + + // Return number of meaningful sortkey bytes. + UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d", + dest,i, state[0], state[1]); + UTRACE_EXIT_VALUE_STATUS(i, *status); + return i; +} + +/** + * Produce a bound for a given sortkey and a number of levels. + */ +U_CAPI int32_t U_EXPORT2 +ucol_getBound(const uint8_t *source, + int32_t sourceLength, + UColBoundMode boundType, + uint32_t noOfLevels, + uint8_t *result, + int32_t resultLength, + UErrorCode *status) +{ + // consistency checks + if(status == NULL || U_FAILURE(*status)) { + return 0; + } + if(source == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + + int32_t sourceIndex = 0; + // Scan the string until we skip enough of the key OR reach the end of the key + do { + sourceIndex++; + if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) { + noOfLevels--; + } + } while (noOfLevels > 0 + && (source[sourceIndex] != 0 || sourceIndex < sourceLength)); + + if((source[sourceIndex] == 0 || sourceIndex == sourceLength) + && noOfLevels > 0) { + *status = U_SORT_KEY_TOO_SHORT_WARNING; + } + + + // READ ME: this code assumes that the values for boundType + // enum will not changes. They are set so that the enum value + // corresponds to the number of extra bytes each bound type + // needs. + if(result != NULL && resultLength >= sourceIndex+boundType) { + uprv_memcpy(result, source, sourceIndex); + switch(boundType) { + // Lower bound just gets terminated. No extra bytes + case UCOL_BOUND_LOWER: // = 0 + break; + // Upper bound needs one extra byte + case UCOL_BOUND_UPPER: // = 1 + result[sourceIndex++] = 2; + break; + // Upper long bound needs two extra bytes + case UCOL_BOUND_UPPER_LONG: // = 2 + result[sourceIndex++] = 0xFF; + result[sourceIndex++] = 0xFF; + break; + default: + *status = U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + result[sourceIndex++] = 0; + + return sourceIndex; + } else { + return sourceIndex+boundType+1; + } +} + +U_CAPI void U_EXPORT2 +ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) { + if(U_FAILURE(*pErrorCode)) { return; } + Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode); +} + +U_CAPI UColReorderCode U_EXPORT2 +ucol_getMaxVariable(const UCollator *coll) { + return Collator::fromUCollator(coll)->getMaxVariable(); +} + +U_CAPI uint32_t U_EXPORT2 +ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) { + if(U_FAILURE(*status) || coll == NULL) { + return 0; + } + return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status); +} + +U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) { + if(U_FAILURE(*status) || coll == NULL) { + return 0; + } + return Collator::fromUCollator(coll)->getVariableTop(*status); +} + +U_CAPI void U_EXPORT2 +ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) { + if(U_FAILURE(*status) || coll == NULL) { + return; + } + Collator::fromUCollator(coll)->setVariableTop(varTop, *status); +} + +U_CAPI void U_EXPORT2 +ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) { + if(U_FAILURE(*status) || coll == NULL) { + return; + } + + Collator::fromUCollator(coll)->setAttribute(attr, value, *status); +} + +U_CAPI UColAttributeValue U_EXPORT2 +ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) { + if(U_FAILURE(*status) || coll == NULL) { + return UCOL_DEFAULT; + } + + return Collator::fromUCollator(coll)->getAttribute(attr, *status); +} + +U_CAPI void U_EXPORT2 +ucol_setStrength( UCollator *coll, + UCollationStrength strength) +{ + UErrorCode status = U_ZERO_ERROR; + ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status); +} + +U_CAPI UCollationStrength U_EXPORT2 +ucol_getStrength(const UCollator *coll) +{ + UErrorCode status = U_ZERO_ERROR; + return ucol_getAttribute(coll, UCOL_STRENGTH, &status); +} + +U_CAPI int32_t U_EXPORT2 +ucol_getReorderCodes(const UCollator *coll, + int32_t *dest, + int32_t destCapacity, + UErrorCode *status) { + if (U_FAILURE(*status)) { + return 0; + } + + return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status); +} + +U_CAPI void U_EXPORT2 +ucol_setReorderCodes(UCollator* coll, + const int32_t* reorderCodes, + int32_t reorderCodesLength, + UErrorCode *status) { + if (U_FAILURE(*status)) { + return; + } + + Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status); +} + +U_CAPI int32_t U_EXPORT2 +ucol_getEquivalentReorderCodes(int32_t reorderCode, + int32_t* dest, + int32_t destCapacity, + UErrorCode *pErrorCode) { + return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode); +} + +U_CAPI void U_EXPORT2 +ucol_getVersion(const UCollator* coll, + UVersionInfo versionInfo) +{ + Collator::fromUCollator(coll)->getVersion(versionInfo); +} + +U_CAPI UCollationResult U_EXPORT2 +ucol_strcollIter( const UCollator *coll, + UCharIterator *sIter, + UCharIterator *tIter, + UErrorCode *status) +{ + if(!status || U_FAILURE(*status)) { + return UCOL_EQUAL; + } + + UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER); + UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter); + + if(sIter == NULL || tIter == NULL || coll == NULL) { + *status = U_ILLEGAL_ARGUMENT_ERROR; + UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); + return UCOL_EQUAL; + } + + UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status); + + UTRACE_EXIT_VALUE_STATUS(result, *status); + return result; +} + + +/* */ +/* ucol_strcoll Main public API string comparison function */ +/* */ +U_CAPI UCollationResult U_EXPORT2 +ucol_strcoll( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength) +{ + UTRACE_ENTRY(UTRACE_UCOL_STRCOLL); + if (UTRACE_LEVEL(UTRACE_VERBOSE)) { + UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); + UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength); + UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength); + } + + UErrorCode status = U_ZERO_ERROR; + UCollationResult returnVal = Collator::fromUCollator(coll)-> + compare(source, sourceLength, target, targetLength, status); + UTRACE_EXIT_VALUE_STATUS(returnVal, status); + return returnVal; +} + +U_CAPI UCollationResult U_EXPORT2 +ucol_strcollUTF8( + const UCollator *coll, + const char *source, + int32_t sourceLength, + const char *target, + int32_t targetLength, + UErrorCode *status) +{ + UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8); + if (UTRACE_LEVEL(UTRACE_VERBOSE)) { + UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target); + UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength); + UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength); + } + + if (U_FAILURE(*status)) { + /* do nothing */ + UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status); + return UCOL_EQUAL; + } + + UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8( + source, sourceLength, target, targetLength, *status); + UTRACE_EXIT_VALUE_STATUS(returnVal, *status); + return returnVal; +} + + +/* convenience function for comparing strings */ +U_CAPI UBool U_EXPORT2 +ucol_greater( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength) +{ + return (ucol_strcoll(coll, source, sourceLength, target, targetLength) + == UCOL_GREATER); +} + +/* convenience function for comparing strings */ +U_CAPI UBool U_EXPORT2 +ucol_greaterOrEqual( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength) +{ + return (ucol_strcoll(coll, source, sourceLength, target, targetLength) + != UCOL_LESS); +} + +/* convenience function for comparing strings */ +U_CAPI UBool U_EXPORT2 +ucol_equal( const UCollator *coll, + const UChar *source, + int32_t sourceLength, + const UChar *target, + int32_t targetLength) +{ + return (ucol_strcoll(coll, source, sourceLength, target, targetLength) + == UCOL_EQUAL); +} + +U_CAPI void U_EXPORT2 +ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) { + const Collator *c = Collator::fromUCollator(coll); + if(c != NULL) { + UVersionInfo v; + c->getVersion(v); + // Note: This is tied to how the current implementation encodes the UCA version + // in the overall getVersion(). + // Alternatively, we could load the root collator and get at lower-level data from there. + // Either way, it will reflect the input collator's UCA version only + // if it is a known implementation. + // It would be cleaner to make this a virtual Collator method. + info[0] = v[1] >> 3; + info[1] = v[1] & 7; + info[2] = v[2] >> 6; + info[3] = 0; + } +} + +U_CAPI const UChar * U_EXPORT2 +ucol_getRules(const UCollator *coll, int32_t *length) { + const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); + // OK to crash if coll==NULL: We do not want to check "this" pointers. + if(rbc != NULL || coll == NULL) { + const UnicodeString &rules = rbc->getRules(); + U_ASSERT(rules.getBuffer()[rules.length()] == 0); + *length = rules.length(); + return rules.getBuffer(); + } + static const UChar _NUL = 0; + *length = 0; + return &_NUL; +} + +U_CAPI int32_t U_EXPORT2 +ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) { + UnicodeString rules; + const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); + if(rbc != NULL || coll == NULL) { + rbc->getRules(delta, rules); + } + if(buffer != NULL && bufferLen > 0) { + UErrorCode errorCode = U_ZERO_ERROR; + return rules.extract(buffer, bufferLen, errorCode); + } else { + return rules.length(); + } +} + +U_CAPI const char * U_EXPORT2 +ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { + return ucol_getLocaleByType(coll, type, status); +} + +U_CAPI const char * U_EXPORT2 +ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) { + if(U_FAILURE(*status)) { + return NULL; + } + UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE); + UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll); + + const char *result; + const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll); + if(rbc == NULL && coll != NULL) { + *status = U_UNSUPPORTED_ERROR; + result = NULL; + } else { + result = rbc->internalGetLocaleID(type, *status); + } + + UTRACE_DATA1(UTRACE_INFO, "result = %s", result); + UTRACE_EXIT_STATUS(*status); + return result; +} + +U_CAPI USet * U_EXPORT2 +ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) { + if(U_FAILURE(*status)) { + return NULL; + } + UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status); + if(U_FAILURE(*status)) { + delete set; + return NULL; + } + return set->toUSet(); +} + +U_CAPI UBool U_EXPORT2 +ucol_equals(const UCollator *source, const UCollator *target) { + return source == target || + (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target)); +} + +#endif /* #if !UCONFIG_NO_COLLATION */ |