summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/ucol.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/ucol.cpp')
-rw-r--r--intl/icu/source/i18n/ucol.cpp621
1 files changed, 621 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/ucol.cpp b/intl/icu/source/i18n/ucol.cpp
new file mode 100644
index 0000000000..f59333ede3
--- /dev/null
+++ b/intl/icu/source/i18n/ucol.cpp
@@ -0,0 +1,621 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 1996-2015, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: ucol.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* Modification history
+* Date Name Comments
+* 1996-1999 various members of ICU team maintained C API for collation framework
+* 02/16/2001 synwee Added internal method getPrevSpecialCE
+* 03/01/2001 synwee Added maxexpansion functionality.
+* 03/16/2001 weiv Collation framework is rewritten in C and made UCA compliant
+* 2012-2014 markus Rewritten in C++ again.
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/coll.h"
+#include "unicode/tblcoll.h"
+#include "unicode/bytestream.h"
+#include "unicode/coleitr.h"
+#include "unicode/ucoleitr.h"
+#include "unicode/ustring.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "cstring.h"
+#include "putilimp.h"
+#include "uassert.h"
+#include "utracimp.h"
+
+U_NAMESPACE_USE
+
+U_CAPI UCollator* U_EXPORT2
+ucol_openBinary(const uint8_t *bin, int32_t length,
+ const UCollator *base,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status)) { return NULL; }
+ RuleBasedCollator *coll = new RuleBasedCollator(
+ bin, length,
+ RuleBasedCollator::rbcFromUCollator(base),
+ *status);
+ if(coll == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return NULL;
+ }
+ if(U_FAILURE(*status)) {
+ delete coll;
+ return NULL;
+ }
+ return coll->toUCollator();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_cloneBinary(const UCollator *coll,
+ uint8_t *buffer, int32_t capacity,
+ UErrorCode *status)
+{
+ if(U_FAILURE(*status)) {
+ return 0;
+ }
+ const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+ if(rbc == NULL && coll != NULL) {
+ *status = U_UNSUPPORTED_ERROR;
+ return 0;
+ }
+ return rbc->cloneBinary(buffer, capacity, *status);
+}
+
+U_CAPI UCollator* U_EXPORT2
+ucol_safeClone(const UCollator *coll, void * /*stackBuffer*/, int32_t * pBufferSize, UErrorCode *status)
+{
+ if (status == NULL || U_FAILURE(*status)){
+ return NULL;
+ }
+ if (coll == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return NULL;
+ }
+ if (pBufferSize != NULL) {
+ int32_t inputSize = *pBufferSize;
+ *pBufferSize = 1;
+ if (inputSize == 0) {
+ return NULL; // preflighting for deprecated functionality
+ }
+ }
+ Collator *newColl = Collator::fromUCollator(coll)->clone();
+ if (newColl == NULL) {
+ *status = U_MEMORY_ALLOCATION_ERROR;
+ return nullptr;
+ } else {
+ *status = U_SAFECLONE_ALLOCATED_WARNING;
+ }
+ return newColl->toUCollator();
+}
+
+U_CAPI void U_EXPORT2
+ucol_close(UCollator *coll)
+{
+ UTRACE_ENTRY_OC(UTRACE_UCOL_CLOSE);
+ UTRACE_DATA1(UTRACE_INFO, "coll = %p", coll);
+ if(coll != NULL) {
+ delete Collator::fromUCollator(coll);
+ }
+ UTRACE_EXIT();
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_mergeSortkeys(const uint8_t *src1, int32_t src1Length,
+ const uint8_t *src2, int32_t src2Length,
+ uint8_t *dest, int32_t destCapacity) {
+ /* check arguments */
+ if( src1==NULL || src1Length<-1 || src1Length==0 || (src1Length>0 && src1[src1Length-1]!=0) ||
+ src2==NULL || src2Length<-1 || src2Length==0 || (src2Length>0 && src2[src2Length-1]!=0) ||
+ destCapacity<0 || (destCapacity>0 && dest==NULL)
+ ) {
+ /* error, attempt to write a zero byte and return 0 */
+ if(dest!=NULL && destCapacity>0) {
+ *dest=0;
+ }
+ return 0;
+ }
+
+ /* check lengths and capacity */
+ if(src1Length<0) {
+ src1Length=(int32_t)uprv_strlen((const char *)src1)+1;
+ }
+ if(src2Length<0) {
+ src2Length=(int32_t)uprv_strlen((const char *)src2)+1;
+ }
+
+ int32_t destLength=src1Length+src2Length;
+ if(destLength>destCapacity) {
+ /* the merged sort key does not fit into the destination */
+ return destLength;
+ }
+
+ /* merge the sort keys with the same number of levels */
+ uint8_t *p=dest;
+ for(;;) {
+ /* copy level from src1 not including 00 or 01 */
+ uint8_t b;
+ while((b=*src1)>=2) {
+ ++src1;
+ *p++=b;
+ }
+
+ /* add a 02 merge separator */
+ *p++=2;
+
+ /* copy level from src2 not including 00 or 01 */
+ while((b=*src2)>=2) {
+ ++src2;
+ *p++=b;
+ }
+
+ /* if both sort keys have another level, then add a 01 level separator and continue */
+ if(*src1==1 && *src2==1) {
+ ++src1;
+ ++src2;
+ *p++=1;
+ } else {
+ break;
+ }
+ }
+
+ /*
+ * here, at least one sort key is finished now, but the other one
+ * might have some contents left from containing more levels;
+ * that contents is just appended to the result
+ */
+ if(*src1!=0) {
+ /* src1 is not finished, therefore *src2==0, and src1 is appended */
+ src2=src1;
+ }
+ /* append src2, "the other, unfinished sort key" */
+ while((*p++=*src2++)!=0) {}
+
+ /* the actual length might be less than destLength if either sort key contained illegally embedded zero bytes */
+ return (int32_t)(p-dest);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getSortKey(const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ uint8_t *result,
+ int32_t resultLength)
+{
+ UTRACE_ENTRY(UTRACE_UCOL_GET_SORTKEY);
+ if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+ UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source string = %vh ", coll, source,
+ ((sourceLength==-1 && source!=NULL) ? u_strlen(source) : sourceLength));
+ }
+
+ int32_t keySize = Collator::fromUCollator(coll)->
+ getSortKey(source, sourceLength, result, resultLength);
+
+ UTRACE_DATA2(UTRACE_VERBOSE, "Sort Key = %vb", result, keySize);
+ UTRACE_EXIT_VALUE(keySize);
+ return keySize;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_nextSortKeyPart(const UCollator *coll,
+ UCharIterator *iter,
+ uint32_t state[2],
+ uint8_t *dest, int32_t count,
+ UErrorCode *status)
+{
+ /* error checking */
+ if(status==NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+ UTRACE_ENTRY(UTRACE_UCOL_NEXTSORTKEYPART);
+ UTRACE_DATA6(UTRACE_VERBOSE, "coll=%p, iter=%p, state=%d %d, dest=%p, count=%d",
+ coll, iter, state[0], state[1], dest, count);
+
+ int32_t i = Collator::fromUCollator(coll)->
+ internalNextSortKeyPart(iter, state, dest, count, *status);
+
+ // Return number of meaningful sortkey bytes.
+ UTRACE_DATA4(UTRACE_VERBOSE, "dest = %vb, state=%d %d",
+ dest,i, state[0], state[1]);
+ UTRACE_EXIT_VALUE_STATUS(i, *status);
+ return i;
+}
+
+/**
+ * Produce a bound for a given sortkey and a number of levels.
+ */
+U_CAPI int32_t U_EXPORT2
+ucol_getBound(const uint8_t *source,
+ int32_t sourceLength,
+ UColBoundMode boundType,
+ uint32_t noOfLevels,
+ uint8_t *result,
+ int32_t resultLength,
+ UErrorCode *status)
+{
+ // consistency checks
+ if(status == NULL || U_FAILURE(*status)) {
+ return 0;
+ }
+ if(source == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+
+ int32_t sourceIndex = 0;
+ // Scan the string until we skip enough of the key OR reach the end of the key
+ do {
+ sourceIndex++;
+ if(source[sourceIndex] == Collation::LEVEL_SEPARATOR_BYTE) {
+ noOfLevels--;
+ }
+ } while (noOfLevels > 0
+ && (source[sourceIndex] != 0 || sourceIndex < sourceLength));
+
+ if((source[sourceIndex] == 0 || sourceIndex == sourceLength)
+ && noOfLevels > 0) {
+ *status = U_SORT_KEY_TOO_SHORT_WARNING;
+ }
+
+
+ // READ ME: this code assumes that the values for boundType
+ // enum will not changes. They are set so that the enum value
+ // corresponds to the number of extra bytes each bound type
+ // needs.
+ if(result != NULL && resultLength >= sourceIndex+boundType) {
+ uprv_memcpy(result, source, sourceIndex);
+ switch(boundType) {
+ // Lower bound just gets terminated. No extra bytes
+ case UCOL_BOUND_LOWER: // = 0
+ break;
+ // Upper bound needs one extra byte
+ case UCOL_BOUND_UPPER: // = 1
+ result[sourceIndex++] = 2;
+ break;
+ // Upper long bound needs two extra bytes
+ case UCOL_BOUND_UPPER_LONG: // = 2
+ result[sourceIndex++] = 0xFF;
+ result[sourceIndex++] = 0xFF;
+ break;
+ default:
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ result[sourceIndex++] = 0;
+
+ return sourceIndex;
+ } else {
+ return sourceIndex+boundType+1;
+ }
+}
+
+U_CAPI void U_EXPORT2
+ucol_setMaxVariable(UCollator *coll, UColReorderCode group, UErrorCode *pErrorCode) {
+ if(U_FAILURE(*pErrorCode)) { return; }
+ Collator::fromUCollator(coll)->setMaxVariable(group, *pErrorCode);
+}
+
+U_CAPI UColReorderCode U_EXPORT2
+ucol_getMaxVariable(const UCollator *coll) {
+ return Collator::fromUCollator(coll)->getMaxVariable();
+}
+
+U_CAPI uint32_t U_EXPORT2
+ucol_setVariableTop(UCollator *coll, const UChar *varTop, int32_t len, UErrorCode *status) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return 0;
+ }
+ return Collator::fromUCollator(coll)->setVariableTop(varTop, len, *status);
+}
+
+U_CAPI uint32_t U_EXPORT2 ucol_getVariableTop(const UCollator *coll, UErrorCode *status) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return 0;
+ }
+ return Collator::fromUCollator(coll)->getVariableTop(*status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_restoreVariableTop(UCollator *coll, const uint32_t varTop, UErrorCode *status) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return;
+ }
+ Collator::fromUCollator(coll)->setVariableTop(varTop, *status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_setAttribute(UCollator *coll, UColAttribute attr, UColAttributeValue value, UErrorCode *status) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return;
+ }
+
+ Collator::fromUCollator(coll)->setAttribute(attr, value, *status);
+}
+
+U_CAPI UColAttributeValue U_EXPORT2
+ucol_getAttribute(const UCollator *coll, UColAttribute attr, UErrorCode *status) {
+ if(U_FAILURE(*status) || coll == NULL) {
+ return UCOL_DEFAULT;
+ }
+
+ return Collator::fromUCollator(coll)->getAttribute(attr, *status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_setStrength( UCollator *coll,
+ UCollationStrength strength)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(coll, UCOL_STRENGTH, strength, &status);
+}
+
+U_CAPI UCollationStrength U_EXPORT2
+ucol_getStrength(const UCollator *coll)
+{
+ UErrorCode status = U_ZERO_ERROR;
+ return ucol_getAttribute(coll, UCOL_STRENGTH, &status);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getReorderCodes(const UCollator *coll,
+ int32_t *dest,
+ int32_t destCapacity,
+ UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return 0;
+ }
+
+ return Collator::fromUCollator(coll)->getReorderCodes(dest, destCapacity, *status);
+}
+
+U_CAPI void U_EXPORT2
+ucol_setReorderCodes(UCollator* coll,
+ const int32_t* reorderCodes,
+ int32_t reorderCodesLength,
+ UErrorCode *status) {
+ if (U_FAILURE(*status)) {
+ return;
+ }
+
+ Collator::fromUCollator(coll)->setReorderCodes(reorderCodes, reorderCodesLength, *status);
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getEquivalentReorderCodes(int32_t reorderCode,
+ int32_t* dest,
+ int32_t destCapacity,
+ UErrorCode *pErrorCode) {
+ return Collator::getEquivalentReorderCodes(reorderCode, dest, destCapacity, *pErrorCode);
+}
+
+U_CAPI void U_EXPORT2
+ucol_getVersion(const UCollator* coll,
+ UVersionInfo versionInfo)
+{
+ Collator::fromUCollator(coll)->getVersion(versionInfo);
+}
+
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollIter( const UCollator *coll,
+ UCharIterator *sIter,
+ UCharIterator *tIter,
+ UErrorCode *status)
+{
+ if(!status || U_FAILURE(*status)) {
+ return UCOL_EQUAL;
+ }
+
+ UTRACE_ENTRY(UTRACE_UCOL_STRCOLLITER);
+ UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, sIter=%p, tIter=%p", coll, sIter, tIter);
+
+ if(sIter == NULL || tIter == NULL || coll == NULL) {
+ *status = U_ILLEGAL_ARGUMENT_ERROR;
+ UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
+ return UCOL_EQUAL;
+ }
+
+ UCollationResult result = Collator::fromUCollator(coll)->compare(*sIter, *tIter, *status);
+
+ UTRACE_EXIT_VALUE_STATUS(result, *status);
+ return result;
+}
+
+
+/* */
+/* ucol_strcoll Main public API string comparison function */
+/* */
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcoll( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength)
+{
+ UTRACE_ENTRY(UTRACE_UCOL_STRCOLL);
+ if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+ UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+ UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vh ", source, sourceLength);
+ UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vh ", target, targetLength);
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UCollationResult returnVal = Collator::fromUCollator(coll)->
+ compare(source, sourceLength, target, targetLength, status);
+ UTRACE_EXIT_VALUE_STATUS(returnVal, status);
+ return returnVal;
+}
+
+U_CAPI UCollationResult U_EXPORT2
+ucol_strcollUTF8(
+ const UCollator *coll,
+ const char *source,
+ int32_t sourceLength,
+ const char *target,
+ int32_t targetLength,
+ UErrorCode *status)
+{
+ UTRACE_ENTRY(UTRACE_UCOL_STRCOLLUTF8);
+ if (UTRACE_LEVEL(UTRACE_VERBOSE)) {
+ UTRACE_DATA3(UTRACE_VERBOSE, "coll=%p, source=%p, target=%p", coll, source, target);
+ UTRACE_DATA2(UTRACE_VERBOSE, "source string = %vb ", source, sourceLength);
+ UTRACE_DATA2(UTRACE_VERBOSE, "target string = %vb ", target, targetLength);
+ }
+
+ if (U_FAILURE(*status)) {
+ /* do nothing */
+ UTRACE_EXIT_VALUE_STATUS(UCOL_EQUAL, *status);
+ return UCOL_EQUAL;
+ }
+
+ UCollationResult returnVal = Collator::fromUCollator(coll)->internalCompareUTF8(
+ source, sourceLength, target, targetLength, *status);
+ UTRACE_EXIT_VALUE_STATUS(returnVal, *status);
+ return returnVal;
+}
+
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_greater( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength)
+{
+ return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+ == UCOL_GREATER);
+}
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_greaterOrEqual( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength)
+{
+ return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+ != UCOL_LESS);
+}
+
+/* convenience function for comparing strings */
+U_CAPI UBool U_EXPORT2
+ucol_equal( const UCollator *coll,
+ const UChar *source,
+ int32_t sourceLength,
+ const UChar *target,
+ int32_t targetLength)
+{
+ return (ucol_strcoll(coll, source, sourceLength, target, targetLength)
+ == UCOL_EQUAL);
+}
+
+U_CAPI void U_EXPORT2
+ucol_getUCAVersion(const UCollator* coll, UVersionInfo info) {
+ const Collator *c = Collator::fromUCollator(coll);
+ if(c != NULL) {
+ UVersionInfo v;
+ c->getVersion(v);
+ // Note: This is tied to how the current implementation encodes the UCA version
+ // in the overall getVersion().
+ // Alternatively, we could load the root collator and get at lower-level data from there.
+ // Either way, it will reflect the input collator's UCA version only
+ // if it is a known implementation.
+ // It would be cleaner to make this a virtual Collator method.
+ info[0] = v[1] >> 3;
+ info[1] = v[1] & 7;
+ info[2] = v[2] >> 6;
+ info[3] = 0;
+ }
+}
+
+U_CAPI const UChar * U_EXPORT2
+ucol_getRules(const UCollator *coll, int32_t *length) {
+ const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+ // OK to crash if coll==NULL: We do not want to check "this" pointers.
+ if(rbc != NULL || coll == NULL) {
+ const UnicodeString &rules = rbc->getRules();
+ U_ASSERT(rules.getBuffer()[rules.length()] == 0);
+ *length = rules.length();
+ return rules.getBuffer();
+ }
+ static const UChar _NUL = 0;
+ *length = 0;
+ return &_NUL;
+}
+
+U_CAPI int32_t U_EXPORT2
+ucol_getRulesEx(const UCollator *coll, UColRuleOption delta, UChar *buffer, int32_t bufferLen) {
+ UnicodeString rules;
+ const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+ if(rbc != NULL || coll == NULL) {
+ rbc->getRules(delta, rules);
+ }
+ if(buffer != NULL && bufferLen > 0) {
+ UErrorCode errorCode = U_ZERO_ERROR;
+ return rules.extract(buffer, bufferLen, errorCode);
+ } else {
+ return rules.length();
+ }
+}
+
+U_CAPI const char * U_EXPORT2
+ucol_getLocale(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
+ return ucol_getLocaleByType(coll, type, status);
+}
+
+U_CAPI const char * U_EXPORT2
+ucol_getLocaleByType(const UCollator *coll, ULocDataLocaleType type, UErrorCode *status) {
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ UTRACE_ENTRY(UTRACE_UCOL_GETLOCALE);
+ UTRACE_DATA1(UTRACE_INFO, "coll=%p", coll);
+
+ const char *result;
+ const RuleBasedCollator *rbc = RuleBasedCollator::rbcFromUCollator(coll);
+ if(rbc == NULL && coll != NULL) {
+ *status = U_UNSUPPORTED_ERROR;
+ result = NULL;
+ } else {
+ result = rbc->internalGetLocaleID(type, *status);
+ }
+
+ UTRACE_DATA1(UTRACE_INFO, "result = %s", result);
+ UTRACE_EXIT_STATUS(*status);
+ return result;
+}
+
+U_CAPI USet * U_EXPORT2
+ucol_getTailoredSet(const UCollator *coll, UErrorCode *status) {
+ if(U_FAILURE(*status)) {
+ return NULL;
+ }
+ UnicodeSet *set = Collator::fromUCollator(coll)->getTailoredSet(*status);
+ if(U_FAILURE(*status)) {
+ delete set;
+ return NULL;
+ }
+ return set->toUSet();
+}
+
+U_CAPI UBool U_EXPORT2
+ucol_equals(const UCollator *source, const UCollator *target) {
+ return source == target ||
+ (*Collator::fromUCollator(source)) == (*Collator::fromUCollator(target));
+}
+
+#endif /* #if !UCONFIG_NO_COLLATION */