summaryrefslogtreecommitdiffstats
path: root/intl/components/src/Collator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/components/src/Collator.cpp')
-rw-r--r--intl/components/src/Collator.cpp305
1 files changed, 305 insertions, 0 deletions
diff --git a/intl/components/src/Collator.cpp b/intl/components/src/Collator.cpp
new file mode 100644
index 0000000000..93052932de
--- /dev/null
+++ b/intl/components/src/Collator.cpp
@@ -0,0 +1,305 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include <algorithm>
+#include <string.h>
+#include "mozilla/intl/Collator.h"
+
+namespace mozilla::intl {
+
+Collator::Collator(UCollator* aCollator) : mCollator(aCollator) {
+ MOZ_ASSERT(aCollator);
+}
+
+Collator::~Collator() {
+ if (mCollator.GetMut()) {
+ ucol_close(mCollator.GetMut());
+ }
+}
+
+Result<UniquePtr<Collator>, ICUError> Collator::TryCreate(const char* aLocale) {
+ UErrorCode status = U_ZERO_ERROR;
+ UCollator* collator = ucol_open(IcuLocale(aLocale), &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+ return MakeUnique<Collator>(collator);
+};
+
+int32_t Collator::CompareStrings(Span<const char16_t> aSource,
+ Span<const char16_t> aTarget) const {
+ switch (ucol_strcoll(mCollator.GetConst(), aSource.data(),
+ static_cast<int32_t>(aSource.size()), aTarget.data(),
+ static_cast<int32_t>(aTarget.size()))) {
+ case UCOL_LESS:
+ return -1;
+ case UCOL_EQUAL:
+ return 0;
+ case UCOL_GREATER:
+ return 1;
+ }
+ MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult");
+ return 0;
+}
+
+int32_t Collator::CompareSortKeys(Span<const uint8_t> aKey1,
+ Span<const uint8_t> aKey2) const {
+ size_t minLength = std::min(aKey1.Length(), aKey2.Length());
+ int32_t tmpResult = strncmp((const char*)aKey1.Elements(),
+ (const char*)aKey2.Elements(), minLength);
+ if (tmpResult < 0) {
+ return -1;
+ }
+ if (tmpResult > 0) {
+ return 1;
+ }
+ if (aKey1.Length() > minLength) {
+ // First string contains second one, so comes later, hence return > 0.
+ return 1;
+ }
+ if (aKey2.Length() > minLength) {
+ // First string is a substring of second one, so comes earlier,
+ // hence return < 0.
+ return -1;
+ }
+ return 0;
+}
+
+static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) {
+ switch (caseFirst) {
+ case Collator::CaseFirst::False:
+ return UCOL_OFF;
+ case Collator::CaseFirst::Upper:
+ return UCOL_UPPER_FIRST;
+ case Collator::CaseFirst::Lower:
+ return UCOL_LOWER_FIRST;
+ }
+
+ MOZ_ASSERT_UNREACHABLE();
+ return UCOL_DEFAULT;
+}
+
+// Define this as a macro to work around exposing the UColAttributeValue type to
+// the header file. Collation::Feature is private to the class.
+#define FEATURE_TO_ICU(featureICU, feature) \
+ switch (feature) { \
+ case Collator::Feature::On: \
+ (featureICU) = UCOL_ON; \
+ break; \
+ case Collator::Feature::Off: \
+ (featureICU) = UCOL_OFF; \
+ break; \
+ case Collator::Feature::Default: \
+ (featureICU) = UCOL_DEFAULT; \
+ break; \
+ }
+
+void Collator::SetStrength(Collator::Strength aStrength) {
+ UColAttributeValue strength;
+ switch (aStrength) {
+ case Collator::Strength::Default:
+ strength = UCOL_DEFAULT_STRENGTH;
+ break;
+ case Collator::Strength::Primary:
+ strength = UCOL_PRIMARY;
+ break;
+ case Collator::Strength::Secondary:
+ strength = UCOL_SECONDARY;
+ break;
+ case Collator::Strength::Tertiary:
+ strength = UCOL_TERTIARY;
+ break;
+ case Collator::Strength::Quaternary:
+ strength = UCOL_QUATERNARY;
+ break;
+ case Collator::Strength::Identical:
+ strength = UCOL_IDENTICAL;
+ break;
+ }
+
+ ucol_setStrength(mCollator.GetMut(), strength);
+}
+
+ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+ ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, featureICU, &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetAlternateHandling(
+ Collator::AlternateHandling aAlternateHandling) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue handling;
+ switch (aAlternateHandling) {
+ case Collator::AlternateHandling::NonIgnorable:
+ handling = UCOL_NON_IGNORABLE;
+ break;
+ case Collator::AlternateHandling::Shifted:
+ handling = UCOL_SHIFTED;
+ break;
+ case Collator::AlternateHandling::Default:
+ handling = UCOL_DEFAULT;
+ break;
+ }
+
+ ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+
+ ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, featureICU,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue featureICU;
+ FEATURE_TO_ICU(featureICU, aFeature);
+ ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, featureICU,
+ &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) {
+ UErrorCode status = U_ZERO_ERROR;
+ ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST,
+ CaseFirstToICU(aCaseFirst), &status);
+ return ToICUResult(status);
+}
+
+ICUResult Collator::SetOptions(const Options& aOptions,
+ const Maybe<Options&> aPrevOptions) {
+ if (aPrevOptions &&
+ // Check the equality of the previous options.
+ aPrevOptions->sensitivity == aOptions.sensitivity &&
+ aPrevOptions->caseFirst == aOptions.caseFirst &&
+ aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation &&
+ aPrevOptions->numeric == aOptions.numeric) {
+ return Ok();
+ }
+
+ Collator::Strength strength = Collator::Strength::Default;
+ Collator::Feature caseLevel = Collator::Feature::Off;
+ switch (aOptions.sensitivity) {
+ case Collator::Sensitivity::Base:
+ strength = Collator::Strength::Primary;
+ break;
+ case Collator::Sensitivity::Accent:
+ strength = Collator::Strength::Secondary;
+ break;
+ case Collator::Sensitivity::Case:
+ caseLevel = Collator::Feature::On;
+ strength = Collator::Strength::Primary;
+ break;
+ case Collator::Sensitivity::Variant:
+ strength = Collator::Strength::Tertiary;
+ break;
+ }
+
+ SetStrength(strength);
+
+ ICUResult result = Ok();
+
+ // According to the ICU team, UCOL_SHIFTED causes punctuation to be
+ // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data
+ // Markup Language, "shifted" causes whitespace and punctuation to be
+ // ignored - that's a bit more than asked for, but there's no way to get
+ // less.
+ result = this->SetAlternateHandling(
+ aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted
+ : Collator::AlternateHandling::Default);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetCaseLevel(caseLevel);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On
+ : Collator::Feature::Off);
+ if (result.isErr()) {
+ return result;
+ }
+
+ // Normalization is always on to meet the canonical equivalence requirement.
+ result = SetNormalizationMode(Collator::Feature::On);
+ if (result.isErr()) {
+ return result;
+ }
+
+ result = SetCaseFirst(aOptions.caseFirst);
+ if (result.isErr()) {
+ return result;
+ }
+ return Ok();
+}
+
+#undef FEATURE_TO_ICU
+
+Result<Collator::CaseFirst, ICUError> Collator::GetCaseFirst() const {
+ UErrorCode status = U_ZERO_ERROR;
+ UColAttributeValue caseFirst =
+ ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status);
+ if (U_FAILURE(status)) {
+ return Err(ToICUError(status));
+ }
+
+ if (caseFirst == UCOL_OFF) {
+ return CaseFirst::False;
+ }
+ if (caseFirst == UCOL_UPPER_FIRST) {
+ return CaseFirst::Upper;
+ }
+ MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST);
+ return CaseFirst::Lower;
+}
+
+/* static */
+Result<Collator::Bcp47ExtEnumeration, ICUError>
+Collator::GetBcp47KeywordValuesForLocale(const char* aLocale,
+ CommonlyUsed aCommonlyUsed) {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucol_getKeywordValuesForLocale(
+ "collation", aLocale, static_cast<bool>(aCommonlyUsed), &status);
+
+ if (U_SUCCESS(status)) {
+ return Bcp47ExtEnumeration(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+/* static */
+Result<Collator::Bcp47ExtEnumeration, ICUError>
+Collator::GetBcp47KeywordValues() {
+ UErrorCode status = U_ZERO_ERROR;
+ UEnumeration* enumeration = ucol_getKeywordValues("collation", &status);
+
+ if (U_SUCCESS(status)) {
+ return Bcp47ExtEnumeration(enumeration);
+ }
+
+ return Err(ToICUError(status));
+}
+
+/* static */
+SpanResult<char> Collator::KeywordValueToBcp47Extension(const char* aKeyword,
+ int32_t aLength) {
+ if (aKeyword == nullptr) {
+ return Err(InternalError{});
+ }
+ return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword));
+}
+
+} // namespace mozilla::intl