/* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include #include #include "mozilla/intl/Collator.h" namespace mozilla::intl { Collator::Collator(UCollator* aCollator) : mCollator(aCollator) { MOZ_ASSERT(aCollator); } Collator::~Collator() { if (mCollator.GetMut()) { ucol_close(mCollator.GetMut()); } } Result, ICUError> Collator::TryCreate(const char* aLocale) { UErrorCode status = U_ZERO_ERROR; UCollator* collator = ucol_open(IcuLocale(aLocale), &status); if (U_FAILURE(status)) { return Err(ToICUError(status)); } return MakeUnique(collator); }; int32_t Collator::CompareStrings(Span aSource, Span aTarget) const { switch (ucol_strcoll(mCollator.GetConst(), aSource.data(), static_cast(aSource.size()), aTarget.data(), static_cast(aTarget.size()))) { case UCOL_LESS: return -1; case UCOL_EQUAL: return 0; case UCOL_GREATER: return 1; } MOZ_ASSERT_UNREACHABLE("ucol_strcoll returned bad UCollationResult"); return 0; } int32_t Collator::CompareSortKeys(Span aKey1, Span aKey2) const { size_t minLength = std::min(aKey1.Length(), aKey2.Length()); int32_t tmpResult = strncmp((const char*)aKey1.Elements(), (const char*)aKey2.Elements(), minLength); if (tmpResult < 0) { return -1; } if (tmpResult > 0) { return 1; } if (aKey1.Length() > minLength) { // First string contains second one, so comes later, hence return > 0. return 1; } if (aKey2.Length() > minLength) { // First string is a substring of second one, so comes earlier, // hence return < 0. return -1; } return 0; } static UColAttributeValue CaseFirstToICU(Collator::CaseFirst caseFirst) { switch (caseFirst) { case Collator::CaseFirst::False: return UCOL_OFF; case Collator::CaseFirst::Upper: return UCOL_UPPER_FIRST; case Collator::CaseFirst::Lower: return UCOL_LOWER_FIRST; } MOZ_ASSERT_UNREACHABLE(); return UCOL_DEFAULT; } void Collator::SetStrength(Collator::Strength aStrength) { UColAttributeValue strength; switch (aStrength) { case Collator::Strength::Default: strength = UCOL_DEFAULT_STRENGTH; break; case Collator::Strength::Primary: strength = UCOL_PRIMARY; break; case Collator::Strength::Secondary: strength = UCOL_SECONDARY; break; case Collator::Strength::Tertiary: strength = UCOL_TERTIARY; break; case Collator::Strength::Quaternary: strength = UCOL_QUATERNARY; break; case Collator::Strength::Identical: strength = UCOL_IDENTICAL; break; } ucol_setStrength(mCollator.GetMut(), strength); } ICUResult Collator::SetCaseLevel(Collator::Feature aFeature) { UErrorCode status = U_ZERO_ERROR; ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_LEVEL, ToUColAttributeValue(aFeature), &status); return ToICUResult(status); } ICUResult Collator::SetAlternateHandling( Collator::AlternateHandling aAlternateHandling) { UErrorCode status = U_ZERO_ERROR; UColAttributeValue handling; switch (aAlternateHandling) { case Collator::AlternateHandling::NonIgnorable: handling = UCOL_NON_IGNORABLE; break; case Collator::AlternateHandling::Shifted: handling = UCOL_SHIFTED; break; case Collator::AlternateHandling::Default: handling = UCOL_DEFAULT; break; } ucol_setAttribute(mCollator.GetMut(), UCOL_ALTERNATE_HANDLING, handling, &status); return ToICUResult(status); } ICUResult Collator::SetNumericCollation(Collator::Feature aFeature) { UErrorCode status = U_ZERO_ERROR; ucol_setAttribute(mCollator.GetMut(), UCOL_NUMERIC_COLLATION, ToUColAttributeValue(aFeature), &status); return ToICUResult(status); } ICUResult Collator::SetNormalizationMode(Collator::Feature aFeature) { UErrorCode status = U_ZERO_ERROR; ucol_setAttribute(mCollator.GetMut(), UCOL_NORMALIZATION_MODE, ToUColAttributeValue(aFeature), &status); return ToICUResult(status); } ICUResult Collator::SetCaseFirst(Collator::CaseFirst aCaseFirst) { UErrorCode status = U_ZERO_ERROR; ucol_setAttribute(mCollator.GetMut(), UCOL_CASE_FIRST, CaseFirstToICU(aCaseFirst), &status); return ToICUResult(status); } ICUResult Collator::SetOptions(const Options& aOptions, const Maybe aPrevOptions) { if (aPrevOptions && // Check the equality of the previous options. aPrevOptions->sensitivity == aOptions.sensitivity && aPrevOptions->caseFirst == aOptions.caseFirst && aPrevOptions->ignorePunctuation == aOptions.ignorePunctuation && aPrevOptions->numeric == aOptions.numeric) { return Ok(); } Collator::Strength strength = Collator::Strength::Default; Collator::Feature caseLevel = Collator::Feature::Off; switch (aOptions.sensitivity) { case Collator::Sensitivity::Base: strength = Collator::Strength::Primary; break; case Collator::Sensitivity::Accent: strength = Collator::Strength::Secondary; break; case Collator::Sensitivity::Case: caseLevel = Collator::Feature::On; strength = Collator::Strength::Primary; break; case Collator::Sensitivity::Variant: strength = Collator::Strength::Tertiary; break; } SetStrength(strength); ICUResult result = Ok(); // According to the ICU team, UCOL_SHIFTED causes punctuation to be // ignored. Looking at Unicode Technical Report 35, Unicode Locale Data // Markup Language, "shifted" causes whitespace and punctuation to be // ignored - that's a bit more than asked for, but there's no way to get // less. result = this->SetAlternateHandling( aOptions.ignorePunctuation ? Collator::AlternateHandling::Shifted : Collator::AlternateHandling::NonIgnorable); if (result.isErr()) { return result; } result = SetCaseLevel(caseLevel); if (result.isErr()) { return result; } result = SetNumericCollation(aOptions.numeric ? Collator::Feature::On : Collator::Feature::Off); if (result.isErr()) { return result; } // Normalization is always on to meet the canonical equivalence requirement. result = SetNormalizationMode(Collator::Feature::On); if (result.isErr()) { return result; } result = SetCaseFirst(aOptions.caseFirst); if (result.isErr()) { return result; } return Ok(); } Result Collator::GetCaseFirst() const { UErrorCode status = U_ZERO_ERROR; UColAttributeValue caseFirst = ucol_getAttribute(mCollator.GetConst(), UCOL_CASE_FIRST, &status); if (U_FAILURE(status)) { return Err(ToICUError(status)); } if (caseFirst == UCOL_OFF) { return CaseFirst::False; } if (caseFirst == UCOL_UPPER_FIRST) { return CaseFirst::Upper; } MOZ_ASSERT(caseFirst == UCOL_LOWER_FIRST); return CaseFirst::Lower; } Result Collator::GetIgnorePunctuation() const { UErrorCode status = U_ZERO_ERROR; UColAttributeValue alternateHandling = ucol_getAttribute(mCollator.GetConst(), UCOL_ALTERNATE_HANDLING, &status); if (U_FAILURE(status)) { return Err(ToICUError(status)); } MOZ_ASSERT(alternateHandling == UCOL_SHIFTED || alternateHandling == UCOL_NON_IGNORABLE); return alternateHandling == UCOL_SHIFTED; } /* static */ Result Collator::GetBcp47KeywordValuesForLocale(const char* aLocale, CommonlyUsed aCommonlyUsed) { UErrorCode status = U_ZERO_ERROR; UEnumeration* enumeration = ucol_getKeywordValuesForLocale( "collation", aLocale, static_cast(aCommonlyUsed), &status); if (U_SUCCESS(status)) { return Bcp47ExtEnumeration(enumeration); } return Err(ToICUError(status)); } /* static */ Result Collator::GetBcp47KeywordValues() { UErrorCode status = U_ZERO_ERROR; UEnumeration* enumeration = ucol_getKeywordValues("collation", &status); if (U_SUCCESS(status)) { return Bcp47ExtEnumeration(enumeration); } return Err(ToICUError(status)); } /* static */ SpanResult Collator::KeywordValueToBcp47Extension(const char* aKeyword, int32_t aLength) { if (aKeyword == nullptr) { return Err(InternalError{}); } return MakeStringSpan(uloc_toUnicodeLocaleType("co", aKeyword)); } } // namespace mozilla::intl