diff options
Diffstat (limited to 'intl/icu/source/common/norm2allmodes.h')
-rw-r--r-- | intl/icu/source/common/norm2allmodes.h | 405 |
1 files changed, 405 insertions, 0 deletions
diff --git a/intl/icu/source/common/norm2allmodes.h b/intl/icu/source/common/norm2allmodes.h new file mode 100644 index 0000000000..6347fba9cb --- /dev/null +++ b/intl/icu/source/common/norm2allmodes.h @@ -0,0 +1,405 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* norm2allmodes.h +* +* created on: 2014sep07 +* created by: Markus W. Scherer +*/ + +#ifndef __NORM2ALLMODES_H__ +#define __NORM2ALLMODES_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/unistr.h" +#include "cpputils.h" +#include "normalizer2impl.h" + +U_NAMESPACE_BEGIN + +// Intermediate class: +// Has Normalizer2Impl and does boilerplate argument checking and setup. +class Normalizer2WithImpl : public Normalizer2 { +public: + Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} + virtual ~Normalizer2WithImpl(); + + // normalize + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const char16_t *sArray=src.getBuffer(); + if(&dest==&src || sArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + dest.remove(); + ReorderingBuffer buffer(impl, dest); + if(buffer.init(src.length(), errorCode)) { + normalize(sArray, sArray+src.length(), buffer, errorCode); + } + return dest; + } + virtual void + normalize(const char16_t *src, const char16_t *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + + // normalize and append + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const override { + return normalizeSecondAndAppend(first, second, true, errorCode); + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const override { + return normalizeSecondAndAppend(first, second, false, errorCode); + } + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(first, errorCode); + if(U_FAILURE(errorCode)) { + return first; + } + const char16_t *secondArray=second.getBuffer(); + if(&first==&second || secondArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return first; + } + int32_t firstLength=first.length(); + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(impl, first); + if(buffer.init(firstLength+second.length(), errorCode)) { + normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, + safeMiddle, buffer, errorCode); + } + } // The ReorderingBuffer destructor finalizes the first string. + if(U_FAILURE(errorCode)) { + // Restore the modified suffix of the first string. + first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); + } + return first; + } + virtual void + normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + virtual UBool + getDecomposition(UChar32 c, UnicodeString &decomposition) const override { + char16_t buffer[4]; + int32_t length; + const char16_t *d=impl.getDecomposition(c, buffer, length); + if(d==nullptr) { + return false; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) + } else { + decomposition.setTo(false, d, length); // read-only alias + } + return true; + } + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override { + char16_t buffer[30]; + int32_t length; + const char16_t *d=impl.getRawDecomposition(c, buffer, length); + if(d==nullptr) { + return false; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) + } else { + decomposition.setTo(false, d, length); // read-only alias + } + return true; + } + virtual UChar32 + composePair(UChar32 a, UChar32 b) const override { + return impl.composePair(a, b); + } + + virtual uint8_t + getCombiningClass(UChar32 c) const override { + return impl.getCC(impl.getNorm16(c)); + } + + // quick checks + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return false; + } + const char16_t *sArray=s.getBuffer(); + if(sArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + const char16_t *sLimit=sArray+s.length(); + return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override { + return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return 0; + } + const char16_t *sArray=s.getBuffer(); + if(sArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); + } + virtual const char16_t * + spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0; + + virtual UNormalizationCheckResult getQuickCheck(UChar32) const { + return UNORM_YES; + } + + const Normalizer2Impl &impl; +}; + +class DecomposeNormalizer2 : public Normalizer2WithImpl { +public: + DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~DecomposeNormalizer2(); + +private: + virtual void + normalize(const char16_t *src, const char16_t *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.decompose(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + + void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const override { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); + impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode); + sink.Flush(); + } + virtual UBool + isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return false; + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); + const uint8_t *sLimit = s + sp.length(); + return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode); + } + + virtual const char16_t * + spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override { + return impl.decompose(src, limit, nullptr, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override { + return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; + } + virtual UBool hasBoundaryBefore(UChar32 c) const override { + return impl.hasDecompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const override { + return impl.hasDecompBoundaryAfter(c); + } + virtual UBool isInert(UChar32 c) const override { + return impl.isDecompInert(c); + } +}; + +class ComposeNormalizer2 : public Normalizer2WithImpl { +public: + ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : + Normalizer2WithImpl(ni), onlyContiguous(fcc) {} + virtual ~ComposeNormalizer2(); + +private: + virtual void + normalize(const char16_t *src, const char16_t *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.compose(src, limit, onlyContiguous, true, buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + + void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const override { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); + impl.composeUTF8(options, onlyContiguous, s, s + src.length(), + &sink, edits, errorCode); + sink.Flush(); + } + + virtual void + normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); + } + + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return false; + } + const char16_t *sArray=s.getBuffer(); + if(sArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return false; + } + UnicodeString temp; + ReorderingBuffer buffer(impl, temp); + if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization + return false; + } + return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode); + } + virtual UBool + isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return false; + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); + return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override { + if(U_FAILURE(errorCode)) { + return UNORM_MAYBE; + } + const char16_t *sArray=s.getBuffer(); + if(sArray==nullptr) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_MAYBE; + } + UNormalizationCheckResult qcResult=UNORM_YES; + impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); + return qcResult; + } + virtual const char16_t * + spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override { + return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override { + return impl.getCompQuickCheck(impl.getNorm16(c)); + } + virtual UBool hasBoundaryBefore(UChar32 c) const override { + return impl.hasCompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const override { + return impl.hasCompBoundaryAfter(c, onlyContiguous); + } + virtual UBool isInert(UChar32 c) const override { + return impl.isCompInert(c, onlyContiguous); + } + + const UBool onlyContiguous; +}; + +class FCDNormalizer2 : public Normalizer2WithImpl { +public: + FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~FCDNormalizer2(); + +private: + virtual void + normalize(const char16_t *src, const char16_t *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.makeFCD(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const override { + impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + virtual const char16_t * + spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override { + return impl.makeFCD(src, limit, nullptr, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UBool hasBoundaryBefore(UChar32 c) const override { + return impl.hasFCDBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const override { + return impl.hasFCDBoundaryAfter(c); + } + virtual UBool isInert(UChar32 c) const override { + return impl.isFCDInert(c); + } +}; + +struct Norm2AllModes : public UMemory { + Norm2AllModes(Normalizer2Impl *i) + : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {} + ~Norm2AllModes(); + + static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); + static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); + static Norm2AllModes *createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode); + + static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); + + Normalizer2Impl *impl; + ComposeNormalizer2 comp; + DecomposeNormalizer2 decomp; + FCDNormalizer2 fcd; + ComposeNormalizer2 fcc; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION +#endif // __NORM2ALLMODES_H__ |