diff options
Diffstat (limited to 'intl/icu/source/common/norm2allmodes.h')
-rw-r--r-- | intl/icu/source/common/norm2allmodes.h | 369 |
1 files changed, 369 insertions, 0 deletions
diff --git a/intl/icu/source/common/norm2allmodes.h b/intl/icu/source/common/norm2allmodes.h new file mode 100644 index 0000000000..682ece28f1 --- /dev/null +++ b/intl/icu/source/common/norm2allmodes.h @@ -0,0 +1,369 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +******************************************************************************* +* Copyright (C) 2014, International Business Machines +* Corporation and others. All Rights Reserved. +******************************************************************************* +* norm2allmodes.h +* +* created on: 2014sep07 +* created by: Markus W. Scherer +*/ + +#ifndef __NORM2ALLMODES_H__ +#define __NORM2ALLMODES_H__ + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_NORMALIZATION + +#include "unicode/edits.h" +#include "unicode/normalizer2.h" +#include "unicode/stringoptions.h" +#include "unicode/unistr.h" +#include "cpputils.h" +#include "normalizer2impl.h" + +U_NAMESPACE_BEGIN + +// Intermediate class: +// Has Normalizer2Impl and does boilerplate argument checking and setup. +class Normalizer2WithImpl : public Normalizer2 { +public: + Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {} + virtual ~Normalizer2WithImpl(); + + // normalize + virtual UnicodeString & + normalize(const UnicodeString &src, + UnicodeString &dest, + UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + dest.setToBogus(); + return dest; + } + const UChar *sArray=src.getBuffer(); + if(&dest==&src || sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + dest.setToBogus(); + return dest; + } + dest.remove(); + ReorderingBuffer buffer(impl, dest); + if(buffer.init(src.length(), errorCode)) { + normalize(sArray, sArray+src.length(), buffer, errorCode); + } + return dest; + } + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + + // normalize and append + virtual UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, TRUE, errorCode); + } + virtual UnicodeString & + append(UnicodeString &first, + const UnicodeString &second, + UErrorCode &errorCode) const { + return normalizeSecondAndAppend(first, second, FALSE, errorCode); + } + UnicodeString & + normalizeSecondAndAppend(UnicodeString &first, + const UnicodeString &second, + UBool doNormalize, + UErrorCode &errorCode) const { + uprv_checkCanGetBuffer(first, errorCode); + if(U_FAILURE(errorCode)) { + return first; + } + const UChar *secondArray=second.getBuffer(); + if(&first==&second || secondArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return first; + } + int32_t firstLength=first.length(); + UnicodeString safeMiddle; + { + ReorderingBuffer buffer(impl, first); + if(buffer.init(firstLength+second.length(), errorCode)) { + normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize, + safeMiddle, buffer, errorCode); + } + } // The ReorderingBuffer destructor finalizes the first string. + if(U_FAILURE(errorCode)) { + // Restore the modified suffix of the first string. + first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle); + } + return first; + } + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0; + virtual UBool + getDecomposition(UChar32 c, UnicodeString &decomposition) const { + UChar buffer[4]; + int32_t length; + const UChar *d=impl.getDecomposition(c, buffer, length); + if(d==NULL) { + return FALSE; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c) + } else { + decomposition.setTo(FALSE, d, length); // read-only alias + } + return TRUE; + } + virtual UBool + getRawDecomposition(UChar32 c, UnicodeString &decomposition) const { + UChar buffer[30]; + int32_t length; + const UChar *d=impl.getRawDecomposition(c, buffer, length); + if(d==NULL) { + return FALSE; + } + if(d==buffer) { + decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition) + } else { + decomposition.setTo(FALSE, d, length); // read-only alias + } + return TRUE; + } + virtual UChar32 + composePair(UChar32 a, UChar32 b) const { + return impl.composePair(a, b); + } + + virtual uint8_t + getCombiningClass(UChar32 c) const { + return impl.getCC(impl.getNorm16(c)); + } + + // quick checks + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + const UChar *sLimit=sArray+s.length(); + return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const { + return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO; + } + virtual int32_t + spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const { + if(U_FAILURE(errorCode)) { + return 0; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return 0; + } + return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const = 0; + + virtual UNormalizationCheckResult getQuickCheck(UChar32) const { + return UNORM_YES; + } + + const Normalizer2Impl &impl; +}; + +class DecomposeNormalizer2 : public Normalizer2WithImpl { +public: + DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~DecomposeNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decompose(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.decompose(src, limit, NULL, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const { + return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO; + } + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasDecompBoundaryBefore(c); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasDecompBoundaryAfter(c); } + virtual UBool isInert(UChar32 c) const { return impl.isDecompInert(c); } +}; + +class ComposeNormalizer2 : public Normalizer2WithImpl { +public: + ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) : + Normalizer2WithImpl(ni), onlyContiguous(fcc) {} + virtual ~ComposeNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { + impl.compose(src, limit, onlyContiguous, TRUE, buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + + void + normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink, + Edits *edits, UErrorCode &errorCode) const U_OVERRIDE { + if (U_FAILURE(errorCode)) { + return; + } + if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) { + edits->reset(); + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data()); + impl.composeUTF8(options, onlyContiguous, s, s + src.length(), + &sink, edits, errorCode); + sink.Flush(); + } + + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const U_OVERRIDE { + impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode); + } + + virtual UBool + isNormalized(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return FALSE; + } + UnicodeString temp; + ReorderingBuffer buffer(impl, temp); + if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization + return FALSE; + } + return impl.compose(sArray, sArray+s.length(), onlyContiguous, FALSE, buffer, errorCode); + } + virtual UBool + isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return FALSE; + } + const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data()); + return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode); + } + virtual UNormalizationCheckResult + quickCheck(const UnicodeString &s, UErrorCode &errorCode) const U_OVERRIDE { + if(U_FAILURE(errorCode)) { + return UNORM_MAYBE; + } + const UChar *sArray=s.getBuffer(); + if(sArray==NULL) { + errorCode=U_ILLEGAL_ARGUMENT_ERROR; + return UNORM_MAYBE; + } + UNormalizationCheckResult qcResult=UNORM_YES; + impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult); + return qcResult; + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &) const U_OVERRIDE { + return impl.composeQuickCheck(src, limit, onlyContiguous, NULL); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const U_OVERRIDE { + return impl.getCompQuickCheck(impl.getNorm16(c)); + } + virtual UBool hasBoundaryBefore(UChar32 c) const U_OVERRIDE { + return impl.hasCompBoundaryBefore(c); + } + virtual UBool hasBoundaryAfter(UChar32 c) const U_OVERRIDE { + return impl.hasCompBoundaryAfter(c, onlyContiguous); + } + virtual UBool isInert(UChar32 c) const U_OVERRIDE { + return impl.isCompInert(c, onlyContiguous); + } + + const UBool onlyContiguous; +}; + +class FCDNormalizer2 : public Normalizer2WithImpl { +public: + FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {} + virtual ~FCDNormalizer2(); + +private: + virtual void + normalize(const UChar *src, const UChar *limit, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCD(src, limit, &buffer, errorCode); + } + using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function. + virtual void + normalizeAndAppend(const UChar *src, const UChar *limit, UBool doNormalize, + UnicodeString &safeMiddle, + ReorderingBuffer &buffer, UErrorCode &errorCode) const { + impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode); + } + virtual const UChar * + spanQuickCheckYes(const UChar *src, const UChar *limit, UErrorCode &errorCode) const { + return impl.makeFCD(src, limit, NULL, errorCode); + } + using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function. + virtual UBool hasBoundaryBefore(UChar32 c) const { return impl.hasFCDBoundaryBefore(c); } + virtual UBool hasBoundaryAfter(UChar32 c) const { return impl.hasFCDBoundaryAfter(c); } + virtual UBool isInert(UChar32 c) const { return impl.isFCDInert(c); } +}; + +struct Norm2AllModes : public UMemory { + Norm2AllModes(Normalizer2Impl *i) + : impl(i), comp(*i, FALSE), decomp(*i), fcd(*i), fcc(*i, TRUE) {} + ~Norm2AllModes(); + + static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode); + static Norm2AllModes *createNFCInstance(UErrorCode &errorCode); + static Norm2AllModes *createInstance(const char *packageName, + const char *name, + UErrorCode &errorCode); + + static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode); + static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode); + + Normalizer2Impl *impl; + ComposeNormalizer2 comp; + DecomposeNormalizer2 decomp; + FCDNormalizer2 fcd; + ComposeNormalizer2 fcc; +}; + +U_NAMESPACE_END + +#endif // !UCONFIG_NO_NORMALIZATION +#endif // __NORM2ALLMODES_H__ |