summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/norm2allmodes.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/norm2allmodes.h')
-rw-r--r--intl/icu/source/common/norm2allmodes.h405
1 files changed, 405 insertions, 0 deletions
diff --git a/intl/icu/source/common/norm2allmodes.h b/intl/icu/source/common/norm2allmodes.h
new file mode 100644
index 0000000000..6347fba9cb
--- /dev/null
+++ b/intl/icu/source/common/norm2allmodes.h
@@ -0,0 +1,405 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2014, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* norm2allmodes.h
+*
+* created on: 2014sep07
+* created by: Markus W. Scherer
+*/
+
+#ifndef __NORM2ALLMODES_H__
+#define __NORM2ALLMODES_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_NORMALIZATION
+
+#include "unicode/edits.h"
+#include "unicode/normalizer2.h"
+#include "unicode/stringoptions.h"
+#include "unicode/unistr.h"
+#include "cpputils.h"
+#include "normalizer2impl.h"
+
+U_NAMESPACE_BEGIN
+
+// Intermediate class:
+// Has Normalizer2Impl and does boilerplate argument checking and setup.
+class Normalizer2WithImpl : public Normalizer2 {
+public:
+ Normalizer2WithImpl(const Normalizer2Impl &ni) : impl(ni) {}
+ virtual ~Normalizer2WithImpl();
+
+ // normalize
+ virtual UnicodeString &
+ normalize(const UnicodeString &src,
+ UnicodeString &dest,
+ UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ dest.setToBogus();
+ return dest;
+ }
+ const char16_t *sArray=src.getBuffer();
+ if(&dest==&src || sArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ dest.setToBogus();
+ return dest;
+ }
+ dest.remove();
+ ReorderingBuffer buffer(impl, dest);
+ if(buffer.init(src.length(), errorCode)) {
+ normalize(sArray, sArray+src.length(), buffer, errorCode);
+ }
+ return dest;
+ }
+ virtual void
+ normalize(const char16_t *src, const char16_t *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+
+ // normalize and append
+ virtual UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const override {
+ return normalizeSecondAndAppend(first, second, true, errorCode);
+ }
+ virtual UnicodeString &
+ append(UnicodeString &first,
+ const UnicodeString &second,
+ UErrorCode &errorCode) const override {
+ return normalizeSecondAndAppend(first, second, false, errorCode);
+ }
+ UnicodeString &
+ normalizeSecondAndAppend(UnicodeString &first,
+ const UnicodeString &second,
+ UBool doNormalize,
+ UErrorCode &errorCode) const {
+ uprv_checkCanGetBuffer(first, errorCode);
+ if(U_FAILURE(errorCode)) {
+ return first;
+ }
+ const char16_t *secondArray=second.getBuffer();
+ if(&first==&second || secondArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return first;
+ }
+ int32_t firstLength=first.length();
+ UnicodeString safeMiddle;
+ {
+ ReorderingBuffer buffer(impl, first);
+ if(buffer.init(firstLength+second.length(), errorCode)) {
+ normalizeAndAppend(secondArray, secondArray+second.length(), doNormalize,
+ safeMiddle, buffer, errorCode);
+ }
+ } // The ReorderingBuffer destructor finalizes the first string.
+ if(U_FAILURE(errorCode)) {
+ // Restore the modified suffix of the first string.
+ first.replace(firstLength-safeMiddle.length(), 0x7fffffff, safeMiddle);
+ }
+ return first;
+ }
+ virtual void
+ normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const = 0;
+ virtual UBool
+ getDecomposition(UChar32 c, UnicodeString &decomposition) const override {
+ char16_t buffer[4];
+ int32_t length;
+ const char16_t *d=impl.getDecomposition(c, buffer, length);
+ if(d==nullptr) {
+ return false;
+ }
+ if(d==buffer) {
+ decomposition.setTo(buffer, length); // copy the string (Jamos from Hangul syllable c)
+ } else {
+ decomposition.setTo(false, d, length); // read-only alias
+ }
+ return true;
+ }
+ virtual UBool
+ getRawDecomposition(UChar32 c, UnicodeString &decomposition) const override {
+ char16_t buffer[30];
+ int32_t length;
+ const char16_t *d=impl.getRawDecomposition(c, buffer, length);
+ if(d==nullptr) {
+ return false;
+ }
+ if(d==buffer) {
+ decomposition.setTo(buffer, length); // copy the string (algorithmic decomposition)
+ } else {
+ decomposition.setTo(false, d, length); // read-only alias
+ }
+ return true;
+ }
+ virtual UChar32
+ composePair(UChar32 a, UChar32 b) const override {
+ return impl.composePair(a, b);
+ }
+
+ virtual uint8_t
+ getCombiningClass(UChar32 c) const override {
+ return impl.getCC(impl.getNorm16(c));
+ }
+
+ // quick checks
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const char16_t *sArray=s.getBuffer();
+ if(sArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return false;
+ }
+ const char16_t *sLimit=sArray+s.length();
+ return sLimit==spanQuickCheckYes(sArray, sLimit, errorCode);
+ }
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
+ return Normalizer2WithImpl::isNormalized(s, errorCode) ? UNORM_YES : UNORM_NO;
+ }
+ virtual int32_t
+ spanQuickCheckYes(const UnicodeString &s, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return 0;
+ }
+ const char16_t *sArray=s.getBuffer();
+ if(sArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return 0;
+ }
+ return (int32_t)(spanQuickCheckYes(sArray, sArray+s.length(), errorCode)-sArray);
+ }
+ virtual const char16_t *
+ spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const = 0;
+
+ virtual UNormalizationCheckResult getQuickCheck(UChar32) const {
+ return UNORM_YES;
+ }
+
+ const Normalizer2Impl &impl;
+};
+
+class DecomposeNormalizer2 : public Normalizer2WithImpl {
+public:
+ DecomposeNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+ virtual ~DecomposeNormalizer2();
+
+private:
+ virtual void
+ normalize(const char16_t *src, const char16_t *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.decompose(src, limit, &buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+ virtual void
+ normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.decomposeAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
+ }
+
+ void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const override {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
+ impl.decomposeUTF8(options, s, s + src.length(), &sink, edits, errorCode);
+ sink.Flush();
+ }
+ virtual UBool
+ isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
+ const uint8_t *sLimit = s + sp.length();
+ return sLimit == impl.decomposeUTF8(0, s, sLimit, nullptr, nullptr, errorCode);
+ }
+
+ virtual const char16_t *
+ spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
+ return impl.decompose(src, limit, nullptr, errorCode);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
+ return impl.isDecompYes(impl.getNorm16(c)) ? UNORM_YES : UNORM_NO;
+ }
+ virtual UBool hasBoundaryBefore(UChar32 c) const override {
+ return impl.hasDecompBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const override {
+ return impl.hasDecompBoundaryAfter(c);
+ }
+ virtual UBool isInert(UChar32 c) const override {
+ return impl.isDecompInert(c);
+ }
+};
+
+class ComposeNormalizer2 : public Normalizer2WithImpl {
+public:
+ ComposeNormalizer2(const Normalizer2Impl &ni, UBool fcc) :
+ Normalizer2WithImpl(ni), onlyContiguous(fcc) {}
+ virtual ~ComposeNormalizer2();
+
+private:
+ virtual void
+ normalize(const char16_t *src, const char16_t *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.compose(src, limit, onlyContiguous, true, buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+
+ void
+ normalizeUTF8(uint32_t options, StringPiece src, ByteSink &sink,
+ Edits *edits, UErrorCode &errorCode) const override {
+ if (U_FAILURE(errorCode)) {
+ return;
+ }
+ if (edits != nullptr && (options & U_EDITS_NO_RESET) == 0) {
+ edits->reset();
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(src.data());
+ impl.composeUTF8(options, onlyContiguous, s, s + src.length(),
+ &sink, edits, errorCode);
+ sink.Flush();
+ }
+
+ virtual void
+ normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.composeAndAppend(src, limit, doNormalize, onlyContiguous, safeMiddle, buffer, errorCode);
+ }
+
+ virtual UBool
+ isNormalized(const UnicodeString &s, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const char16_t *sArray=s.getBuffer();
+ if(sArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return false;
+ }
+ UnicodeString temp;
+ ReorderingBuffer buffer(impl, temp);
+ if(!buffer.init(5, errorCode)) { // small destCapacity for substring normalization
+ return false;
+ }
+ return impl.compose(sArray, sArray+s.length(), onlyContiguous, false, buffer, errorCode);
+ }
+ virtual UBool
+ isNormalizedUTF8(StringPiece sp, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const uint8_t *s = reinterpret_cast<const uint8_t *>(sp.data());
+ return impl.composeUTF8(0, onlyContiguous, s, s + sp.length(), nullptr, nullptr, errorCode);
+ }
+ virtual UNormalizationCheckResult
+ quickCheck(const UnicodeString &s, UErrorCode &errorCode) const override {
+ if(U_FAILURE(errorCode)) {
+ return UNORM_MAYBE;
+ }
+ const char16_t *sArray=s.getBuffer();
+ if(sArray==nullptr) {
+ errorCode=U_ILLEGAL_ARGUMENT_ERROR;
+ return UNORM_MAYBE;
+ }
+ UNormalizationCheckResult qcResult=UNORM_YES;
+ impl.composeQuickCheck(sArray, sArray+s.length(), onlyContiguous, &qcResult);
+ return qcResult;
+ }
+ virtual const char16_t *
+ spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &) const override {
+ return impl.composeQuickCheck(src, limit, onlyContiguous, nullptr);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UNormalizationCheckResult getQuickCheck(UChar32 c) const override {
+ return impl.getCompQuickCheck(impl.getNorm16(c));
+ }
+ virtual UBool hasBoundaryBefore(UChar32 c) const override {
+ return impl.hasCompBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const override {
+ return impl.hasCompBoundaryAfter(c, onlyContiguous);
+ }
+ virtual UBool isInert(UChar32 c) const override {
+ return impl.isCompInert(c, onlyContiguous);
+ }
+
+ const UBool onlyContiguous;
+};
+
+class FCDNormalizer2 : public Normalizer2WithImpl {
+public:
+ FCDNormalizer2(const Normalizer2Impl &ni) : Normalizer2WithImpl(ni) {}
+ virtual ~FCDNormalizer2();
+
+private:
+ virtual void
+ normalize(const char16_t *src, const char16_t *limit,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.makeFCD(src, limit, &buffer, errorCode);
+ }
+ using Normalizer2WithImpl::normalize; // Avoid warning about hiding base class function.
+ virtual void
+ normalizeAndAppend(const char16_t *src, const char16_t *limit, UBool doNormalize,
+ UnicodeString &safeMiddle,
+ ReorderingBuffer &buffer, UErrorCode &errorCode) const override {
+ impl.makeFCDAndAppend(src, limit, doNormalize, safeMiddle, buffer, errorCode);
+ }
+ virtual const char16_t *
+ spanQuickCheckYes(const char16_t *src, const char16_t *limit, UErrorCode &errorCode) const override {
+ return impl.makeFCD(src, limit, nullptr, errorCode);
+ }
+ using Normalizer2WithImpl::spanQuickCheckYes; // Avoid warning about hiding base class function.
+ virtual UBool hasBoundaryBefore(UChar32 c) const override {
+ return impl.hasFCDBoundaryBefore(c);
+ }
+ virtual UBool hasBoundaryAfter(UChar32 c) const override {
+ return impl.hasFCDBoundaryAfter(c);
+ }
+ virtual UBool isInert(UChar32 c) const override {
+ return impl.isFCDInert(c);
+ }
+};
+
+struct Norm2AllModes : public UMemory {
+ Norm2AllModes(Normalizer2Impl *i)
+ : impl(i), comp(*i, false), decomp(*i), fcd(*i), fcc(*i, true) {}
+ ~Norm2AllModes();
+
+ static Norm2AllModes *createInstance(Normalizer2Impl *impl, UErrorCode &errorCode);
+ static Norm2AllModes *createNFCInstance(UErrorCode &errorCode);
+ static Norm2AllModes *createInstance(const char *packageName,
+ const char *name,
+ UErrorCode &errorCode);
+
+ static const Norm2AllModes *getNFCInstance(UErrorCode &errorCode);
+ static const Norm2AllModes *getNFKCInstance(UErrorCode &errorCode);
+ static const Norm2AllModes *getNFKC_CFInstance(UErrorCode &errorCode);
+
+ Normalizer2Impl *impl;
+ ComposeNormalizer2 comp;
+ DecomposeNormalizer2 decomp;
+ FCDNormalizer2 fcd;
+ ComposeNormalizer2 fcc;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_NORMALIZATION
+#endif // __NORM2ALLMODES_H__