summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/uitercollationiterator.h
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/uitercollationiterator.h')
-rw-r--r--intl/icu/source/i18n/uitercollationiterator.h162
1 files changed, 162 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/uitercollationiterator.h b/intl/icu/source/i18n/uitercollationiterator.h
new file mode 100644
index 0000000000..23d8562a87
--- /dev/null
+++ b/intl/icu/source/i18n/uitercollationiterator.h
@@ -0,0 +1,162 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2012-2016, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* uitercollationiterator.h
+*
+* created on: 2012sep23 (from utf16collationiterator.h)
+* created by: Markus W. Scherer
+*/
+
+#ifndef __UITERCOLLATIONITERATOR_H__
+#define __UITERCOLLATIONITERATOR_H__
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION
+
+#include "unicode/uiter.h"
+#include "cmemory.h"
+#include "collation.h"
+#include "collationdata.h"
+#include "collationiterator.h"
+#include "normalizer2impl.h"
+
+U_NAMESPACE_BEGIN
+
+/**
+ * UCharIterator-based collation element and character iterator.
+ * Handles normalized text inline, with length or NUL-terminated.
+ * Unnormalized text is handled by a subclass.
+ */
+class U_I18N_API UIterCollationIterator : public CollationIterator {
+public:
+ UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
+ : CollationIterator(d, numeric), iter(ui) {}
+
+ virtual ~UIterCollationIterator();
+
+ virtual void resetToOffset(int32_t newOffset) override;
+
+ virtual int32_t getOffset() const override;
+
+ virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
+
+ virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
+
+protected:
+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
+
+ virtual char16_t handleGetTrailSurrogate() override;
+
+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
+
+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
+
+ UCharIterator &iter;
+};
+
+/**
+ * Incrementally checks the input text for FCD and normalizes where necessary.
+ */
+class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
+public:
+ FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
+ : UIterCollationIterator(data, numeric, ui),
+ state(ITER_CHECK_FWD), start(startIndex),
+ nfcImpl(data->nfcImpl) {}
+
+ virtual ~FCDUIterCollationIterator();
+
+ virtual void resetToOffset(int32_t newOffset) override;
+
+ virtual int32_t getOffset() const override;
+
+ virtual UChar32 nextCodePoint(UErrorCode &errorCode) override;
+
+ virtual UChar32 previousCodePoint(UErrorCode &errorCode) override;
+
+protected:
+ virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode) override;
+
+ virtual char16_t handleGetTrailSurrogate() override;
+
+
+ virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
+
+ virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) override;
+
+private:
+ /**
+ * Switches to forward checking if possible.
+ */
+ void switchToForward();
+
+ /**
+ * Extends the FCD text segment forward or normalizes around pos.
+ * @return true if success
+ */
+ UBool nextSegment(UErrorCode &errorCode);
+
+ /**
+ * Switches to backward checking.
+ */
+ void switchToBackward();
+
+ /**
+ * Extends the FCD text segment backward or normalizes around pos.
+ * @return true if success
+ */
+ UBool previousSegment(UErrorCode &errorCode);
+
+ UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
+
+ enum State {
+ /**
+ * The input text [start..(iter index)[ passes the FCD check.
+ * Moving forward checks incrementally.
+ * pos & limit are undefined.
+ */
+ ITER_CHECK_FWD,
+ /**
+ * The input text [(iter index)..limit[ passes the FCD check.
+ * Moving backward checks incrementally.
+ * start & pos are undefined.
+ */
+ ITER_CHECK_BWD,
+ /**
+ * The input text [start..limit[ passes the FCD check.
+ * pos tracks the current text index.
+ */
+ ITER_IN_FCD_SEGMENT,
+ /**
+ * The input text [start..limit[ failed the FCD check and was normalized.
+ * pos tracks the current index in the normalized string.
+ * The text iterator is at the limit index.
+ */
+ IN_NORM_ITER_AT_LIMIT,
+ /**
+ * The input text [start..limit[ failed the FCD check and was normalized.
+ * pos tracks the current index in the normalized string.
+ * The text iterator is at the start index.
+ */
+ IN_NORM_ITER_AT_START
+ };
+
+ State state;
+
+ int32_t start;
+ int32_t pos;
+ int32_t limit;
+
+ const Normalizer2Impl &nfcImpl;
+ UnicodeString normalized;
+};
+
+U_NAMESPACE_END
+
+#endif // !UCONFIG_NO_COLLATION
+#endif // __UITERCOLLATIONITERATOR_H__