summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/i18n/search.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/i18n/search.cpp')
-rw-r--r--intl/icu/source/i18n/search.cpp445
1 files changed, 445 insertions, 0 deletions
diff --git a/intl/icu/source/i18n/search.cpp b/intl/icu/source/i18n/search.cpp
new file mode 100644
index 0000000000..ec5028ca81
--- /dev/null
+++ b/intl/icu/source/i18n/search.cpp
@@ -0,0 +1,445 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+**********************************************************************
+* Copyright (C) 2001-2008,2010 IBM and others. All rights reserved.
+**********************************************************************
+* Date Name Description
+* 03/22/2000 helena Creation.
+**********************************************************************
+*/
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_COLLATION && !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/brkiter.h"
+#include "unicode/schriter.h"
+#include "unicode/search.h"
+#include "usrchimp.h"
+#include "cmemory.h"
+
+// public constructors and destructors -----------------------------------
+U_NAMESPACE_BEGIN
+
+SearchIterator::SearchIterator(const SearchIterator &other)
+ : UObject(other)
+{
+ m_breakiterator_ = other.m_breakiterator_;
+ m_text_ = other.m_text_;
+ m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
+ m_search_->breakIter = other.m_search_->breakIter;
+ m_search_->isCanonicalMatch = other.m_search_->isCanonicalMatch;
+ m_search_->isOverlap = other.m_search_->isOverlap;
+ m_search_->elementComparisonType = other.m_search_->elementComparisonType;
+ m_search_->matchedIndex = other.m_search_->matchedIndex;
+ m_search_->matchedLength = other.m_search_->matchedLength;
+ m_search_->text = other.m_search_->text;
+ m_search_->textLength = other.m_search_->textLength;
+}
+
+SearchIterator::~SearchIterator()
+{
+ if (m_search_ != nullptr) {
+ uprv_free(m_search_);
+ }
+}
+
+// public get and set methods ----------------------------------------
+
+void SearchIterator::setAttribute(USearchAttribute attribute,
+ USearchAttributeValue value,
+ UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ switch (attribute)
+ {
+ case USEARCH_OVERLAP :
+ m_search_->isOverlap = (value == USEARCH_ON ? true : false);
+ break;
+ case USEARCH_CANONICAL_MATCH :
+ m_search_->isCanonicalMatch = (value == USEARCH_ON ? true : false);
+ break;
+ case USEARCH_ELEMENT_COMPARISON :
+ if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
+ m_search_->elementComparisonType = (int16_t)value;
+ } else {
+ m_search_->elementComparisonType = 0;
+ }
+ break;
+ default:
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ }
+ if (value == USEARCH_ATTRIBUTE_VALUE_COUNT) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+}
+
+USearchAttributeValue SearchIterator::getAttribute(
+ USearchAttribute attribute) const
+{
+ switch (attribute) {
+ case USEARCH_OVERLAP :
+ return (m_search_->isOverlap ? USEARCH_ON : USEARCH_OFF);
+ case USEARCH_CANONICAL_MATCH :
+ return (m_search_->isCanonicalMatch ? USEARCH_ON : USEARCH_OFF);
+ case USEARCH_ELEMENT_COMPARISON :
+ {
+ int16_t value = m_search_->elementComparisonType;
+ if (value == USEARCH_PATTERN_BASE_WEIGHT_IS_WILDCARD || value == USEARCH_ANY_BASE_WEIGHT_IS_WILDCARD) {
+ return (USearchAttributeValue)value;
+ } else {
+ return USEARCH_STANDARD_ELEMENT_COMPARISON;
+ }
+ }
+ default :
+ return USEARCH_DEFAULT;
+ }
+}
+
+int32_t SearchIterator::getMatchedStart() const
+{
+ return m_search_->matchedIndex;
+}
+
+int32_t SearchIterator::getMatchedLength() const
+{
+ return m_search_->matchedLength;
+}
+
+void SearchIterator::getMatchedText(UnicodeString &result) const
+{
+ int32_t matchedindex = m_search_->matchedIndex;
+ int32_t matchedlength = m_search_->matchedLength;
+ if (matchedindex != USEARCH_DONE && matchedlength != 0) {
+ result.setTo(m_search_->text + matchedindex, matchedlength);
+ }
+ else {
+ result.remove();
+ }
+}
+
+void SearchIterator::setBreakIterator(BreakIterator *breakiter,
+ UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+#if 0
+ m_search_->breakIter = nullptr;
+ // the c++ breakiterator may not make use of ubreakiterator.
+ // so we'll have to keep track of it ourselves.
+#else
+ // Well, gee... the Constructors that take a BreakIterator
+ // all cast the BreakIterator to a UBreakIterator and
+ // pass it to the corresponding usearch_openFromXXX
+ // routine, so there's no reason not to do this.
+ //
+ // Besides, a UBreakIterator is a BreakIterator, so
+ // any subclass of BreakIterator should work fine here...
+ m_search_->breakIter = (UBreakIterator *) breakiter;
+#endif
+
+ m_breakiterator_ = breakiter;
+ }
+}
+
+const BreakIterator * SearchIterator::getBreakIterator() const
+{
+ return m_breakiterator_;
+}
+
+void SearchIterator::setText(const UnicodeString &text, UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ if (text.length() == 0) {
+ status = U_ILLEGAL_ARGUMENT_ERROR;
+ }
+ else {
+ m_text_ = text;
+ m_search_->text = m_text_.getBuffer();
+ m_search_->textLength = m_text_.length();
+ }
+ }
+}
+
+void SearchIterator::setText(CharacterIterator &text, UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ text.getText(m_text_);
+ setText(m_text_, status);
+ }
+}
+
+const UnicodeString & SearchIterator::getText() const
+{
+ return m_text_;
+}
+
+// operator overloading ----------------------------------------------
+
+bool SearchIterator::operator==(const SearchIterator &that) const
+{
+ if (this == &that) {
+ return true;
+ }
+ return (m_breakiterator_ == that.m_breakiterator_ &&
+ m_search_->isCanonicalMatch == that.m_search_->isCanonicalMatch &&
+ m_search_->isOverlap == that.m_search_->isOverlap &&
+ m_search_->elementComparisonType == that.m_search_->elementComparisonType &&
+ m_search_->matchedIndex == that.m_search_->matchedIndex &&
+ m_search_->matchedLength == that.m_search_->matchedLength &&
+ m_search_->textLength == that.m_search_->textLength &&
+ getOffset() == that.getOffset() &&
+ (m_search_->textLength == 0 ||
+ (uprv_memcmp(m_search_->text, that.m_search_->text,
+ m_search_->textLength * sizeof(char16_t)) == 0)));
+}
+
+// public methods ----------------------------------------------------
+
+int32_t SearchIterator::first(UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ setOffset(0, status);
+ return handleNext(0, status);
+}
+
+int32_t SearchIterator::following(int32_t position,
+ UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ setOffset(position, status);
+ return handleNext(position, status);
+}
+
+int32_t SearchIterator::last(UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ setOffset(m_search_->textLength, status);
+ return handlePrev(m_search_->textLength, status);
+}
+
+int32_t SearchIterator::preceding(int32_t position,
+ UErrorCode &status)
+{
+ if (U_FAILURE(status)) {
+ return USEARCH_DONE;
+ }
+ setOffset(position, status);
+ return handlePrev(position, status);
+}
+
+int32_t SearchIterator::next(UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ int32_t offset = getOffset();
+ int32_t matchindex = m_search_->matchedIndex;
+ int32_t matchlength = m_search_->matchedLength;
+ m_search_->reset = false;
+ if (m_search_->isForwardSearching) {
+ int32_t textlength = m_search_->textLength;
+ if (offset == textlength || matchindex == textlength ||
+ (matchindex != USEARCH_DONE &&
+ matchindex + matchlength >= textlength)) {
+ // not enough characters to match
+ setMatchNotFound();
+ return USEARCH_DONE;
+ }
+ }
+ else {
+ // switching direction.
+ // if matchedIndex == USEARCH_DONE, it means that either a
+ // setOffset has been called or that previous ran off the text
+ // string. the iterator would have been set to offset 0 if a
+ // match is not found.
+ m_search_->isForwardSearching = true;
+ if (m_search_->matchedIndex != USEARCH_DONE) {
+ // there's no need to set the collation element iterator
+ // the next call to next will set the offset.
+ return matchindex;
+ }
+ }
+
+ if (matchlength > 0) {
+ // if matchlength is 0 we are at the start of the iteration
+ if (m_search_->isOverlap) {
+ offset ++;
+ }
+ else {
+ offset += matchlength;
+ }
+ }
+ return handleNext(offset, status);
+ }
+ return USEARCH_DONE;
+}
+
+int32_t SearchIterator::previous(UErrorCode &status)
+{
+ if (U_SUCCESS(status)) {
+ int32_t offset;
+ if (m_search_->reset) {
+ offset = m_search_->textLength;
+ m_search_->isForwardSearching = false;
+ m_search_->reset = false;
+ setOffset(offset, status);
+ }
+ else {
+ offset = getOffset();
+ }
+
+ int32_t matchindex = m_search_->matchedIndex;
+ if (m_search_->isForwardSearching) {
+ // switching direction.
+ // if matchedIndex == USEARCH_DONE, it means that either a
+ // setOffset has been called or that next ran off the text
+ // string. the iterator would have been set to offset textLength if
+ // a match is not found.
+ m_search_->isForwardSearching = false;
+ if (matchindex != USEARCH_DONE) {
+ return matchindex;
+ }
+ }
+ else {
+ if (offset == 0 || matchindex == 0) {
+ // not enough characters to match
+ setMatchNotFound();
+ return USEARCH_DONE;
+ }
+ }
+
+ if (matchindex != USEARCH_DONE) {
+ if (m_search_->isOverlap) {
+ matchindex += m_search_->matchedLength - 2;
+ }
+
+ return handlePrev(matchindex, status);
+ }
+
+ return handlePrev(offset, status);
+ }
+
+ return USEARCH_DONE;
+}
+
+void SearchIterator::reset()
+{
+ UErrorCode status = U_ZERO_ERROR;
+ setMatchNotFound();
+ setOffset(0, status);
+ m_search_->isOverlap = false;
+ m_search_->isCanonicalMatch = false;
+ m_search_->elementComparisonType = 0;
+ m_search_->isForwardSearching = true;
+ m_search_->reset = true;
+}
+
+// protected constructors and destructors -----------------------------
+
+SearchIterator::SearchIterator()
+{
+ m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
+ m_search_->breakIter = nullptr;
+ m_search_->isOverlap = false;
+ m_search_->isCanonicalMatch = false;
+ m_search_->elementComparisonType = 0;
+ m_search_->isForwardSearching = true;
+ m_search_->reset = true;
+ m_search_->matchedIndex = USEARCH_DONE;
+ m_search_->matchedLength = 0;
+ m_search_->text = nullptr;
+ m_search_->textLength = 0;
+ m_breakiterator_ = nullptr;
+}
+
+SearchIterator::SearchIterator(const UnicodeString &text,
+ BreakIterator *breakiter) :
+ m_breakiterator_(breakiter),
+ m_text_(text)
+{
+ m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
+ m_search_->breakIter = nullptr;
+ m_search_->isOverlap = false;
+ m_search_->isCanonicalMatch = false;
+ m_search_->elementComparisonType = 0;
+ m_search_->isForwardSearching = true;
+ m_search_->reset = true;
+ m_search_->matchedIndex = USEARCH_DONE;
+ m_search_->matchedLength = 0;
+ m_search_->text = m_text_.getBuffer();
+ m_search_->textLength = text.length();
+}
+
+SearchIterator::SearchIterator(CharacterIterator &text,
+ BreakIterator *breakiter) :
+ m_breakiterator_(breakiter)
+{
+ m_search_ = (USearch *)uprv_malloc(sizeof(USearch));
+ m_search_->breakIter = nullptr;
+ m_search_->isOverlap = false;
+ m_search_->isCanonicalMatch = false;
+ m_search_->elementComparisonType = 0;
+ m_search_->isForwardSearching = true;
+ m_search_->reset = true;
+ m_search_->matchedIndex = USEARCH_DONE;
+ m_search_->matchedLength = 0;
+ text.getText(m_text_);
+ m_search_->text = m_text_.getBuffer();
+ m_search_->textLength = m_text_.length();
+ m_breakiterator_ = breakiter;
+}
+
+// protected methods ------------------------------------------------------
+
+SearchIterator & SearchIterator::operator=(const SearchIterator &that)
+{
+ if (this != &that) {
+ m_breakiterator_ = that.m_breakiterator_;
+ m_text_ = that.m_text_;
+ m_search_->breakIter = that.m_search_->breakIter;
+ m_search_->isCanonicalMatch = that.m_search_->isCanonicalMatch;
+ m_search_->isOverlap = that.m_search_->isOverlap;
+ m_search_->elementComparisonType = that.m_search_->elementComparisonType;
+ m_search_->matchedIndex = that.m_search_->matchedIndex;
+ m_search_->matchedLength = that.m_search_->matchedLength;
+ m_search_->text = that.m_search_->text;
+ m_search_->textLength = that.m_search_->textLength;
+ }
+ return *this;
+}
+
+void SearchIterator::setMatchLength(int32_t length)
+{
+ m_search_->matchedLength = length;
+}
+
+void SearchIterator::setMatchStart(int32_t position)
+{
+ m_search_->matchedIndex = position;
+}
+
+void SearchIterator::setMatchNotFound()
+{
+ setMatchStart(USEARCH_DONE);
+ setMatchLength(0);
+ UErrorCode status = U_ZERO_ERROR;
+ // by default no errors should be returned here since offsets are within
+ // range.
+ if (m_search_->isForwardSearching) {
+ setOffset(m_search_->textLength, status);
+ }
+ else {
+ setOffset(0, status);
+ }
+}
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_COLLATION */