summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/bytestrieiterator.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'intl/icu/source/common/bytestrieiterator.cpp')
-rw-r--r--intl/icu/source/common/bytestrieiterator.cpp214
1 files changed, 214 insertions, 0 deletions
diff --git a/intl/icu/source/common/bytestrieiterator.cpp b/intl/icu/source/common/bytestrieiterator.cpp
new file mode 100644
index 0000000000..65f54be48a
--- /dev/null
+++ b/intl/icu/source/common/bytestrieiterator.cpp
@@ -0,0 +1,214 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+*******************************************************************************
+* Copyright (C) 2010-2012, International Business Machines
+* Corporation and others. All Rights Reserved.
+*******************************************************************************
+* file name: bytestrieiterator.cpp
+* encoding: UTF-8
+* tab size: 8 (not used)
+* indentation:4
+*
+* created on: 2010nov03
+* created by: Markus W. Scherer
+*/
+
+#include "unicode/utypes.h"
+#include "unicode/bytestrie.h"
+#include "unicode/stringpiece.h"
+#include "charstr.h"
+#include "uvectr32.h"
+
+U_NAMESPACE_BEGIN
+
+BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : bytes_(static_cast<const uint8_t *>(trieBytes)),
+ pos_(bytes_), initialPos_(bytes_),
+ remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
+ str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
+ // a public API header for which we would want it to depend only on
+ // other public headers.
+ // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
+ // via the CharString and UVector32 implementations, so this additional
+ // cost is minimal.
+ str_=new CharString();
+ stack_=new UVector32(errorCode);
+ if(U_SUCCESS(errorCode) && (str_==nullptr || stack_==nullptr)) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ }
+}
+
+BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
+ UErrorCode &errorCode)
+ : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
+ remainingMatchLength_(trie.remainingMatchLength_),
+ initialRemainingMatchLength_(trie.remainingMatchLength_),
+ str_(nullptr), maxLength_(maxStringLength), value_(0), stack_(nullptr) {
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ str_=new CharString();
+ stack_=new UVector32(errorCode);
+ if(U_FAILURE(errorCode)) {
+ return;
+ }
+ if(str_==nullptr || stack_==nullptr) {
+ errorCode=U_MEMORY_ALLOCATION_ERROR;
+ return;
+ }
+ int32_t length=remainingMatchLength_; // Actual remaining match length minus 1.
+ if(length>=0) {
+ // Pending linear-match node, append remaining bytes to str_.
+ ++length;
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_; // This will leave remainingMatchLength>=0 as a signal.
+ }
+ str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ }
+}
+
+BytesTrie::Iterator::~Iterator() {
+ delete str_;
+ delete stack_;
+}
+
+BytesTrie::Iterator &
+BytesTrie::Iterator::reset() {
+ pos_=initialPos_;
+ remainingMatchLength_=initialRemainingMatchLength_;
+ int32_t length=remainingMatchLength_+1; // Remaining match length.
+ if(maxLength_>0 && length>maxLength_) {
+ length=maxLength_;
+ }
+ str_->truncate(length);
+ pos_+=length;
+ remainingMatchLength_-=length;
+ stack_->setSize(0);
+ return *this;
+}
+
+UBool
+BytesTrie::Iterator::hasNext() const { return pos_!=nullptr || !stack_->isEmpty(); }
+
+UBool
+BytesTrie::Iterator::next(UErrorCode &errorCode) {
+ if(U_FAILURE(errorCode)) {
+ return false;
+ }
+ const uint8_t *pos=pos_;
+ if(pos==nullptr) {
+ if(stack_->isEmpty()) {
+ return false;
+ }
+ // Pop the state off the stack and continue with the next outbound edge of
+ // the branch node.
+ int32_t stackSize=stack_->size();
+ int32_t length=stack_->elementAti(stackSize-1);
+ pos=bytes_+stack_->elementAti(stackSize-2);
+ stack_->setSize(stackSize-2);
+ str_->truncate(length&0xffff);
+ length=(int32_t)((uint32_t)length>>16);
+ if(length>1) {
+ pos=branchNext(pos, length, errorCode);
+ if(pos==nullptr) {
+ return true; // Reached a final value.
+ }
+ } else {
+ str_->append((char)*pos++, errorCode);
+ }
+ }
+ if(remainingMatchLength_>=0) {
+ // We only get here if we started in a pending linear-match node
+ // with more than maxLength remaining bytes.
+ return truncateAndStop();
+ }
+ for(;;) {
+ int32_t node=*pos++;
+ if(node>=kMinValueLead) {
+ // Deliver value for the byte sequence so far.
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ value_=readValue(pos, node>>1);
+ if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
+ pos_=nullptr;
+ } else {
+ pos_=skipValue(pos, node);
+ }
+ return true;
+ }
+ if(maxLength_>0 && str_->length()==maxLength_) {
+ return truncateAndStop();
+ }
+ if(node<kMinLinearMatch) {
+ if(node==0) {
+ node=*pos++;
+ }
+ pos=branchNext(pos, node+1, errorCode);
+ if(pos==nullptr) {
+ return true; // Reached a final value.
+ }
+ } else {
+ // Linear-match node, append length bytes to str_.
+ int32_t length=node-kMinLinearMatch+1;
+ if(maxLength_>0 && str_->length()+length>maxLength_) {
+ str_->append(reinterpret_cast<const char *>(pos),
+ maxLength_-str_->length(), errorCode);
+ return truncateAndStop();
+ }
+ str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
+ pos+=length;
+ }
+ }
+}
+
+StringPiece
+BytesTrie::Iterator::getString() const {
+ return str_ == nullptr ? StringPiece() : str_->toStringPiece();
+}
+
+UBool
+BytesTrie::Iterator::truncateAndStop() {
+ pos_=nullptr;
+ value_=-1; // no real value for str
+ return true;
+}
+
+// Branch node, needs to take the first outbound edge and push state for the rest.
+const uint8_t *
+BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
+ while(length>kMaxBranchLinearSubNodeLength) {
+ ++pos; // ignore the comparison byte
+ // Push state for the greater-or-equal edge.
+ stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
+ stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
+ // Follow the less-than edge.
+ length>>=1;
+ pos=jumpByDelta(pos);
+ }
+ // List of key-value pairs where values are either final values or jump deltas.
+ // Read the first (key, value) pair.
+ uint8_t trieByte=*pos++;
+ int32_t node=*pos++;
+ UBool isFinal=(UBool)(node&kValueIsFinal);
+ int32_t value=readValue(pos, node>>1);
+ pos=skipValue(pos, node);
+ stack_->addElement((int32_t)(pos-bytes_), errorCode);
+ stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
+ str_->append((char)trieByte, errorCode);
+ if(isFinal) {
+ pos_=nullptr;
+ value_=value;
+ return nullptr;
+ } else {
+ return pos+value;
+ }
+}
+
+U_NAMESPACE_END