diff options
Diffstat (limited to 'intl/icu/source/common/rbbinode.cpp')
-rw-r--r-- | intl/icu/source/common/rbbinode.cpp | 373 |
1 files changed, 373 insertions, 0 deletions
diff --git a/intl/icu/source/common/rbbinode.cpp b/intl/icu/source/common/rbbinode.cpp new file mode 100644 index 0000000000..7aa75d5ffb --- /dev/null +++ b/intl/icu/source/common/rbbinode.cpp @@ -0,0 +1,373 @@ +// © 2016 and later: Unicode, Inc. and others. +// License & terms of use: http://www.unicode.org/copyright.html +/* +*************************************************************************** +* Copyright (C) 2002-2016 International Business Machines Corporation * +* and others. All rights reserved. * +*************************************************************************** +*/ + +// +// File: rbbinode.cpp +// +// Implementation of class RBBINode, which represents a node in the +// tree generated when parsing the Rules Based Break Iterator rules. +// +// This "Class" is actually closer to a struct. +// Code using it is expected to directly access fields much of the time. +// + +#include "unicode/utypes.h" + +#if !UCONFIG_NO_BREAK_ITERATION + +#include "unicode/unistr.h" +#include "unicode/uniset.h" +#include "unicode/uchar.h" +#include "unicode/parsepos.h" + +#include "cstr.h" +#include "uvector.h" + +#include "rbbirb.h" +#include "rbbinode.h" + +#include "uassert.h" + + +U_NAMESPACE_BEGIN + +#ifdef RBBI_DEBUG +static int gLastSerial = 0; +#endif + + +//------------------------------------------------------------------------- +// +// Constructor. Just set the fields to reasonable default values. +// +//------------------------------------------------------------------------- +RBBINode::RBBINode(NodeType t) : UMemory() { +#ifdef RBBI_DEBUG + fSerialNum = ++gLastSerial; +#endif + fType = t; + fParent = nullptr; + fLeftChild = nullptr; + fRightChild = nullptr; + fInputSet = nullptr; + fFirstPos = 0; + fLastPos = 0; + fNullable = false; + fLookAheadEnd = false; + fRuleRoot = false; + fChainIn = false; + fVal = 0; + fPrecedence = precZero; + + UErrorCode status = U_ZERO_ERROR; + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere + fLastPosSet = new UVector(status); + fFollowPos = new UVector(status); + if (t==opCat) {fPrecedence = precOpCat;} + else if (t==opOr) {fPrecedence = precOpOr;} + else if (t==opStart) {fPrecedence = precStart;} + else if (t==opLParen) {fPrecedence = precLParen;} + +} + + +RBBINode::RBBINode(const RBBINode &other) : UMemory(other) { +#ifdef RBBI_DEBUG + fSerialNum = ++gLastSerial; +#endif + fType = other.fType; + fParent = nullptr; + fLeftChild = nullptr; + fRightChild = nullptr; + fInputSet = other.fInputSet; + fPrecedence = other.fPrecedence; + fText = other.fText; + fFirstPos = other.fFirstPos; + fLastPos = other.fLastPos; + fNullable = other.fNullable; + fVal = other.fVal; + fRuleRoot = false; + fChainIn = other.fChainIn; + UErrorCode status = U_ZERO_ERROR; + fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere + fLastPosSet = new UVector(status); + fFollowPos = new UVector(status); +} + + +//------------------------------------------------------------------------- +// +// Destructor. Deletes both this node AND any child nodes, +// except in the case of variable reference nodes. For +// these, the l. child points back to the definition, which +// is common for all references to the variable, meaning +// it can't be deleted here. +// +//------------------------------------------------------------------------- +RBBINode::~RBBINode() { + // printf("deleting node %8x serial %4d\n", this, this->fSerialNum); + delete fInputSet; + fInputSet = nullptr; + + switch (this->fType) { + case varRef: + case setRef: + // for these node types, multiple instances point to the same "children" + // Storage ownership of children handled elsewhere. Don't delete here. + break; + + default: + delete fLeftChild; + fLeftChild = nullptr; + delete fRightChild; + fRightChild = nullptr; + } + + + delete fFirstPosSet; + delete fLastPosSet; + delete fFollowPos; + +} + + +//------------------------------------------------------------------------- +// +// cloneTree Make a copy of the subtree rooted at this node. +// Discard any variable references encountered along the way, +// and replace with copies of the variable's definitions. +// Used to replicate the expression underneath variable +// references in preparation for generating the DFA tables. +// +//------------------------------------------------------------------------- +RBBINode *RBBINode::cloneTree() { + RBBINode *n; + + if (fType == RBBINode::varRef) { + // If the current node is a variable reference, skip over it + // and clone the definition of the variable instead. + n = fLeftChild->cloneTree(); + } else if (fType == RBBINode::uset) { + n = this; + } else { + n = new RBBINode(*this); + // Check for null pointer. + if (n != nullptr) { + if (fLeftChild != nullptr) { + n->fLeftChild = fLeftChild->cloneTree(); + n->fLeftChild->fParent = n; + } + if (fRightChild != nullptr) { + n->fRightChild = fRightChild->cloneTree(); + n->fRightChild->fParent = n; + } + } + } + return n; +} + + + +//------------------------------------------------------------------------- +// +// flattenVariables Walk a parse tree, replacing any variable +// references with a copy of the variable's definition. +// Aside from variables, the tree is not changed. +// +// Return the root of the tree. If the root was not a variable +// reference, it remains unchanged - the root we started with +// is the root we return. If, however, the root was a variable +// reference, the root of the newly cloned replacement tree will +// be returned, and the original tree deleted. +// +// This function works by recursively walking the tree +// without doing anything until a variable reference is +// found, then calling cloneTree() at that point. Any +// nested references are handled by cloneTree(), not here. +// +//------------------------------------------------------------------------- +RBBINode *RBBINode::flattenVariables() { + if (fType == varRef) { + RBBINode *retNode = fLeftChild->cloneTree(); + if (retNode != nullptr) { + retNode->fRuleRoot = this->fRuleRoot; + retNode->fChainIn = this->fChainIn; + } + delete this; // TODO: undefined behavior. Fix. + return retNode; + } + + if (fLeftChild != nullptr) { + fLeftChild = fLeftChild->flattenVariables(); + fLeftChild->fParent = this; + } + if (fRightChild != nullptr) { + fRightChild = fRightChild->flattenVariables(); + fRightChild->fParent = this; + } + return this; +} + + +//------------------------------------------------------------------------- +// +// flattenSets Walk the parse tree, replacing any nodes of type setRef +// with a copy of the expression tree for the set. A set's +// equivalent expression tree is precomputed and saved as +// the left child of the uset node. +// +//------------------------------------------------------------------------- +void RBBINode::flattenSets() { + U_ASSERT(fType != setRef); + + if (fLeftChild != nullptr) { + if (fLeftChild->fType==setRef) { + RBBINode *setRefNode = fLeftChild; + RBBINode *usetNode = setRefNode->fLeftChild; + RBBINode *replTree = usetNode->fLeftChild; + fLeftChild = replTree->cloneTree(); + fLeftChild->fParent = this; + delete setRefNode; + } else { + fLeftChild->flattenSets(); + } + } + + if (fRightChild != nullptr) { + if (fRightChild->fType==setRef) { + RBBINode *setRefNode = fRightChild; + RBBINode *usetNode = setRefNode->fLeftChild; + RBBINode *replTree = usetNode->fLeftChild; + fRightChild = replTree->cloneTree(); + fRightChild->fParent = this; + delete setRefNode; + } else { + fRightChild->flattenSets(); + } + } +} + + + +//------------------------------------------------------------------------- +// +// findNodes() Locate all the nodes of the specified type, starting +// at the specified root. +// +//------------------------------------------------------------------------- +void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) { + /* test for buffer overflows */ + if (U_FAILURE(status)) { + return; + } + U_ASSERT(!dest->hasDeleter()); + if (fType == kind) { + dest->addElement(this, status); + } + if (fLeftChild != nullptr) { + fLeftChild->findNodes(dest, kind, status); + } + if (fRightChild != nullptr) { + fRightChild->findNodes(dest, kind, status); + } +} + + +//------------------------------------------------------------------------- +// +// print. Print out a single node, for debugging. +// +//------------------------------------------------------------------------- +#ifdef RBBI_DEBUG + +static int32_t serial(const RBBINode *node) { + return (node == nullptr? -1 : node->fSerialNum); +} + + +void RBBINode::printNode(const RBBINode *node) { + static const char * const nodeTypeNames[] = { + "setRef", + "uset", + "varRef", + "leafChar", + "lookAhead", + "tag", + "endMark", + "opStart", + "opCat", + "opOr", + "opStar", + "opPlus", + "opQuestion", + "opBreak", + "opReverse", + "opLParen" + }; + + if (node==nullptr) { + RBBIDebugPrintf("%10p", (void *)node); + } else { + RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ", + (void *)node, node->fSerialNum, nodeTypeNames[node->fType], + node->fRuleRoot?'R':' ', node->fChainIn?'C':' ', + serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent), + node->fFirstPos, node->fVal); + if (node->fType == varRef) { + RBBI_DEBUG_printUnicodeString(node->fText); + } + } + RBBIDebugPrintf("\n"); +} +#endif + + +#ifdef RBBI_DEBUG +U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) { + RBBIDebugPrintf("%*s", minWidth, CStr(s)()); +} +#endif + + +//------------------------------------------------------------------------- +// +// print. Print out the tree of nodes rooted at "this" +// +//------------------------------------------------------------------------- +#ifdef RBBI_DEBUG +void RBBINode::printNodeHeader() { + RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n"); +} + +void RBBINode::printTree(const RBBINode *node, UBool printHeading) { + if (printHeading) { + printNodeHeader(); + } + printNode(node); + if (node != nullptr) { + // Only dump the definition under a variable reference if asked to. + // Unconditionally dump children of all other node types. + if (node->fType != varRef) { + if (node->fLeftChild != nullptr) { + printTree(node->fLeftChild, false); + } + + if (node->fRightChild != nullptr) { + printTree(node->fRightChild, false); + } + } + } +} +#endif + + + +U_NAMESPACE_END + +#endif /* #if !UCONFIG_NO_BREAK_ITERATION */ |