summaryrefslogtreecommitdiffstats
path: root/intl/icu/source/common/rbbinode.cpp
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--intl/icu/source/common/rbbinode.cpp372
1 files changed, 372 insertions, 0 deletions
diff --git a/intl/icu/source/common/rbbinode.cpp b/intl/icu/source/common/rbbinode.cpp
new file mode 100644
index 0000000000..69d84151fe
--- /dev/null
+++ b/intl/icu/source/common/rbbinode.cpp
@@ -0,0 +1,372 @@
+// © 2016 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+/*
+***************************************************************************
+* Copyright (C) 2002-2016 International Business Machines Corporation *
+* and others. All rights reserved. *
+***************************************************************************
+*/
+
+//
+// File: rbbinode.cpp
+//
+// Implementation of class RBBINode, which represents a node in the
+// tree generated when parsing the Rules Based Break Iterator rules.
+//
+// This "Class" is actually closer to a struct.
+// Code using it is expected to directly access fields much of the time.
+//
+
+#include "unicode/utypes.h"
+
+#if !UCONFIG_NO_BREAK_ITERATION
+
+#include "unicode/unistr.h"
+#include "unicode/uniset.h"
+#include "unicode/uchar.h"
+#include "unicode/parsepos.h"
+
+#include "cstr.h"
+#include "uvector.h"
+
+#include "rbbirb.h"
+#include "rbbinode.h"
+
+#include "uassert.h"
+
+
+U_NAMESPACE_BEGIN
+
+#ifdef RBBI_DEBUG
+static int gLastSerial = 0;
+#endif
+
+
+//-------------------------------------------------------------------------
+//
+// Constructor. Just set the fields to reasonable default values.
+//
+//-------------------------------------------------------------------------
+RBBINode::RBBINode(NodeType t) : UMemory() {
+#ifdef RBBI_DEBUG
+ fSerialNum = ++gLastSerial;
+#endif
+ fType = t;
+ fParent = NULL;
+ fLeftChild = NULL;
+ fRightChild = NULL;
+ fInputSet = NULL;
+ fFirstPos = 0;
+ fLastPos = 0;
+ fNullable = FALSE;
+ fLookAheadEnd = FALSE;
+ fRuleRoot = FALSE;
+ fChainIn = FALSE;
+ fVal = 0;
+ fPrecedence = precZero;
+
+ UErrorCode status = U_ZERO_ERROR;
+ fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
+ fLastPosSet = new UVector(status);
+ fFollowPos = new UVector(status);
+ if (t==opCat) {fPrecedence = precOpCat;}
+ else if (t==opOr) {fPrecedence = precOpOr;}
+ else if (t==opStart) {fPrecedence = precStart;}
+ else if (t==opLParen) {fPrecedence = precLParen;}
+
+}
+
+
+RBBINode::RBBINode(const RBBINode &other) : UMemory(other) {
+#ifdef RBBI_DEBUG
+ fSerialNum = ++gLastSerial;
+#endif
+ fType = other.fType;
+ fParent = NULL;
+ fLeftChild = NULL;
+ fRightChild = NULL;
+ fInputSet = other.fInputSet;
+ fPrecedence = other.fPrecedence;
+ fText = other.fText;
+ fFirstPos = other.fFirstPos;
+ fLastPos = other.fLastPos;
+ fNullable = other.fNullable;
+ fVal = other.fVal;
+ fRuleRoot = FALSE;
+ fChainIn = other.fChainIn;
+ UErrorCode status = U_ZERO_ERROR;
+ fFirstPosSet = new UVector(status); // TODO - get a real status from somewhere
+ fLastPosSet = new UVector(status);
+ fFollowPos = new UVector(status);
+}
+
+
+//-------------------------------------------------------------------------
+//
+// Destructor. Deletes both this node AND any child nodes,
+// except in the case of variable reference nodes. For
+// these, the l. child points back to the definition, which
+// is common for all references to the variable, meaning
+// it can't be deleted here.
+//
+//-------------------------------------------------------------------------
+RBBINode::~RBBINode() {
+ // printf("deleting node %8x serial %4d\n", this, this->fSerialNum);
+ delete fInputSet;
+ fInputSet = NULL;
+
+ switch (this->fType) {
+ case varRef:
+ case setRef:
+ // for these node types, multiple instances point to the same "children"
+ // Storage ownership of children handled elsewhere. Don't delete here.
+ break;
+
+ default:
+ delete fLeftChild;
+ fLeftChild = NULL;
+ delete fRightChild;
+ fRightChild = NULL;
+ }
+
+
+ delete fFirstPosSet;
+ delete fLastPosSet;
+ delete fFollowPos;
+
+}
+
+
+//-------------------------------------------------------------------------
+//
+// cloneTree Make a copy of the subtree rooted at this node.
+// Discard any variable references encountered along the way,
+// and replace with copies of the variable's definitions.
+// Used to replicate the expression underneath variable
+// references in preparation for generating the DFA tables.
+//
+//-------------------------------------------------------------------------
+RBBINode *RBBINode::cloneTree() {
+ RBBINode *n;
+
+ if (fType == RBBINode::varRef) {
+ // If the current node is a variable reference, skip over it
+ // and clone the definition of the variable instead.
+ n = fLeftChild->cloneTree();
+ } else if (fType == RBBINode::uset) {
+ n = this;
+ } else {
+ n = new RBBINode(*this);
+ // Check for null pointer.
+ if (n != NULL) {
+ if (fLeftChild != NULL) {
+ n->fLeftChild = fLeftChild->cloneTree();
+ n->fLeftChild->fParent = n;
+ }
+ if (fRightChild != NULL) {
+ n->fRightChild = fRightChild->cloneTree();
+ n->fRightChild->fParent = n;
+ }
+ }
+ }
+ return n;
+}
+
+
+
+//-------------------------------------------------------------------------
+//
+// flattenVariables Walk a parse tree, replacing any variable
+// references with a copy of the variable's definition.
+// Aside from variables, the tree is not changed.
+//
+// Return the root of the tree. If the root was not a variable
+// reference, it remains unchanged - the root we started with
+// is the root we return. If, however, the root was a variable
+// reference, the root of the newly cloned replacement tree will
+// be returned, and the original tree deleted.
+//
+// This function works by recursively walking the tree
+// without doing anything until a variable reference is
+// found, then calling cloneTree() at that point. Any
+// nested references are handled by cloneTree(), not here.
+//
+//-------------------------------------------------------------------------
+RBBINode *RBBINode::flattenVariables() {
+ if (fType == varRef) {
+ RBBINode *retNode = fLeftChild->cloneTree();
+ if (retNode != NULL) {
+ retNode->fRuleRoot = this->fRuleRoot;
+ retNode->fChainIn = this->fChainIn;
+ }
+ delete this; // TODO: undefined behavior. Fix.
+ return retNode;
+ }
+
+ if (fLeftChild != NULL) {
+ fLeftChild = fLeftChild->flattenVariables();
+ fLeftChild->fParent = this;
+ }
+ if (fRightChild != NULL) {
+ fRightChild = fRightChild->flattenVariables();
+ fRightChild->fParent = this;
+ }
+ return this;
+}
+
+
+//-------------------------------------------------------------------------
+//
+// flattenSets Walk the parse tree, replacing any nodes of type setRef
+// with a copy of the expression tree for the set. A set's
+// equivalent expression tree is precomputed and saved as
+// the left child of the uset node.
+//
+//-------------------------------------------------------------------------
+void RBBINode::flattenSets() {
+ U_ASSERT(fType != setRef);
+
+ if (fLeftChild != NULL) {
+ if (fLeftChild->fType==setRef) {
+ RBBINode *setRefNode = fLeftChild;
+ RBBINode *usetNode = setRefNode->fLeftChild;
+ RBBINode *replTree = usetNode->fLeftChild;
+ fLeftChild = replTree->cloneTree();
+ fLeftChild->fParent = this;
+ delete setRefNode;
+ } else {
+ fLeftChild->flattenSets();
+ }
+ }
+
+ if (fRightChild != NULL) {
+ if (fRightChild->fType==setRef) {
+ RBBINode *setRefNode = fRightChild;
+ RBBINode *usetNode = setRefNode->fLeftChild;
+ RBBINode *replTree = usetNode->fLeftChild;
+ fRightChild = replTree->cloneTree();
+ fRightChild->fParent = this;
+ delete setRefNode;
+ } else {
+ fRightChild->flattenSets();
+ }
+ }
+}
+
+
+
+//-------------------------------------------------------------------------
+//
+// findNodes() Locate all the nodes of the specified type, starting
+// at the specified root.
+//
+//-------------------------------------------------------------------------
+void RBBINode::findNodes(UVector *dest, RBBINode::NodeType kind, UErrorCode &status) {
+ /* test for buffer overflows */
+ if (U_FAILURE(status)) {
+ return;
+ }
+ if (fType == kind) {
+ dest->addElement(this, status);
+ }
+ if (fLeftChild != NULL) {
+ fLeftChild->findNodes(dest, kind, status);
+ }
+ if (fRightChild != NULL) {
+ fRightChild->findNodes(dest, kind, status);
+ }
+}
+
+
+//-------------------------------------------------------------------------
+//
+// print. Print out a single node, for debugging.
+//
+//-------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+
+static int32_t serial(const RBBINode *node) {
+ return (node == NULL? -1 : node->fSerialNum);
+}
+
+
+void RBBINode::printNode(const RBBINode *node) {
+ static const char * const nodeTypeNames[] = {
+ "setRef",
+ "uset",
+ "varRef",
+ "leafChar",
+ "lookAhead",
+ "tag",
+ "endMark",
+ "opStart",
+ "opCat",
+ "opOr",
+ "opStar",
+ "opPlus",
+ "opQuestion",
+ "opBreak",
+ "opReverse",
+ "opLParen"
+ };
+
+ if (node==NULL) {
+ RBBIDebugPrintf("%10p", (void *)node);
+ } else {
+ RBBIDebugPrintf("%10p %5d %12s %c%c %5d %5d %5d %6d %d ",
+ (void *)node, node->fSerialNum, nodeTypeNames[node->fType],
+ node->fRuleRoot?'R':' ', node->fChainIn?'C':' ',
+ serial(node->fLeftChild), serial(node->fRightChild), serial(node->fParent),
+ node->fFirstPos, node->fVal);
+ if (node->fType == varRef) {
+ RBBI_DEBUG_printUnicodeString(node->fText);
+ }
+ }
+ RBBIDebugPrintf("\n");
+}
+#endif
+
+
+#ifdef RBBI_DEBUG
+U_CFUNC void RBBI_DEBUG_printUnicodeString(const UnicodeString &s, int minWidth) {
+ RBBIDebugPrintf("%*s", minWidth, CStr(s)());
+}
+#endif
+
+
+//-------------------------------------------------------------------------
+//
+// print. Print out the tree of nodes rooted at "this"
+//
+//-------------------------------------------------------------------------
+#ifdef RBBI_DEBUG
+void RBBINode::printNodeHeader() {
+ RBBIDebugPrintf(" Address serial type LeftChild RightChild Parent position value\n");
+}
+
+void RBBINode::printTree(const RBBINode *node, UBool printHeading) {
+ if (printHeading) {
+ printNodeHeader();
+ }
+ printNode(node);
+ if (node != NULL) {
+ // Only dump the definition under a variable reference if asked to.
+ // Unconditinally dump children of all other node types.
+ if (node->fType != varRef) {
+ if (node->fLeftChild != NULL) {
+ printTree(node->fLeftChild, FALSE);
+ }
+
+ if (node->fRightChild != NULL) {
+ printTree(node->fRightChild, FALSE);
+ }
+ }
+ }
+}
+#endif
+
+
+
+U_NAMESPACE_END
+
+#endif /* #if !UCONFIG_NO_BREAK_ITERATION */