1 files changed, 127 insertions, 0 deletions
diff --git a/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java b/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java
new file mode 100644
index 000000000..622fedf9e
--- /dev/null
+++ b/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java
@@ -0,0 +1,127 @@
+/*
+ * This file is part of the LibreOffice project.
+ *
+ * This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/.
+ *
+ * This file incorporates work covered by the following license notice:
+ *
+ *   Licensed to the Apache Software Foundation (ASF) under one or more
+ *   contributor license agreements. See the NOTICE file distributed
+ *   with this work for additional information regarding copyright
+ *   ownership. The ASF licenses this file to you under the Apache
+ *   License, Version 2.0 (the "License"); you may not use this file
+ *   except in compliance with the License. You may obtain a copy of
+ *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
+ */
+
+package org.openoffice.xmerge.merger.diff;
+
+import org.w3c.dom.Node;
+
+import org.openoffice.xmerge.converter.xml.OfficeConstants;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * This is a parser to return a character array for difference purpose.
+ *
+ * <p>It will use depth first search to traverse all the characters inside the
+ * text {@code Node} under a given {@code Node} (most likely to be a paragraph
+ * {@code Node}).</p>
+ *
+ * <p>Note: Once the XML Tree is parsed, then the {@code Iterator} will be a
+ * snapshot of that tree. That means even the tree is modified later, then
+ * the cached paragraph {@code Node} list will not be updated accordingly. For
+ * this reason and for performance reasons this {@code Iterator} does not
+ * support any operation methods such as insert, remove or replace. The main
+ * purpose of this {@code Iterator} is to be used with difference, not with
+ * merge.</p>
+ */
+public class CharacterParser {
+
+    private final TextNodeIterator textNodes;
+    private int currentPosition = 0;
+    private final List<TextNodeEntry> nodeList_;
+    private char[] charArray;
+
+    /**
+     * Standard constructor.
+     *
+     * @param  node  The initial root {@code Node}.
+     */
+    public CharacterParser(Node node) {
+        textNodes = new TextNodeIterator(node);
+        nodeList_ = new ArrayList<TextNodeEntry>();
+
+        parseNodes();
+    }
+
+    /**
+     * Returns the {@code Node} pointer with the given character position.
+     *
+     * @return  The {@code Node} pointer with the given character position.
+     */
+    public List<TextNodeEntry> getNodeList() {
+        // will go through the nodeList to find the corresponding node
+        return  nodeList_;
+    }
+
+    /**
+     *  Returns the character array representation of the text.
+     *
+     *  @return  The character array representation of the text.
+     */
+    public char[] getCharArray() {
+        return charArray;
+    }
+
+    private void parseNodes() {
+
+        StringBuffer strBuf = new StringBuffer();
+
+        /* create the character array by iterate the textnode iterator */
+        Node currentNode = (Node)(textNodes.start());
+        for (;
+             currentNode != null;
+             currentNode = (Node)(textNodes.next())) {
+
+            // add the text value into the array
+            String textValue  = null;
+            String nodeName = currentNode.getNodeName();
+
+            // TODO: Space node have a count attribute which is not handled!
+            if (currentNode.getNodeType() == Node.TEXT_NODE) {
+                textValue = currentNode.getNodeValue();
+            } else if (nodeName.equals(OfficeConstants.TAG_SPACE)) {
+                textValue = " ";
+            } else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) {
+                textValue = "\t";
+            }
+
+            if (textValue != null) {
+                strBuf.append(textValue);
+                addNewNodeEntry(textValue.length(), currentNode);
+            }
+        }
+
+        charArray = strBuf.toString().toCharArray();
+    }
+
+    /**
+     * Adds a new {@code Node} entry.
+     *
+     * @param  textLen  The text length.
+     * @param  node     The {@code Node}.
+     */
+    private void addNewNodeEntry(int textLen, Node node) {
+
+        TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition,
+                                      currentPosition + textLen - 1, node);
+        currentPosition         = currentPosition + textLen;
+
+        nodeList_.add(nodeEntry);
+    }
+}