diff options
Diffstat (limited to 'xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java')
-rw-r--r-- | xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java | 127 |
1 files changed, 127 insertions, 0 deletions
diff --git a/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java b/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java new file mode 100644 index 000000000..622fedf9e --- /dev/null +++ b/xmerge/source/xmerge/java/org/openoffice/xmerge/merger/diff/CharacterParser.java @@ -0,0 +1,127 @@ +/* + * This file is part of the LibreOffice project. + * + * This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. + * + * This file incorporates work covered by the following license notice: + * + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed + * with this work for additional information regarding copyright + * ownership. The ASF licenses this file to you under the Apache + * License, Version 2.0 (the "License"); you may not use this file + * except in compliance with the License. You may obtain a copy of + * the License at http://www.apache.org/licenses/LICENSE-2.0 . + */ + +package org.openoffice.xmerge.merger.diff; + +import org.w3c.dom.Node; + +import org.openoffice.xmerge.converter.xml.OfficeConstants; + +import java.util.ArrayList; +import java.util.List; + +/** + * This is a parser to return a character array for difference purpose. + * + * <p>It will use depth first search to traverse all the characters inside the + * text {@code Node} under a given {@code Node} (most likely to be a paragraph + * {@code Node}).</p> + * + * <p>Note: Once the XML Tree is parsed, then the {@code Iterator} will be a + * snapshot of that tree. That means even the tree is modified later, then + * the cached paragraph {@code Node} list will not be updated accordingly. For + * this reason and for performance reasons this {@code Iterator} does not + * support any operation methods such as insert, remove or replace. The main + * purpose of this {@code Iterator} is to be used with difference, not with + * merge.</p> + */ +public class CharacterParser { + + private final TextNodeIterator textNodes; + private int currentPosition = 0; + private final List<TextNodeEntry> nodeList_; + private char[] charArray; + + /** + * Standard constructor. + * + * @param node The initial root {@code Node}. + */ + public CharacterParser(Node node) { + textNodes = new TextNodeIterator(node); + nodeList_ = new ArrayList<TextNodeEntry>(); + + parseNodes(); + } + + /** + * Returns the {@code Node} pointer with the given character position. + * + * @return The {@code Node} pointer with the given character position. + */ + public List<TextNodeEntry> getNodeList() { + // will go through the nodeList to find the corresponding node + return nodeList_; + } + + /** + * Returns the character array representation of the text. + * + * @return The character array representation of the text. + */ + public char[] getCharArray() { + return charArray; + } + + private void parseNodes() { + + StringBuffer strBuf = new StringBuffer(); + + /* create the character array by iterate the textnode iterator */ + Node currentNode = (Node)(textNodes.start()); + for (; + currentNode != null; + currentNode = (Node)(textNodes.next())) { + + // add the text value into the array + String textValue = null; + String nodeName = currentNode.getNodeName(); + + // TODO: Space node have a count attribute which is not handled! + if (currentNode.getNodeType() == Node.TEXT_NODE) { + textValue = currentNode.getNodeValue(); + } else if (nodeName.equals(OfficeConstants.TAG_SPACE)) { + textValue = " "; + } else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) { + textValue = "\t"; + } + + if (textValue != null) { + strBuf.append(textValue); + addNewNodeEntry(textValue.length(), currentNode); + } + } + + charArray = strBuf.toString().toCharArray(); + } + + /** + * Adds a new {@code Node} entry. + * + * @param textLen The text length. + * @param node The {@code Node}. + */ + private void addNewNodeEntry(int textLen, Node node) { + + TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition, + currentPosition + textLen - 1, node); + currentPosition = currentPosition + textLen; + + nodeList_.add(nodeEntry); + } +} |