/* * This file is part of the LibreOffice project. * * This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. * * This file incorporates work covered by the following license notice: * * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed * with this work for additional information regarding copyright * ownership. The ASF licenses this file to you under the Apache * License, Version 2.0 (the "License"); you may not use this file * except in compliance with the License. You may obtain a copy of * the License at http://www.apache.org/licenses/LICENSE-2.0 . */ package org.openoffice.xmerge.merger.diff; import org.w3c.dom.Node; import org.openoffice.xmerge.converter.xml.OfficeConstants; import java.util.ArrayList; import java.util.List; /** * This is a parser to return a character array for difference purpose. * *

It will use depth first search to traverse all the characters inside the * text {@code Node} under a given {@code Node} (most likely to be a paragraph * {@code Node}).

* *

Note: Once the XML Tree is parsed, then the {@code Iterator} will be a * snapshot of that tree. That means even the tree is modified later, then * the cached paragraph {@code Node} list will not be updated accordingly. For * this reason and for performance reasons this {@code Iterator} does not * support any operation methods such as insert, remove or replace. The main * purpose of this {@code Iterator} is to be used with difference, not with * merge.

*/ public class CharacterParser { private final TextNodeIterator textNodes; private int currentPosition = 0; private final List nodeList_; private char[] charArray; /** * Standard constructor. * * @param node The initial root {@code Node}. */ public CharacterParser(Node node) { textNodes = new TextNodeIterator(node); nodeList_ = new ArrayList(); parseNodes(); } /** * Returns the {@code Node} pointer with the given character position. * * @return The {@code Node} pointer with the given character position. */ public List getNodeList() { // will go through the nodeList to find the corresponding node return nodeList_; } /** * Returns the character array representation of the text. * * @return The character array representation of the text. */ public char[] getCharArray() { return charArray; } private void parseNodes() { StringBuffer strBuf = new StringBuffer(); /* create the character array by iterate the textnode iterator */ Node currentNode = (Node)(textNodes.start()); for (; currentNode != null; currentNode = (Node)(textNodes.next())) { // add the text value into the array String textValue = null; String nodeName = currentNode.getNodeName(); // TODO: Space node have a count attribute which is not handled! if (currentNode.getNodeType() == Node.TEXT_NODE) { textValue = currentNode.getNodeValue(); } else if (nodeName.equals(OfficeConstants.TAG_SPACE)) { textValue = " "; } else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) { textValue = "\t"; } if (textValue != null) { strBuf.append(textValue); addNewNodeEntry(textValue.length(), currentNode); } } charArray = strBuf.toString().toCharArray(); } /** * Adds a new {@code Node} entry. * * @param textLen The text length. * @param node The {@code Node}. */ private void addNewNodeEntry(int textLen, Node node) { TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition, currentPosition + textLen - 1, node); currentPosition = currentPosition + textLen; nodeList_.add(nodeEntry); } }