blob: 622fedf9edceaef9b42ad0dcb6df3f5cd2fafabf (
plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
|
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
package org.openoffice.xmerge.merger.diff;
import org.w3c.dom.Node;
import org.openoffice.xmerge.converter.xml.OfficeConstants;
import java.util.ArrayList;
import java.util.List;
/**
* This is a parser to return a character array for difference purpose.
*
* <p>It will use depth first search to traverse all the characters inside the
* text {@code Node} under a given {@code Node} (most likely to be a paragraph
* {@code Node}).</p>
*
* <p>Note: Once the XML Tree is parsed, then the {@code Iterator} will be a
* snapshot of that tree. That means even the tree is modified later, then
* the cached paragraph {@code Node} list will not be updated accordingly. For
* this reason and for performance reasons this {@code Iterator} does not
* support any operation methods such as insert, remove or replace. The main
* purpose of this {@code Iterator} is to be used with difference, not with
* merge.</p>
*/
public class CharacterParser {
private final TextNodeIterator textNodes;
private int currentPosition = 0;
private final List<TextNodeEntry> nodeList_;
private char[] charArray;
/**
* Standard constructor.
*
* @param node The initial root {@code Node}.
*/
public CharacterParser(Node node) {
textNodes = new TextNodeIterator(node);
nodeList_ = new ArrayList<TextNodeEntry>();
parseNodes();
}
/**
* Returns the {@code Node} pointer with the given character position.
*
* @return The {@code Node} pointer with the given character position.
*/
public List<TextNodeEntry> getNodeList() {
// will go through the nodeList to find the corresponding node
return nodeList_;
}
/**
* Returns the character array representation of the text.
*
* @return The character array representation of the text.
*/
public char[] getCharArray() {
return charArray;
}
private void parseNodes() {
StringBuffer strBuf = new StringBuffer();
/* create the character array by iterate the textnode iterator */
Node currentNode = (Node)(textNodes.start());
for (;
currentNode != null;
currentNode = (Node)(textNodes.next())) {
// add the text value into the array
String textValue = null;
String nodeName = currentNode.getNodeName();
// TODO: Space node have a count attribute which is not handled!
if (currentNode.getNodeType() == Node.TEXT_NODE) {
textValue = currentNode.getNodeValue();
} else if (nodeName.equals(OfficeConstants.TAG_SPACE)) {
textValue = " ";
} else if (nodeName.equals(OfficeConstants.TAG_TAB_STOP)) {
textValue = "\t";
}
if (textValue != null) {
strBuf.append(textValue);
addNewNodeEntry(textValue.length(), currentNode);
}
}
charArray = strBuf.toString().toCharArray();
}
/**
* Adds a new {@code Node} entry.
*
* @param textLen The text length.
* @param node The {@code Node}.
*/
private void addNewNodeEntry(int textLen, Node node) {
TextNodeEntry nodeEntry = new TextNodeEntry(currentPosition,
currentPosition + textLen - 1, node);
currentPosition = currentPosition + textLen;
nodeList_.add(nodeEntry);
}
}
|