1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
|
#!/usr/bin/env python
# coding=utf-8
#
# Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com
# 2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
"""Splits a text element into lines, words, chars.
Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2),
flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns)
The code is structured as followed. For each selected text element:
- preprocess_text_element duplicates the element, converts flowroots to plain text elements,
and simplifies manual kerning if requested (only for split_words and split_chars, for all,
possibly nested, children) using simplify_nested_tspans.
- if split lines: split_lines copies all top-level tspans from the previous step into their own
text element, which is otherwise a duplicate of the original text element (thus preserving
style and transforms), see append_splitted_element
- if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan,
the content and tail is split (words: at spaces, chars: after each character) into their own
tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical
coordinate, incrementing it with the number of characters and a multiple of font size.
"""
import re as regex
from typing import Union, Callable
import inkex
from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement
from inkex.units import parse_unit
from inkex.localization import inkex_gettext as _
TextLike = Union[FlowRoot, TextElement]
class TextSplit(inkex.EffectExtension):
"""Split text up."""
def __init__(self):
"""Initialize State machine"""
super().__init__()
self.mode: Callable
self.separation: float = 1
self.fs_multiplier: float = 0.25
self.current_x: float = 0
self.current_y: float = 0
self.process_kerns: bool = True
self.current_root: TextLike
self.current_fontsize: float = 0
def add_arguments(self, pars):
pars.add_argument("--tab", help="The selected UI tab when OK was pressed")
pars.add_argument(
"-t",
"--splittype",
default="line",
choices=["letter", "word", "line"],
help="type of split",
)
pars.add_argument(
"-p",
"--preserve",
type=inkex.Boolean,
default=True,
help="Preserve original",
)
pars.add_argument(
"-s",
"--separation",
type=float,
default=1,
help="Threshold for separating text with manual kerns in multiples of"
"font-size",
)
def effect(self):
"""Applies the effect"""
split_type = self.options.splittype
preserve = self.options.preserve
# checks if the selected elements are text nodes
for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot):
try:
self.separation = self.options.separation
if split_type == "line":
node = self.split_lines(elem)
elif split_type == "word":
self.mode = self.process_plain_words
node = self.split_words_or_chars(elem)
else:
self.separation = 0
self.mode = self.process_plain_chars
node = self.split_words_or_chars(elem)
node.getparent().remove(node)
if not preserve and node is not None:
elem.getparent().remove(elem)
except TypeError as err:
inkex.errormsg(err) # if an element can not be processed
@staticmethod
def get_font_size(element):
"""get the font size of an element"""
return element.specified_style()("font-size")
@staticmethod
def get_line_height(element: ShapeElement):
"""get the line height of an element"""
return element.get_line_height_uu()
def simplify_child_tspans(self, element: TextElement):
"""Checks all child tspans if they have manual kerns.
If it does, try to find words (characters with a distance > separation * font-size).
Then concatenate the words with spaces, set this string as a new text and"""
for child in list(element):
# process manual kerns
if not isinstance(child, Tspan):
continue
xvals = list(
map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))
)
content = child.text
if content not in [None, ""] and len(xvals) >= 2:
fsize = self.get_font_size(child)
separation = self.separation * fsize
current_word_start = 0
for i in range(1, max(len(content), len(xvals))):
if i >= len(content) - 1 or i >= len(xvals) - 1:
# consume the entire remaining string
i = len(content)
if i == len(content) or abs(xvals[i] - xvals[i - 1]) > separation:
wordspan = Tspan(x=str(xvals[current_word_start]))
wordspan.text = content[current_word_start:i]
child.add(wordspan)
current_word_start = i
child.pop("x")
child.text = None
# process child elements
self.simplify_child_tspans(child)
def preprocess_text_element(self, element: TextElement):
"""Processes a text element and returns an element containing tspans with x and y coordinate,
possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the
text was a simple text. Manual kerns (one x value per letter) are converted to spaces
if requested (not necessary for "split characters")"""
oldelement = element
if isinstance(element, FlowRoot):
element = TextElement()
oldelement.addnext(element)
element.style = oldelement.style
element.transform = oldelement.transform
flowref = oldelement.findone("svg:flowRegion")[0]
if isinstance(flowref, Rectangle):
flowx = element.unittouu(flowref.get("x"))
flowy = element.unittouu(float(flowref.get("y")))
first = True
else:
raise TypeError(
_(
"Element {} uses a flow region that is not a rectangle. "
"First unflow text."
).format(element.get_id())
)
for child in oldelement:
if isinstance(child, FlowPara):
# convert the flowpara "line" (note: no automatic wrapping)
# to a tspan and set the y coordinate.
# future FlowRoot improvements could add a better conversion.
newchild = Tspan()
element.append(newchild)
newchild.text = child.text
newchild.style = child.style
newchild.transform = child.transform
newchild.set("x", flowx)
if first:
flowy += self.get_font_size(child) * 1.25
first = False
else:
flowy += self.get_line_height(child)
newchild.set("y", str(flowy))
else:
element = oldelement.duplicate()
oldelement.getparent().append(element)
element.style.pop("shape-inside", None)
# Real support for RTL text is missing, but we can emulate it by just removing the
# attribute. However, line breaks will be misaligned.
element.style.pop("direction", None)
for child in element:
child.style.pop("direction", None)
if self.process_kerns:
self.simplify_child_tspans(element)
return element
def append_splitted_element(self, text, prototype=None):
"""Creates a new text element, sibling to self.current_root, at (self.current_x,
self.current_y) with content text.
text: either a Tspan that should be moved to a new text element - in this case, text is
a direct child of element; or a string
prototype: if text is a string, style and transform will be taken from prototype"""
if isinstance(text, Tspan) and text.getparent() == self.current_root:
# we just move the tspan to a new text element.
elem = self.current_root.duplicate()
elem.remove_all(Tspan)
elem.append(text)
elem.set("x", text.get("x"))
elem.set("y", text.get("y"))
else:
elem = TextElement(x=str(self.current_x), y=str(self.current_y))
# transfer the style from all parents, including the text element (if there's a style to
# the text element's parent applied, it will be duplicated, but that doesn't really
# matter)
elem.style = prototype.specified_style()
# the element will be appended to the parent of element, but there might be nested
# tspans between the prototype and the element. The next line says
# "compose transforms until you reach the parent of element"
elem.transform = (
-self.current_root.getparent().transform
) @ prototype.composed_transform()
tsp = Tspan(x=str(self.current_x), y=str(self.current_y))
tsp.text = text
elem.add(tsp)
self.current_root.addnext(elem)
def split_lines(self, element: TextLike) -> TextElement:
"""Splits a text into its lines"""
self.process_kerns = False
preprocessed = self.preprocess_text_element(element)
self.current_root = preprocessed
# Now we only have to copy each tspan into its own text element.
for child in list(preprocessed):
self.append_splitted_element(child)
return preprocessed
def process_plain_text(self, element, splitted):
"""Appends new text elements to as sibling root for each element of splitted, starting at
self.current_x, self.current_y, incrementing those, with prototype element (that
styles and transforms will be taken from)"""
if splitted is None:
return
for word in splitted:
if word != "":
self.append_splitted_element(word, element)
# +1 since for words, we lost a space
self.current_x += (
self.current_fontsize * (len(word) + 1) * self.fs_multiplier
)
def process_plain_words(self, element, text):
"""Calls process_plain_text for splitting words"""
self.fs_multiplier = 0.4
if text is not None:
self.process_plain_text(element, text.split(" "))
def process_plain_chars(self, element, text):
"""Calls process_plain_text for splitting characters"""
self.fs_multiplier = 0.25
self.process_plain_text(element, text)
def split_words_or_chars(self, element: TextLike) -> TextElement:
"""Splits a text into its lines"""
self.process_kerns = True
preprocessed = self.preprocess_text_element(element)
def process_element(element) -> float:
elem_coords = {
i: element.root.unittouu(element.get(i))
if element.get(i) is not None
else None
for i in "xy"
}
if elem_coords["x"] is not None:
self.current_x = elem_coords["x"]
if elem_coords["y"] is not None:
self.current_y = elem_coords["y"]
self.current_fontsize = self.get_font_size(element)
current_x = self.mode(element, element.text)
for elem in element:
if isinstance(elem, Tspan):
current_x = process_element(elem)
current_x = self.mode(element, elem.tail)
return current_x
self.current_root = preprocessed
process_element(preprocessed)
return preprocessed
if __name__ == "__main__":
TextSplit().run()
|