share/extensions/text_split.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313

#!/usr/bin/env python
# coding=utf-8
#
# Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com
#               2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
"""Splits a text element into lines, words, chars.
Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2),
flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns)

The code is structured as followed. For each selected text element:
 - preprocess_text_element duplicates the element, converts flowroots to plain text elements,
   and simplifies manual kerning if requested (only for split_words and split_chars, for all,
   possibly nested, children) using simplify_nested_tspans.
 - if split lines: split_lines copies all top-level tspans from the previous step into their own
   text element, which is otherwise a duplicate of the original text element (thus preserving
   style and transforms), see append_splitted_element
 - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan,
   the content and tail is split (words: at spaces, chars: after each character) into their own
   tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical
   coordinate, incrementing it with the number of characters and a multiple of font size.
"""

import re as regex
from typing import Union, Callable

import inkex
from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement
from inkex.units import parse_unit
from inkex.localization import inkex_gettext as _

TextLike = Union[FlowRoot, TextElement]


class TextSplit(inkex.EffectExtension):
    """Split text up."""

    def __init__(self):
        """Initialize State machine"""
        super().__init__()
        self.mode: Callable
        self.separation: float = 1
        self.fs_multiplier: float = 0.25
        self.current_x: float = 0
        self.current_y: float = 0
        self.process_kerns: bool = True
        self.current_root: TextLike
        self.current_fontsize: float = 0

    def add_arguments(self, pars):
        pars.add_argument("--tab", help="The selected UI tab when OK was pressed")
        pars.add_argument(
            "-t",
            "--splittype",
            default="line",
            choices=["letter", "word", "line"],
            help="type of split",
        )
        pars.add_argument(
            "-p",
            "--preserve",
            type=inkex.Boolean,
            default=True,
            help="Preserve original",
        )
        pars.add_argument(
            "-s",
            "--separation",
            type=float,
            default=1,
            help="Threshold for separating text with manual kerns in multiples of"
            "font-size",
        )

    def effect(self):
        """Applies the effect"""

        split_type = self.options.splittype
        preserve = self.options.preserve

        # checks if the selected elements are text nodes
        for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot):
            try:
                self.separation = self.options.separation
                if split_type == "line":
                    node = self.split_lines(elem)
                elif split_type == "word":
                    self.mode = self.process_plain_words
                    node = self.split_words_or_chars(elem)
                else:
                    self.separation = 0
                    self.mode = self.process_plain_chars
                    node = self.split_words_or_chars(elem)

                node.getparent().remove(node)

                if not preserve and node is not None:
                    elem.getparent().remove(elem)
            except TypeError as err:
                inkex.errormsg(err)  # if an element can not be processed

    @staticmethod
    def get_font_size(element):
        """get the font size of an element"""
        return element.specified_style()("font-size")

    @staticmethod
    def get_line_height(element: ShapeElement):
        """get the line height of an element"""
        return element.get_line_height_uu()

    def simplify_child_tspans(self, element: TextElement):
        """Checks all child tspans if they have manual kerns.
        If it does, try to find words (characters with a distance > separation * font-size).
        Then concatenate the words with spaces, set this string as a new text and"""
        for child in list(element):
            # process manual kerns
            if not isinstance(child, Tspan):
                continue
            xvals = list(
                map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))
            )
            content = child.text
            if content not in [None, ""] and len(xvals) >= 2:
                fsize = self.get_font_size(child)
                separation = self.separation * fsize
                current_word_start = 0
                for i in range(1, max(len(content), len(xvals))):
                    if i >= len(content) - 1 or i >= len(xvals) - 1:
                        # consume the entire remaining string
                        i = len(content)
                    if i == len(content) or abs(xvals[i] - xvals[i - 1]) > separation:
                        wordspan = Tspan(x=str(xvals[current_word_start]))
                        wordspan.text = content[current_word_start:i]
                        child.add(wordspan)
                        current_word_start = i
                child.pop("x")
                child.text = None
            # process child elements
            self.simplify_child_tspans(child)

    def preprocess_text_element(self, element: TextElement):
        """Processes a text element and returns an element containing tspans with x and y coordinate,
        possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the
        text was a simple text. Manual kerns (one x value per letter) are converted to spaces
        if requested (not necessary for "split characters")"""

        oldelement = element
        if isinstance(element, FlowRoot):
            element = TextElement()
            oldelement.addnext(element)
            element.style = oldelement.style
            element.transform = oldelement.transform
            flowref = oldelement.findone("svg:flowRegion")[0]
            if isinstance(flowref, Rectangle):
                flowx = element.unittouu(flowref.get("x"))
                flowy = element.unittouu(float(flowref.get("y")))
                first = True
            else:
                raise TypeError(
                    _(
                        "Element {} uses a flow region that is not a rectangle. "
                        "First unflow text."
                    ).format(element.get_id())
                )
            for child in oldelement:
                if isinstance(child, FlowPara):
                    # convert the flowpara "line" (note: no automatic wrapping)
                    # to a tspan and set the y coordinate.
                    # future FlowRoot improvements could add a better conversion.
                    newchild = Tspan()
                    element.append(newchild)
                    newchild.text = child.text
                    newchild.style = child.style
                    newchild.transform = child.transform
                    newchild.set("x", flowx)
                    if first:
                        flowy += self.get_font_size(child) * 1.25
                        first = False
                    else:
                        flowy += self.get_line_height(child)
                    newchild.set("y", str(flowy))

        else:
            element = oldelement.duplicate()
            oldelement.getparent().append(element)

        element.style.pop("shape-inside", None)

        # Real support for RTL text is missing, but we can emulate it by just removing the
        # attribute. However, line breaks will be misaligned.
        element.style.pop("direction", None)
        for child in element:
            child.style.pop("direction", None)

        if self.process_kerns:
            self.simplify_child_tspans(element)
        return element

    def append_splitted_element(self, text, prototype=None):
        """Creates a new text element, sibling to self.current_root, at (self.current_x,
        self.current_y) with content text.

        text: either a Tspan that should be moved to a new text element - in this case, text is
            a direct child of element; or a string
        prototype: if text is a string, style and transform will be taken from prototype"""

        if isinstance(text, Tspan) and text.getparent() == self.current_root:
            # we just move the tspan to a new text element.
            elem = self.current_root.duplicate()
            elem.remove_all(Tspan)
            elem.append(text)
            elem.set("x", text.get("x"))
            elem.set("y", text.get("y"))
        else:
            elem = TextElement(x=str(self.current_x), y=str(self.current_y))
            # transfer the style from all parents, including the text element (if there's a style to
            # the text element's parent applied, it will be duplicated, but that doesn't really
            # matter)
            elem.style = prototype.specified_style()
            # the element will be appended to the parent of element, but there might be nested
            # tspans between the prototype and the element. The next line says
            # "compose transforms until you reach the parent of element"
            elem.transform = (
                -self.current_root.getparent().transform
            ) @ prototype.composed_transform()
            tsp = Tspan(x=str(self.current_x), y=str(self.current_y))
            tsp.text = text
            elem.add(tsp)
        self.current_root.addnext(elem)

    def split_lines(self, element: TextLike) -> TextElement:
        """Splits a text into its lines"""
        self.process_kerns = False
        preprocessed = self.preprocess_text_element(element)
        self.current_root = preprocessed
        # Now we only have to copy each tspan into its own text element.
        for child in list(preprocessed):
            self.append_splitted_element(child)

        return preprocessed

    def process_plain_text(self, element, splitted):
        """Appends new text elements to as sibling root for each element of splitted, starting at
        self.current_x, self.current_y, incrementing those, with prototype element (that
        styles and transforms will be taken from)"""
        if splitted is None:
            return
        for word in splitted:
            if word != "":
                self.append_splitted_element(word, element)
            # +1 since for words, we lost a space
            self.current_x += (
                self.current_fontsize * (len(word) + 1) * self.fs_multiplier
            )

    def process_plain_words(self, element, text):
        """Calls process_plain_text for splitting words"""
        self.fs_multiplier = 0.4
        if text is not None:
            self.process_plain_text(element, text.split(" "))

    def process_plain_chars(self, element, text):
        """Calls process_plain_text for splitting characters"""
        self.fs_multiplier = 0.25
        self.process_plain_text(element, text)

    def split_words_or_chars(self, element: TextLike) -> TextElement:
        """Splits a text into its lines"""
        self.process_kerns = True
        preprocessed = self.preprocess_text_element(element)

        def process_element(element) -> float:
            elem_coords = {
                i: element.root.unittouu(element.get(i))
                if element.get(i) is not None
                else None
                for i in "xy"
            }
            if elem_coords["x"] is not None:
                self.current_x = elem_coords["x"]
            if elem_coords["y"] is not None:
                self.current_y = elem_coords["y"]
            self.current_fontsize = self.get_font_size(element)
            current_x = self.mode(element, element.text)

            for elem in element:
                if isinstance(elem, Tspan):
                    current_x = process_element(elem)
                current_x = self.mode(element, elem.tail)
            return current_x

        self.current_root = preprocessed
        process_element(preprocessed)
        return preprocessed


if __name__ == "__main__":
    TextSplit().run()