diff options
Diffstat (limited to '')
-rwxr-xr-x | share/extensions/text_split.py | 313 |
1 files changed, 313 insertions, 0 deletions
diff --git a/share/extensions/text_split.py b/share/extensions/text_split.py new file mode 100755 index 0000000..dee9c88 --- /dev/null +++ b/share/extensions/text_split.py @@ -0,0 +1,313 @@ +#!/usr/bin/env python +# coding=utf-8 +# +# Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com +# 2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +# +"""Splits a text element into lines, words, chars. +Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2), +flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns) + +The code is structured as followed. For each selected text element: + - preprocess_text_element duplicates the element, converts flowroots to plain text elements, + and simplifies manual kerning if requested (only for split_words and split_chars, for all, + possibly nested, children) using simplify_nested_tspans. + - if split lines: split_lines copies all top-level tspans from the previous step into their own + text element, which is otherwise a duplicate of the original text element (thus preserving + style and transforms), see append_splitted_element + - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan, + the content and tail is split (words: at spaces, chars: after each character) into their own + tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical + coordinate, incrementing it with the number of characters and a multiple of font size. +""" + +import re as regex +from typing import Union, Callable + +import inkex +from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement +from inkex.units import parse_unit +from inkex.localization import inkex_gettext as _ + +TextLike = Union[FlowRoot, TextElement] + + +class TextSplit(inkex.EffectExtension): + """Split text up.""" + + def __init__(self): + """Initialize State machine""" + super().__init__() + self.mode: Callable + self.separation: float = 1 + self.fs_multiplier: float = 0.25 + self.current_x: float = 0 + self.current_y: float = 0 + self.process_kerns: bool = True + self.current_root: TextLike + self.current_fontsize: float = 0 + + def add_arguments(self, pars): + pars.add_argument("--tab", help="The selected UI tab when OK was pressed") + pars.add_argument( + "-t", + "--splittype", + default="line", + choices=["letter", "word", "line"], + help="type of split", + ) + pars.add_argument( + "-p", + "--preserve", + type=inkex.Boolean, + default=True, + help="Preserve original", + ) + pars.add_argument( + "-s", + "--separation", + type=float, + default=1, + help="Threshold for separating text with manual kerns in multiples of" + "font-size", + ) + + def effect(self): + """Applies the effect""" + + split_type = self.options.splittype + preserve = self.options.preserve + + # checks if the selected elements are text nodes + for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot): + try: + self.separation = self.options.separation + if split_type == "line": + node = self.split_lines(elem) + elif split_type == "word": + self.mode = self.process_plain_words + node = self.split_words_or_chars(elem) + else: + self.separation = 0 + self.mode = self.process_plain_chars + node = self.split_words_or_chars(elem) + + node.getparent().remove(node) + + if not preserve and node is not None: + elem.getparent().remove(elem) + except TypeError as err: + inkex.errormsg(err) # if an element can not be processed + + @staticmethod + def get_font_size(element): + """get the font size of an element""" + return element.specified_style()("font-size") + + @staticmethod + def get_line_height(element: ShapeElement): + """get the line height of an element""" + return element.get_line_height_uu() + + def simplify_child_tspans(self, element: TextElement): + """Checks all child tspans if they have manual kerns. + If it does, try to find words (characters with a distance > separation * font-size). + Then concatenate the words with spaces, set this string as a new text and""" + for child in list(element): + # process manual kerns + if not isinstance(child, Tspan): + continue + xvals = list( + map(float, filter(len, regex.split(r"[,\s]", child.get("x") or ""))) + ) + content = child.text + if content not in [None, ""] and len(xvals) >= 2: + fsize = self.get_font_size(child) + separation = self.separation * fsize + current_word_start = 0 + for i in range(1, max(len(content), len(xvals))): + if i >= len(content) - 1 or i >= len(xvals) - 1: + # consume the entire remaining string + i = len(content) + if i == len(content) or abs(xvals[i] - xvals[i - 1]) > separation: + wordspan = Tspan(x=str(xvals[current_word_start])) + wordspan.text = content[current_word_start:i] + child.add(wordspan) + current_word_start = i + child.pop("x") + child.text = None + # process child elements + self.simplify_child_tspans(child) + + def preprocess_text_element(self, element: TextElement): + """Processes a text element and returns an element containing tspans with x and y coordinate, + possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the + text was a simple text. Manual kerns (one x value per letter) are converted to spaces + if requested (not necessary for "split characters")""" + + oldelement = element + if isinstance(element, FlowRoot): + element = TextElement() + oldelement.addnext(element) + element.style = oldelement.style + element.transform = oldelement.transform + flowref = oldelement.findone("svg:flowRegion")[0] + if isinstance(flowref, Rectangle): + flowx = element.unittouu(flowref.get("x")) + flowy = element.unittouu(float(flowref.get("y"))) + first = True + else: + raise TypeError( + _( + "Element {} uses a flow region that is not a rectangle. " + "First unflow text." + ).format(element.get_id()) + ) + for child in oldelement: + if isinstance(child, FlowPara): + # convert the flowpara "line" (note: no automatic wrapping) + # to a tspan and set the y coordinate. + # future FlowRoot improvements could add a better conversion. + newchild = Tspan() + element.append(newchild) + newchild.text = child.text + newchild.style = child.style + newchild.transform = child.transform + newchild.set("x", flowx) + if first: + flowy += self.get_font_size(child) * 1.25 + first = False + else: + flowy += self.get_line_height(child) + newchild.set("y", str(flowy)) + + else: + element = oldelement.duplicate() + oldelement.getparent().append(element) + + element.style.pop("shape-inside", None) + + # Real support for RTL text is missing, but we can emulate it by just removing the + # attribute. However, line breaks will be misaligned. + element.style.pop("direction", None) + for child in element: + child.style.pop("direction", None) + + if self.process_kerns: + self.simplify_child_tspans(element) + return element + + def append_splitted_element(self, text, prototype=None): + """Creates a new text element, sibling to self.current_root, at (self.current_x, + self.current_y) with content text. + + text: either a Tspan that should be moved to a new text element - in this case, text is + a direct child of element; or a string + prototype: if text is a string, style and transform will be taken from prototype""" + + if isinstance(text, Tspan) and text.getparent() == self.current_root: + # we just move the tspan to a new text element. + elem = self.current_root.duplicate() + elem.remove_all(Tspan) + elem.append(text) + elem.set("x", text.get("x")) + elem.set("y", text.get("y")) + else: + elem = TextElement(x=str(self.current_x), y=str(self.current_y)) + # transfer the style from all parents, including the text element (if there's a style to + # the text element's parent applied, it will be duplicated, but that doesn't really + # matter) + elem.style = prototype.specified_style() + # the element will be appended to the parent of element, but there might be nested + # tspans between the prototype and the element. The next line says + # "compose transforms until you reach the parent of element" + elem.transform = ( + -self.current_root.getparent().transform + ) @ prototype.composed_transform() + tsp = Tspan(x=str(self.current_x), y=str(self.current_y)) + tsp.text = text + elem.add(tsp) + self.current_root.addnext(elem) + + def split_lines(self, element: TextLike) -> TextElement: + """Splits a text into its lines""" + self.process_kerns = False + preprocessed = self.preprocess_text_element(element) + self.current_root = preprocessed + # Now we only have to copy each tspan into its own text element. + for child in list(preprocessed): + self.append_splitted_element(child) + + return preprocessed + + def process_plain_text(self, element, splitted): + """Appends new text elements to as sibling root for each element of splitted, starting at + self.current_x, self.current_y, incrementing those, with prototype element (that + styles and transforms will be taken from)""" + if splitted is None: + return + for word in splitted: + if word != "": + self.append_splitted_element(word, element) + # +1 since for words, we lost a space + self.current_x += ( + self.current_fontsize * (len(word) + 1) * self.fs_multiplier + ) + + def process_plain_words(self, element, text): + """Calls process_plain_text for splitting words""" + self.fs_multiplier = 0.4 + if text is not None: + self.process_plain_text(element, text.split(" ")) + + def process_plain_chars(self, element, text): + """Calls process_plain_text for splitting characters""" + self.fs_multiplier = 0.25 + self.process_plain_text(element, text) + + def split_words_or_chars(self, element: TextLike) -> TextElement: + """Splits a text into its lines""" + self.process_kerns = True + preprocessed = self.preprocess_text_element(element) + + def process_element(element) -> float: + elem_coords = { + i: element.root.unittouu(element.get(i)) + if element.get(i) is not None + else None + for i in "xy" + } + if elem_coords["x"] is not None: + self.current_x = elem_coords["x"] + if elem_coords["y"] is not None: + self.current_y = elem_coords["y"] + self.current_fontsize = self.get_font_size(element) + current_x = self.mode(element, element.text) + + for elem in element: + if isinstance(elem, Tspan): + current_x = process_element(elem) + current_x = self.mode(element, elem.tail) + return current_x + + self.current_root = preprocessed + process_element(preprocessed) + return preprocessed + + +if __name__ == "__main__": + TextSplit().run() |