#!/usr/bin/env python # coding=utf-8 # # Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com # 2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # """Splits a text element into lines, words, chars. Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2), flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns) The code is structured as followed. For each selected text element: - preprocess_text_element duplicates the element, converts flowroots to plain text elements, and simplifies manual kerning if requested (only for split_words and split_chars, for all, possibly nested, children) using simplify_nested_tspans. - if split lines: split_lines copies all top-level tspans from the previous step into their own text element, which is otherwise a duplicate of the original text element (thus preserving style and transforms), see append_splitted_element - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan, the content and tail is split (words: at spaces, chars: after each character) into their own tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical coordinate, incrementing it with the number of characters and a multiple of font size. """ import re as regex from typing import Union, Callable import inkex from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement from inkex.units import parse_unit from inkex.localization import inkex_gettext as _ TextLike = Union[FlowRoot, TextElement] class TextSplit(inkex.EffectExtension): """Split text up.""" def __init__(self): """Initialize State machine""" super().__init__() self.mode: Callable self.separation: float = 1 self.fs_multiplier: float = 0.25 self.current_x: float = 0 self.current_y: float = 0 self.process_kerns: bool = True self.current_root: TextLike self.current_fontsize: float = 0 def add_arguments(self, pars): pars.add_argument("--tab", help="The selected UI tab when OK was pressed") pars.add_argument( "-t", "--splittype", default="line", choices=["letter", "word", "line"], help="type of split", ) pars.add_argument( "-p", "--preserve", type=inkex.Boolean, default=True, help="Preserve original", ) pars.add_argument( "-s", "--separation", type=float, default=1, help="Threshold for separating text with manual kerns in multiples of" "font-size", ) def effect(self): """Applies the effect""" split_type = self.options.splittype preserve = self.options.preserve # checks if the selected elements are text nodes for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot): try: self.separation = self.options.separation if split_type == "line": node = self.split_lines(elem) elif split_type == "word": self.mode = self.process_plain_words node = self.split_words_or_chars(elem) else: self.separation = 0 self.mode = self.process_plain_chars node = self.split_words_or_chars(elem) node.getparent().remove(node) if not preserve and node is not None: elem.getparent().remove(elem) except TypeError as err: inkex.errormsg(err) # if an element can not be processed @staticmethod def get_font_size(element): """get the font size of an element""" return element.specified_style()("font-size") @staticmethod def get_line_height(element: ShapeElement): """get the line height of an element""" return element.get_line_height_uu() def simplify_child_tspans(self, element: TextElement): """Checks all child tspans if they have manual kerns. If it does, try to find words (characters with a distance > separation * font-size). Then concatenate the words with spaces, set this string as a new text and""" for child in list(element): # process manual kerns if not isinstance(child, Tspan): continue xvals = list( map(float, filter(len, regex.split(r"[,\s]", child.get("x") or ""))) ) content = child.text if content not in [None, ""] and len(xvals) >= 2: fsize = self.get_font_size(child) separation = self.separation * fsize current_word_start = 0 for i in range(1, max(len(content), len(xvals))): if i >= len(content) - 1 or i >= len(xvals) - 1: # consume the entire remaining string i = len(content) if i == len(content) or abs(xvals[i] - xvals[i - 1]) > separation: wordspan = Tspan(x=str(xvals[current_word_start])) wordspan.text = content[current_word_start:i] child.add(wordspan) current_word_start = i child.pop("x") child.text = None # process child elements self.simplify_child_tspans(child) def preprocess_text_element(self, element: TextElement): """Processes a text element and returns an element containing tspans with x and y coordinate, possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the text was a simple text. Manual kerns (one x value per letter) are converted to spaces if requested (not necessary for "split characters")""" oldelement = element if isinstance(element, FlowRoot): element = TextElement() oldelement.addnext(element) element.style = oldelement.style element.transform = oldelement.transform flowref = oldelement.findone("svg:flowRegion")[0] if isinstance(flowref, Rectangle): flowx = element.unittouu(flowref.get("x")) flowy = element.unittouu(float(flowref.get("y"))) first = True else: raise TypeError( _( "Element {} uses a flow region that is not a rectangle. " "First unflow text." ).format(element.get_id()) ) for child in oldelement: if isinstance(child, FlowPara): # convert the flowpara "line" (note: no automatic wrapping) # to a tspan and set the y coordinate. # future FlowRoot improvements could add a better conversion. newchild = Tspan() element.append(newchild) newchild.text = child.text newchild.style = child.style newchild.transform = child.transform newchild.set("x", flowx) if first: flowy += self.get_font_size(child) * 1.25 first = False else: flowy += self.get_line_height(child) newchild.set("y", str(flowy)) else: element = oldelement.duplicate() oldelement.getparent().append(element) element.style.pop("shape-inside", None) # Real support for RTL text is missing, but we can emulate it by just removing the # attribute. However, line breaks will be misaligned. element.style.pop("direction", None) for child in element: child.style.pop("direction", None) if self.process_kerns: self.simplify_child_tspans(element) return element def append_splitted_element(self, text, prototype=None): """Creates a new text element, sibling to self.current_root, at (self.current_x, self.current_y) with content text. text: either a Tspan that should be moved to a new text element - in this case, text is a direct child of element; or a string prototype: if text is a string, style and transform will be taken from prototype""" if isinstance(text, Tspan) and text.getparent() == self.current_root: # we just move the tspan to a new text element. elem = self.current_root.duplicate() elem.remove_all(Tspan) elem.append(text) elem.set("x", text.get("x")) elem.set("y", text.get("y")) else: elem = TextElement(x=str(self.current_x), y=str(self.current_y)) # transfer the style from all parents, including the text element (if there's a style to # the text element's parent applied, it will be duplicated, but that doesn't really # matter) elem.style = prototype.specified_style() # the element will be appended to the parent of element, but there might be nested # tspans between the prototype and the element. The next line says # "compose transforms until you reach the parent of element" elem.transform = ( -self.current_root.getparent().transform ) @ prototype.composed_transform() tsp = Tspan(x=str(self.current_x), y=str(self.current_y)) tsp.text = text elem.add(tsp) self.current_root.addnext(elem) def split_lines(self, element: TextLike) -> TextElement: """Splits a text into its lines""" self.process_kerns = False preprocessed = self.preprocess_text_element(element) self.current_root = preprocessed # Now we only have to copy each tspan into its own text element. for child in list(preprocessed): self.append_splitted_element(child) return preprocessed def process_plain_text(self, element, splitted): """Appends new text elements to as sibling root for each element of splitted, starting at self.current_x, self.current_y, incrementing those, with prototype element (that styles and transforms will be taken from)""" if splitted is None: return for word in splitted: if word != "": self.append_splitted_element(word, element) # +1 since for words, we lost a space self.current_x += ( self.current_fontsize * (len(word) + 1) * self.fs_multiplier ) def process_plain_words(self, element, text): """Calls process_plain_text for splitting words""" self.fs_multiplier = 0.4 if text is not None: self.process_plain_text(element, text.split(" ")) def process_plain_chars(self, element, text): """Calls process_plain_text for splitting characters""" self.fs_multiplier = 0.25 self.process_plain_text(element, text) def split_words_or_chars(self, element: TextLike) -> TextElement: """Splits a text into its lines""" self.process_kerns = True preprocessed = self.preprocess_text_element(element) def process_element(element) -> float: elem_coords = { i: element.root.unittouu(element.get(i)) if element.get(i) is not None else None for i in "xy" } if elem_coords["x"] is not None: self.current_x = elem_coords["x"] if elem_coords["y"] is not None: self.current_y = elem_coords["y"] self.current_fontsize = self.get_font_size(element) current_x = self.mode(element, element.text) for elem in element: if isinstance(elem, Tspan): current_x = process_element(elem) current_x = self.mode(element, elem.tail) return current_x self.current_root = preprocessed process_element(preprocessed) return preprocessed if __name__ == "__main__": TextSplit().run()