inkscape/share/extensions/text_split.py

#!/usr/bin/env python3
# coding=utf-8
#
# Copyright (C) 2009 Karlisson Bezerra, contato@nerdson.com
#               2021 Jonathan Neuhauser, jonathan.neuhauser@outlook.com
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
#
"""Splits a text element into lines, words, chars.
Supports all text elements that Inkscape can create, such as normal text, shape-inside (SVG2),
flowroot (SVG1.2), inline-size, manual kerns, and nested tspans (with possibly different kerns)

The code is structured as followed. For each selected text element:
 - preprocess_text_element duplicates the element, converts flowroots to plain text elements,
   and simplifies manual kerning if requested (only for split_words and split_chars, for all,
   possibly nested, children) using simplify_nested_tspans.
 - if split lines: split_lines copies all top-level tspans from the previous step into their own
   text element, which is otherwise a duplicate of the original text element (thus preserving
   style and transforms), see append_splitted_element
 - if split words or chars: split_words_or_chars: the text is recursively processed. For each tspan,
   the content and tail is split (words: at spaces, chars: after each character) into their own
   tspan, again using append_splitted_element. The method keeps track of the horizontal and vertical
   coordinate, incrementing it with the number of characters and a multiple of font size.
"""

import re as regex
from typing import Union, Callable

import inkex
from inkex import TextElement, FlowRoot, FlowPara, Tspan, Rectangle, ShapeElement
from inkex.units import parse_unit
from inkex.localization import inkex_gettext as _

TextLike = Union[FlowRoot, TextElement]


class TextSplit(inkex.EffectExtension):
    """Split text up."""

    def __init__(self):
        """Initialize State machine"""
        super().__init__()
        self.mode: Callable
        self.separation: float = 1
        self.fs_multiplier: float = 0.25
        self.current_x: float = 0
        self.current_y: float = 0
        self.process_kerns: bool = True
        self.current_root: TextLike
        self.current_fontsize: float = 0

    def add_arguments(self, pars):
        pars.add_argument("--tab", help="The selected UI tab when OK was pressed")
        pars.add_argument(
            "-t",
            "--splittype",
            default="line",
            choices=["letter", "word", "line"],
            help="type of split",
        )
        pars.add_argument(
            "-p",
            "--preserve",
            type=inkex.Boolean,
            default=True,
            help="Preserve original",
        )
        pars.add_argument(
            "-s",
            "--separation",
            type=float,
            default=1,
            help="Threshold for separating text with manual kerns in multiples of"
            "font-size",
        )

    def effect(self):
        """Applies the effect"""

        split_type = self.options.splittype
        preserve = self.options.preserve

        # checks if the selected elements are text nodes
        for elem in self.svg.selection.filter_nonzero(TextElement, FlowRoot):
            try:
                self.separation = self.options.separation
                if split_type == "line":
                    node = self.split_lines(elem)
                elif split_type == "word":
                    self.mode = self.process_plain_words
                    node = self.split_words_or_chars(elem)
                else:
                    self.separation = 0
                    self.mode = self.process_plain_chars
                    node = self.split_words_or_chars(elem)

                node.getparent().remove(node)

                if not preserve and node is not None:
                    elem.getparent().remove(elem)
            except TypeError as err:
                inkex.errormsg(err)  # if an element can not be processed

    @staticmethod
    def get_font_size(element):
        """get the font size of an element"""
        return element.get_computed_style("font-size")

    @staticmethod
    def get_line_height(element: ShapeElement):
        """get the line height of an element"""
        return element.get_line_height_uu()

    def simplify_child_tspans(self, element: TextElement):
        """Checks all child tspans if they have manual kerns.
        If it does, try to find words (characters with a distance > separation * font-size).
        Then concatenate the words with spaces, set this string as a new text and"""
        for child in list(element):
            # process manual kerns
            if not isinstance(child, Tspan):
                continue
            xvals = list(
                map(float, filter(len, regex.split(r"[,\s]", child.get("x") or "")))
            )
            content = child.text
            if content not in [None, ""] and len(xvals) >= 2:
                fsize = self.get_font_size(child)
                separation = self.separation * fsize
                current_word_start = 0
                for i in range(1, max(len(content), len(xvals))):
                    if i >= len(content) - 1 or i >= len(xvals) - 1:
                        # consume the entire remaining string
                        i = len(content)
                    if i == len(content) or abs(xvals[i] - xvals[i - 1]) > separation:
                        wordspan = Tspan(x=str(xvals[current_word_start]))
                        wordspan.text = content[current_word_start:i]
                        child.add(wordspan)
                        current_word_start = i
                child.pop("x")
                child.text = None
            # process child elements
            self.simplify_child_tspans(child)

    def preprocess_text_element(self, element: TextElement):
        """Processes a text element and returns an element containing tspans with x and y coordinate,
        possibly nested (for Inkscape-type kerning), so that the actual splitting can work as if the
        text was a simple text. Manual kerns (one x value per letter) are converted to spaces
        if requested (not necessary for "split characters")"""

        oldelement = element
        if isinstance(element, FlowRoot):
            element = TextElement()
            oldelement.addnext(element)
            element.style = oldelement.style
            element.transform = oldelement.transform
            flowref = oldelement.findone("svg:flowRegion")[0]
            if isinstance(flowref, Rectangle):
                flowx = element.unittouu(flowref.get("x"))
                flowy = element.unittouu(float(flowref.get("y")))
                first = True
            else:
                raise TypeError(
                    _(
                        "Element {} uses a flow region that is not a rectangle. "
                        "First unflow text."
                    ).format(element.get_id())
                )
            for child in oldelement:
                if isinstance(child, FlowPara):
                    # convert the flowpara "line" (note: no automatic wrapping)
                    # to a tspan and set the y coordinate.
                    # future FlowRoot improvements could add a better conversion.
                    newchild = Tspan()
                    element.append(newchild)
                    newchild.text = child.text
                    newchild.style = child.style
                    newchild.transform = child.transform
                    newchild.set("x", flowx)
                    if first:
                        flowy += self.get_font_size(child) * 1.25
                        first = False
                    else:
                        flowy += self.get_line_height(child)
                    newchild.set("y", str(flowy))

        else:
            element = oldelement.duplicate()
            oldelement.getparent().append(element)

        element.style.pop("shape-inside", None)

        # Real support for RTL text is missing, but we can emulate it by just removing the
        # attribute. However, line breaks will be misaligned.
        element.style.pop("direction", None)
        for child in element:
            child.style.pop("direction", None)

        if self.process_kerns:
            self.simplify_child_tspans(element)
        return element

    def append_splitted_element(self, text, prototype=None):
        """Creates a new text element, sibling to self.current_root, at (self.current_x,
        self.current_y) with content text.

        text: either a Tspan that should be moved to a new text element - in this case, text is
            a direct child of element; or a string
        prototype: if text is a string, style and transform will be taken from prototype
        """

        if isinstance(text, Tspan) and text.getparent() == self.current_root:
            # we just move the tspan to a new text element.
            elem = self.current_root.duplicate()
            elem.remove_all(Tspan)
            elem.append(text)
            elem.set("x", text.get("x"))
            elem.set("y", text.get("y"))
        else:
            elem = TextElement(x=str(self.current_x), y=str(self.current_y))
            # transfer the style from all parents, including the text element (if there's a style to
            # the text element's parent applied, it will be duplicated, but that doesn't really
            # matter)
            elem.style = prototype.specified_style()
            # the element will be appended to the parent of element, but there might be nested
            # tspans between the prototype and the element. The next line says
            # "compose transforms until you reach the parent of element"
            elem.transform = (
                -self.current_root.getparent().transform
            ) @ prototype.composed_transform()
            tsp = Tspan(x=str(self.current_x), y=str(self.current_y))
            tsp.text = text
            elem.add(tsp)
        self.current_root.addnext(elem)

    def split_lines(self, element: TextLike) -> TextElement:
        """Splits a text into its lines"""
        self.process_kerns = False
        preprocessed = self.preprocess_text_element(element)
        self.current_root = preprocessed
        # Now we only have to copy each tspan into its own text element.
        for child in list(preprocessed):
            self.append_splitted_element(child)

        return preprocessed

    def process_plain_text(self, element, splitted):
        """Appends new text elements to as sibling root for each element of splitted, starting at
        self.current_x, self.current_y, incrementing those, with prototype element (that
        styles and transforms will be taken from)"""
        if splitted is None:
            return
        for word in splitted:
            if word != "":
                self.append_splitted_element(word, element)
            # +1 since for words, we lost a space
            self.current_x += (
                self.current_fontsize * (len(word) + 1) * self.fs_multiplier
            )

    def process_plain_words(self, element, text):
        """Calls process_plain_text for splitting words"""
        self.fs_multiplier = 0.4
        if text is not None:
            self.process_plain_text(element, text.split(" "))

    def process_plain_chars(self, element, text):
        """Calls process_plain_text for splitting characters"""
        self.fs_multiplier = 0.25
        self.process_plain_text(element, text)

    def split_words_or_chars(self, element: TextLike) -> TextElement:
        """Splits a text into its lines"""
        self.process_kerns = True
        preprocessed = self.preprocess_text_element(element)

        def process_element(element) -> float:
            elem_coords = {
                i: (
                    element.root.unittouu(element.get(i))
                    if element.get(i) is not None
                    else None
                )
                for i in "xy"
            }
            if elem_coords["x"] is not None:
                self.current_x = elem_coords["x"]
            if elem_coords["y"] is not None:
                self.current_y = elem_coords["y"]
            self.current_fontsize = self.get_font_size(element)
            current_x = self.mode(element, element.text)

            for elem in element:
                if isinstance(elem, Tspan):
                    current_x = process_element(elem)
                current_x = self.mode(element, elem.tail)
            return current_x

        self.current_root = preprocessed
        process_element(preprocessed)
        return preprocessed


if __name__ == "__main__":
    TextSplit().run()