summaryrefslogtreecommitdiffstats
path: root/lib/mbsalign.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:11:47 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 16:11:47 +0000
commit758f820bcc0f68aeebac1717e537ca13a320b909 (patch)
tree48111ece75cf4f98316848b37a7e26356e00669e /lib/mbsalign.c
parentInitial commit. (diff)
downloadcoreutils-758f820bcc0f68aeebac1717e537ca13a320b909.tar.xz
coreutils-758f820bcc0f68aeebac1717e537ca13a320b909.zip
Adding upstream version 9.1.upstream/9.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/mbsalign.c')
-rw-r--r--lib/mbsalign.c276
1 files changed, 276 insertions, 0 deletions
diff --git a/lib/mbsalign.c b/lib/mbsalign.c
new file mode 100644
index 0000000..38face2
--- /dev/null
+++ b/lib/mbsalign.c
@@ -0,0 +1,276 @@
+/* Align/Truncate a string in a given screen width
+ Copyright (C) 2009-2022 Free Software Foundation, Inc.
+
+ This program is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+/* Written by Pádraig Brady. */
+
+#include <config.h>
+#include "mbsalign.h"
+
+#include "minmax.h"
+
+#include <stdlib.h>
+#include <string.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <wchar.h>
+#include <wctype.h>
+
+/* Replace non printable chars.
+ Note \t and \n etc. are non printable.
+ Return 1 if replacement made, 0 otherwise. */
+
+static bool
+wc_ensure_printable (wchar_t *wchars)
+{
+ bool replaced = false;
+ wchar_t *wc = wchars;
+ while (*wc)
+ {
+ if (!iswprint ((wint_t) *wc))
+ {
+ *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
+ replaced = true;
+ }
+ wc++;
+ }
+ return replaced;
+}
+
+/* Truncate wchar string to width cells.
+ * Returns number of cells used. */
+
+static size_t
+wc_truncate (wchar_t *wc, size_t width)
+{
+ size_t cells = 0;
+ int next_cells = 0;
+
+ while (*wc)
+ {
+ next_cells = wcwidth (*wc);
+ if (next_cells == -1) /* non printable */
+ {
+ *wc = 0xFFFD; /* L'\uFFFD' (replacement char) */
+ next_cells = 1;
+ }
+ if (cells + next_cells > width)
+ break;
+ cells += next_cells;
+ wc++;
+ }
+ *wc = L'\0';
+ return cells;
+}
+
+/* Write N_SPACES space characters to DEST while ensuring
+ nothing is written beyond DEST_END. A terminating NUL
+ is always added to DEST.
+ A pointer to the terminating NUL is returned. */
+
+static char *
+mbs_align_pad (char *dest, char const *dest_end, size_t n_spaces)
+{
+ /* FIXME: Should we pad with "figure space" (\u2007)
+ if non ascii data present? */
+ while (n_spaces-- && (dest < dest_end))
+ *dest++ = ' ';
+ *dest = '\0';
+ return dest;
+}
+
+/* Align a string, SRC, in a field of *WIDTH columns, handling multi-byte
+ characters; write the result into the DEST_SIZE-byte buffer, DEST.
+ ALIGNMENT specifies whether to left- or right-justify or to center.
+ If SRC requires more than *WIDTH columns, truncate it to fit.
+ When centering, the number of trailing spaces may be one less than the
+ number of leading spaces.
+ Return the length in bytes required for the final result, not counting
+ the trailing NUL. A return value of DEST_SIZE or larger means there
+ wasn't enough space. DEST will be NUL terminated in any case.
+ Return SIZE_MAX upon error (invalid multi-byte sequence in SRC,
+ or malloc failure), unless MBA_UNIBYTE_FALLBACK is specified.
+ Update *WIDTH to indicate how many columns were used before padding. */
+
+size_t
+mbsalign (char const *src, char *dest, size_t dest_size,
+ size_t *width, mbs_align_t align, int flags)
+{
+ size_t ret = SIZE_MAX;
+ size_t src_size = strlen (src) + 1;
+ char *newstr = NULL;
+ wchar_t *str_wc = NULL;
+ char const *str_to_print = src;
+ size_t n_cols = src_size - 1;
+ size_t n_used_bytes = n_cols; /* Not including NUL */
+ size_t n_spaces = 0;
+ bool conversion = false;
+ bool wc_enabled = false;
+
+ /* In multi-byte locales convert to wide characters
+ to allow easy truncation. Also determine number
+ of screen columns used. */
+ if (!(flags & MBA_UNIBYTE_ONLY) && MB_CUR_MAX > 1)
+ {
+ size_t src_chars = mbstowcs (NULL, src, 0);
+ if (src_chars == SIZE_MAX)
+ {
+ if (flags & MBA_UNIBYTE_FALLBACK)
+ goto mbsalign_unibyte;
+ else
+ goto mbsalign_cleanup;
+ }
+ src_chars += 1; /* make space for NUL */
+ str_wc = malloc (src_chars * sizeof (wchar_t));
+ if (str_wc == NULL)
+ {
+ if (flags & MBA_UNIBYTE_FALLBACK)
+ goto mbsalign_unibyte;
+ else
+ goto mbsalign_cleanup;
+ }
+ if (mbstowcs (str_wc, src, src_chars) != 0)
+ {
+ str_wc[src_chars - 1] = L'\0';
+ wc_enabled = true;
+ conversion = wc_ensure_printable (str_wc);
+ n_cols = wcswidth (str_wc, src_chars);
+ }
+ }
+
+ /* If we transformed or need to truncate the source string
+ then create a modified copy of it. */
+ if (wc_enabled && (conversion || (n_cols > *width)))
+ {
+ if (conversion)
+ {
+ /* May have increased the size by converting
+ \t to \uFFFD for example. */
+ src_size = wcstombs (NULL, str_wc, 0) + 1;
+ }
+ newstr = malloc (src_size);
+ if (newstr == NULL)
+ {
+ if (flags & MBA_UNIBYTE_FALLBACK)
+ goto mbsalign_unibyte;
+ else
+ goto mbsalign_cleanup;
+ }
+ str_to_print = newstr;
+ n_cols = wc_truncate (str_wc, *width);
+ n_used_bytes = wcstombs (newstr, str_wc, src_size);
+ }
+
+mbsalign_unibyte:
+
+ if (n_cols > *width) /* Unibyte truncation required. */
+ {
+ n_cols = *width;
+ n_used_bytes = n_cols;
+ }
+
+ if (*width > n_cols) /* Padding required. */
+ n_spaces = *width - n_cols;
+
+ /* indicate to caller how many cells needed (not including padding). */
+ *width = n_cols;
+
+ {
+ size_t start_spaces, end_spaces;
+
+ switch (align)
+ {
+ case MBS_ALIGN_LEFT:
+ start_spaces = 0;
+ end_spaces = n_spaces;
+ break;
+ case MBS_ALIGN_RIGHT:
+ start_spaces = n_spaces;
+ end_spaces = 0;
+ break;
+ case MBS_ALIGN_CENTER:
+ default:
+ start_spaces = n_spaces / 2 + n_spaces % 2;
+ end_spaces = n_spaces / 2;
+ break;
+ }
+
+ if (flags & MBA_NO_LEFT_PAD)
+ start_spaces = 0;
+ if (flags & MBA_NO_RIGHT_PAD)
+ end_spaces = 0;
+
+ /* Write as much NUL terminated output to DEST as possible. */
+ if (dest_size != 0)
+ {
+ size_t space_left;
+ char *dest_end = dest + dest_size - 1;
+
+ dest = mbs_align_pad (dest, dest_end, start_spaces);
+ space_left = dest_end - dest;
+ dest = mempcpy (dest, str_to_print, MIN (n_used_bytes, space_left));
+ mbs_align_pad (dest, dest_end, end_spaces);
+ }
+
+ /* indicate to caller how many bytes needed (not including NUL). */
+ ret = n_used_bytes + ((start_spaces + end_spaces) * 1);
+ }
+
+mbsalign_cleanup:
+
+ free (str_wc);
+ free (newstr);
+
+ return ret;
+}
+
+/* A wrapper around mbsalign() to dynamically allocate the
+ minimum amount of memory to store the result.
+ Return NULL on failure. */
+
+char *
+ambsalign (char const *src, size_t *width, mbs_align_t align, int flags)
+{
+ size_t orig_width = *width;
+ size_t size = *width; /* Start with enough for unibyte mode. */
+ size_t req = size;
+ char *buf = NULL;
+
+ while (req >= size)
+ {
+ char *nbuf;
+ size = req + 1; /* Space for NUL. */
+ nbuf = realloc (buf, size);
+ if (nbuf == NULL)
+ {
+ free (buf);
+ buf = NULL;
+ break;
+ }
+ buf = nbuf;
+ *width = orig_width;
+ req = mbsalign (src, buf, size, width, align, flags);
+ if (req == SIZE_MAX)
+ {
+ free (buf);
+ buf = NULL;
+ break;
+ }
+ }
+
+ return buf;
+}