summaryrefslogtreecommitdiffstats
path: root/lib/sh/casemod.c
diff options
context:
space:
mode:
Diffstat (limited to 'lib/sh/casemod.c')
-rw-r--r--lib/sh/casemod.c273
1 files changed, 273 insertions, 0 deletions
diff --git a/lib/sh/casemod.c b/lib/sh/casemod.c
new file mode 100644
index 0000000..bdd96f8
--- /dev/null
+++ b/lib/sh/casemod.c
@@ -0,0 +1,273 @@
+/* casemod.c -- functions to change case of strings */
+
+/* Copyright (C) 2008-2020 Free Software Foundation, Inc.
+
+ This file is part of GNU Bash, the Bourne Again SHell.
+
+ Bash is free software: you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation, either version 3 of the License, or
+ (at your option) any later version.
+
+ Bash is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU General Public License for more details.
+
+ You should have received a copy of the GNU General Public License
+ along with Bash. If not, see <http://www.gnu.org/licenses/>.
+*/
+
+#if defined (HAVE_CONFIG_H)
+# include <config.h>
+#endif
+
+#if defined (HAVE_UNISTD_H)
+# include <unistd.h>
+#endif /* HAVE_UNISTD_H */
+
+#include <stdc.h>
+
+#include <bashansi.h>
+#include <bashintl.h>
+#include <bashtypes.h>
+
+#include <stdio.h>
+#include <ctype.h>
+#include <xmalloc.h>
+
+#include <shmbchar.h>
+#include <shmbutil.h>
+#include <chartypes.h>
+#include <typemax.h>
+
+#include <glob/strmatch.h>
+
+#define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc))
+#define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc))
+
+#if !defined (HANDLE_MULTIBYTE)
+# define cval(s, i) ((s)[(i)])
+# define iswalnum(c) (isalnum(c))
+# define TOGGLE(x) (ISUPPER (x) ? tolower ((unsigned char)x) : (TOUPPER (x)))
+#else
+# define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x)))
+#endif
+
+/* These must agree with the defines in externs.h */
+#define CASE_NOOP 0x0000
+#define CASE_LOWER 0x0001
+#define CASE_UPPER 0x0002
+#define CASE_CAPITALIZE 0x0004
+#define CASE_UNCAP 0x0008
+#define CASE_TOGGLE 0x0010
+#define CASE_TOGGLEALL 0x0020
+#define CASE_UPFIRST 0x0040
+#define CASE_LOWFIRST 0x0080
+
+#define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */
+
+extern char *substring PARAMS((char *, int, int));
+
+#ifndef UCHAR_MAX
+# define UCHAR_MAX TYPE_MAXIMUM(unsigned char)
+#endif
+
+#if defined (HANDLE_MULTIBYTE)
+static wchar_t
+cval (s, i)
+ char *s;
+ int i;
+{
+ size_t tmp;
+ wchar_t wc;
+ int l;
+ mbstate_t mps;
+
+ if (MB_CUR_MAX == 1 || is_basic (s[i]))
+ return ((wchar_t)s[i]);
+ l = strlen (s);
+ if (i >= (l - 1))
+ return ((wchar_t)s[i]);
+ memset (&mps, 0, sizeof (mbstate_t));
+ tmp = mbrtowc (&wc, s + i, l - i, &mps);
+ if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp))
+ return ((wchar_t)s[i]);
+ return wc;
+}
+#endif
+
+/* Modify the case of characters in STRING matching PAT based on the value of
+ FLAGS. If PAT is null, modify the case of each character */
+char *
+sh_modcase (string, pat, flags)
+ const char *string;
+ char *pat;
+ int flags;
+{
+ int start, next, end, retind;
+ int inword, c, nc, nop, match, usewords;
+ char *ret, *s;
+ wchar_t wc;
+ int mb_cur_max;
+#if defined (HANDLE_MULTIBYTE)
+ wchar_t nwc;
+ char mb[MB_LEN_MAX+1];
+ int mlen;
+ size_t m;
+ mbstate_t state;
+#endif
+
+ if (string == 0 || *string == 0)
+ {
+ ret = (char *)xmalloc (1);
+ ret[0] = '\0';
+ return ret;
+ }
+
+#if defined (HANDLE_MULTIBYTE)
+ memset (&state, 0, sizeof (mbstate_t));
+#endif
+
+ start = 0;
+ end = strlen (string);
+ mb_cur_max = MB_CUR_MAX;
+
+ ret = (char *)xmalloc (2*end + 1);
+ retind = 0;
+
+ /* See if we are supposed to split on alphanumerics and operate on each word */
+ usewords = (flags & CASE_USEWORDS);
+ flags &= ~CASE_USEWORDS;
+
+ inword = 0;
+ while (start < end)
+ {
+ wc = cval ((char *)string, start);
+
+ if (iswalnum (wc) == 0)
+ inword = 0;
+
+ if (pat)
+ {
+ next = start;
+ ADVANCE_CHAR (string, end, next);
+ s = substring ((char *)string, start, next);
+ match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH;
+ free (s);
+ if (match == 0)
+ {
+ /* copy unmatched portion */
+ memcpy (ret + retind, string + start, next - start);
+ retind += next - start;
+ start = next;
+ inword = 1;
+ continue;
+ }
+ }
+
+ /* XXX - for now, the toggling operators work on the individual
+ words in the string, breaking on alphanumerics. Should I
+ leave the capitalization operators to do that also? */
+ if (flags == CASE_CAPITALIZE)
+ {
+ if (usewords)
+ nop = inword ? CASE_LOWER : CASE_UPPER;
+ else
+ nop = (start > 0) ? CASE_LOWER : CASE_UPPER;
+ inword = 1;
+ }
+ else if (flags == CASE_UNCAP)
+ {
+ if (usewords)
+ nop = inword ? CASE_UPPER : CASE_LOWER;
+ else
+ nop = (start > 0) ? CASE_UPPER : CASE_LOWER;
+ inword = 1;
+ }
+ else if (flags == CASE_UPFIRST)
+ {
+ if (usewords)
+ nop = inword ? CASE_NOOP : CASE_UPPER;
+ else
+ nop = (start > 0) ? CASE_NOOP : CASE_UPPER;
+ inword = 1;
+ }
+ else if (flags == CASE_LOWFIRST)
+ {
+ if (usewords)
+ nop = inword ? CASE_NOOP : CASE_LOWER;
+ else
+ nop = (start > 0) ? CASE_NOOP : CASE_LOWER;
+ inword = 1;
+ }
+ else if (flags == CASE_TOGGLE)
+ {
+ nop = inword ? CASE_NOOP : CASE_TOGGLE;
+ inword = 1;
+ }
+ else
+ nop = flags;
+
+ /* Can't short-circuit, some locales have multibyte upper and lower
+ case equivalents of single-byte ascii characters (e.g., Turkish) */
+ if (mb_cur_max == 1)
+ {
+singlebyte:
+ switch (nop)
+ {
+ default:
+ case CASE_NOOP: nc = wc; break;
+ case CASE_UPPER: nc = TOUPPER (wc); break;
+ case CASE_LOWER: nc = TOLOWER (wc); break;
+ case CASE_TOGGLEALL:
+ case CASE_TOGGLE: nc = TOGGLE (wc); break;
+ }
+ ret[retind++] = nc;
+ }
+#if defined (HANDLE_MULTIBYTE)
+ else
+ {
+ m = mbrtowc (&wc, string + start, end - start, &state);
+ /* Have to go through wide case conversion even for single-byte
+ chars, to accommodate single-byte characters where the
+ corresponding upper or lower case equivalent is multibyte. */
+ if (MB_INVALIDCH (m))
+ {
+ wc = (unsigned char)string[start];
+ goto singlebyte;
+ }
+ else if (MB_NULLWCH (m))
+ wc = L'\0';
+ switch (nop)
+ {
+ default:
+ case CASE_NOOP: nwc = wc; break;
+ case CASE_UPPER: nwc = _to_wupper (wc); break;
+ case CASE_LOWER: nwc = _to_wlower (wc); break;
+ case CASE_TOGGLEALL:
+ case CASE_TOGGLE: nwc = TOGGLE (wc); break;
+ }
+
+ /* We don't have to convert `wide' characters that are in the
+ unsigned char range back to single-byte `multibyte' characters. */
+ if ((int)nwc <= UCHAR_MAX && is_basic ((int)nwc))
+ ret[retind++] = nwc;
+ else
+ {
+ mlen = wcrtomb (mb, nwc, &state);
+ if (mlen > 0)
+ mb[mlen] = '\0';
+ /* Don't assume the same width */
+ strncpy (ret + retind, mb, mlen);
+ retind += mlen;
+ }
+ }
+#endif
+
+ ADVANCE_CHAR (string, end, start);
+ }
+
+ ret[retind] = '\0';
+ return ret;
+}