diff options
Diffstat (limited to 'lib/sh/casemod.c')
-rw-r--r-- | lib/sh/casemod.c | 273 |
1 files changed, 273 insertions, 0 deletions
diff --git a/lib/sh/casemod.c b/lib/sh/casemod.c new file mode 100644 index 0000000..bdd96f8 --- /dev/null +++ b/lib/sh/casemod.c @@ -0,0 +1,273 @@ +/* casemod.c -- functions to change case of strings */ + +/* Copyright (C) 2008-2020 Free Software Foundation, Inc. + + This file is part of GNU Bash, the Bourne Again SHell. + + Bash is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + Bash is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with Bash. If not, see <http://www.gnu.org/licenses/>. +*/ + +#if defined (HAVE_CONFIG_H) +# include <config.h> +#endif + +#if defined (HAVE_UNISTD_H) +# include <unistd.h> +#endif /* HAVE_UNISTD_H */ + +#include <stdc.h> + +#include <bashansi.h> +#include <bashintl.h> +#include <bashtypes.h> + +#include <stdio.h> +#include <ctype.h> +#include <xmalloc.h> + +#include <shmbchar.h> +#include <shmbutil.h> +#include <chartypes.h> +#include <typemax.h> + +#include <glob/strmatch.h> + +#define _to_wupper(wc) (iswlower (wc) ? towupper (wc) : (wc)) +#define _to_wlower(wc) (iswupper (wc) ? towlower (wc) : (wc)) + +#if !defined (HANDLE_MULTIBYTE) +# define cval(s, i) ((s)[(i)]) +# define iswalnum(c) (isalnum(c)) +# define TOGGLE(x) (ISUPPER (x) ? tolower ((unsigned char)x) : (TOUPPER (x))) +#else +# define TOGGLE(x) (iswupper (x) ? towlower (x) : (_to_wupper(x))) +#endif + +/* These must agree with the defines in externs.h */ +#define CASE_NOOP 0x0000 +#define CASE_LOWER 0x0001 +#define CASE_UPPER 0x0002 +#define CASE_CAPITALIZE 0x0004 +#define CASE_UNCAP 0x0008 +#define CASE_TOGGLE 0x0010 +#define CASE_TOGGLEALL 0x0020 +#define CASE_UPFIRST 0x0040 +#define CASE_LOWFIRST 0x0080 + +#define CASE_USEWORDS 0x1000 /* modify behavior to act on words in passed string */ + +extern char *substring PARAMS((char *, int, int)); + +#ifndef UCHAR_MAX +# define UCHAR_MAX TYPE_MAXIMUM(unsigned char) +#endif + +#if defined (HANDLE_MULTIBYTE) +static wchar_t +cval (s, i) + char *s; + int i; +{ + size_t tmp; + wchar_t wc; + int l; + mbstate_t mps; + + if (MB_CUR_MAX == 1 || is_basic (s[i])) + return ((wchar_t)s[i]); + l = strlen (s); + if (i >= (l - 1)) + return ((wchar_t)s[i]); + memset (&mps, 0, sizeof (mbstate_t)); + tmp = mbrtowc (&wc, s + i, l - i, &mps); + if (MB_INVALIDCH (tmp) || MB_NULLWCH (tmp)) + return ((wchar_t)s[i]); + return wc; +} +#endif + +/* Modify the case of characters in STRING matching PAT based on the value of + FLAGS. If PAT is null, modify the case of each character */ +char * +sh_modcase (string, pat, flags) + const char *string; + char *pat; + int flags; +{ + int start, next, end, retind; + int inword, c, nc, nop, match, usewords; + char *ret, *s; + wchar_t wc; + int mb_cur_max; +#if defined (HANDLE_MULTIBYTE) + wchar_t nwc; + char mb[MB_LEN_MAX+1]; + int mlen; + size_t m; + mbstate_t state; +#endif + + if (string == 0 || *string == 0) + { + ret = (char *)xmalloc (1); + ret[0] = '\0'; + return ret; + } + +#if defined (HANDLE_MULTIBYTE) + memset (&state, 0, sizeof (mbstate_t)); +#endif + + start = 0; + end = strlen (string); + mb_cur_max = MB_CUR_MAX; + + ret = (char *)xmalloc (2*end + 1); + retind = 0; + + /* See if we are supposed to split on alphanumerics and operate on each word */ + usewords = (flags & CASE_USEWORDS); + flags &= ~CASE_USEWORDS; + + inword = 0; + while (start < end) + { + wc = cval ((char *)string, start); + + if (iswalnum (wc) == 0) + inword = 0; + + if (pat) + { + next = start; + ADVANCE_CHAR (string, end, next); + s = substring ((char *)string, start, next); + match = strmatch (pat, s, FNM_EXTMATCH) != FNM_NOMATCH; + free (s); + if (match == 0) + { + /* copy unmatched portion */ + memcpy (ret + retind, string + start, next - start); + retind += next - start; + start = next; + inword = 1; + continue; + } + } + + /* XXX - for now, the toggling operators work on the individual + words in the string, breaking on alphanumerics. Should I + leave the capitalization operators to do that also? */ + if (flags == CASE_CAPITALIZE) + { + if (usewords) + nop = inword ? CASE_LOWER : CASE_UPPER; + else + nop = (start > 0) ? CASE_LOWER : CASE_UPPER; + inword = 1; + } + else if (flags == CASE_UNCAP) + { + if (usewords) + nop = inword ? CASE_UPPER : CASE_LOWER; + else + nop = (start > 0) ? CASE_UPPER : CASE_LOWER; + inword = 1; + } + else if (flags == CASE_UPFIRST) + { + if (usewords) + nop = inword ? CASE_NOOP : CASE_UPPER; + else + nop = (start > 0) ? CASE_NOOP : CASE_UPPER; + inword = 1; + } + else if (flags == CASE_LOWFIRST) + { + if (usewords) + nop = inword ? CASE_NOOP : CASE_LOWER; + else + nop = (start > 0) ? CASE_NOOP : CASE_LOWER; + inword = 1; + } + else if (flags == CASE_TOGGLE) + { + nop = inword ? CASE_NOOP : CASE_TOGGLE; + inword = 1; + } + else + nop = flags; + + /* Can't short-circuit, some locales have multibyte upper and lower + case equivalents of single-byte ascii characters (e.g., Turkish) */ + if (mb_cur_max == 1) + { +singlebyte: + switch (nop) + { + default: + case CASE_NOOP: nc = wc; break; + case CASE_UPPER: nc = TOUPPER (wc); break; + case CASE_LOWER: nc = TOLOWER (wc); break; + case CASE_TOGGLEALL: + case CASE_TOGGLE: nc = TOGGLE (wc); break; + } + ret[retind++] = nc; + } +#if defined (HANDLE_MULTIBYTE) + else + { + m = mbrtowc (&wc, string + start, end - start, &state); + /* Have to go through wide case conversion even for single-byte + chars, to accommodate single-byte characters where the + corresponding upper or lower case equivalent is multibyte. */ + if (MB_INVALIDCH (m)) + { + wc = (unsigned char)string[start]; + goto singlebyte; + } + else if (MB_NULLWCH (m)) + wc = L'\0'; + switch (nop) + { + default: + case CASE_NOOP: nwc = wc; break; + case CASE_UPPER: nwc = _to_wupper (wc); break; + case CASE_LOWER: nwc = _to_wlower (wc); break; + case CASE_TOGGLEALL: + case CASE_TOGGLE: nwc = TOGGLE (wc); break; + } + + /* We don't have to convert `wide' characters that are in the + unsigned char range back to single-byte `multibyte' characters. */ + if ((int)nwc <= UCHAR_MAX && is_basic ((int)nwc)) + ret[retind++] = nwc; + else + { + mlen = wcrtomb (mb, nwc, &state); + if (mlen > 0) + mb[mlen] = '\0'; + /* Don't assume the same width */ + strncpy (ret + retind, mb, mlen); + retind += mlen; + } + } +#endif + + ADVANCE_CHAR (string, end, start); + } + + ret[retind] = '\0'; + return ret; +} |