diff options
Diffstat (limited to 'lib/sh/shmbchar.c')
-rw-r--r-- | lib/sh/shmbchar.c | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/lib/sh/shmbchar.c b/lib/sh/shmbchar.c new file mode 100644 index 0000000..f2f2582 --- /dev/null +++ b/lib/sh/shmbchar.c @@ -0,0 +1,137 @@ +/* Copyright (C) 2001, 2006, 2009, 2010, 2012, 2015-2018 Free Software Foundation, Inc. + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program. If not, see <http://www.gnu.org/licenses/>. */ + + +#include <config.h> + +#if defined (HANDLE_MULTIBYTE) +#include <stdlib.h> +#include <limits.h> + +#include <errno.h> + +#include <shmbutil.h> +#include <shmbchar.h> + +#ifndef errno +extern int errno; +#endif + +#if IS_BASIC_ASCII + +/* Bit table of characters in the ISO C "basic character set". */ +const unsigned int is_basic_table [UCHAR_MAX / 32 + 1] = +{ + 0x00001a00, /* '\t' '\v' '\f' */ + 0xffffffef, /* ' '...'#' '%'...'?' */ + 0xfffffffe, /* 'A'...'Z' '[' '\\' ']' '^' '_' */ + 0x7ffffffe /* 'a'...'z' '{' '|' '}' '~' */ + /* The remaining bits are 0. */ +}; + +#endif /* IS_BASIC_ASCII */ + +extern int locale_utf8locale; + +extern char *utf8_mbsmbchar (const char *); +extern int utf8_mblen (const char *, size_t); + +/* Count the number of characters in S, counting multi-byte characters as a + single character. */ +size_t +mbstrlen (s) + const char *s; +{ + size_t clen, nc; + mbstate_t mbs = { 0 }, mbsbak = { 0 }; + int f, mb_cur_max; + + nc = 0; + mb_cur_max = MB_CUR_MAX; + while (*s && (clen = (f = is_basic (*s)) ? 1 : mbrlen(s, mb_cur_max, &mbs)) != 0) + { + if (MB_INVALIDCH(clen)) + { + clen = 1; /* assume single byte */ + mbs = mbsbak; + } + + if (f == 0) + mbsbak = mbs; + + s += clen; + nc++; + } + return nc; +} + +/* Return pointer to first multibyte char in S, or NULL if none. */ +/* XXX - if we know that the locale is UTF-8, we can just check whether or + not any byte has the eighth bit turned on */ +char * +mbsmbchar (s) + const char *s; +{ + char *t; + size_t clen; + mbstate_t mbs = { 0 }; + int mb_cur_max; + + if (locale_utf8locale) + return (utf8_mbsmbchar (s)); /* XXX */ + + mb_cur_max = MB_CUR_MAX; + for (t = (char *)s; *t; t++) + { + if (is_basic (*t)) + continue; + + if (locale_utf8locale) /* not used if above code active */ + clen = utf8_mblen (t, mb_cur_max); + else + clen = mbrlen (t, mb_cur_max, &mbs); + + if (clen == 0) + return 0; + if (MB_INVALIDCH(clen)) + continue; + + if (clen > 1) + return t; + } + return 0; +} + +int +sh_mbsnlen(src, srclen, maxlen) + const char *src; + size_t srclen; + int maxlen; +{ + int count; + int sind; + DECLARE_MBSTATE; + + for (sind = count = 0; src[sind]; ) + { + count++; /* number of multibyte characters */ + ADVANCE_CHAR (src, srclen, sind); + if (sind > maxlen) + break; + } + + return count; +} +#endif |