/* shmbutil.h -- utility functions for multibyte characters. */
/* Copyright (C) 2002-2019 Free Software Foundation, Inc.
This file is part of GNU Bash, the Bourne Again SHell.
Bash is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
Bash is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with Bash. If not, see .
*/
#if !defined (_SH_MBUTIL_H_)
#define _SH_MBUTIL_H_
#include "stdc.h"
/* Include config.h for HANDLE_MULTIBYTE */
#include
#if defined (HANDLE_MULTIBYTE)
#include "shmbchar.h"
extern size_t xwcsrtombs PARAMS((char *, const wchar_t **, size_t, mbstate_t *));
extern size_t xmbsrtowcs PARAMS((wchar_t *, const char **, size_t, mbstate_t *));
extern size_t xdupmbstowcs PARAMS((wchar_t **, char ***, const char *));
extern size_t mbstrlen PARAMS((const char *));
extern char *xstrchr PARAMS((const char *, int));
extern int locale_mb_cur_max; /* XXX */
extern int locale_utf8locale; /* XXX */
#ifndef MB_INVALIDCH
#define MB_INVALIDCH(x) ((x) == (size_t)-1 || (x) == (size_t)-2)
#define MB_NULLWCH(x) ((x) == 0)
#endif
#define MBSLEN(s) (((s) && (s)[0]) ? ((s)[1] ? mbstrlen (s) : 1) : 0)
#define MB_STRLEN(s) ((MB_CUR_MAX > 1) ? MBSLEN (s) : STRLEN (s))
#define MBLEN(s, n) ((MB_CUR_MAX > 1) ? mblen ((s), (n)) : 1)
#define MBRLEN(s, n, p) ((MB_CUR_MAX > 1) ? mbrlen ((s), (n), (p)) : 1)
#define UTF8_SINGLEBYTE(c) (((c) & 0x80) == 0)
#define UTF8_MBFIRSTCHAR(c) (((c) & 0xc0) == 0xc0)
#define UTF8_MBCHAR(c) (((c) & 0xc0) == 0x80)
#else /* !HANDLE_MULTIBYTE */
#undef MB_LEN_MAX
#undef MB_CUR_MAX
#define MB_LEN_MAX 1
#define MB_CUR_MAX 1
#undef xstrchr
#define xstrchr(s, c) strchr(s, c)
#ifndef MB_INVALIDCH
#define MB_INVALIDCH(x) (0)
#define MB_NULLWCH(x) (0)
#endif
#define MB_STRLEN(s) (STRLEN(s))
#define MBLEN(s, n) 1
#define MBRLEN(s, n, p) 1
#ifndef wchar_t
# define wchar_t int
#endif
#define UTF8_SINGLEBYTE(c) (1)
#define UTF8_MBFIRSTCHAR(c) (0)
#endif /* !HANDLE_MULTIBYTE */
/* Declare and initialize a multibyte state. Call must be terminated
with `;'. */
#if defined (HANDLE_MULTIBYTE)
# define DECLARE_MBSTATE \
mbstate_t state; \
memset (&state, '\0', sizeof (mbstate_t))
#else
# define DECLARE_MBSTATE
#endif /* !HANDLE_MULTIBYTE */
/* Initialize or reinitialize a multibyte state named `state'. Call must be
terminated with `;'. */
#if defined (HANDLE_MULTIBYTE)
# define INITIALIZE_MBSTATE memset (&state, '\0', sizeof (mbstate_t))
#else
# define INITIALIZE_MBSTATE
#endif /* !HANDLE_MULTIBYTE */
/* Advance one (possibly multi-byte) character in string _STR of length
_STRSIZE, starting at index _I. STATE must have already been declared. */
#if defined (HANDLE_MULTIBYTE)
# define ADVANCE_CHAR(_str, _strsize, _i) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _f; \
\
_f = is_basic ((_str)[_i]); \
if (_f) \
mblength = 1; \
else if (locale_utf8locale && (((_str)[_i] & 0x80) == 0)) \
mblength = (_str)[_i] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_str) + (_i), (_strsize) - (_i), &state); \
} \
\
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
(_i)++; \
} \
else if (mblength == 0) \
(_i)++; \
else \
(_i) += mblength; \
} \
else \
(_i)++; \
} \
while (0)
#else
# define ADVANCE_CHAR(_str, _strsize, _i) (_i)++
#endif /* !HANDLE_MULTIBYTE */
/* Advance one (possibly multibyte) character in the string _STR of length
_STRSIZE.
SPECIAL: assume that _STR will be incremented by 1 after this call. */
#if defined (HANDLE_MULTIBYTE)
# define ADVANCE_CHAR_P(_str, _strsize) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _f; \
\
_f = is_basic (*(_str)); \
if (_f) \
mblength = 1; \
else if (locale_utf8locale && ((*(_str) & 0x80) == 0)) \
mblength = *(_str) != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_str), (_strsize), &state); \
} \
\
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
mblength = 1; \
} \
else \
(_str) += (mblength < 1) ? 0 : (mblength - 1); \
} \
} \
while (0)
#else
# define ADVANCE_CHAR_P(_str, _strsize)
#endif /* !HANDLE_MULTIBYTE */
/* Back up one (possibly multi-byte) character in string _STR of length
_STRSIZE, starting at index _I. STATE must have already been declared. */
#if defined (HANDLE_MULTIBYTE)
# define BACKUP_CHAR(_str, _strsize, _i) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _x, _p; /* _x == temp index into string, _p == prev index */ \
\
_x = _p = 0; \
while (_x < (_i)) \
{ \
state_bak = state; \
mblength = mbrlen ((_str) + (_x), (_strsize) - (_x), &state); \
\
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
_x++; \
} \
else if (mblength == 0) \
_x++; \
else \
{ \
_p = _x; /* _p == start of prev mbchar */ \
_x += mblength; \
} \
} \
(_i) = _p; \
} \
else \
(_i)--; \
} \
while (0)
#else
# define BACKUP_CHAR(_str, _strsize, _i) (_i)--
#endif /* !HANDLE_MULTIBYTE */
/* Back up one (possibly multibyte) character in the string _BASE of length
_STRSIZE starting at _STR (_BASE <= _STR <= (_BASE + _STRSIZE) ).
SPECIAL: DO NOT assume that _STR will be decremented by 1 after this call. */
#if defined (HANDLE_MULTIBYTE)
# define BACKUP_CHAR_P(_base, _strsize, _str) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
char *_x, _p; /* _x == temp pointer into string, _p == prev pointer */ \
\
_x = _p = _base; \
while (_x < (_str)) \
{ \
state_bak = state; \
mblength = mbrlen (_x, (_strsize) - _x, &state); \
\
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
_x++; \
} \
else if (mblength == 0) \
_x++; \
else \
{ \
_p = _x; /* _p == start of prev mbchar */ \
_x += mblength; \
} \
} \
(_str) = _p; \
} \
else \
(_str)--; \
} \
while (0)
#else
# define BACKUP_CHAR_P(_base, _strsize, _str) (_str)--
#endif /* !HANDLE_MULTIBYTE */
/* Copy a single character from the string _SRC to the string _DST.
_SRCEND is a pointer to the end of _SRC. */
#if defined (HANDLE_MULTIBYTE)
# define COPY_CHAR_P(_dst, _src, _srcend) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _k; \
\
_k = is_basic (*(_src)); \
if (_k) \
mblength = 1; \
else if (locale_utf8locale && ((*(_src) & 0x80) == 0)) \
mblength = *(_src) != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src), (_srcend) - (_src), &state); \
} \
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
mblength = 1; \
} \
else \
mblength = (mblength < 1) ? 1 : mblength; \
\
for (_k = 0; _k < mblength; _k++) \
*(_dst)++ = *(_src)++; \
} \
else \
*(_dst)++ = *(_src)++; \
} \
while (0)
#else
# define COPY_CHAR_P(_dst, _src, _srcend) *(_dst)++ = *(_src)++
#endif /* !HANDLE_MULTIBYTE */
/* Copy a single character from the string _SRC at index _SI to the string
_DST at index _DI. _SRCEND is a pointer to the end of _SRC. */
#if defined (HANDLE_MULTIBYTE)
# define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _k; \
\
_k = is_basic ((_src)[(_si)]); \
if (_k) \
mblength = 1; \
else if (locale_utf8locale && ((_src)[(_si)] & 0x80) == 0) \
mblength = (_src)[(_si)] != 0; \
else \
{\
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src)+(_si)), &state); \
} \
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
mblength = 1; \
} \
else \
mblength = (mblength < 1) ? 1 : mblength; \
\
for (_k = 0; _k < mblength; _k++) \
_dst[_di++] = _src[_si++]; \
} \
else \
_dst[_di++] = _src[_si++]; \
} \
while (0)
#else
# define COPY_CHAR_I(_dst, _di, _src, _srcend, _si) _dst[_di++] = _src[_si++]
#endif /* !HANDLE_MULTIBYTE */
/****************************************************************
* *
* The following are only guaranteed to work in subst.c *
* *
****************************************************************/
#if defined (HANDLE_MULTIBYTE)
# define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _i; \
\
_i = is_basic ((_src)[(_si)]); \
if (_i) \
mblength = 1; \
else if (locale_utf8locale && ((_src)[(_si)] & 0x80) == 0) \
mblength = (_src)[(_si)] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_slen) - (_si), &state); \
} \
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
mblength = 1; \
} \
else \
mblength = (mblength < 1) ? 1 : mblength; \
\
temp = xmalloc (mblength + 2); \
temp[0] = _escchar; \
for (_i = 0; _i < mblength; _i++) \
temp[_i + 1] = _src[_si++]; \
temp[mblength + 1] = '\0'; \
\
goto add_string; \
} \
else \
{ \
_dst[0] = _escchar; \
_dst[1] = _sc; \
} \
} \
while (0)
#else
# define SCOPY_CHAR_I(_dst, _escchar, _sc, _src, _si, _slen) \
_dst[0] = _escchar; \
_dst[1] = _sc
#endif /* !HANDLE_MULTIBYTE */
#if defined (HANDLE_MULTIBYTE)
# define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
mbstate_t state_bak; \
size_t mblength; \
int _i; \
\
_i = is_basic (*((_src) + (_si))); \
if (_i) \
mblength = 1; \
else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
mblength = (_src)[_si] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_srcend) - ((_src) + (_si)), &state); \
} \
if (mblength == (size_t)-2 || mblength == (size_t)-1) \
{ \
state = state_bak; \
mblength = 1; \
} \
else \
mblength = (mblength < 1) ? 1 : mblength; \
\
FASTCOPY(((_src) + (_si)), (_dst), mblength); \
\
(_dst) += mblength; \
(_si) += mblength; \
} \
else \
{ \
*(_dst)++ = _src[(_si)]; \
(_si)++; \
} \
} \
while (0)
#else
# define SCOPY_CHAR_M(_dst, _src, _srcend, _si) \
*(_dst)++ = _src[(_si)]; \
(_si)++
#endif /* !HANDLE_MULTIBYTE */
#if HANDLE_MULTIBYTE
# define SADD_MBCHAR(_dst, _src, _si, _srcsize) \
do \
{ \
if (locale_mb_cur_max > 1) \
{ \
int i; \
mbstate_t state_bak; \
size_t mblength; \
\
i = is_basic (*((_src) + (_si))); \
if (i) \
mblength = 1; \
else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
mblength = (_src)[_si] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
} \
if (mblength == (size_t)-1 || mblength == (size_t)-2) \
{ \
state = state_bak; \
mblength = 1; \
} \
if (mblength < 1) \
mblength = 1; \
\
_dst = (char *)xmalloc (mblength + 1); \
for (i = 0; i < mblength; i++) \
(_dst)[i] = (_src)[(_si)++]; \
(_dst)[mblength] = '\0'; \
\
goto add_string; \
} \
} \
while (0)
#else
# define SADD_MBCHAR(_dst, _src, _si, _srcsize)
#endif
/* Watch out when using this -- it's just straight textual substitution */
#if defined (HANDLE_MULTIBYTE)
# define SADD_MBQCHAR_BODY(_dst, _src, _si, _srcsize) \
\
int i; \
mbstate_t state_bak; \
size_t mblength; \
\
i = is_basic (*((_src) + (_si))); \
if (i) \
mblength = 1; \
else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
mblength = (_src)[_si] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
} \
if (mblength == (size_t)-1 || mblength == (size_t)-2) \
{ \
state = state_bak; \
mblength = 1; \
} \
if (mblength < 1) \
mblength = 1; \
\
(_dst) = (char *)xmalloc (mblength + 2); \
(_dst)[0] = CTLESC; \
for (i = 0; i < mblength; i++) \
(_dst)[i+1] = (_src)[(_si)++]; \
(_dst)[mblength+1] = '\0'; \
\
goto add_string
# define SADD_MBCHAR_BODY(_dst, _src, _si, _srcsize) \
\
int i; \
mbstate_t state_bak; \
size_t mblength; \
\
i = is_basic (*((_src) + (_si))); \
if (i) \
mblength = 1; \
else if (locale_utf8locale && (((_src)[_si] & 0x80) == 0)) \
mblength = (_src)[_si] != 0; \
else \
{ \
state_bak = state; \
mblength = mbrlen ((_src) + (_si), (_srcsize) - (_si), &state); \
} \
if (mblength == (size_t)-1 || mblength == (size_t)-2) \
{ \
state = state_bak; \
mblength = 1; \
} \
if (mblength < 1) \
mblength = 1; \
\
(_dst) = (char *)xmalloc (mblength + 1); \
for (i = 0; i < mblength; i++) \
(_dst)[i+1] = (_src)[(_si)++]; \
(_dst)[mblength+1] = '\0'; \
\
goto add_string
#endif /* HANDLE_MULTIBYTE */
#endif /* _SH_MBUTIL_H_ */