summaryrefslogtreecommitdiffstats
path: root/lib/sh/mbschr.c
blob: 639962d46317785baabae86bac6623b101ad2cc6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
/* mbschr.c - strchr(3) that handles multibyte characters. */

/* Copyright (C) 2002 Free Software Foundation, Inc.

   This file is part of GNU Bash, the Bourne Again SHell.

   Bash is free software: you can redistribute it and/or modify
   it under the terms of the GNU General Public License as published by
   the Free Software Foundation, either version 3 of the License, or
   (at your option) any later version.

   Bash is distributed in the hope that it will be useful,
   but WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
   GNU General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with Bash.  If not, see <http://www.gnu.org/licenses/>.
*/

#include <config.h>

#ifdef HAVE_STDLIB_H
#  include <stdlib.h>
#endif

#include "bashansi.h"
#include "shmbutil.h"

extern int locale_mb_cur_max;
extern int locale_utf8locale;

#undef mbschr

extern char *utf8_mbschr (const char *, int);	/* XXX */

/* In some locales, the non-first byte of some multibyte characters have
   the same value as some ascii character.  Faced with these strings, a
   legacy strchr() might return the wrong value. */

char *
#if defined (PROTOTYPES)
mbschr (const char *s, int c)
#else
mbschr (s, c)
     const char *s;
     int c;
#endif
{
#if HANDLE_MULTIBYTE
  char *pos;
  mbstate_t state;
  size_t strlength, mblength;

  if (locale_utf8locale && c < 0x80)
    return (utf8_mbschr (s, c));		/* XXX */

  /* The locale encodings with said weird property are BIG5, BIG5-HKSCS,
     GBK, GB18030, SHIFT_JIS, and JOHAB.  They exhibit the problem only
     when c >= 0x30.  We can therefore use the faster bytewise search if
     c <= 0x30. */
  if ((unsigned char)c >= '0' && locale_mb_cur_max > 1)
    {
      pos = (char *)s;
      memset (&state, '\0', sizeof(mbstate_t));
      strlength = strlen (s);

      while (strlength > 0)
	{
	  if (is_basic (*pos))
	    mblength = 1;
	  else
	    {
	      mblength = mbrlen (pos, strlength, &state);
	      if (mblength == (size_t)-2 || mblength == (size_t)-1 || mblength == (size_t)0)
	        mblength = 1;
	    }

	  if (mblength == 1 && c == (unsigned char)*pos)
	    return pos;

	  strlength -= mblength;
	  pos += mblength;
	}

      return ((char *)NULL);
    }
  else
#endif
  return (strchr (s, c));
}