summaryrefslogtreecommitdiffstats
path: root/man3/mbstowcs.3
blob: 6a0d6559aa4344e542873325921467c739522310 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
'\" t
.\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
.\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
.\"
.\" SPDX-License-Identifier: GPL-2.0-or-later
.\"
.\" References consulted:
.\"   GNU glibc-2 source code and manual
.\"   Dinkumware C library reference http://www.dinkumware.com/
.\"   OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
.\"   ISO/IEC 9899:1999
.\"
.TH mbstowcs 3 2023-11-14 "Linux man-pages 6.7"
.SH NAME
mbstowcs \- convert a multibyte string to a wide-character string
.SH LIBRARY
Standard C library
.RI ( libc ", " \-lc )
.SH SYNOPSIS
.nf
.B #include <stdlib.h>
.P
.BI "size_t mbstowcs(wchar_t " dest "[restrict ." dsize "], \
const char *restrict " src ,
.BI "                size_t " dsize );
.fi
.SH DESCRIPTION
If
.I dest
is not NULL,
convert the
multibyte string
.I src
to a wide-character string starting at
.IR dest .
At most
.I dsize
wide characters are written to
.IR dest .
The sequence of characters in the string
.I src
shall begin in the initial shift state.
The conversion can stop for three reasons:
.IP \[bu] 3
An invalid multibyte sequence has been encountered.
In this case,
.I (size_t)\ \-1
is returned.
.IP \[bu]
.I dsize
non-L\[aq]\e0\[aq] wide characters have been stored at
.IR dest .
In this case, the number of wide characters written to
.I dest
is returned, but the
shift state at this point is lost.
.IP \[bu]
The multibyte string has been completely converted, including the
terminating null character (\[aq]\e0\[aq]).
In this case, the number of wide characters written to
.IR dest ,
excluding the terminating null wide character, is returned.
.P
If
.I dest
is NULL,
.I dsize
is ignored, and the conversion proceeds as
above, except that the converted wide characters are not written out to memory,
and that no length limit exists.
.P
In order to avoid the case 2 above, the programmer should make sure
.I dsize
is
greater than or equal to
.IR "mbstowcs(NULL,src,0)+1" .
.P
The programmer must ensure that there is room for at least
.I dsize
wide
characters at
.IR dest .
.SH RETURN VALUE
The number of wide characters that make
up the converted part of the wide-character string, not including the
terminating null wide character.
If an invalid multibyte sequence was
encountered,
.I (size_t)\ \-1
is returned.
.SH ATTRIBUTES
For an explanation of the terms used in this section, see
.BR attributes (7).
.TS
allbox;
lbx lb lb
l l l.
Interface	Attribute	Value
T{
.na
.nh
.BR mbstowcs ()
T}	Thread safety	MT-Safe
.TE
.SH VERSIONS
The function
.BR mbsrtowcs (3)
provides a better interface to the same
functionality.
.SH STANDARDS
C11, POSIX.1-2008.
.SH HISTORY
POSIX.1-2001, C99.
.SH NOTES
The behavior of
.BR mbstowcs ()
depends on the
.B LC_CTYPE
category of the
current locale.
.SH EXAMPLES
The program below illustrates the use of
.BR mbstowcs (),
as well as some of the wide character classification functions.
An example run is the following:
.P
.in +4n
.EX
$ ./t_mbstowcs de_DE.UTF\-8 Grüße!
Length of source string (excluding terminator):
    8 bytes
    6 multibyte characters
\&
Wide character string is: Grüße! (6 characters)
    G alpha upper
    r alpha lower
    ü alpha lower
    ß alpha lower
    e alpha lower
    ! !alpha
.EE
.in
.SS Program source
\&
.\" SRC BEGIN (mbstowcs.c)
.EX
#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <wchar.h>
#include <wctype.h>
\&
int
main(int argc, char *argv[])
{
    size_t mbslen;      /* Number of multibyte characters in source */
    wchar_t *wcs;       /* Pointer to converted wide character string */
\&
    if (argc < 3) {
        fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]);
        exit(EXIT_FAILURE);
    }
\&
    /* Apply the specified locale. */
\&
    if (setlocale(LC_ALL, argv[1]) == NULL) {
        perror("setlocale");
        exit(EXIT_FAILURE);
    }
\&
    /* Calculate the length required to hold argv[2] converted to
       a wide character string. */
\&
    mbslen = mbstowcs(NULL, argv[2], 0);
    if (mbslen == (size_t) \-1) {
        perror("mbstowcs");
        exit(EXIT_FAILURE);
    }
\&
    /* Describe the source string to the user. */
\&
    printf("Length of source string (excluding terminator):\en");
    printf("    %zu bytes\en", strlen(argv[2]));
    printf("    %zu multibyte characters\en\en", mbslen);
\&
    /* Allocate wide character string of the desired size.  Add 1
       to allow for terminating null wide character (L\[aq]\e0\[aq]). */
\&
    wcs = calloc(mbslen + 1, sizeof(*wcs));
    if (wcs == NULL) {
        perror("calloc");
        exit(EXIT_FAILURE);
    }
\&
    /* Convert the multibyte character string in argv[2] to a
       wide character string. */
\&
    if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
        perror("mbstowcs");
        exit(EXIT_FAILURE);
    }
\&
    printf("Wide character string is: %ls (%zu characters)\en",
           wcs, mbslen);
\&
    /* Now do some inspection of the classes of the characters in
       the wide character string. */
\&
    for (wchar_t *wp = wcs; *wp != 0; wp++) {
        printf("    %lc ", (wint_t) *wp);
\&
        if (!iswalpha(*wp))
            printf("!");
        printf("alpha ");
\&
        if (iswalpha(*wp)) {
            if (iswupper(*wp))
                printf("upper ");
\&
            if (iswlower(*wp))
                printf("lower ");
        }
\&
        putchar(\[aq]\en\[aq]);
    }
\&
    exit(EXIT_SUCCESS);
}
.EE
.\" SRC END
.SH SEE ALSO
.BR mblen (3),
.BR mbsrtowcs (3),
.BR mbtowc (3),
.BR wcstombs (3),
.BR wctomb (3)