summaryrefslogtreecommitdiffstats
path: root/man/man3/mbstowcs.3
diff options
context:
space:
mode:
Diffstat (limited to 'man/man3/mbstowcs.3')
-rw-r--r--man/man3/mbstowcs.3237
1 files changed, 237 insertions, 0 deletions
diff --git a/man/man3/mbstowcs.3 b/man/man3/mbstowcs.3
new file mode 100644
index 0000000..f1c4ea5
--- /dev/null
+++ b/man/man3/mbstowcs.3
@@ -0,0 +1,237 @@
+'\" t
+.\" Copyright (c) Bruno Haible <haible@clisp.cons.org>
+.\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com>
+.\"
+.\" SPDX-License-Identifier: GPL-2.0-or-later
+.\"
+.\" References consulted:
+.\" GNU glibc-2 source code and manual
+.\" Dinkumware C library reference http://www.dinkumware.com/
+.\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html
+.\" ISO/IEC 9899:1999
+.\"
+.TH mbstowcs 3 2024-05-02 "Linux man-pages (unreleased)"
+.SH NAME
+mbstowcs \- convert a multibyte string to a wide-character string
+.SH LIBRARY
+Standard C library
+.RI ( libc ", " \-lc )
+.SH SYNOPSIS
+.nf
+.B #include <stdlib.h>
+.P
+.BI "size_t mbstowcs(wchar_t " dest "[restrict ." dsize "], \
+const char *restrict " src ,
+.BI " size_t " dsize );
+.fi
+.SH DESCRIPTION
+If
+.I dest
+is not NULL,
+convert the
+multibyte string
+.I src
+to a wide-character string starting at
+.IR dest .
+At most
+.I dsize
+wide characters are written to
+.IR dest .
+The sequence of characters in the string
+.I src
+shall begin in the initial shift state.
+The conversion can stop for three reasons:
+.IP \[bu] 3
+An invalid multibyte sequence has been encountered.
+In this case,
+.I (size_t)\ \-1
+is returned.
+.IP \[bu]
+.I dsize
+non-L\[aq]\e0\[aq] wide characters have been stored at
+.IR dest .
+In this case, the number of wide characters written to
+.I dest
+is returned, but the
+shift state at this point is lost.
+.IP \[bu]
+The multibyte string has been completely converted, including the
+terminating null character (\[aq]\e0\[aq]).
+In this case, the number of wide characters written to
+.IR dest ,
+excluding the terminating null wide character, is returned.
+.P
+If
+.I dest
+is NULL,
+.I dsize
+is ignored, and the conversion proceeds as
+above, except that the converted wide characters are not written out to memory,
+and that no length limit exists.
+.P
+In order to avoid the case 2 above, the programmer should make sure
+.I dsize
+is
+greater than or equal to
+.IR "mbstowcs(NULL,src,0)+1" .
+.P
+The programmer must ensure that there is room for at least
+.I dsize
+wide
+characters at
+.IR dest .
+.SH RETURN VALUE
+The number of wide characters that make
+up the converted part of the wide-character string, not including the
+terminating null wide character.
+If an invalid multibyte sequence was
+encountered,
+.I (size_t)\ \-1
+is returned.
+.SH ATTRIBUTES
+For an explanation of the terms used in this section, see
+.BR attributes (7).
+.TS
+allbox;
+lbx lb lb
+l l l.
+Interface Attribute Value
+T{
+.na
+.nh
+.BR mbstowcs ()
+T} Thread safety MT-Safe
+.TE
+.SH VERSIONS
+The function
+.BR mbsrtowcs (3)
+provides a better interface to the same
+functionality.
+.SH STANDARDS
+C11, POSIX.1-2008.
+.SH HISTORY
+POSIX.1-2001, C99.
+.SH NOTES
+The behavior of
+.BR mbstowcs ()
+depends on the
+.B LC_CTYPE
+category of the
+current locale.
+.SH EXAMPLES
+The program below illustrates the use of
+.BR mbstowcs (),
+as well as some of the wide character classification functions.
+An example run is the following:
+.P
+.in +4n
+.EX
+$ ./t_mbstowcs de_DE.UTF\-8 Grüße!
+Length of source string (excluding terminator):
+ 8 bytes
+ 6 multibyte characters
+\&
+Wide character string is: Grüße! (6 characters)
+ G alpha upper
+ r alpha lower
+ ü alpha lower
+ ß alpha lower
+ e alpha lower
+ ! !alpha
+.EE
+.in
+.SS Program source
+\&
+.\" SRC BEGIN (mbstowcs.c)
+.EX
+#include <locale.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <wchar.h>
+#include <wctype.h>
+\&
+int
+main(int argc, char *argv[])
+{
+ size_t mbslen; /* Number of multibyte characters in source */
+ wchar_t *wcs; /* Pointer to converted wide character string */
+\&
+ if (argc < 3) {
+ fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]);
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Apply the specified locale. */
+\&
+ if (setlocale(LC_ALL, argv[1]) == NULL) {
+ perror("setlocale");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Calculate the length required to hold argv[2] converted to
+ a wide character string. */
+\&
+ mbslen = mbstowcs(NULL, argv[2], 0);
+ if (mbslen == (size_t) \-1) {
+ perror("mbstowcs");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Describe the source string to the user. */
+\&
+ printf("Length of source string (excluding terminator):\en");
+ printf(" %zu bytes\en", strlen(argv[2]));
+ printf(" %zu multibyte characters\en\en", mbslen);
+\&
+ /* Allocate wide character string of the desired size. Add 1
+ to allow for terminating null wide character (L\[aq]\e0\[aq]). */
+\&
+ wcs = calloc(mbslen + 1, sizeof(*wcs));
+ if (wcs == NULL) {
+ perror("calloc");
+ exit(EXIT_FAILURE);
+ }
+\&
+ /* Convert the multibyte character string in argv[2] to a
+ wide character string. */
+\&
+ if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) {
+ perror("mbstowcs");
+ exit(EXIT_FAILURE);
+ }
+\&
+ printf("Wide character string is: %ls (%zu characters)\en",
+ wcs, mbslen);
+\&
+ /* Now do some inspection of the classes of the characters in
+ the wide character string. */
+\&
+ for (wchar_t *wp = wcs; *wp != 0; wp++) {
+ printf(" %lc ", (wint_t) *wp);
+\&
+ if (!iswalpha(*wp))
+ printf("!");
+ printf("alpha ");
+\&
+ if (iswalpha(*wp)) {
+ if (iswupper(*wp))
+ printf("upper ");
+\&
+ if (iswlower(*wp))
+ printf("lower ");
+ }
+\&
+ putchar(\[aq]\en\[aq]);
+ }
+\&
+ exit(EXIT_SUCCESS);
+}
+.EE
+.\" SRC END
+.SH SEE ALSO
+.BR mblen (3),
+.BR mbsrtowcs (3),
+.BR mbtowc (3),
+.BR wcstombs (3),
+.BR wctomb (3)