diff options
Diffstat (limited to 'man/man3/mbstowcs.3')
-rw-r--r-- | man/man3/mbstowcs.3 | 237 |
1 files changed, 237 insertions, 0 deletions
diff --git a/man/man3/mbstowcs.3 b/man/man3/mbstowcs.3 new file mode 100644 index 0000000..f1c4ea5 --- /dev/null +++ b/man/man3/mbstowcs.3 @@ -0,0 +1,237 @@ +'\" t +.\" Copyright (c) Bruno Haible <haible@clisp.cons.org> +.\" and Copyright 2014 Michael Kerrisk <mtk.manpages@gmail.com> +.\" +.\" SPDX-License-Identifier: GPL-2.0-or-later +.\" +.\" References consulted: +.\" GNU glibc-2 source code and manual +.\" Dinkumware C library reference http://www.dinkumware.com/ +.\" OpenGroup's Single UNIX specification http://www.UNIX-systems.org/online.html +.\" ISO/IEC 9899:1999 +.\" +.TH mbstowcs 3 2024-05-02 "Linux man-pages (unreleased)" +.SH NAME +mbstowcs \- convert a multibyte string to a wide-character string +.SH LIBRARY +Standard C library +.RI ( libc ", " \-lc ) +.SH SYNOPSIS +.nf +.B #include <stdlib.h> +.P +.BI "size_t mbstowcs(wchar_t " dest "[restrict ." dsize "], \ +const char *restrict " src , +.BI " size_t " dsize ); +.fi +.SH DESCRIPTION +If +.I dest +is not NULL, +convert the +multibyte string +.I src +to a wide-character string starting at +.IR dest . +At most +.I dsize +wide characters are written to +.IR dest . +The sequence of characters in the string +.I src +shall begin in the initial shift state. +The conversion can stop for three reasons: +.IP \[bu] 3 +An invalid multibyte sequence has been encountered. +In this case, +.I (size_t)\ \-1 +is returned. +.IP \[bu] +.I dsize +non-L\[aq]\e0\[aq] wide characters have been stored at +.IR dest . +In this case, the number of wide characters written to +.I dest +is returned, but the +shift state at this point is lost. +.IP \[bu] +The multibyte string has been completely converted, including the +terminating null character (\[aq]\e0\[aq]). +In this case, the number of wide characters written to +.IR dest , +excluding the terminating null wide character, is returned. +.P +If +.I dest +is NULL, +.I dsize +is ignored, and the conversion proceeds as +above, except that the converted wide characters are not written out to memory, +and that no length limit exists. +.P +In order to avoid the case 2 above, the programmer should make sure +.I dsize +is +greater than or equal to +.IR "mbstowcs(NULL,src,0)+1" . +.P +The programmer must ensure that there is room for at least +.I dsize +wide +characters at +.IR dest . +.SH RETURN VALUE +The number of wide characters that make +up the converted part of the wide-character string, not including the +terminating null wide character. +If an invalid multibyte sequence was +encountered, +.I (size_t)\ \-1 +is returned. +.SH ATTRIBUTES +For an explanation of the terms used in this section, see +.BR attributes (7). +.TS +allbox; +lbx lb lb +l l l. +Interface Attribute Value +T{ +.na +.nh +.BR mbstowcs () +T} Thread safety MT-Safe +.TE +.SH VERSIONS +The function +.BR mbsrtowcs (3) +provides a better interface to the same +functionality. +.SH STANDARDS +C11, POSIX.1-2008. +.SH HISTORY +POSIX.1-2001, C99. +.SH NOTES +The behavior of +.BR mbstowcs () +depends on the +.B LC_CTYPE +category of the +current locale. +.SH EXAMPLES +The program below illustrates the use of +.BR mbstowcs (), +as well as some of the wide character classification functions. +An example run is the following: +.P +.in +4n +.EX +$ ./t_mbstowcs de_DE.UTF\-8 Grüße! +Length of source string (excluding terminator): + 8 bytes + 6 multibyte characters +\& +Wide character string is: Grüße! (6 characters) + G alpha upper + r alpha lower + ü alpha lower + ß alpha lower + e alpha lower + ! !alpha +.EE +.in +.SS Program source +\& +.\" SRC BEGIN (mbstowcs.c) +.EX +#include <locale.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <wchar.h> +#include <wctype.h> +\& +int +main(int argc, char *argv[]) +{ + size_t mbslen; /* Number of multibyte characters in source */ + wchar_t *wcs; /* Pointer to converted wide character string */ +\& + if (argc < 3) { + fprintf(stderr, "Usage: %s <locale> <string>\en", argv[0]); + exit(EXIT_FAILURE); + } +\& + /* Apply the specified locale. */ +\& + if (setlocale(LC_ALL, argv[1]) == NULL) { + perror("setlocale"); + exit(EXIT_FAILURE); + } +\& + /* Calculate the length required to hold argv[2] converted to + a wide character string. */ +\& + mbslen = mbstowcs(NULL, argv[2], 0); + if (mbslen == (size_t) \-1) { + perror("mbstowcs"); + exit(EXIT_FAILURE); + } +\& + /* Describe the source string to the user. */ +\& + printf("Length of source string (excluding terminator):\en"); + printf(" %zu bytes\en", strlen(argv[2])); + printf(" %zu multibyte characters\en\en", mbslen); +\& + /* Allocate wide character string of the desired size. Add 1 + to allow for terminating null wide character (L\[aq]\e0\[aq]). */ +\& + wcs = calloc(mbslen + 1, sizeof(*wcs)); + if (wcs == NULL) { + perror("calloc"); + exit(EXIT_FAILURE); + } +\& + /* Convert the multibyte character string in argv[2] to a + wide character string. */ +\& + if (mbstowcs(wcs, argv[2], mbslen + 1) == (size_t) \-1) { + perror("mbstowcs"); + exit(EXIT_FAILURE); + } +\& + printf("Wide character string is: %ls (%zu characters)\en", + wcs, mbslen); +\& + /* Now do some inspection of the classes of the characters in + the wide character string. */ +\& + for (wchar_t *wp = wcs; *wp != 0; wp++) { + printf(" %lc ", (wint_t) *wp); +\& + if (!iswalpha(*wp)) + printf("!"); + printf("alpha "); +\& + if (iswalpha(*wp)) { + if (iswupper(*wp)) + printf("upper "); +\& + if (iswlower(*wp)) + printf("lower "); + } +\& + putchar(\[aq]\en\[aq]); + } +\& + exit(EXIT_SUCCESS); +} +.EE +.\" SRC END +.SH SEE ALSO +.BR mblen (3), +.BR mbsrtowcs (3), +.BR mbtowc (3), +.BR wcstombs (3), +.BR wctomb (3) |