diff options
Diffstat (limited to 'libc-top-half/musl/src/multibyte')
21 files changed, 597 insertions, 0 deletions
diff --git a/libc-top-half/musl/src/multibyte/btowc.c b/libc-top-half/musl/src/multibyte/btowc.c new file mode 100644 index 0000000..8acd0a2 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/btowc.c @@ -0,0 +1,10 @@ +#include <stdio.h> +#include <wchar.h> +#include <stdlib.h> +#include "internal.h" + +wint_t btowc(int c) +{ + int b = (unsigned char)c; + return b<128U ? b : (MB_CUR_MAX==1 && c!=EOF) ? CODEUNIT(c) : WEOF; +} diff --git a/libc-top-half/musl/src/multibyte/c16rtomb.c b/libc-top-half/musl/src/multibyte/c16rtomb.c new file mode 100644 index 0000000..39ca375 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/c16rtomb.c @@ -0,0 +1,35 @@ +#include <uchar.h> +#include <errno.h> +#include <wchar.h> + +size_t c16rtomb(char *restrict s, char16_t c16, mbstate_t *restrict ps) +{ + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + unsigned *x = (unsigned *)ps; + wchar_t wc; + + if (!s) { + if (*x) goto ilseq; + return 1; + } + + if (!*x && c16 - 0xd800u < 0x400) { + *x = c16 - 0xd7c0 << 10; + return 0; + } + + if (*x) { + if (c16 - 0xdc00u >= 0x400) goto ilseq; + else wc = *x + c16 - 0xdc00; + *x = 0; + } else { + wc = c16; + } + return wcrtomb(s, wc, 0); + +ilseq: + *x = 0; + errno = EILSEQ; + return -1; +} diff --git a/libc-top-half/musl/src/multibyte/c32rtomb.c b/libc-top-half/musl/src/multibyte/c32rtomb.c new file mode 100644 index 0000000..6785132 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/c32rtomb.c @@ -0,0 +1,7 @@ +#include <uchar.h> +#include <wchar.h> + +size_t c32rtomb(char *restrict s, char32_t c32, mbstate_t *restrict ps) +{ + return wcrtomb(s, c32, ps); +} diff --git a/libc-top-half/musl/src/multibyte/internal.c b/libc-top-half/musl/src/multibyte/internal.c new file mode 100644 index 0000000..2f5aaa9 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/internal.c @@ -0,0 +1,26 @@ +#include "internal.h" + +#define C(x) ( x<2 ? -1 : ( R(0x80,0xc0) | x ) ) +#define D(x) C((x+16)) +#define E(x) ( ( x==0 ? R(0xa0,0xc0) : \ + x==0xd ? R(0x80,0xa0) : \ + R(0x80,0xc0) ) \ + | ( R(0x80,0xc0) >> 6 ) \ + | x ) +#define F(x) ( ( x>=5 ? 0 : \ + x==0 ? R(0x90,0xc0) : \ + x==4 ? R(0x80,0x90) : \ + R(0x80,0xc0) ) \ + | ( R(0x80,0xc0) >> 6 ) \ + | ( R(0x80,0xc0) >> 12 ) \ + | x ) + +const uint32_t bittab[] = { + C(0x2),C(0x3),C(0x4),C(0x5),C(0x6),C(0x7), + C(0x8),C(0x9),C(0xa),C(0xb),C(0xc),C(0xd),C(0xe),C(0xf), + D(0x0),D(0x1),D(0x2),D(0x3),D(0x4),D(0x5),D(0x6),D(0x7), + D(0x8),D(0x9),D(0xa),D(0xb),D(0xc),D(0xd),D(0xe),D(0xf), + E(0x0),E(0x1),E(0x2),E(0x3),E(0x4),E(0x5),E(0x6),E(0x7), + E(0x8),E(0x9),E(0xa),E(0xb),E(0xc),E(0xd),E(0xe),E(0xf), + F(0x0),F(0x1),F(0x2),F(0x3),F(0x4) +}; diff --git a/libc-top-half/musl/src/multibyte/internal.h b/libc-top-half/musl/src/multibyte/internal.h new file mode 100644 index 0000000..45bbc6d --- /dev/null +++ b/libc-top-half/musl/src/multibyte/internal.h @@ -0,0 +1,24 @@ +#define bittab __fsmu8 + +#include <stdint.h> +#include <features.h> + +extern hidden const uint32_t bittab[]; + +/* Upper 6 state bits are a negative integer offset to bound-check next byte */ +/* equivalent to: ( (b-0x80) | (b+offset) ) & ~0x3f */ +#define OOB(c,b) (((((b)>>3)-0x10)|(((b)>>3)+((int32_t)(c)>>26))) & ~7) + +/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */ +#define R(a,b) ((uint32_t)((a==0x80 ? 0x40u-b : 0u-a) << 23)) +#define FAILSTATE R(0x80,0x80) + +#define SA 0xc2u +#define SB 0xf4u + +/* Arbitrary encoding for representing code units instead of characters. */ +#define CODEUNIT(c) (0xdfff & (signed char)(c)) +#define IS_CODEUNIT(c) ((unsigned)(c)-0xdf80 < 0x80) + +/* Get inline definition of MB_CUR_MAX. */ +#include "locale_impl.h" diff --git a/libc-top-half/musl/src/multibyte/mblen.c b/libc-top-half/musl/src/multibyte/mblen.c new file mode 100644 index 0000000..a4304bf --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mblen.c @@ -0,0 +1,6 @@ +#include <stdlib.h> + +int mblen(const char *s, size_t n) +{ + return mbtowc(0, s, n); +} diff --git a/libc-top-half/musl/src/multibyte/mbrlen.c b/libc-top-half/musl/src/multibyte/mbrlen.c new file mode 100644 index 0000000..accf4b3 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbrlen.c @@ -0,0 +1,7 @@ +#include <wchar.h> + +size_t mbrlen(const char *restrict s, size_t n, mbstate_t *restrict st) +{ + static unsigned internal; + return mbrtowc(0, s, n, st ? st : (mbstate_t *)&internal); +} diff --git a/libc-top-half/musl/src/multibyte/mbrtoc16.c b/libc-top-half/musl/src/multibyte/mbrtoc16.c new file mode 100644 index 0000000..765ff90 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbrtoc16.c @@ -0,0 +1,30 @@ +#include <uchar.h> +#include <wchar.h> + +size_t mbrtoc16(char16_t *restrict pc16, const char *restrict s, size_t n, mbstate_t *restrict ps) +{ + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + unsigned *pending = (unsigned *)ps; + + if (!s) return mbrtoc16(0, "", 1, ps); + + /* mbrtowc states for partial UTF-8 characters have the high bit set; + * we use nonzero states without high bit for pending surrogates. */ + if ((int)*pending > 0) { + if (pc16) *pc16 = *pending; + *pending = 0; + return -3; + } + + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4) { + if (wc >= 0x10000) { + *pending = (wc & 0x3ff) + 0xdc00; + wc = 0xd7c0 + (wc >> 10); + } + if (pc16) *pc16 = wc; + } + return ret; +} diff --git a/libc-top-half/musl/src/multibyte/mbrtoc32.c b/libc-top-half/musl/src/multibyte/mbrtoc32.c new file mode 100644 index 0000000..9b6b236 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbrtoc32.c @@ -0,0 +1,13 @@ +#include <uchar.h> +#include <wchar.h> + +size_t mbrtoc32(char32_t *restrict pc32, const char *restrict s, size_t n, mbstate_t *restrict ps) +{ + static unsigned internal_state; + if (!ps) ps = (void *)&internal_state; + if (!s) return mbrtoc32(0, "", 1, ps); + wchar_t wc; + size_t ret = mbrtowc(&wc, s, n, ps); + if (ret <= 4 && pc32) *pc32 = wc; + return ret; +} diff --git a/libc-top-half/musl/src/multibyte/mbrtowc.c b/libc-top-half/musl/src/multibyte/mbrtowc.c new file mode 100644 index 0000000..c94819e --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbrtowc.c @@ -0,0 +1,51 @@ +#include <stdlib.h> +#include <wchar.h> +#include <errno.h> +#include "internal.h" + +size_t mbrtowc(wchar_t *restrict wc, const char *restrict src, size_t n, mbstate_t *restrict st) +{ + static unsigned internal_state; + unsigned c; + const unsigned char *s = (const void *)src; + const unsigned N = n; + wchar_t dummy; + + if (!st) st = (void *)&internal_state; + c = *(unsigned *)st; + + if (!s) { + if (c) goto ilseq; + return 0; + } else if (!wc) wc = &dummy; + + if (!n) return -2; + if (!c) { + if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; + if (*s-SA > SB-SA) goto ilseq; + c = bittab[*s++-SA]; n--; + } + + if (n) { + if (OOB(c,*s)) goto ilseq; +loop: + c = c<<6 | *s++-0x80; n--; + if (!(c&(1U<<31))) { + *(unsigned *)st = 0; + *wc = c; + return N-n; + } + if (n) { + if (*s-0x80u >= 0x40) goto ilseq; + goto loop; + } + } + + *(unsigned *)st = c; + return -2; +ilseq: + *(unsigned *)st = 0; + errno = EILSEQ; + return -1; +} diff --git a/libc-top-half/musl/src/multibyte/mbsinit.c b/libc-top-half/musl/src/multibyte/mbsinit.c new file mode 100644 index 0000000..c608194 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbsinit.c @@ -0,0 +1,6 @@ +#include <wchar.h> + +int mbsinit(const mbstate_t *st) +{ + return !st || !*(unsigned *)st; +} diff --git a/libc-top-half/musl/src/multibyte/mbsnrtowcs.c b/libc-top-half/musl/src/multibyte/mbsnrtowcs.c new file mode 100644 index 0000000..931192e --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbsnrtowcs.c @@ -0,0 +1,55 @@ +#include <wchar.h> + +size_t mbsnrtowcs(wchar_t *restrict wcs, const char **restrict src, size_t n, size_t wn, mbstate_t *restrict st) +{ + size_t l, cnt=0, n2; + wchar_t *ws, wbuf[256]; + const char *s = *src; + const char *tmp_s; + + if (!wcs) ws = wbuf, wn = sizeof wbuf / sizeof *wbuf; + else ws = wcs; + + /* making sure output buffer size is at most n/4 will ensure + * that mbsrtowcs never reads more than n input bytes. thus + * we can use mbsrtowcs as long as it's practical.. */ + + while ( s && wn && ( (n2=n/4)>=wn || n2>32 ) ) { + if (n2>=wn) n2=wn; + tmp_s = s; + l = mbsrtowcs(ws, &s, n2, st); + if (!(l+1)) { + cnt = l; + wn = 0; + break; + } + if (ws != wbuf) { + ws += l; + wn -= l; + } + n = s ? n - (s - tmp_s) : 0; + cnt += l; + } + if (s) while (wn && n) { + l = mbrtowc(ws, s, n, st); + if (l+2<=2) { + if (!(l+1)) { + cnt = l; + break; + } + if (!l) { + s = 0; + break; + } + /* have to roll back partial character */ + *(unsigned *)st = 0; + break; + } + s += l; n -= l; + /* safe - this loop runs fewer than sizeof(wbuf)/8 times */ + ws++; wn--; + cnt++; + } + if (wcs) *src = s; + return cnt; +} diff --git a/libc-top-half/musl/src/multibyte/mbsrtowcs.c b/libc-top-half/musl/src/multibyte/mbsrtowcs.c new file mode 100644 index 0000000..9b2f2df --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbsrtowcs.c @@ -0,0 +1,120 @@ +#include <stdint.h> +#include <wchar.h> +#include <errno.h> +#include <string.h> +#include <stdlib.h> +#include "internal.h" + +size_t mbsrtowcs(wchar_t *restrict ws, const char **restrict src, size_t wn, mbstate_t *restrict st) +{ + const unsigned char *s = (const void *)*src; + size_t wn0 = wn; + unsigned c = 0; + + if (st && (c = *(unsigned *)st)) { + if (ws) { + *(unsigned *)st = 0; + goto resume; + } else { + goto resume0; + } + } + + if (MB_CUR_MAX==1) { + if (!ws) return strlen((const char *)s); + for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } + if (!*s) break; + c = *s++; + *ws++ = CODEUNIT(c); + wn--; + } + *ws = 0; + *src = 0; + return wn0-wn; + } + + if (!ws) for (;;) { +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; + if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { + while (!(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) { + s += 4; + wn -= 4; + } + } +#endif + if (*s-1u < 0x7f) { + s++; + wn--; + continue; + } + if (*s-SA > SB-SA) break; + c = bittab[*s++-SA]; +resume0: + if (OOB(c,*s)) { s--; break; } + s++; + if (c&(1U<<25)) { + if (*s-0x80u >= 0x40) { s-=2; break; } + s++; + if (c&(1U<<19)) { + if (*s-0x80u >= 0x40) { s-=3; break; } + s++; + } + } + wn--; + c = 0; + } else for (;;) { + if (!wn) { + *src = (const void *)s; + return wn0; + } +#ifdef __GNUC__ + typedef uint32_t __attribute__((__may_alias__)) w32; + if (*s-1u < 0x7f && (uintptr_t)s%4 == 0) { + while (wn>=5 && !(( *(w32*)s | *(w32*)s-0x01010101) & 0x80808080)) { + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + *ws++ = *s++; + wn -= 4; + } + } +#endif + if (*s-1u < 0x7f) { + *ws++ = *s++; + wn--; + continue; + } + if (*s-SA > SB-SA) break; + c = bittab[*s++-SA]; +resume: + if (OOB(c,*s)) { s--; break; } + c = (c<<6) | *s++-0x80; + if (c&(1U<<31)) { + if (*s-0x80u >= 0x40) { s-=2; break; } + c = (c<<6) | *s++-0x80; + if (c&(1U<<31)) { + if (*s-0x80u >= 0x40) { s-=3; break; } + c = (c<<6) | *s++-0x80; + } + } + *ws++ = c; + wn--; + c = 0; + } + + if (!c && !*s) { + if (ws) { + *ws = 0; + *src = 0; + } + return wn0-wn; + } + errno = EILSEQ; + if (ws) *src = (const void *)s; + return -1; +} diff --git a/libc-top-half/musl/src/multibyte/mbstowcs.c b/libc-top-half/musl/src/multibyte/mbstowcs.c new file mode 100644 index 0000000..dc0d459 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbstowcs.c @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <wchar.h> + +size_t mbstowcs(wchar_t *restrict ws, const char *restrict s, size_t wn) +{ + return mbsrtowcs(ws, (void*)&s, wn, 0); +} diff --git a/libc-top-half/musl/src/multibyte/mbtowc.c b/libc-top-half/musl/src/multibyte/mbtowc.c new file mode 100644 index 0000000..c191bb0 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/mbtowc.c @@ -0,0 +1,47 @@ +#include <stdlib.h> +#include <wchar.h> +#include <errno.h> +#include "internal.h" + +int mbtowc(wchar_t *restrict wc, const char *restrict src, size_t n) +{ + unsigned c; + const unsigned char *s = (const void *)src; + wchar_t dummy; + + if (!s) return 0; + if (!n) goto ilseq; + if (!wc) wc = &dummy; + + if (*s < 0x80) return !!(*wc = *s); + if (MB_CUR_MAX==1) return (*wc = CODEUNIT(*s)), 1; + if (*s-SA > SB-SA) goto ilseq; + c = bittab[*s++-SA]; + + /* Avoid excessive checks against n: If shifting the state n-1 + * times does not clear the high bit, then the value of n is + * insufficient to read a character */ + if (n<4 && ((c<<(6*n-6)) & (1U<<31))) goto ilseq; + + if (OOB(c,*s)) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 2; + } + + if (*s-0x80u >= 0x40) goto ilseq; + c = c<<6 | *s++-0x80; + if (!(c&(1U<<31))) { + *wc = c; + return 3; + } + + if (*s-0x80u >= 0x40) goto ilseq; + *wc = c<<6 | *s++-0x80; + return 4; + +ilseq: + errno = EILSEQ; + return -1; +} diff --git a/libc-top-half/musl/src/multibyte/wcrtomb.c b/libc-top-half/musl/src/multibyte/wcrtomb.c new file mode 100644 index 0000000..8e34926 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wcrtomb.c @@ -0,0 +1,37 @@ +#include <stdlib.h> +#include <wchar.h> +#include <errno.h> +#include "internal.h" + +size_t wcrtomb(char *restrict s, wchar_t wc, mbstate_t *restrict st) +{ + if (!s) return 1; + if ((unsigned)wc < 0x80) { + *s = wc; + return 1; + } else if (MB_CUR_MAX == 1) { + if (!IS_CODEUNIT(wc)) { + errno = EILSEQ; + return -1; + } + *s = wc; + return 1; + } else if ((unsigned)wc < 0x800) { + *s++ = 0xc0 | (wc>>6); + *s = 0x80 | (wc&0x3f); + return 2; + } else if ((unsigned)wc < 0xd800 || (unsigned)wc-0xe000 < 0x2000) { + *s++ = 0xe0 | (wc>>12); + *s++ = 0x80 | ((wc>>6)&0x3f); + *s = 0x80 | (wc&0x3f); + return 3; + } else if ((unsigned)wc-0x10000 < 0x100000) { + *s++ = 0xf0 | (wc>>18); + *s++ = 0x80 | ((wc>>12)&0x3f); + *s++ = 0x80 | ((wc>>6)&0x3f); + *s = 0x80 | (wc&0x3f); + return 4; + } + errno = EILSEQ; + return -1; +} diff --git a/libc-top-half/musl/src/multibyte/wcsnrtombs.c b/libc-top-half/musl/src/multibyte/wcsnrtombs.c new file mode 100644 index 0000000..95e25e7 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wcsnrtombs.c @@ -0,0 +1,35 @@ +#include <wchar.h> +#include <limits.h> +#include <string.h> + +size_t wcsnrtombs(char *restrict dst, const wchar_t **restrict wcs, size_t wn, size_t n, mbstate_t *restrict st) +{ + const wchar_t *ws = *wcs; + size_t cnt = 0; + if (!dst) n=0; + while (ws && wn) { + char tmp[MB_LEN_MAX]; + size_t l = wcrtomb(n<MB_LEN_MAX ? tmp : dst, *ws, 0); + if (l==-1) { + cnt = -1; + break; + } + if (dst) { + if (n<MB_LEN_MAX) { + if (l>n) break; + memcpy(dst, tmp, l); + } + dst += l; + n -= l; + } + if (!*ws) { + ws = 0; + break; + } + ws++; + wn--; + cnt += l; + } + if (dst) *wcs = ws; + return cnt; +} diff --git a/libc-top-half/musl/src/multibyte/wcsrtombs.c b/libc-top-half/musl/src/multibyte/wcsrtombs.c new file mode 100644 index 0000000..b5713ae --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wcsrtombs.c @@ -0,0 +1,55 @@ +#include <wchar.h> + +size_t wcsrtombs(char *restrict s, const wchar_t **restrict ws, size_t n, mbstate_t *restrict st) +{ + const wchar_t *ws2; + char buf[4]; + size_t N = n, l; + if (!s) { + for (n=0, ws2=*ws; *ws2; ws2++) { + if (*ws2 >= 0x80u) { + l = wcrtomb(buf, *ws2, 0); + if (!(l+1)) return -1; + n += l; + } else n++; + } + return n; + } + while (n>=4) { + if (**ws-1u >= 0x7fu) { + if (!**ws) { + *s = 0; + *ws = 0; + return N-n; + } + l = wcrtomb(s, **ws, 0); + if (!(l+1)) return -1; + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + while (n) { + if (**ws-1u >= 0x7fu) { + if (!**ws) { + *s = 0; + *ws = 0; + return N-n; + } + l = wcrtomb(buf, **ws, 0); + if (!(l+1)) return -1; + if (l>n) return N-n; + wcrtomb(s, **ws, 0); + s += l; + n -= l; + } else { + *s++ = **ws; + n--; + } + (*ws)++; + } + return N; +} diff --git a/libc-top-half/musl/src/multibyte/wcstombs.c b/libc-top-half/musl/src/multibyte/wcstombs.c new file mode 100644 index 0000000..ab15287 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wcstombs.c @@ -0,0 +1,7 @@ +#include <stdlib.h> +#include <wchar.h> + +size_t wcstombs(char *restrict s, const wchar_t *restrict ws, size_t n) +{ + return wcsrtombs(s, &(const wchar_t *){ws}, n, 0); +} diff --git a/libc-top-half/musl/src/multibyte/wctob.c b/libc-top-half/musl/src/multibyte/wctob.c new file mode 100644 index 0000000..b484a3f --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wctob.c @@ -0,0 +1,11 @@ +#include <wchar.h> +#include <stdio.h> +#include <stdlib.h> +#include "internal.h" + +int wctob(wint_t c) +{ + if (c < 128U) return c; + if (MB_CUR_MAX==1 && IS_CODEUNIT(c)) return (unsigned char)c; + return EOF; +} diff --git a/libc-top-half/musl/src/multibyte/wctomb.c b/libc-top-half/musl/src/multibyte/wctomb.c new file mode 100644 index 0000000..bad41c5 --- /dev/null +++ b/libc-top-half/musl/src/multibyte/wctomb.c @@ -0,0 +1,8 @@ +#include <stdlib.h> +#include <wchar.h> + +int wctomb(char *s, wchar_t wc) +{ + if (!s) return 0; + return wcrtomb(s, wc, 0); +} |