diff options
Diffstat (limited to 'lib/mbiter.h')
-rw-r--r-- | lib/mbiter.h | 89 |
1 files changed, 69 insertions, 20 deletions
diff --git a/lib/mbiter.h b/lib/mbiter.h index 7b41870..cb7950d 100644 --- a/lib/mbiter.h +++ b/lib/mbiter.h @@ -1,5 +1,5 @@ /* Iterating through multibyte strings: macros for multi-byte encodings. - Copyright (C) 2001, 2005, 2007, 2009-2023 Free Software Foundation, Inc. + Copyright (C) 2001, 2005, 2007, 2009-2024 Free Software Foundation, Inc. This file is free software: you can redistribute it and/or modify it under the terms of the GNU Lesser General Public License as @@ -82,7 +82,8 @@ #ifndef _MBITER_H #define _MBITER_H 1 -/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE. */ +/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE, + _GL_ATTRIBUTE_ALWAYS_INLINE. */ #if !_GL_CONFIG_H_INCLUDED #error "Please include config.h first." #endif @@ -90,27 +91,35 @@ #include <assert.h> #include <stddef.h> #include <string.h> +#include <uchar.h> #include <wchar.h> #include "mbchar.h" _GL_INLINE_HEADER_BEGIN #ifndef MBITER_INLINE -# define MBITER_INLINE _GL_INLINE +# define MBITER_INLINE _GL_INLINE _GL_ATTRIBUTE_ALWAYS_INLINE #endif struct mbiter_multi { const char *limit; /* pointer to end of string */ + #if !GNULIB_MBRTOC32_REGULAR bool in_shift; /* true if next byte may not be interpreted as ASCII */ + /* If GNULIB_MBRTOC32_REGULAR, it is always false, + so optimize it away. */ + #endif mbstate_t state; /* if in_shift: current shift state */ + /* If GNULIB_MBRTOC32_REGULAR, it is in an initial state + before and after every mbiter_multi_next invocation. + */ bool next_done; /* true if mbi_avail has already filled the following */ struct mbchar cur; /* the current character: - const char *cur.ptr pointer to current character + const char *cur.ptr pointer to current character The following are only valid after mbi_avail. - size_t cur.bytes number of bytes of current character - bool cur.wc_valid true if wc is a valid wide character - wchar_t cur.wc if wc_valid: the current character + size_t cur.bytes number of bytes of current character + bool cur.wc_valid true if wc is a valid 32-bit wide character + char32_t cur.wc if wc_valid: the current character */ }; @@ -119,14 +128,19 @@ mbiter_multi_next (struct mbiter_multi *iter) { if (iter->next_done) return; + #if !GNULIB_MBRTOC32_REGULAR if (iter->in_shift) goto with_shift; + #endif /* Handle most ASCII characters quickly, without calling mbrtowc(). */ if (is_basic (*iter->cur.ptr)) { - /* These characters are part of the basic character set. ISO C 99 - guarantees that their wide character code is identical to their - char code. */ + /* These characters are part of the POSIX portable character set. + For most of them, namely those in the ISO C basic character set, + ISO C 99 guarantees that their wide character code is identical to + their char code. For the few other ones, this is the case as well, + in all locale encodings that are in use. The 32-bit wide character + code is the same as well. */ iter->cur.bytes = 1; iter->cur.wc = *iter->cur.ptr; iter->cur.wc_valid = true; @@ -134,25 +148,34 @@ mbiter_multi_next (struct mbiter_multi *iter) else { assert (mbsinit (&iter->state)); + #if !GNULIB_MBRTOC32_REGULAR iter->in_shift = true; with_shift: - iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr, - iter->limit - iter->cur.ptr, &iter->state); + #endif + iter->cur.bytes = mbrtoc32 (&iter->cur.wc, iter->cur.ptr, + iter->limit - iter->cur.ptr, &iter->state); if (iter->cur.bytes == (size_t) -1) { /* An invalid multibyte sequence was encountered. */ iter->cur.bytes = 1; iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not very important; the string is bogus anyway. */ + /* Allow the next invocation to continue from a sane state. */ + #if !GNULIB_MBRTOC32_REGULAR + iter->in_shift = false; + #endif + mbszero (&iter->state); } else if (iter->cur.bytes == (size_t) -2) { /* An incomplete multibyte character at the end. */ iter->cur.bytes = iter->limit - iter->cur.ptr; iter->cur.wc_valid = false; - /* Whether to set iter->in_shift = false and reset iter->state - or not is not important; the string end is reached anyway. */ + #if !GNULIB_MBRTOC32_REGULAR + /* Cause the next mbi_avail invocation to return false. */ + iter->in_shift = false; + #endif + /* Whether to reset iter->state or not is not important; the + string end is reached anyway. */ } else { @@ -163,12 +186,20 @@ mbiter_multi_next (struct mbiter_multi *iter) assert (*iter->cur.ptr == '\0'); assert (iter->cur.wc == 0); } + #if !GNULIB_MBRTOC32_REGULAR + else if (iter->cur.bytes == (size_t) -3) + /* The previous multibyte sequence produced an additional 32-bit + wide character. */ + iter->cur.bytes = 0; + #endif iter->cur.wc_valid = true; - /* When in the initial state, we can go back treating ASCII + /* When in an initial state, we can go back treating ASCII characters more quickly. */ + #if !GNULIB_MBRTOC32_REGULAR if (mbsinit (&iter->state)) iter->in_shift = false; + #endif } } iter->next_done = true; @@ -185,22 +216,40 @@ MBITER_INLINE void mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi *old_iter) { new_iter->limit = old_iter->limit; + #if !GNULIB_MBRTOC32_REGULAR if ((new_iter->in_shift = old_iter->in_shift)) memcpy (&new_iter->state, &old_iter->state, sizeof (mbstate_t)); else - memset (&new_iter->state, 0, sizeof (mbstate_t)); + #endif + mbszero (&new_iter->state); new_iter->next_done = old_iter->next_done; mb_copy (&new_iter->cur, &old_iter->cur); } /* Iteration macros. */ typedef struct mbiter_multi mbi_iterator_t; +#if !GNULIB_MBRTOC32_REGULAR #define mbi_init(iter, startptr, length) \ ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \ - (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \ + (iter).in_shift = false, mbszero (&(iter).state), \ (iter).next_done = false) +#else +/* Optimized: no in_shift. */ +#define mbi_init(iter, startptr, length) \ + ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \ + mbszero (&(iter).state), \ + (iter).next_done = false) +#endif +#if !GNULIB_MBRTOC32_REGULAR +#define mbi_avail(iter) \ + (((iter).cur.ptr < (iter).limit || (iter).in_shift) \ + && (mbiter_multi_next (&(iter)), true)) +#else +/* Optimized: no in_shift. */ #define mbi_avail(iter) \ - ((iter).cur.ptr < (iter).limit && (mbiter_multi_next (&(iter)), true)) + ((iter).cur.ptr < (iter).limit \ + && (mbiter_multi_next (&(iter)), true)) +#endif #define mbi_advance(iter) \ ((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false) |