summaryrefslogtreecommitdiffstats
path: root/lib/mbiter.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/mbiter.h')
-rw-r--r--lib/mbiter.h89
1 files changed, 69 insertions, 20 deletions
diff --git a/lib/mbiter.h b/lib/mbiter.h
index 7b41870..cb7950d 100644
--- a/lib/mbiter.h
+++ b/lib/mbiter.h
@@ -1,5 +1,5 @@
/* Iterating through multibyte strings: macros for multi-byte encodings.
- Copyright (C) 2001, 2005, 2007, 2009-2023 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2005, 2007, 2009-2024 Free Software Foundation, Inc.
This file is free software: you can redistribute it and/or modify
it under the terms of the GNU Lesser General Public License as
@@ -82,7 +82,8 @@
#ifndef _MBITER_H
#define _MBITER_H 1
-/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE. */
+/* This file uses _GL_INLINE_HEADER_BEGIN, _GL_INLINE,
+ _GL_ATTRIBUTE_ALWAYS_INLINE. */
#if !_GL_CONFIG_H_INCLUDED
#error "Please include config.h first."
#endif
@@ -90,27 +91,35 @@
#include <assert.h>
#include <stddef.h>
#include <string.h>
+#include <uchar.h>
#include <wchar.h>
#include "mbchar.h"
_GL_INLINE_HEADER_BEGIN
#ifndef MBITER_INLINE
-# define MBITER_INLINE _GL_INLINE
+# define MBITER_INLINE _GL_INLINE _GL_ATTRIBUTE_ALWAYS_INLINE
#endif
struct mbiter_multi
{
const char *limit; /* pointer to end of string */
+ #if !GNULIB_MBRTOC32_REGULAR
bool in_shift; /* true if next byte may not be interpreted as ASCII */
+ /* If GNULIB_MBRTOC32_REGULAR, it is always false,
+ so optimize it away. */
+ #endif
mbstate_t state; /* if in_shift: current shift state */
+ /* If GNULIB_MBRTOC32_REGULAR, it is in an initial state
+ before and after every mbiter_multi_next invocation.
+ */
bool next_done; /* true if mbi_avail has already filled the following */
struct mbchar cur; /* the current character:
- const char *cur.ptr pointer to current character
+ const char *cur.ptr pointer to current character
The following are only valid after mbi_avail.
- size_t cur.bytes number of bytes of current character
- bool cur.wc_valid true if wc is a valid wide character
- wchar_t cur.wc if wc_valid: the current character
+ size_t cur.bytes number of bytes of current character
+ bool cur.wc_valid true if wc is a valid 32-bit wide character
+ char32_t cur.wc if wc_valid: the current character
*/
};
@@ -119,14 +128,19 @@ mbiter_multi_next (struct mbiter_multi *iter)
{
if (iter->next_done)
return;
+ #if !GNULIB_MBRTOC32_REGULAR
if (iter->in_shift)
goto with_shift;
+ #endif
/* Handle most ASCII characters quickly, without calling mbrtowc(). */
if (is_basic (*iter->cur.ptr))
{
- /* These characters are part of the basic character set. ISO C 99
- guarantees that their wide character code is identical to their
- char code. */
+ /* These characters are part of the POSIX portable character set.
+ For most of them, namely those in the ISO C basic character set,
+ ISO C 99 guarantees that their wide character code is identical to
+ their char code. For the few other ones, this is the case as well,
+ in all locale encodings that are in use. The 32-bit wide character
+ code is the same as well. */
iter->cur.bytes = 1;
iter->cur.wc = *iter->cur.ptr;
iter->cur.wc_valid = true;
@@ -134,25 +148,34 @@ mbiter_multi_next (struct mbiter_multi *iter)
else
{
assert (mbsinit (&iter->state));
+ #if !GNULIB_MBRTOC32_REGULAR
iter->in_shift = true;
with_shift:
- iter->cur.bytes = mbrtowc (&iter->cur.wc, iter->cur.ptr,
- iter->limit - iter->cur.ptr, &iter->state);
+ #endif
+ iter->cur.bytes = mbrtoc32 (&iter->cur.wc, iter->cur.ptr,
+ iter->limit - iter->cur.ptr, &iter->state);
if (iter->cur.bytes == (size_t) -1)
{
/* An invalid multibyte sequence was encountered. */
iter->cur.bytes = 1;
iter->cur.wc_valid = false;
- /* Whether to set iter->in_shift = false and reset iter->state
- or not is not very important; the string is bogus anyway. */
+ /* Allow the next invocation to continue from a sane state. */
+ #if !GNULIB_MBRTOC32_REGULAR
+ iter->in_shift = false;
+ #endif
+ mbszero (&iter->state);
}
else if (iter->cur.bytes == (size_t) -2)
{
/* An incomplete multibyte character at the end. */
iter->cur.bytes = iter->limit - iter->cur.ptr;
iter->cur.wc_valid = false;
- /* Whether to set iter->in_shift = false and reset iter->state
- or not is not important; the string end is reached anyway. */
+ #if !GNULIB_MBRTOC32_REGULAR
+ /* Cause the next mbi_avail invocation to return false. */
+ iter->in_shift = false;
+ #endif
+ /* Whether to reset iter->state or not is not important; the
+ string end is reached anyway. */
}
else
{
@@ -163,12 +186,20 @@ mbiter_multi_next (struct mbiter_multi *iter)
assert (*iter->cur.ptr == '\0');
assert (iter->cur.wc == 0);
}
+ #if !GNULIB_MBRTOC32_REGULAR
+ else if (iter->cur.bytes == (size_t) -3)
+ /* The previous multibyte sequence produced an additional 32-bit
+ wide character. */
+ iter->cur.bytes = 0;
+ #endif
iter->cur.wc_valid = true;
- /* When in the initial state, we can go back treating ASCII
+ /* When in an initial state, we can go back treating ASCII
characters more quickly. */
+ #if !GNULIB_MBRTOC32_REGULAR
if (mbsinit (&iter->state))
iter->in_shift = false;
+ #endif
}
}
iter->next_done = true;
@@ -185,22 +216,40 @@ MBITER_INLINE void
mbiter_multi_copy (struct mbiter_multi *new_iter, const struct mbiter_multi *old_iter)
{
new_iter->limit = old_iter->limit;
+ #if !GNULIB_MBRTOC32_REGULAR
if ((new_iter->in_shift = old_iter->in_shift))
memcpy (&new_iter->state, &old_iter->state, sizeof (mbstate_t));
else
- memset (&new_iter->state, 0, sizeof (mbstate_t));
+ #endif
+ mbszero (&new_iter->state);
new_iter->next_done = old_iter->next_done;
mb_copy (&new_iter->cur, &old_iter->cur);
}
/* Iteration macros. */
typedef struct mbiter_multi mbi_iterator_t;
+#if !GNULIB_MBRTOC32_REGULAR
#define mbi_init(iter, startptr, length) \
((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \
- (iter).in_shift = false, memset (&(iter).state, '\0', sizeof (mbstate_t)), \
+ (iter).in_shift = false, mbszero (&(iter).state), \
(iter).next_done = false)
+#else
+/* Optimized: no in_shift. */
+#define mbi_init(iter, startptr, length) \
+ ((iter).cur.ptr = (startptr), (iter).limit = (iter).cur.ptr + (length), \
+ mbszero (&(iter).state), \
+ (iter).next_done = false)
+#endif
+#if !GNULIB_MBRTOC32_REGULAR
+#define mbi_avail(iter) \
+ (((iter).cur.ptr < (iter).limit || (iter).in_shift) \
+ && (mbiter_multi_next (&(iter)), true))
+#else
+/* Optimized: no in_shift. */
#define mbi_avail(iter) \
- ((iter).cur.ptr < (iter).limit && (mbiter_multi_next (&(iter)), true))
+ ((iter).cur.ptr < (iter).limit \
+ && (mbiter_multi_next (&(iter)), true))
+#endif
#define mbi_advance(iter) \
((iter).cur.ptr += (iter).cur.bytes, (iter).next_done = false)