104 lines
3.8 KiB
C
104 lines
3.8 KiB
C
/* Test of c32width() function.
|
|
Copyright (C) 2007-2025 Free Software Foundation, Inc.
|
|
|
|
This program is free software: you can redistribute it and/or modify
|
|
it under the terms of the GNU General Public License as published by
|
|
the Free Software Foundation, either version 3 of the License, or
|
|
(at your option) any later version.
|
|
|
|
This program is distributed in the hope that it will be useful,
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
GNU General Public License for more details.
|
|
|
|
You should have received a copy of the GNU General Public License
|
|
along with this program. If not, see <https://www.gnu.org/licenses/>. */
|
|
|
|
/* Written by Bruno Haible <bruno@clisp.org>, 2007. */
|
|
|
|
#include <config.h>
|
|
|
|
#include <uchar.h>
|
|
|
|
#include "signature.h"
|
|
SIGNATURE_CHECK (c32width, int, (char32_t));
|
|
|
|
#include <locale.h>
|
|
#include <string.h>
|
|
|
|
#include "c-ctype.h"
|
|
#include "localcharset.h"
|
|
#include "macros.h"
|
|
|
|
int
|
|
main ()
|
|
{
|
|
char32_t wc;
|
|
|
|
#if !GNULIB_WCHAR_SINGLE_LOCALE
|
|
# ifdef C_CTYPE_ASCII
|
|
/* Test width of ASCII characters. */
|
|
for (wc = 0x20; wc < 0x7F; wc++)
|
|
ASSERT (c32width (wc) == 1);
|
|
# endif
|
|
#endif
|
|
|
|
/* Switch to an UTF-8 locale. */
|
|
if (setlocale (LC_ALL, "fr_FR.UTF-8") != NULL
|
|
/* Check whether it's really an UTF-8 locale.
|
|
On OpenBSD 4.0, the setlocale call succeeds only for the LC_CTYPE
|
|
category and therefore returns "C/fr_FR.UTF-8/C/C/C/C", but the
|
|
LC_CTYPE category is effectively set to an ASCII LC_CTYPE category;
|
|
in particular, locale_charset() returns "ASCII". */
|
|
&& strcmp (locale_charset (), "UTF-8") == 0)
|
|
{
|
|
/* Test width of ASCII characters. */
|
|
for (wc = 0x20; wc < 0x7F; wc++)
|
|
ASSERT (c32width (wc) == 1);
|
|
|
|
/* Test width of some non-spacing characters. */
|
|
ASSERT (c32width (0x0301) == 0);
|
|
ASSERT (c32width (0x05B0) == 0);
|
|
|
|
/* Test width of some format control characters. */
|
|
ASSERT (c32width (0x200E) <= 0);
|
|
ASSERT (c32width (0x2060) <= 0);
|
|
ASSERT (c32width (0xE0001) <= 0);
|
|
ASSERT (c32width (0xE0044) <= 0);
|
|
|
|
/* Test width of some zero width characters. */
|
|
/* While it is desirable that U+200B, U+200C, U+200D have width 0,
|
|
because this makes wcswidth work better on strings that contain these
|
|
characters, it is acceptable if an implementation treats these
|
|
characters like control characters. */
|
|
ASSERT (c32width (0x200B) <= 0);
|
|
ASSERT (c32width (0xFEFF) <= 0);
|
|
|
|
/* Test width of some math symbols.
|
|
U+2202 is marked as having ambiguous width (A) in EastAsianWidth.txt
|
|
(see <https://www.unicode.org/Public/12.0.0/ucd/EastAsianWidth.txt>).
|
|
The Unicode Standard Annex 11
|
|
<https://www.unicode.org/reports/tr11/tr11-36.html>
|
|
says
|
|
"Ambiguous characters behave like wide or narrow characters
|
|
depending on the context (language tag, script identification,
|
|
associated font, source of data, or explicit markup; all can
|
|
provide the context). If the context cannot be established
|
|
reliably, they should be treated as narrow characters by default."
|
|
For c32width(), the only available context information is the locale.
|
|
"fr_FR.UTF-8" is a Western locale, not an East Asian locale, therefore
|
|
U+2202 should be treated like a narrow character. */
|
|
ASSERT (c32width (0x2202) == 1);
|
|
|
|
/* Test width of some CJK characters. */
|
|
ASSERT (c32width (0x3000) == 2);
|
|
ASSERT (c32width (0xB250) == 2);
|
|
ASSERT (c32width (0xFF1A) == 2);
|
|
#if !((defined __FreeBSD__ && __FreeBSD__ < 13 && !defined __GLIBC__) || defined __sun)
|
|
ASSERT (c32width (0x20369) == 2);
|
|
ASSERT (c32width (0x2F876) == 2);
|
|
#endif
|
|
}
|
|
|
|
return test_exit_status;
|
|
}
|