summaryrefslogtreecommitdiffstats
path: root/lib/iconv_open.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 16:58:41 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 16:58:41 +0000
commite1908ae95dd4c9d19ee4dfabfc8bf8a7f85943fe (patch)
treef5cc731bedcac0fb7fe14d952e4581e749f8bb87 /lib/iconv_open.c
parentInitial commit. (diff)
downloadcoreutils-upstream.tar.xz
coreutils-upstream.zip
Adding upstream version 9.4.upstream/9.4upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'lib/iconv_open.c')
-rw-r--r--lib/iconv_open.c173
1 files changed, 173 insertions, 0 deletions
diff --git a/lib/iconv_open.c b/lib/iconv_open.c
new file mode 100644
index 0000000..be70de2
--- /dev/null
+++ b/lib/iconv_open.c
@@ -0,0 +1,173 @@
+/* Character set conversion.
+ Copyright (C) 2007, 2009-2023 Free Software Foundation, Inc.
+
+ This file is free software: you can redistribute it and/or modify
+ it under the terms of the GNU Lesser General Public License as
+ published by the Free Software Foundation; either version 2.1 of the
+ License, or (at your option) any later version.
+
+ This file is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ GNU Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public License
+ along with this program. If not, see <https://www.gnu.org/licenses/>. */
+
+#include <config.h>
+
+/* Specification. */
+#include <iconv.h>
+
+#include <errno.h>
+#include <string.h>
+#include "c-ctype.h"
+#include "c-strcase.h"
+
+#define SIZEOF(a) (sizeof(a) / sizeof(a[0]))
+
+/* Namespace cleanliness. */
+#define mapping_lookup rpl_iconv_open_mapping_lookup
+
+/* The macro ICONV_FLAVOR is defined to one of these or undefined. */
+
+#define ICONV_FLAVOR_AIX "iconv_open-aix.h"
+#define ICONV_FLAVOR_HPUX "iconv_open-hpux.h"
+#define ICONV_FLAVOR_IRIX "iconv_open-irix.h"
+#define ICONV_FLAVOR_OSF "iconv_open-osf.h"
+#define ICONV_FLAVOR_SOLARIS "iconv_open-solaris.h"
+#define ICONV_FLAVOR_ZOS "iconv_open-zos.h"
+
+#ifdef ICONV_FLAVOR
+# include ICONV_FLAVOR
+#endif
+
+iconv_t
+rpl_iconv_open (const char *tocode, const char *fromcode)
+#undef iconv_open
+{
+ char fromcode_upper[32];
+ char tocode_upper[32];
+ char *fromcode_upper_end;
+ char *tocode_upper_end;
+
+#if REPLACE_ICONV_UTF
+ /* Special handling of conversion between UTF-8 and UTF-{16,32}{BE,LE}.
+ Do this here, before calling the real iconv_open(), because OSF/1 5.1
+ iconv() to these encoding inserts a BOM, which is wrong.
+ We do not need to handle conversion between arbitrary encodings and
+ UTF-{16,32}{BE,LE}, because the 'striconveh' module implements two-step
+ conversion through UTF-8.
+ The _ICONV_* constants are chosen to be disjoint from any iconv_t
+ returned by the system's iconv_open() functions. Recall that iconv_t
+ is a scalar type. */
+ if (c_toupper (fromcode[0]) == 'U'
+ && c_toupper (fromcode[1]) == 'T'
+ && c_toupper (fromcode[2]) == 'F'
+ && fromcode[3] == '-')
+ {
+ if (c_toupper (tocode[0]) == 'U'
+ && c_toupper (tocode[1]) == 'T'
+ && c_toupper (tocode[2]) == 'F'
+ && tocode[3] == '-')
+ {
+ if (strcmp (fromcode + 4, "8") == 0)
+ {
+ if (c_strcasecmp (tocode + 4, "16BE") == 0)
+ return _ICONV_UTF8_UTF16BE;
+ if (c_strcasecmp (tocode + 4, "16LE") == 0)
+ return _ICONV_UTF8_UTF16LE;
+ if (c_strcasecmp (tocode + 4, "32BE") == 0)
+ return _ICONV_UTF8_UTF32BE;
+ if (c_strcasecmp (tocode + 4, "32LE") == 0)
+ return _ICONV_UTF8_UTF32LE;
+ }
+ else if (strcmp (tocode + 4, "8") == 0)
+ {
+ if (c_strcasecmp (fromcode + 4, "16BE") == 0)
+ return _ICONV_UTF16BE_UTF8;
+ if (c_strcasecmp (fromcode + 4, "16LE") == 0)
+ return _ICONV_UTF16LE_UTF8;
+ if (c_strcasecmp (fromcode + 4, "32BE") == 0)
+ return _ICONV_UTF32BE_UTF8;
+ if (c_strcasecmp (fromcode + 4, "32LE") == 0)
+ return _ICONV_UTF32LE_UTF8;
+ }
+ }
+ }
+#endif
+
+ /* Do *not* add special support for 8-bit encodings like ASCII or ISO-8859-1
+ here. This would lead to programs that work in some locales (such as the
+ "C" or "en_US" locales) but do not work in East Asian locales. It is
+ better if programmers make their programs depend on GNU libiconv (except
+ on glibc systems), e.g. by using the AM_ICONV macro and documenting the
+ dependency in an INSTALL or DEPENDENCIES file. */
+
+ /* Try with the original names first.
+ This covers the case when fromcode or tocode is a lowercase encoding name
+ that is understood by the system's iconv_open but not listed in our
+ mappings table. */
+ {
+ iconv_t cd = iconv_open (tocode, fromcode);
+ if (cd != (iconv_t)(-1))
+ return cd;
+ }
+
+ /* Convert the encodings to upper case, because
+ 1. in the arguments of iconv_open() on AIX, HP-UX, and OSF/1 the case
+ matters,
+ 2. it makes searching in the table faster. */
+ {
+ const char *p = fromcode;
+ char *q = fromcode_upper;
+ while ((*q = c_toupper (*p)) != '\0')
+ {
+ p++;
+ q++;
+ if (q == &fromcode_upper[SIZEOF (fromcode_upper)])
+ {
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+ }
+ fromcode_upper_end = q;
+ }
+
+ {
+ const char *p = tocode;
+ char *q = tocode_upper;
+ while ((*q = c_toupper (*p)) != '\0')
+ {
+ p++;
+ q++;
+ if (q == &tocode_upper[SIZEOF (tocode_upper)])
+ {
+ errno = EINVAL;
+ return (iconv_t)(-1);
+ }
+ }
+ tocode_upper_end = q;
+ }
+
+#ifdef ICONV_FLAVOR
+ /* Apply the mappings. */
+ {
+ const struct mapping *m =
+ mapping_lookup (fromcode_upper, fromcode_upper_end - fromcode_upper);
+
+ fromcode = (m != NULL ? m->vendor_name : fromcode_upper);
+ }
+ {
+ const struct mapping *m =
+ mapping_lookup (tocode_upper, tocode_upper_end - tocode_upper);
+
+ tocode = (m != NULL ? m->vendor_name : tocode_upper);
+ }
+#else
+ fromcode = fromcode_upper;
+ tocode = tocode_upper;
+#endif
+
+ return iconv_open (tocode, fromcode);
+}