summaryrefslogtreecommitdiffstats
path: root/simple_recode.c
diff options
context:
space:
mode:
Diffstat (limited to 'simple_recode.c')
-rw-r--r--simple_recode.c182
1 files changed, 182 insertions, 0 deletions
diff --git a/simple_recode.c b/simple_recode.c
new file mode 100644
index 0000000..d4b0072
--- /dev/null
+++ b/simple_recode.c
@@ -0,0 +1,182 @@
+/*
+ * Copyright (C) by Marco d'Itri <md@linux.it>.
+ *
+ * simple_recode was inspired by a similar function found in Simon
+ * Josefsson's libidn.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+
+#include <stdlib.h>
+#include <errno.h>
+#include <stdio.h>
+#include <string.h>
+#include <iconv.h>
+#include <langinfo.h>
+
+#include "utils.h"
+
+#include "simple_recode.h"
+
+/* Global variables */
+iconv_t simple_recode_iconv_handle;
+const char *simple_recode_input_charset;
+
+/*
+ * These value should be tuned to an acceptable compromise between memory
+ * usage and calling iconv(3) as few times as possible.
+ */
+#define SIMPLE_RECODE_BUFFER_SIZE_1 256
+#define SIMPLE_RECODE_BUFFER_SIZE_2 1024
+#define SIMPLE_RECODE_BUFFER_INCREMENT 1
+
+/*
+ * Convert a NULL-terminated string accordingly to the provided iconv(3)
+ * handle. The returned string is allocated using malloc(3) and needs to be
+ * deallocated by the caller.
+ * Incomplete, invalid and impossible to recode sequences are copied as-is.
+ * On failure, NULL is returned and errno is set.
+ */
+char *simple_recode(const iconv_t handle, const char *str)
+{
+ char *inp = (char *) str;
+ char *outp, *result;
+ size_t inbytes_remaining, outbytes_remaining, outbuf_size;
+
+ inbytes_remaining = strlen(inp);
+ if (inbytes_remaining + 1 <= SIMPLE_RECODE_BUFFER_SIZE_1
+ - (SIMPLE_RECODE_BUFFER_SIZE_1 >> SIMPLE_RECODE_BUFFER_INCREMENT))
+ outbuf_size = SIMPLE_RECODE_BUFFER_SIZE_1;
+ else
+ outbuf_size = inbytes_remaining + 1
+ + (inbytes_remaining >> SIMPLE_RECODE_BUFFER_INCREMENT);
+
+ outp = result = malloc(outbuf_size);
+ if (!result)
+ return NULL;
+ outbytes_remaining = outbuf_size - 1;
+
+ do {
+ size_t err = iconv(handle, &inp, &inbytes_remaining, &outp,
+ &outbytes_remaining);
+
+ if (err != (size_t) -1)
+ break; /* success */
+
+ switch (errno) {
+ case EINVAL: /* incomplete multibyte sequence */
+ case EILSEQ: /* invalid multibyte sequence */
+#ifdef SIMPLE_RECODE_SKIP_INVALID_SEQUENCES
+ /* recover from invalid input by replacing it with a '?' */
+ inp++;
+ *outp++ = '?'; /* use U+FFFD for unicode output? how? */
+#else
+ /* garbage in, garbage out */
+ *outp++ = *inp++;
+#endif
+ inbytes_remaining--;
+ outbytes_remaining--;
+ continue;
+
+ case E2BIG:
+ {
+ size_t used = outp - result;
+ size_t newsize;
+ char *new_result;
+
+ if (outbuf_size < SIMPLE_RECODE_BUFFER_SIZE_2)
+ newsize = SIMPLE_RECODE_BUFFER_SIZE_2;
+ else
+ newsize = outbuf_size
+ + (outbuf_size >> SIMPLE_RECODE_BUFFER_INCREMENT);
+
+ /* check if the newsize variable has overflowed */
+ if (newsize <= outbuf_size) {
+ free(result);
+ errno = ENOMEM;
+ return NULL;
+ }
+ outbuf_size = newsize;
+ new_result = realloc(result, outbuf_size);
+ if (!new_result) {
+ free(result);
+ return NULL;
+ }
+ result = new_result;
+
+ /* update the position in the new output stream */
+ outp = result + used;
+ outbytes_remaining = outbuf_size - used - 1;
+
+ continue;
+ }
+
+ default:
+ free(result);
+ return NULL;
+ }
+ } while (inbytes_remaining > 0);
+
+ *outp = '\0';
+
+ return result;
+}
+
+/*
+ * Like fputs(3), but transparently recodes s using the global variable
+ * simple_recode_input_charset as the input charset and the current locale
+ * as the output charset.
+ * If simple_recode_input_charset is NULL it just calls fputs(3).
+ * Exits with an error if iconv(3) or iconv_open(3) fail.
+ *
+ * Assumes that setlocale(3) has already been called.
+ *
+ * If appropriate, the iconv object referenced by the global variable
+ * simple_recode_iconv_handle should be deallocated with iconv_close(3).
+ */
+int recode_fputs(const char *s, FILE *stream)
+{
+ char *out;
+ int result;
+
+ if (simple_recode_input_charset == NULL) /* no conversion is needed */
+ return fputs(s, stream);
+
+ if (simple_recode_iconv_handle == NULL) {
+ simple_recode_iconv_handle = iconv_open(nl_langinfo(CODESET),
+ simple_recode_input_charset);
+ if (simple_recode_iconv_handle == (iconv_t) - 1)
+ err_sys("iconv_open");
+ }
+
+ out = simple_recode(simple_recode_iconv_handle, s);
+ if (!out)
+ err_sys("iconv");
+ result = fputs(out, stream);
+ free(out);
+
+ return result;
+}
+
+void simple_recode_iconv_close(void)
+{
+ if (simple_recode_iconv_handle == NULL)
+ return;
+
+ iconv_close(simple_recode_iconv_handle);
+ simple_recode_iconv_handle = NULL;
+ simple_recode_input_charset = NULL;
+}
+