summaryrefslogtreecommitdiffstats
path: root/lib/encode.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--lib/encode.c79
1 files changed, 79 insertions, 0 deletions
diff --git a/lib/encode.c b/lib/encode.c
new file mode 100644
index 0000000..10b5971
--- /dev/null
+++ b/lib/encode.c
@@ -0,0 +1,79 @@
+/*
+ * Based on code from libblkid,
+ *
+ * Copyright (C) 2008 Kay Sievers <kay.sievers@vrfy.org>
+ * Copyright (C) 2009 Karel Zak <kzak@redhat.com>
+ * Copyright (C) 2020 Pali Rohár <pali.rohar@gmail.com>
+ *
+ * This file may be redistributed under the terms of the
+ * GNU Lesser General Public License.
+ */
+#include "c.h"
+#include "encode.h"
+
+size_t ul_encode_to_utf8(int enc, unsigned char *dest, size_t len,
+ const unsigned char *src, size_t count)
+{
+ size_t i, j;
+ uint32_t c;
+ uint16_t c2;
+
+ for (j = i = 0; i < count; i++) {
+ if (enc == UL_ENCODE_UTF16LE) {
+ if (i+2 > count)
+ break;
+ c = (src[i+1] << 8) | src[i];
+ i++;
+ } else if (enc == UL_ENCODE_UTF16BE) {
+ if (i+2 > count)
+ break;
+ c = (src[i] << 8) | src[i+1];
+ i++;
+ } else if (enc == UL_ENCODE_LATIN1) {
+ c = src[i];
+ } else {
+ return 0;
+ }
+ if ((enc == UL_ENCODE_UTF16LE || enc == UL_ENCODE_UTF16BE) &&
+ c >= 0xD800 && c <= 0xDBFF && i+2 < count) {
+ if (enc == UL_ENCODE_UTF16LE)
+ c2 = (src[i+2] << 8) | src[i+1];
+ else
+ c2 = (src[i+1] << 8) | src[i+2];
+ if (c2 >= 0xDC00 && c2 <= 0xDFFF) {
+ c = 0x10000 + ((c - 0xD800) << 10) + (c2 - 0xDC00);
+ i += 2;
+ }
+ }
+ if (c == 0) {
+ dest[j] = '\0';
+ break;
+ }
+
+ if (c < 0x80) {
+ if (j+1 >= len)
+ break;
+ dest[j++] = (uint8_t) c;
+ } else if (c < 0x800) {
+ if (j+2 >= len)
+ break;
+ dest[j++] = (uint8_t) (0xc0 | (c >> 6));
+ dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
+ } else if (c < 0x10000) {
+ if (j+3 >= len)
+ break;
+ dest[j++] = (uint8_t) (0xe0 | (c >> 12));
+ dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+ dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
+ } else {
+ if (j+4 >= len)
+ break;
+ dest[j++] = (uint8_t) (0xf0 | (c >> 18));
+ dest[j++] = (uint8_t) (0x80 | ((c >> 12) & 0x3f));
+ dest[j++] = (uint8_t) (0x80 | ((c >> 6) & 0x3f));
+ dest[j++] = (uint8_t) (0x80 | (c & 0x3f));
+ }
+ }
+ dest[j] = '\0';
+ return j;
+}