summaryrefslogtreecommitdiffstats
path: root/src/lib/str-sanitize.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/lib/str-sanitize.c')
-rw-r--r--src/lib/str-sanitize.c165
1 files changed, 165 insertions, 0 deletions
diff --git a/src/lib/str-sanitize.c b/src/lib/str-sanitize.c
new file mode 100644
index 0000000..859640a
--- /dev/null
+++ b/src/lib/str-sanitize.c
@@ -0,0 +1,165 @@
+/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */
+
+#include "lib.h"
+#include "unichar.h"
+#include "str.h"
+#include "str-sanitize.h"
+
+static size_t str_sanitize_skip_start(const char *src, size_t max_bytes)
+{
+ unichar_t chr;
+ size_t i;
+
+ for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+ if (len <= 0)
+ break;
+ if ((unsigned char)src[i] < 32)
+ break;
+ i += len;
+ }
+ i_assert(i <= max_bytes);
+ return i;
+}
+
+
+static size_t
+str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars)
+{
+ unichar_t chr;
+ uintmax_t c;
+ size_t i;
+
+ for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char(src+i, &chr);
+ if (len <= 0)
+ break;
+ if ((unsigned char)src[i] < 32)
+ break;
+ c++;
+ i += len;
+ }
+ i_assert(c <= max_chars);
+ return i;
+}
+
+static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos)
+{
+ const unsigned char *data = str_data(dest);
+ size_t len = str_len(dest);
+
+ i_assert(len >= initial_pos);
+ if (len == initial_pos)
+ return;
+
+ data += initial_pos;
+ len -= initial_pos;
+ str_truncate(dest, initial_pos +
+ uni_utf8_data_truncate(data, len, len-1));
+}
+
+void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes)
+{
+ size_t initial_pos = str_len(dest);
+ unichar_t chr;
+ size_t i;
+
+ for (i = 0; i < max_bytes && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr);
+ if (len == 0)
+ break; /* input ended too early */
+
+ if (len < 0) {
+ /* invalid UTF-8 */
+ str_append_c(dest, '?');
+ i++;
+ continue;
+ }
+ if ((unsigned char)src[i] < 32)
+ str_append_c(dest, '?');
+ else
+ str_append_data(dest, src+i, len);
+ i += len;
+ }
+
+ if (src[i] != '\0') {
+ if (max_bytes < 3)
+ str_truncate(dest, initial_pos);
+ else {
+ while (str_len(dest) - initial_pos > max_bytes-3)
+ str_sanitize_truncate_char(dest, initial_pos);
+ }
+ str_append(dest, "...");
+ }
+}
+
+void str_sanitize_append_utf8(string_t *dest, const char *src,
+ uintmax_t max_cps)
+{
+ size_t last_pos = 0;
+ unichar_t chr;
+ uintmax_t c;
+ size_t i;
+
+ i_assert(max_cps > 0);
+
+ for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) {
+ int len = uni_utf8_get_char(src+i, &chr);
+ if (len == 0)
+ break; /* input ended too early */
+
+ last_pos = str_len(dest);
+ if (len < 0) {
+ /* invalid UTF-8 */
+ str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
+ i++;
+ continue;
+ }
+ if ((unsigned char)src[i] < 32)
+ str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8);
+ else
+ str_append_data(dest, src+i, len);
+ i += len;
+ c++;
+ }
+
+ if (src[i] != '\0') {
+ str_truncate(dest, last_pos);
+ str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8);
+ }
+}
+
+const char *str_sanitize(const char *src, size_t max_bytes)
+{
+ string_t *str;
+ size_t i;
+
+ if (src == NULL)
+ return NULL;
+
+ i = str_sanitize_skip_start(src, max_bytes);
+ if (src[i] == '\0')
+ return src;
+
+ str = t_str_new(I_MIN(max_bytes, 256));
+ str_sanitize_append(str, src, max_bytes);
+ return str_c(str);
+}
+
+const char *str_sanitize_utf8(const char *src, uintmax_t max_cps)
+{
+ string_t *str;
+ size_t i;
+
+ if (src == NULL)
+ return NULL;
+
+ i = str_sanitize_skip_start_utf8(src, max_cps);
+ if (src[i] == '\0')
+ return src;
+
+ str = t_str_new(I_MIN(max_cps, 256));
+ str_sanitize_append_utf8(str, src, max_cps);
+ return str_c(str);
+}
+