diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-28 09:51:24 +0000 |
commit | f7548d6d28c313cf80e6f3ef89aed16a19815df1 (patch) | |
tree | a3f6f2a3f247293bee59ecd28e8cd8ceb6ca064a /src/lib/str-sanitize.c | |
parent | Initial commit. (diff) | |
download | dovecot-upstream.tar.xz dovecot-upstream.zip |
Adding upstream version 1:2.3.19.1+dfsg1.upstream/1%2.3.19.1+dfsg1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to '')
-rw-r--r-- | src/lib/str-sanitize.c | 165 |
1 files changed, 165 insertions, 0 deletions
diff --git a/src/lib/str-sanitize.c b/src/lib/str-sanitize.c new file mode 100644 index 0000000..859640a --- /dev/null +++ b/src/lib/str-sanitize.c @@ -0,0 +1,165 @@ +/* Copyright (c) 2004-2018 Dovecot authors, see the included COPYING file */ + +#include "lib.h" +#include "unichar.h" +#include "str.h" +#include "str-sanitize.h" + +static size_t str_sanitize_skip_start(const char *src, size_t max_bytes) +{ + unichar_t chr; + size_t i; + + for (i = 0; i < max_bytes && src[i] != '\0'; ) { + int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); + if (len <= 0) + break; + if ((unsigned char)src[i] < 32) + break; + i += len; + } + i_assert(i <= max_bytes); + return i; +} + + +static size_t +str_sanitize_skip_start_utf8(const char *src, uintmax_t max_chars) +{ + unichar_t chr; + uintmax_t c; + size_t i; + + for (i = 0, c = 0; c < max_chars && src[i] != '\0'; ) { + int len = uni_utf8_get_char(src+i, &chr); + if (len <= 0) + break; + if ((unsigned char)src[i] < 32) + break; + c++; + i += len; + } + i_assert(c <= max_chars); + return i; +} + +static void str_sanitize_truncate_char(string_t *dest, unsigned int initial_pos) +{ + const unsigned char *data = str_data(dest); + size_t len = str_len(dest); + + i_assert(len >= initial_pos); + if (len == initial_pos) + return; + + data += initial_pos; + len -= initial_pos; + str_truncate(dest, initial_pos + + uni_utf8_data_truncate(data, len, len-1)); +} + +void str_sanitize_append(string_t *dest, const char *src, size_t max_bytes) +{ + size_t initial_pos = str_len(dest); + unichar_t chr; + size_t i; + + for (i = 0; i < max_bytes && src[i] != '\0'; ) { + int len = uni_utf8_get_char_n(src+i, max_bytes-i, &chr); + if (len == 0) + break; /* input ended too early */ + + if (len < 0) { + /* invalid UTF-8 */ + str_append_c(dest, '?'); + i++; + continue; + } + if ((unsigned char)src[i] < 32) + str_append_c(dest, '?'); + else + str_append_data(dest, src+i, len); + i += len; + } + + if (src[i] != '\0') { + if (max_bytes < 3) + str_truncate(dest, initial_pos); + else { + while (str_len(dest) - initial_pos > max_bytes-3) + str_sanitize_truncate_char(dest, initial_pos); + } + str_append(dest, "..."); + } +} + +void str_sanitize_append_utf8(string_t *dest, const char *src, + uintmax_t max_cps) +{ + size_t last_pos = 0; + unichar_t chr; + uintmax_t c; + size_t i; + + i_assert(max_cps > 0); + + for (i = 0, c = 0; c < max_cps && src[i] != '\0'; ) { + int len = uni_utf8_get_char(src+i, &chr); + if (len == 0) + break; /* input ended too early */ + + last_pos = str_len(dest); + if (len < 0) { + /* invalid UTF-8 */ + str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); + i++; + continue; + } + if ((unsigned char)src[i] < 32) + str_append(dest, UNICODE_REPLACEMENT_CHAR_UTF8); + else + str_append_data(dest, src+i, len); + i += len; + c++; + } + + if (src[i] != '\0') { + str_truncate(dest, last_pos); + str_append(dest, UNICODE_HORIZONTAL_ELLIPSIS_CHAR_UTF8); + } +} + +const char *str_sanitize(const char *src, size_t max_bytes) +{ + string_t *str; + size_t i; + + if (src == NULL) + return NULL; + + i = str_sanitize_skip_start(src, max_bytes); + if (src[i] == '\0') + return src; + + str = t_str_new(I_MIN(max_bytes, 256)); + str_sanitize_append(str, src, max_bytes); + return str_c(str); +} + +const char *str_sanitize_utf8(const char *src, uintmax_t max_cps) +{ + string_t *str; + size_t i; + + if (src == NULL) + return NULL; + + i = str_sanitize_skip_start_utf8(src, max_cps); + if (src[i] == '\0') + return src; + + str = t_str_new(I_MIN(max_cps, 256)); + str_sanitize_append_utf8(str, src, max_cps); + return str_c(str); +} + |