diff options
Diffstat (limited to 'fluent-bit/src/flb_unescape.c')
-rw-r--r-- | fluent-bit/src/flb_unescape.c | 328 |
1 files changed, 328 insertions, 0 deletions
diff --git a/fluent-bit/src/flb_unescape.c b/fluent-bit/src/flb_unescape.c new file mode 100644 index 000000000..44f575b41 --- /dev/null +++ b/fluent-bit/src/flb_unescape.c @@ -0,0 +1,328 @@ +/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ + +/* Fluent Bit + * ========== + * Copyright (C) 2015-2022 The Fluent Bit Authors + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <fluent-bit/flb_compat.h> +#include <fluent-bit/flb_info.h> +#include <fluent-bit/flb_log.h> + +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +static int octal_digit(char c) +{ + return (c >= '0' && c <= '7'); +} + +static int hex_digit(char c) +{ + return ((c >= '0' && c <= '9') || + (c >= 'A' && c <= 'F') || + (c >= 'a' && c <= 'f')); +} + +static int u8_wc_toutf8(char *dest, uint32_t ch) +{ + if (ch < 0x80) { + dest[0] = (char)ch; + return 1; + } + if (ch < 0x800) { + dest[0] = (ch>>6) | 0xC0; + dest[1] = (ch & 0x3F) | 0x80; + return 2; + } + if (ch < 0x10000) { + dest[0] = (ch>>12) | 0xE0; + dest[1] = ((ch>>6) & 0x3F) | 0x80; + dest[2] = (ch & 0x3F) | 0x80; + return 3; + } + if (ch < 0x110000) { + dest[0] = (ch>>18) | 0xF0; + dest[1] = ((ch>>12) & 0x3F) | 0x80; + dest[2] = ((ch>>6) & 0x3F) | 0x80; + dest[3] = (ch & 0x3F) | 0x80; + return 4; + } + return 0; +} + +/* assumes that src points to the character after a backslash + returns number of input characters processed */ +static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) +{ + uint32_t ch; + char digs[9]="\0\0\0\0\0\0\0\0"; + int dno=0, i=1; + + ch = (uint32_t)str[0]; /* take literal character */ + + if (str[0] == 'n') + ch = L'\n'; + else if (str[0] == 't') + ch = L'\t'; + else if (str[0] == 'r') + ch = L'\r'; + else if (str[0] == 'b') + ch = L'\b'; + else if (str[0] == 'f') + ch = L'\f'; + else if (str[0] == 'v') + ch = L'\v'; + else if (str[0] == 'a') + ch = L'\a'; + else if (octal_digit(str[0])) { + i = 0; + do { + digs[dno++] = str[i++]; + } while (i < size && octal_digit(str[i]) && dno < 3); + ch = strtol(digs, NULL, 8); + } + else if (str[0] == 'x') { + while (i < size && hex_digit(str[i]) && dno < 2) { + digs[dno++] = str[i++]; + } + if (dno > 0) { + ch = strtol(digs, NULL, 16); + } + } + else if (str[0] == 'u') { + while (i < size && hex_digit(str[i]) && dno < 4) { + digs[dno++] = str[i++]; + } + if (dno > 0) { + ch = strtol(digs, NULL, 16); + } + } + else if (str[0] == 'U') { + while (i < size && hex_digit(str[i]) && dno < 8) { + digs[dno++] = str[i++]; + } + if (dno > 0) { + ch = strtol(digs, NULL, 16); + } + } + *dest = ch; + + return i; +} + +int flb_unescape_string_utf8(const char *in_buf, int sz, char *out_buf) +{ + uint32_t ch; + char temp[4]; + const char *end; + const char *next; + int size; + + + int count_out = 0; + int count_in = 0; + int esc_in = 0; + int esc_out = 0; + + end = in_buf + sz; + while (in_buf < end && *in_buf && count_in < sz) { + next = in_buf + 1; + if (next < end && *in_buf == '\\') { + esc_in = 2; + switch (*next) { + case '"': + ch = '"'; + break; + case '\'': + ch = '\''; + break; + case '\\': + ch = '\\'; + break; + case '/': + ch = '/'; + break; + case 'n': + ch = '\n'; + break; + case 'b': + ch = '\b'; + break; + case 't': + ch = '\t'; + break; + case 'f': + ch = '\f'; + break; + case 'r': + ch = '\r'; + break; + default: + size = end - next; + if (size > 0) { + esc_in = u8_read_escape_sequence(next, size, &ch) + 1; + } + else { + /* because char is unsigned char by default on arm, so we need to do a explicit conversion */ + ch = (uint32_t) (signed char) *in_buf; + esc_in = 1; + } + } + } + else { + /* explicit convert char to signed char */ + ch = (uint32_t) (signed char) *in_buf; + esc_in = 1; + } + + in_buf += esc_in; + count_in += esc_in; + + esc_out = u8_wc_toutf8(temp, ch); + if (esc_out > sz-count_out) { + flb_error("Crossing over string boundary"); + break; + } + + if (esc_out == 0) { + out_buf[count_out] = ch; + esc_out = 1; + } + else if (esc_out == 1) { + out_buf[count_out] = (char) temp[0]; + } + else { + memcpy(&out_buf[count_out], temp, esc_out); + } + count_out += esc_out; + } + if (count_in < sz) { + flb_error("Not at boundary but still NULL terminating : %d - '%s'", sz, in_buf); + } + out_buf[count_out] = '\0'; + return count_out; +} + +int flb_unescape_string(const char *buf, int buf_len, char **unesc_buf) +{ + int i = 0; + int j = 0; + char *p; + char n; + + p = *unesc_buf; + while (i < buf_len) { + if (buf[i] == '\\') { + if (i + 1 < buf_len) { + n = buf[i + 1]; + if (n == 'n') { + p[j++] = '\n'; + i++; + } + else if (n == 'a') { + p[j++] = '\a'; + i++; + } + else if (n == 'b') { + p[j++] = '\b'; + i++; + } + else if (n == 't') { + p[j++] = '\t'; + i++; + } + else if (n == 'v') { + p[j++] = '\v'; + i++; + } + else if (n == 'f') { + p[j++] = '\f'; + i++; + } + else if (n == 'r') { + p[j++] = '\r'; + i++; + } + else if (n == '\\') { + p[j++] = '\\'; + i++; + } + i++; + continue; + } + else { + i++; + } + } + p[j++] = buf[i++]; + } + p[j] = '\0'; + return j; +} + + +/* mysql unquote */ +int flb_mysql_unquote_string(char *buf, int buf_len, char **unesc_buf) +{ + int i = 0; + int j = 0; + char *p; + char n; + + p = *unesc_buf; + while (i < buf_len) { + if ((n = buf[i++]) != '\\') { + p[j++] = n; + } else if(i >= buf_len) { + p[j++] = n; + } else { + n = buf[i++]; + switch(n) { + case 'n': + p[j++] = '\n'; + break; + case 'r': + p[j++] = '\r'; + break; + case 't': + p[j++] = '\t'; + break; + case '\\': + p[j++] = '\\'; + break; + case '\'': + p[j++] = '\''; + break; + case '\"': + p[j++] = '\"'; + break; + case '0': + p[j++] = 0; + break; + case 'Z': + p[j++] = 0x1a; + break; + default: + p[j++] = '\\'; + p[j++] = n; + break; + } + } + } + p[j] = '\0'; + return j; +} |