summaryrefslogtreecommitdiffstats
path: root/fluent-bit/src/flb_unescape.c
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 02:57:58 +0000
commitbe1c7e50e1e8809ea56f2c9d472eccd8ffd73a97 (patch)
tree9754ff1ca740f6346cf8483ec915d4054bc5da2d /fluent-bit/src/flb_unescape.c
parentInitial commit. (diff)
downloadnetdata-upstream.tar.xz
netdata-upstream.zip
Adding upstream version 1.44.3.upstream/1.44.3upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'fluent-bit/src/flb_unescape.c')
-rw-r--r--fluent-bit/src/flb_unescape.c328
1 files changed, 328 insertions, 0 deletions
diff --git a/fluent-bit/src/flb_unescape.c b/fluent-bit/src/flb_unescape.c
new file mode 100644
index 00000000..44f575b4
--- /dev/null
+++ b/fluent-bit/src/flb_unescape.c
@@ -0,0 +1,328 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* Fluent Bit
+ * ==========
+ * Copyright (C) 2015-2022 The Fluent Bit Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <fluent-bit/flb_compat.h>
+#include <fluent-bit/flb_info.h>
+#include <fluent-bit/flb_log.h>
+
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+static int octal_digit(char c)
+{
+ return (c >= '0' && c <= '7');
+}
+
+static int hex_digit(char c)
+{
+ return ((c >= '0' && c <= '9') ||
+ (c >= 'A' && c <= 'F') ||
+ (c >= 'a' && c <= 'f'));
+}
+
+static int u8_wc_toutf8(char *dest, uint32_t ch)
+{
+ if (ch < 0x80) {
+ dest[0] = (char)ch;
+ return 1;
+ }
+ if (ch < 0x800) {
+ dest[0] = (ch>>6) | 0xC0;
+ dest[1] = (ch & 0x3F) | 0x80;
+ return 2;
+ }
+ if (ch < 0x10000) {
+ dest[0] = (ch>>12) | 0xE0;
+ dest[1] = ((ch>>6) & 0x3F) | 0x80;
+ dest[2] = (ch & 0x3F) | 0x80;
+ return 3;
+ }
+ if (ch < 0x110000) {
+ dest[0] = (ch>>18) | 0xF0;
+ dest[1] = ((ch>>12) & 0x3F) | 0x80;
+ dest[2] = ((ch>>6) & 0x3F) | 0x80;
+ dest[3] = (ch & 0x3F) | 0x80;
+ return 4;
+ }
+ return 0;
+}
+
+/* assumes that src points to the character after a backslash
+ returns number of input characters processed */
+static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest)
+{
+ uint32_t ch;
+ char digs[9]="\0\0\0\0\0\0\0\0";
+ int dno=0, i=1;
+
+ ch = (uint32_t)str[0]; /* take literal character */
+
+ if (str[0] == 'n')
+ ch = L'\n';
+ else if (str[0] == 't')
+ ch = L'\t';
+ else if (str[0] == 'r')
+ ch = L'\r';
+ else if (str[0] == 'b')
+ ch = L'\b';
+ else if (str[0] == 'f')
+ ch = L'\f';
+ else if (str[0] == 'v')
+ ch = L'\v';
+ else if (str[0] == 'a')
+ ch = L'\a';
+ else if (octal_digit(str[0])) {
+ i = 0;
+ do {
+ digs[dno++] = str[i++];
+ } while (i < size && octal_digit(str[i]) && dno < 3);
+ ch = strtol(digs, NULL, 8);
+ }
+ else if (str[0] == 'x') {
+ while (i < size && hex_digit(str[i]) && dno < 2) {
+ digs[dno++] = str[i++];
+ }
+ if (dno > 0) {
+ ch = strtol(digs, NULL, 16);
+ }
+ }
+ else if (str[0] == 'u') {
+ while (i < size && hex_digit(str[i]) && dno < 4) {
+ digs[dno++] = str[i++];
+ }
+ if (dno > 0) {
+ ch = strtol(digs, NULL, 16);
+ }
+ }
+ else if (str[0] == 'U') {
+ while (i < size && hex_digit(str[i]) && dno < 8) {
+ digs[dno++] = str[i++];
+ }
+ if (dno > 0) {
+ ch = strtol(digs, NULL, 16);
+ }
+ }
+ *dest = ch;
+
+ return i;
+}
+
+int flb_unescape_string_utf8(const char *in_buf, int sz, char *out_buf)
+{
+ uint32_t ch;
+ char temp[4];
+ const char *end;
+ const char *next;
+ int size;
+
+
+ int count_out = 0;
+ int count_in = 0;
+ int esc_in = 0;
+ int esc_out = 0;
+
+ end = in_buf + sz;
+ while (in_buf < end && *in_buf && count_in < sz) {
+ next = in_buf + 1;
+ if (next < end && *in_buf == '\\') {
+ esc_in = 2;
+ switch (*next) {
+ case '"':
+ ch = '"';
+ break;
+ case '\'':
+ ch = '\'';
+ break;
+ case '\\':
+ ch = '\\';
+ break;
+ case '/':
+ ch = '/';
+ break;
+ case 'n':
+ ch = '\n';
+ break;
+ case 'b':
+ ch = '\b';
+ break;
+ case 't':
+ ch = '\t';
+ break;
+ case 'f':
+ ch = '\f';
+ break;
+ case 'r':
+ ch = '\r';
+ break;
+ default:
+ size = end - next;
+ if (size > 0) {
+ esc_in = u8_read_escape_sequence(next, size, &ch) + 1;
+ }
+ else {
+ /* because char is unsigned char by default on arm, so we need to do a explicit conversion */
+ ch = (uint32_t) (signed char) *in_buf;
+ esc_in = 1;
+ }
+ }
+ }
+ else {
+ /* explicit convert char to signed char */
+ ch = (uint32_t) (signed char) *in_buf;
+ esc_in = 1;
+ }
+
+ in_buf += esc_in;
+ count_in += esc_in;
+
+ esc_out = u8_wc_toutf8(temp, ch);
+ if (esc_out > sz-count_out) {
+ flb_error("Crossing over string boundary");
+ break;
+ }
+
+ if (esc_out == 0) {
+ out_buf[count_out] = ch;
+ esc_out = 1;
+ }
+ else if (esc_out == 1) {
+ out_buf[count_out] = (char) temp[0];
+ }
+ else {
+ memcpy(&out_buf[count_out], temp, esc_out);
+ }
+ count_out += esc_out;
+ }
+ if (count_in < sz) {
+ flb_error("Not at boundary but still NULL terminating : %d - '%s'", sz, in_buf);
+ }
+ out_buf[count_out] = '\0';
+ return count_out;
+}
+
+int flb_unescape_string(const char *buf, int buf_len, char **unesc_buf)
+{
+ int i = 0;
+ int j = 0;
+ char *p;
+ char n;
+
+ p = *unesc_buf;
+ while (i < buf_len) {
+ if (buf[i] == '\\') {
+ if (i + 1 < buf_len) {
+ n = buf[i + 1];
+ if (n == 'n') {
+ p[j++] = '\n';
+ i++;
+ }
+ else if (n == 'a') {
+ p[j++] = '\a';
+ i++;
+ }
+ else if (n == 'b') {
+ p[j++] = '\b';
+ i++;
+ }
+ else if (n == 't') {
+ p[j++] = '\t';
+ i++;
+ }
+ else if (n == 'v') {
+ p[j++] = '\v';
+ i++;
+ }
+ else if (n == 'f') {
+ p[j++] = '\f';
+ i++;
+ }
+ else if (n == 'r') {
+ p[j++] = '\r';
+ i++;
+ }
+ else if (n == '\\') {
+ p[j++] = '\\';
+ i++;
+ }
+ i++;
+ continue;
+ }
+ else {
+ i++;
+ }
+ }
+ p[j++] = buf[i++];
+ }
+ p[j] = '\0';
+ return j;
+}
+
+
+/* mysql unquote */
+int flb_mysql_unquote_string(char *buf, int buf_len, char **unesc_buf)
+{
+ int i = 0;
+ int j = 0;
+ char *p;
+ char n;
+
+ p = *unesc_buf;
+ while (i < buf_len) {
+ if ((n = buf[i++]) != '\\') {
+ p[j++] = n;
+ } else if(i >= buf_len) {
+ p[j++] = n;
+ } else {
+ n = buf[i++];
+ switch(n) {
+ case 'n':
+ p[j++] = '\n';
+ break;
+ case 'r':
+ p[j++] = '\r';
+ break;
+ case 't':
+ p[j++] = '\t';
+ break;
+ case '\\':
+ p[j++] = '\\';
+ break;
+ case '\'':
+ p[j++] = '\'';
+ break;
+ case '\"':
+ p[j++] = '\"';
+ break;
+ case '0':
+ p[j++] = 0;
+ break;
+ case 'Z':
+ p[j++] = 0x1a;
+ break;
+ default:
+ p[j++] = '\\';
+ p[j++] = n;
+ break;
+ }
+ }
+ }
+ p[j] = '\0';
+ return j;
+}