/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */ /* Fluent Bit * ========== * Copyright (C) 2015-2022 The Fluent Bit Authors * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ #include #include #include #include #include #include static int octal_digit(char c) { return (c >= '0' && c <= '7'); } static int hex_digit(char c) { return ((c >= '0' && c <= '9') || (c >= 'A' && c <= 'F') || (c >= 'a' && c <= 'f')); } static int u8_wc_toutf8(char *dest, uint32_t ch) { if (ch < 0x80) { dest[0] = (char)ch; return 1; } if (ch < 0x800) { dest[0] = (ch>>6) | 0xC0; dest[1] = (ch & 0x3F) | 0x80; return 2; } if (ch < 0x10000) { dest[0] = (ch>>12) | 0xE0; dest[1] = ((ch>>6) & 0x3F) | 0x80; dest[2] = (ch & 0x3F) | 0x80; return 3; } if (ch < 0x110000) { dest[0] = (ch>>18) | 0xF0; dest[1] = ((ch>>12) & 0x3F) | 0x80; dest[2] = ((ch>>6) & 0x3F) | 0x80; dest[3] = (ch & 0x3F) | 0x80; return 4; } return 0; } /* assumes that src points to the character after a backslash returns number of input characters processed */ static int u8_read_escape_sequence(const char *str, int size, uint32_t *dest) { uint32_t ch; char digs[9]="\0\0\0\0\0\0\0\0"; int dno=0, i=1; ch = (uint32_t)str[0]; /* take literal character */ if (str[0] == 'n') ch = L'\n'; else if (str[0] == 't') ch = L'\t'; else if (str[0] == 'r') ch = L'\r'; else if (str[0] == 'b') ch = L'\b'; else if (str[0] == 'f') ch = L'\f'; else if (str[0] == 'v') ch = L'\v'; else if (str[0] == 'a') ch = L'\a'; else if (octal_digit(str[0])) { i = 0; do { digs[dno++] = str[i++]; } while (i < size && octal_digit(str[i]) && dno < 3); ch = strtol(digs, NULL, 8); } else if (str[0] == 'x') { while (i < size && hex_digit(str[i]) && dno < 2) { digs[dno++] = str[i++]; } if (dno > 0) { ch = strtol(digs, NULL, 16); } } else if (str[0] == 'u') { while (i < size && hex_digit(str[i]) && dno < 4) { digs[dno++] = str[i++]; } if (dno > 0) { ch = strtol(digs, NULL, 16); } } else if (str[0] == 'U') { while (i < size && hex_digit(str[i]) && dno < 8) { digs[dno++] = str[i++]; } if (dno > 0) { ch = strtol(digs, NULL, 16); } } *dest = ch; return i; } int flb_unescape_string_utf8(const char *in_buf, int sz, char *out_buf) { uint32_t ch; char temp[4]; const char *end; const char *next; int size; int count_out = 0; int count_in = 0; int esc_in = 0; int esc_out = 0; end = in_buf + sz; while (in_buf < end && *in_buf && count_in < sz) { next = in_buf + 1; if (next < end && *in_buf == '\\') { esc_in = 2; switch (*next) { case '"': ch = '"'; break; case '\'': ch = '\''; break; case '\\': ch = '\\'; break; case '/': ch = '/'; break; case 'n': ch = '\n'; break; case 'b': ch = '\b'; break; case 't': ch = '\t'; break; case 'f': ch = '\f'; break; case 'r': ch = '\r'; break; default: size = end - next; if (size > 0) { esc_in = u8_read_escape_sequence(next, size, &ch) + 1; } else { /* because char is unsigned char by default on arm, so we need to do a explicit conversion */ ch = (uint32_t) (signed char) *in_buf; esc_in = 1; } } } else { /* explicit convert char to signed char */ ch = (uint32_t) (signed char) *in_buf; esc_in = 1; } in_buf += esc_in; count_in += esc_in; esc_out = u8_wc_toutf8(temp, ch); if (esc_out > sz-count_out) { flb_error("Crossing over string boundary"); break; } if (esc_out == 0) { out_buf[count_out] = ch; esc_out = 1; } else if (esc_out == 1) { out_buf[count_out] = (char) temp[0]; } else { memcpy(&out_buf[count_out], temp, esc_out); } count_out += esc_out; } if (count_in < sz) { flb_error("Not at boundary but still NULL terminating : %d - '%s'", sz, in_buf); } out_buf[count_out] = '\0'; return count_out; } int flb_unescape_string(const char *buf, int buf_len, char **unesc_buf) { int i = 0; int j = 0; char *p; char n; p = *unesc_buf; while (i < buf_len) { if (buf[i] == '\\') { if (i + 1 < buf_len) { n = buf[i + 1]; if (n == 'n') { p[j++] = '\n'; i++; } else if (n == 'a') { p[j++] = '\a'; i++; } else if (n == 'b') { p[j++] = '\b'; i++; } else if (n == 't') { p[j++] = '\t'; i++; } else if (n == 'v') { p[j++] = '\v'; i++; } else if (n == 'f') { p[j++] = '\f'; i++; } else if (n == 'r') { p[j++] = '\r'; i++; } else if (n == '\\') { p[j++] = '\\'; i++; } i++; continue; } else { i++; } } p[j++] = buf[i++]; } p[j] = '\0'; return j; } /* mysql unquote */ int flb_mysql_unquote_string(char *buf, int buf_len, char **unesc_buf) { int i = 0; int j = 0; char *p; char n; p = *unesc_buf; while (i < buf_len) { if ((n = buf[i++]) != '\\') { p[j++] = n; } else if(i >= buf_len) { p[j++] = n; } else { n = buf[i++]; switch(n) { case 'n': p[j++] = '\n'; break; case 'r': p[j++] = '\r'; break; case 't': p[j++] = '\t'; break; case '\\': p[j++] = '\\'; break; case '\'': p[j++] = '\''; break; case '\"': p[j++] = '\"'; break; case '0': p[j++] = 0; break; case 'Z': p[j++] = 0x1a; break; default: p[j++] = '\\'; p[j++] = n; break; } } } p[j] = '\0'; return j; }