summaryrefslogtreecommitdiffstats
path: root/fluent-bit/src/flb_utils.c
diff options
context:
space:
mode:
Diffstat (limited to 'fluent-bit/src/flb_utils.c')
-rw-r--r--fluent-bit/src/flb_utils.c1433
1 files changed, 1433 insertions, 0 deletions
diff --git a/fluent-bit/src/flb_utils.c b/fluent-bit/src/flb_utils.c
new file mode 100644
index 000000000..c2b2f58a6
--- /dev/null
+++ b/fluent-bit/src/flb_utils.c
@@ -0,0 +1,1433 @@
+/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
+
+/* Fluent Bit
+ * ==========
+ * Copyright (C) 2015-2022 The Fluent Bit Authors
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <time.h>
+#include <ctype.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <msgpack.h>
+
+#include <monkey/mk_core.h>
+#include <fluent-bit/flb_macros.h>
+#include <fluent-bit/flb_config.h>
+#include <fluent-bit/flb_error.h>
+#include <fluent-bit/flb_input.h>
+#include <fluent-bit/flb_output.h>
+#include <fluent-bit/flb_utils.h>
+#include <fluent-bit/flb_utf8.h>
+
+#ifdef FLB_HAVE_AWS_ERROR_REPORTER
+#include <fluent-bit/aws/flb_aws_error_reporter.h>
+
+extern struct flb_aws_error_reporter *error_reporter;
+#endif
+
+#ifdef FLB_HAVE_OPENSSL
+#include <openssl/rand.h>
+#endif
+
+/*
+ * The following block descriptor describes the private use unicode character range
+ * used for denoting invalid utf-8 fragments. Invalid fragment 0xCE would become
+ * utf-8 codepoint U+E0CE if FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR is set to
+ * E0 since U+E0CE = U+<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR><HEX_FRAGMENT>
+ */
+#define FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR 0xE0
+
+void flb_utils_error(int err)
+{
+ char *msg = NULL;
+
+ switch (err) {
+ case FLB_ERR_CFG_FILE:
+ msg = "could not open configuration file";
+ break;
+ case FLB_ERR_CFG_FILE_FORMAT:
+ msg = "configuration file contains format errors";
+ break;
+ case FLB_ERR_CFG_FILE_STOP:
+ msg = "configuration file contains errors";
+ break;
+ case FLB_ERR_CFG_FLUSH:
+ msg = "invalid flush value";
+ break;
+ case FLB_ERR_CFG_FLUSH_CREATE:
+ msg = "could not create timer for flushing";
+ break;
+ case FLB_ERR_CFG_FLUSH_REGISTER:
+ msg = "could not register timer for flushing";
+ break;
+ case FLB_ERR_INPUT_INVALID:
+ msg = "invalid input type";
+ break;
+ case FLB_ERR_INPUT_UNDEF:
+ msg = "no input(s) have been defined";
+ break;
+ case FLB_ERR_INPUT_UNSUP:
+ msg = "unsupported Input";
+ break;
+ case FLB_ERR_OUTPUT_UNDEF:
+ msg = "you must specify an output target";
+ break;
+ case FLB_ERR_OUTPUT_INVALID:
+ msg = "invalid output target";
+ break;
+ case FLB_ERR_OUTPUT_UNIQ:
+ msg = "just one output type is supported";
+ break;
+ case FLB_ERR_FILTER_INVALID:
+ msg = "invalid filter plugin";
+ break;
+ case FLB_ERR_CFG_PARSER_FILE:
+ msg = "could not open parser configuration file";
+ break;
+ case FLB_ERR_JSON_INVAL:
+ msg = "invalid JSON string";
+ break;
+ case FLB_ERR_JSON_PART:
+ msg = "truncated JSON string";
+ break;
+ case FLB_ERR_CORO_STACK_SIZE:
+ msg = "invalid coroutine stack size";
+ break;
+ case FLB_ERR_CFG_PLUGIN_FILE:
+ msg = "plugins_file not found";
+ break;
+ case FLB_ERR_RELOADING_IN_PROGRESS:
+ msg = "reloading in progress";
+ break;
+ default:
+ flb_error("(error message is not defined. err=%d)", err);
+ }
+
+ if (!msg) {
+ fprintf(stderr,
+ "%sError%s: undefined. Aborting",
+ ANSI_BOLD ANSI_RED, ANSI_RESET);
+ #ifdef FLB_HAVE_AWS_ERROR_REPORTER
+ if (is_error_reporting_enabled()) {
+ flb_aws_error_reporter_write(error_reporter, "Error: undefined. Aborting\n");
+ }
+ #endif
+
+ }
+ else {
+ flb_error("%s, aborting.", msg);
+ #ifdef FLB_HAVE_AWS_ERROR_REPORTER
+ if (is_error_reporting_enabled()) {
+ flb_aws_error_reporter_write(error_reporter, msg);
+ }
+ #endif
+ }
+
+ if (err <= FLB_ERR_FILTER_INVALID) {
+ exit(EXIT_FAILURE);
+ }
+}
+
+/* Custom error */
+void flb_utils_error_c(const char *msg)
+{
+ fprintf(stderr,
+ "%sError%s: %s. Aborting\n\n",
+ ANSI_BOLD ANSI_RED, ANSI_RESET, msg);
+ exit(EXIT_FAILURE);
+}
+
+void flb_utils_warn_c(const char *msg)
+{
+ fprintf(stderr,
+ "%sWarning%s: %s",
+ ANSI_BOLD ANSI_YELLOW, ANSI_RESET, msg);
+}
+
+#ifdef FLB_HAVE_FORK
+/* Run current process in background mode */
+int flb_utils_set_daemon(struct flb_config *config)
+{
+ pid_t pid;
+
+ if ((pid = fork()) < 0){
+ flb_error("Failed creating to switch to daemon mode (fork failed)");
+ exit(EXIT_FAILURE);
+ }
+
+ if (pid > 0) { /* parent */
+ exit(EXIT_SUCCESS);
+ }
+
+ /* set files mask */
+ umask(0);
+
+ /* Create new session */
+ setsid();
+
+ if (chdir("/") < 0) { /* make sure we can unmount the inherited filesystem */
+ flb_error("Unable to unmount the inherited filesystem");
+ exit(EXIT_FAILURE);
+ }
+
+ /* Our last STDOUT messages */
+ flb_info("switching to background mode (PID=%ld)", (long) getpid());
+
+ fclose(stderr);
+ fclose(stdout);
+
+ return 0;
+}
+#endif
+
+void flb_utils_print_setup(struct flb_config *config)
+{
+ struct mk_list *head;
+ struct mk_list *head_tmp;
+ struct flb_input_plugin *plugin;
+ struct flb_input_collector *collector;
+ struct flb_input_instance *in;
+ struct flb_filter_instance *f;
+ struct flb_output_instance *out;
+
+ flb_info("Configuration:");
+
+ /* general */
+ flb_info(" flush time | %f seconds", config->flush);
+ flb_info(" grace | %i seconds", config->grace);
+ flb_info(" daemon | %i", config->daemon);
+
+ /* Inputs */
+ flb_info("___________");
+ flb_info(" inputs:");
+ mk_list_foreach(head, &config->inputs) {
+ in = mk_list_entry(head, struct flb_input_instance, _head);
+ flb_info(" %s", in->p->name);
+ }
+
+ /* Filters */
+ flb_info("___________");
+ flb_info(" filters:");
+ mk_list_foreach(head, &config->filters) {
+ f = mk_list_entry(head, struct flb_filter_instance, _head);
+ flb_info(" %s", f->name);
+ }
+
+ /* Outputs */
+ flb_info("___________");
+ flb_info(" outputs:");
+ mk_list_foreach(head, &config->outputs) {
+ out = mk_list_entry(head, struct flb_output_instance, _head);
+ flb_info(" %s", out->name);
+ }
+
+ /* Collectors */
+ flb_info("___________");
+ flb_info(" collectors:");
+ mk_list_foreach(head, &config->inputs) {
+ in = mk_list_entry(head, struct flb_input_instance, _head);
+ mk_list_foreach(head_tmp, &in->collectors) {
+ collector = mk_list_entry(head_tmp, struct flb_input_collector, _head);
+ plugin = collector->instance->p;
+
+ if (collector->seconds > 0) {
+ flb_info("[%s %lus,%luns] ",
+ plugin->name,
+ collector->seconds,
+ collector->nanoseconds);
+ }
+ else {
+ flb_info(" [%s] ", plugin->name);
+ }
+ }
+ }
+}
+
+/*
+ * quoted_string_len returns the length of a quoted string, not including the quotes.
+ */
+static int quoted_string_len(const char *str)
+{
+ int len = 0;
+ char quote = *str++; /* Consume the quote character. */
+
+ while (quote != 0) {
+ char c = *str++;
+ switch (c) {
+ case '\0':
+ /* Error: string ends before end-quote was seen. */
+ return -1;
+ case '\\':
+ /* Skip escaped quote or \\. */
+ if (*str == quote || *str == '\\') {
+ str++;
+ }
+ break;
+ case '\'':
+ case '"':
+ /* End-quote seen: stop iterating. */
+ if (c == quote) {
+ quote = 0;
+ }
+ break;
+ default:
+ break;
+ }
+ len++;
+ }
+
+ /* Go back one character to ignore end-quote */
+ len--;
+
+ return len;
+}
+
+/*
+ * next_token returns the next token in the string 'str' delimited by 'separator'.
+ * 'out' is set to the beginning of the token.
+ * 'out_len' is set to the length of the token.
+ * 'parse_quotes' is set to FLB_TRUE when quotes shall be considered when tokenizing the 'str'.
+ * The function returns offset to next token in the string.
+ */
+static int next_token(const char *str, int separator, char **out, int *out_len, int parse_quotes) {
+ const char *token_in = str;
+ char *token_out;
+ int next_separator = 0;
+ int quote = 0; /* Parser state: 0 not inside quoted string, or '"' or '\'' when inside quoted string. */
+ int len = 0;
+ int i;
+
+ /* Skip leading separators. */
+ while (*token_in == separator) {
+ token_in++;
+ }
+
+ /* Should quotes be parsed? Or is token quoted? If not, copy until separator or the end of string. */
+ if (parse_quotes == FLB_FALSE || (*token_in != '"' && *token_in != '\'')) {
+ len = (int)strlen(token_in);
+ next_separator = mk_string_char_search(token_in, separator, len);
+ if (next_separator > 0) {
+ len = next_separator;
+ }
+ *out_len = len;
+ *out = mk_string_copy_substr(token_in, 0, len);
+ if (*out == NULL) {
+ return -1;
+ }
+
+ return (int)(token_in - str) + len;
+ }
+
+ /* Token is quoted. */
+
+ len = quoted_string_len(token_in);
+ if (len < 0) {
+ return -1;
+ }
+
+ /* Consume the quote character. */
+ quote = *token_in++;
+
+ token_out = flb_malloc(len + 1);
+ if (!token_out) {
+ return -1;
+ }
+
+ /* Copy the token */
+ for (i = 0; i < len; i++) {
+ /* Handle escapes when inside quoted token:
+ * \" -> "
+ * \' -> '
+ * \\ -> \
+ */
+ if (*token_in == '\\' && (token_in[1] == quote || token_in[1] == '\\')) {
+ token_in++;
+ }
+ token_out[i] = *token_in++;
+ }
+ token_out[i] = '\0';
+
+ *out = token_out;
+ *out_len = len;
+
+ return (int)(token_in - str);
+}
+
+
+static struct mk_list *split(const char *line, int separator, int max_split, int quoted)
+{
+ int i = 0;
+ int count = 0;
+ int val_len;
+ int len;
+ int end;
+ char *val;
+ struct mk_list *list;
+ struct flb_split_entry *new;
+
+ if (!line) {
+ return NULL;
+ }
+
+ list = flb_malloc(sizeof(struct mk_list));
+ if (!list) {
+ flb_errno();
+ return NULL;
+ }
+ mk_list_init(list);
+
+ len = strlen(line);
+ while (i < len) {
+ end = next_token(line + i, separator, &val, &val_len, quoted);
+ if (end == -1) {
+ flb_error("Parsing failed: %s", line);
+ flb_utils_split_free(list);
+ return NULL;
+ }
+
+ /* Update last position */
+ i += end;
+
+ /* Create new entry */
+ new = flb_malloc(sizeof(struct flb_split_entry));
+ if (!new) {
+ flb_errno();
+ flb_free(val);
+ flb_utils_split_free(list);
+ return NULL;
+ }
+ new->value = val;
+ new->len = val_len;
+ new->last_pos = i;
+ mk_list_add(&new->_head, list);
+ count++;
+
+ /* Update index for next loop */
+ i++;
+
+ /*
+ * If the counter exceeded the maximum specified and there
+ * are still remaining bytes, append those bytes in a new
+ * and last entry.
+ */
+ if (count >= max_split && max_split > 0 && i < len) {
+ new = flb_malloc(sizeof(struct flb_split_entry));
+ if (!new) {
+ flb_errno();
+ flb_utils_split_free(list);
+ return NULL;
+ }
+ new->value = mk_string_copy_substr(line, i, len);
+ new->len = len - i;
+ mk_list_add(&new->_head, list);
+ break;
+ }
+ }
+
+ return list;
+}
+
+struct mk_list *flb_utils_split_quoted(const char *line, int separator, int max_split)
+{
+ return split(line, separator, max_split, FLB_TRUE);
+}
+
+struct mk_list *flb_utils_split(const char *line, int separator, int max_split)
+{
+ return split(line, separator, max_split, FLB_FALSE);
+}
+
+
+void flb_utils_split_free_entry(struct flb_split_entry *entry)
+{
+ mk_list_del(&entry->_head);
+ flb_free(entry->value);
+ flb_free(entry);
+}
+
+void flb_utils_split_free(struct mk_list *list)
+{
+ struct mk_list *tmp;
+ struct mk_list *head;
+ struct flb_split_entry *entry;
+
+ mk_list_foreach_safe(head, tmp, list) {
+ entry = mk_list_entry(head, struct flb_split_entry, _head);
+ flb_utils_split_free_entry(entry);
+ }
+
+ flb_free(list);
+}
+
+/* When a timer expires, it needs some handling */
+int flb_utils_timer_consume(flb_pipefd_t fd)
+{
+ int ret;
+ uint64_t val;
+
+ ret = flb_pipe_r(fd, &val, sizeof(val));
+ if (ret == -1) {
+ flb_errno();
+ return -1;
+ }
+
+#ifdef __linux__
+ /* A timer on linux must return an unisgned 64 bit number */
+ if (ret == 0) {
+ return -1;
+ }
+#endif
+
+ return 0;
+}
+
+int flb_utils_pipe_byte_consume(flb_pipefd_t fd)
+{
+ int ret;
+ uint64_t val;
+
+ ret = flb_pipe_r(fd, &val, sizeof(val));
+ if (ret == -1) {
+ flb_errno();
+ return -1;
+ }
+
+ return 0;
+}
+
+int64_t flb_utils_size_to_bytes(const char *size)
+{
+ int i;
+ int len;
+ int plen = 0;
+ int64_t val;
+ char c;
+ char tmp[3] = {0};
+ int64_t KB = 1000;
+ int64_t MB = 1000 * KB;
+ int64_t GB = 1000 * MB;
+
+ if (!size) {
+ return -1;
+ }
+
+ if (strcasecmp(size, "false") == 0) {
+ return 0;
+ }
+
+ len = strlen(size);
+ val = atoll(size);
+
+ if (len == 0) {
+ return -1;
+ }
+
+ for (i = len - 1; i > 0; i--) {
+ if (isdigit(size[i])) {
+ break;
+ }
+ else {
+ plen++;
+ }
+ }
+
+ if (plen == 0) {
+ return val;
+ }
+ else if (plen > 2) {
+ return -1;
+ }
+
+ for (i = 0; i < plen; i++) {
+ c = size[(len - plen) + i];
+ tmp[i] = toupper(c);
+ }
+
+ if (plen == 2) {
+ if (tmp[1] != 'B') {
+ return -1;
+ }
+ }
+
+ if (tmp[0] == 'K') {
+ /* set upper bound (2**64/KB)/2 to avoid overflows */
+ if (val >= 9223372036854775 || val <= -9223372036854774)
+ {
+ return -1;
+ }
+ return (val * KB);
+ }
+ else if (tmp[0] == 'M') {
+ /* set upper bound (2**64/MB)/2 to avoid overflows */
+ if (val >= 9223372036854 || val <= -9223372036853) {
+ return -1;
+ }
+ return (val * MB);
+ }
+ else if (tmp[0] == 'G') {
+ /* set upper bound (2**64/GB)/2 to avoid overflows */
+ if (val >= 9223372036 || val <= -9223372035) {
+ return -1;
+ }
+ return (val * GB);
+ }
+ else {
+ return -1;
+ }
+
+ return val;
+}
+
+int64_t flb_utils_hex2int(char *hex, int len)
+{
+ int i = 0;
+ int64_t res = 0;
+ char c;
+
+ while ((c = *hex++) && i < len) {
+ /* Ensure no overflow */
+ if (res >= (int64_t)((INT64_MAX/0x10) - 0xff)) {
+ return -1;
+ }
+
+ res *= 0x10;
+
+ if (c >= 'a' && c <= 'f') {
+ res += (c - 0x57);
+ }
+ else if (c >= 'A' && c <= 'F') {
+ res += (c - 0x37);
+ }
+ else if (c >= '0' && c <= '9') {
+ res += (c - 0x30);
+ }
+ else {
+ return -1;
+ }
+ i++;
+ }
+
+ if (res < 0) {
+ return -1;
+ }
+
+ return res;
+}
+
+int flb_utils_time_to_seconds(const char *time)
+{
+ int len;
+ size_t val;
+
+ len = strlen(time);
+ if (len == 0) {
+ return 0;
+ }
+ val = atoi(time);
+
+ /* String time to seconds */
+ if (time[len - 1] == 'D' || time[len - 1] == 'd') {
+ val *= 86400;
+ }
+ if (time[len - 1] == 'H' || time[len - 1] == 'h') {
+ val *= 3600;
+ }
+ else if (time[len - 1] == 'M' || time[len - 1] == 'm') {
+ val *= 60;
+ }
+
+ return val;
+}
+
+int flb_utils_bool(const char *val)
+{
+ if (strcasecmp(val, "true") == 0 ||
+ strcasecmp(val, "on") == 0 ||
+ strcasecmp(val, "yes") == 0) {
+ return FLB_TRUE;
+ }
+ else if (strcasecmp(val, "false") == 0 ||
+ strcasecmp(val, "off") == 0 ||
+ strcasecmp(val, "no") == 0) {
+ return FLB_FALSE;
+ }
+
+ return -1;
+}
+
+/* Convert a 'string' time seconds.nanoseconds to int and long values */
+int flb_utils_time_split(const char *time, int *sec, long *nsec)
+{
+ char *p;
+ char *end;
+ long val = 0;
+
+ errno = 0;
+ val = strtol(time, &end, 10);
+ if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN))
+ || (errno != 0 && val == 0)) {
+ flb_errno();
+ return -1;
+ }
+ if (end == time) {
+ return -1;
+ }
+ *sec = (int) val;
+
+ /* Try to find subseconds */
+ *nsec = 0;
+ p = strchr(time, '.');
+ if (p) {
+ p += 1;
+ val = strtol(p, &end, 10);
+ if ((errno == ERANGE && (val == LONG_MAX || val == LONG_MIN))
+ || (errno != 0 && val == 0)) {
+ flb_errno();
+ return -1;
+ }
+ if (end == p) {
+ return -1;
+ }
+ *nsec = val;
+ }
+
+ return 0;
+}
+
+void flb_utils_bytes_to_human_readable_size(size_t bytes,
+ char *out_buf, size_t size)
+{
+ unsigned long i;
+ unsigned long u = 1024;
+ static const char *__units[] = {
+ "b", "K", "M", "G",
+ "T", "P", "E", "Z", "Y", NULL
+ };
+
+ for (i = 0; __units[i] != NULL; i++) {
+ if ((bytes / u) == 0) {
+ break;
+ }
+ u *= 1024;
+ }
+ if (!i) {
+ snprintf(out_buf, size, "%lu%s", (long unsigned int) bytes, __units[0]);
+ }
+ else {
+ float fsize = (float) ((double) bytes / (u / 1024));
+ snprintf(out_buf, size, "%.1f%s", fsize, __units[i]);
+ }
+}
+
+
+static inline void encoded_to_buf(char *out, const char *in, int len)
+{
+ int i;
+ char *p = out;
+
+ for (i = 0; i < len; i++) {
+ *p++ = in[i];
+ }
+}
+
+/*
+ * Write string pointed by 'str' to the destination buffer 'buf'. It's make sure
+ * to escape sepecial characters and convert utf-8 byte characters to string
+ * representation.
+ */
+int flb_utils_write_str(char *buf, int *off, size_t size,
+ const char *str, size_t str_len)
+{
+ int i;
+ int b;
+ int ret;
+ int written = 0;
+ int required;
+ int len;
+ int hex_bytes;
+ int is_valid;
+ int utf_sequence_number;
+ int utf_sequence_length;
+ uint32_t codepoint;
+ uint32_t state = 0;
+ char tmp[16];
+ size_t available;
+ uint32_t c;
+ char *p;
+ uint8_t *s;
+
+ available = (size - *off);
+ required = str_len;
+ if (available <= required) {
+ return FLB_FALSE;
+ }
+
+ p = buf + *off;
+ for (i = 0; i < str_len; i++) {
+ if ((available - written) < 2) {
+ return FLB_FALSE;
+ }
+
+ c = (uint32_t) str[i];
+ if (c == '\"') {
+ *p++ = '\\';
+ *p++ = '\"';
+ }
+ else if (c == '\\') {
+ *p++ = '\\';
+ *p++ = '\\';
+ }
+ else if (c == '\n') {
+ *p++ = '\\';
+ *p++ = 'n';
+ }
+ else if (c == '\r') {
+ *p++ = '\\';
+ *p++ = 'r';
+ }
+ else if (c == '\t') {
+ *p++ = '\\';
+ *p++ = 't';
+ }
+ else if (c == '\b') {
+ *p++ = '\\';
+ *p++ = 'b';
+ }
+ else if (c == '\f') {
+ *p++ = '\\';
+ *p++ = 'f';
+ }
+ else if (c < 32 || c == 0x7f) {
+ if ((available - written) < 6) {
+ return FLB_FALSE;
+ }
+ len = snprintf(tmp, sizeof(tmp) - 1, "\\u%.4hhx", (unsigned char) c);
+ if ((available - written) < len) {
+ return FLB_FALSE;
+ }
+ encoded_to_buf(p, tmp, len);
+ p += len;
+ }
+ else if (c >= 0x80 && c <= 0xFFFF) {
+ hex_bytes = flb_utf8_len(str + i);
+ if (available - written < 6) {
+ return FLB_FALSE;
+ }
+
+ if (i + hex_bytes > str_len) {
+ break; /* skip truncated UTF-8 */
+ }
+
+ state = FLB_UTF8_ACCEPT;
+ codepoint = 0;
+
+ for (b = 0; b < hex_bytes; b++) {
+ s = (unsigned char *) str + i + b;
+ ret = flb_utf8_decode(&state, &codepoint, *s);
+ if (ret == 0) {
+ break;
+ }
+ }
+
+ if (state != FLB_UTF8_ACCEPT) {
+ /* Invalid UTF-8 hex, just skip utf-8 bytes */
+ flb_warn("[pack] invalid UTF-8 bytes found, skipping bytes");
+ }
+ else {
+ len = snprintf(tmp, sizeof(tmp) - 1, "\\u%.4x", codepoint);
+ if ((available - written) < len) {
+ return FLB_FALSE;
+ }
+ encoded_to_buf(p, tmp, len);
+ p += len;
+ }
+ i += (hex_bytes - 1);
+ }
+ else if (c > 0xFFFF) {
+ utf_sequence_length = flb_utf8_len(str + i);
+
+ if (i + utf_sequence_length > str_len) {
+ break; /* skip truncated UTF-8 */
+ }
+
+ is_valid = FLB_TRUE;
+ for (utf_sequence_number = 0; utf_sequence_number < utf_sequence_length;
+ utf_sequence_number++) {
+ /* Leading characters must start with bits 11 */
+ if (utf_sequence_number == 0 && ((str[i] & 0xC0) != 0xC0)) {
+ /* Invalid unicode character. replace */
+ flb_debug("[pack] unexpected UTF-8 leading byte, "
+ "substituting character with replacement character");
+ tmp[utf_sequence_number] = str[i];
+ ++i; /* Consume invalid leading byte */
+ utf_sequence_length = utf_sequence_number + 1;
+ is_valid = FLB_FALSE;
+ break;
+ }
+ /* Trailing characters must start with bits 10 */
+ else if (utf_sequence_number > 0 && ((str[i] & 0xC0) != 0x80)) {
+ /* Invalid unicode character. replace */
+ flb_debug("[pack] unexpected UTF-8 continuation byte, "
+ "substituting character with replacement character");
+ /* This byte, i, is the start of the next unicode character */
+ utf_sequence_length = utf_sequence_number;
+ is_valid = FLB_FALSE;
+ break;
+ }
+
+ tmp[utf_sequence_number] = str[i];
+ ++i;
+ }
+ --i;
+
+ if (is_valid) {
+ if (available - written < utf_sequence_length) {
+ return FLB_FALSE;
+ }
+
+ encoded_to_buf(p, tmp, utf_sequence_length);
+ p += utf_sequence_length;
+ }
+ else {
+ if (available - written < utf_sequence_length * 3) {
+ return FLB_FALSE;
+ }
+
+ /*
+ * Utf-8 sequence is invalid. Map fragments to private use area
+ * codepoints in range:
+ * 0x<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR>00 to
+ * 0x<FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR>FF
+ */
+ for (b = 0; b < utf_sequence_length; ++b) {
+ /*
+ * Utf-8 private block invalid hex mapping. Format unicode charpoint
+ * in the following format:
+ *
+ * +--------+--------+--------+
+ * |1110PPPP|10PPPPHH|10HHHHHH|
+ * +--------+--------+--------+
+ *
+ * Where:
+ * P is FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR bits (1 byte)
+ * H is Utf-8 fragment hex bits (1 byte)
+ * 1 is bit 1
+ * 0 is bit 0
+ */
+
+ /* unicode codepoint start */
+ *p = 0xE0;
+
+ /* print unicode private block header first 4 bits */
+ *p |= FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR >> 4;
+ ++p;
+
+ /* unicode codepoint middle */
+ *p = 0x80;
+
+ /* print end of unicode private block header last 4 bits */
+ *p |= ((FLB_UTILS_FRAGMENT_PRIVATE_BLOCK_DESCRIPTOR << 2) & 0x3f);
+
+ /* print hex fragment first 2 bits */
+ *p |= (tmp[b] >> 6) & 0x03;
+ ++p;
+
+ /* unicode codepoint middle */
+ *p = 0x80;
+
+ /* print hex fragment last 6 bits */
+ *p |= tmp[b] & 0x3f;
+ ++p;
+ }
+ }
+ }
+ else {
+ *p++ = c;
+ }
+ written = (p - (buf + *off));
+ }
+
+ *off += written;
+
+ return FLB_TRUE;
+}
+
+
+int flb_utils_write_str_buf(const char *str, size_t str_len, char **out, size_t *out_size)
+{
+ int ret;
+ int off;
+ char *tmp;
+ char *buf;
+ size_t s;
+
+ s = str_len + 1;
+ buf = flb_malloc(s);
+ if (!buf) {
+ flb_errno();
+ return -1;
+ }
+
+ while (1) {
+ off = 0;
+ ret = flb_utils_write_str(buf, &off, s, str, str_len);
+ if (ret == FLB_FALSE) {
+ s += 256;
+ tmp = flb_realloc(buf, s);
+ if (!tmp) {
+ flb_errno();
+ flb_free(buf);
+ return -1;
+ }
+ buf = tmp;
+ }
+ else {
+ /* done */
+ break;
+ }
+ }
+
+ *out = buf;
+ *out_size = off;
+ return 0;
+}
+
+static char *flb_copy_host(const char *string, int pos_init, int pos_end)
+{
+ if (string[pos_init] == '[') { /* IPv6 */
+ if (string[pos_end-1] != ']')
+ return NULL;
+
+ return mk_string_copy_substr(string, pos_init + 1, pos_end - 1);
+ }
+ else
+ return mk_string_copy_substr(string, pos_init, pos_end);
+}
+
+int flb_utils_url_split(const char *in_url, char **out_protocol,
+ char **out_host, char **out_port, char **out_uri)
+{
+ char *protocol = NULL;
+ char *host = NULL;
+ char *port = NULL;
+ char *uri = NULL;
+ char *p;
+ char *tmp;
+ char *sep;
+
+ /* Protocol */
+ p = strstr(in_url, "://");
+ if (!p) {
+ return -1;
+ }
+ if (p == in_url) {
+ return -1;
+ }
+
+ protocol = mk_string_copy_substr(in_url, 0, p - in_url);
+ if (!protocol) {
+ flb_errno();
+ return -1;
+ }
+
+ /* Advance position after protocol */
+ p += 3;
+
+ /* Check for first '/' */
+ sep = strchr(p, '/');
+ tmp = strchr(p, ':');
+
+ /* Validate port separator is found before the first slash */
+ if (sep && tmp) {
+ if (tmp > sep) {
+ tmp = NULL;
+ }
+ }
+
+ if (tmp) {
+ host = flb_copy_host(p, 0, tmp - p);
+ if (!host) {
+ flb_errno();
+ goto error;
+ }
+ p = tmp + 1;
+
+ /* Look for an optional URI */
+ tmp = strchr(p, '/');
+ if (tmp) {
+ port = mk_string_copy_substr(p, 0, tmp - p);
+ uri = flb_strdup(tmp);
+ }
+ else {
+ port = flb_strdup(p);
+ uri = flb_strdup("/");
+ }
+ }
+ else {
+ tmp = strchr(p, '/');
+ if (tmp) {
+ host = flb_copy_host(p, 0, tmp - p);
+ uri = flb_strdup(tmp);
+ }
+ else {
+ host = flb_copy_host(p, 0, strlen(p));
+ uri = flb_strdup("/");
+ }
+ }
+
+ if (!port) {
+ if (strcmp(protocol, "http") == 0) {
+ port = flb_strdup("80");
+ }
+ else if (strcmp(protocol, "https") == 0) {
+ port = flb_strdup("443");
+ }
+ }
+
+ *out_protocol = protocol;
+ *out_host = host;
+ *out_port = port;
+ *out_uri = uri;
+
+ return 0;
+
+ error:
+ if (protocol) {
+ flb_free(protocol);
+ }
+
+ return -1;
+}
+
+
+/*
+ * flb_utils_proxy_url_split parses a proxy's information from a http_proxy URL.
+ * The URL is in the form like `http://username:password@myproxy.com:8080`.
+ * Note: currently only HTTP is supported.
+ */
+int flb_utils_proxy_url_split(const char *in_url, char **out_protocol,
+ char **out_username, char **out_password,
+ char **out_host, char **out_port)
+{
+ char *protocol = NULL;
+ char *username = NULL;
+ char *password = NULL;
+ char *host = NULL;
+ char *port = NULL;
+ char *proto_sep;
+ char *at_sep;
+ char *tmp;
+
+ /* Parse protocol */
+ proto_sep = strstr(in_url, "://");
+ if (!proto_sep) {
+ return -1;
+ }
+ if (proto_sep == in_url) {
+ return -1;
+ }
+
+ protocol = mk_string_copy_substr(in_url, 0, proto_sep - in_url);
+ if (!protocol) {
+ flb_errno();
+ return -1;
+ }
+ /* Only HTTP proxy is supported for now. */
+ if (strcmp(protocol, "http") != 0) {
+ flb_free(protocol);
+ return -1;
+ }
+
+ /* Advance position after protocol */
+ proto_sep += 3;
+
+ /* Seperate `username:password` and `host:port` */
+ at_sep = strrchr(proto_sep, '@');
+ if (at_sep) {
+ /* Parse username:passwrod part. */
+ tmp = strchr(proto_sep, ':');
+ if (!tmp) {
+ flb_free(protocol);
+ return -1;
+ }
+ username = mk_string_copy_substr(proto_sep, 0, tmp - proto_sep);
+ tmp += 1;
+ password = mk_string_copy_substr(tmp, 0, at_sep - tmp);
+
+ /* Parse host:port part. */
+ at_sep += 1;
+ tmp = strchr(at_sep, ':');
+ if (tmp) {
+ host = flb_copy_host(at_sep, 0, tmp - at_sep);
+ tmp += 1;
+ port = strdup(tmp);
+ }
+ else {
+ host = flb_copy_host(at_sep, 0, strlen(at_sep));
+ port = flb_strdup("80");
+ }
+ }
+ else {
+ /* Parse host:port part. */
+ tmp = strchr(proto_sep, ':');
+ if (tmp) {
+ host = flb_copy_host(proto_sep, 0, tmp - proto_sep);
+ tmp += 1;
+ port = strdup(tmp);
+ }
+ else {
+ host = flb_copy_host(proto_sep, 0, strlen(proto_sep));
+ port = flb_strdup("80");
+ }
+ }
+
+ *out_protocol = protocol;
+ *out_host = host;
+ *out_port = port;
+ if (username) {
+ *out_username = username;
+ }
+ if (password) {
+ *out_password = password;
+ }
+
+ return 0;
+}
+
+
+char *flb_utils_get_os_name()
+{
+#ifdef _WIN64
+ return "win64";
+#elif _WIN32
+ return "win32";
+#elif __APPLE__ || __MACH__
+ return "macos";
+#elif __linux__
+ return "linux";
+#elif __FreeBSD__
+ return "freebsd";
+#elif __unix || __unix__
+ return "unix";
+#else
+ return "other";
+#endif
+}
+
+#ifdef FLB_HAVE_OPENSSL
+int flb_utils_uuid_v4_gen(char *buf)
+{
+ int ret;
+ union {
+ struct {
+ uint32_t time_low;
+ uint16_t time_mid;
+ uint16_t time_hi_and_version;
+ uint8_t clk_seq_hi_res;
+ uint8_t clk_seq_low;
+ uint8_t node[6];
+ };
+ uint8_t __rnd[16];
+ } uuid;
+
+ ret = RAND_bytes(uuid.__rnd, sizeof(uuid));
+
+ uuid.clk_seq_hi_res = (uint8_t) ((uuid.clk_seq_hi_res & 0x3F) | 0x80);
+ uuid.time_hi_and_version = (uint16_t) ((uuid.time_hi_and_version & 0x0FFF) | 0x4000);
+
+ snprintf(buf, 38, "%08x-%04x-%04x-%02x%02x-%02x%02x%02x%02x%02x%02x",
+ uuid.time_low, uuid.time_mid, uuid.time_hi_and_version,
+ uuid.clk_seq_hi_res, uuid.clk_seq_low,
+ uuid.node[0], uuid.node[1], uuid.node[2],
+ uuid.node[3], uuid.node[4], uuid.node[5]);
+
+ if (ret == 1) {
+ return 0;
+ }
+
+ return -1;
+}
+#else
+int flb_utils_uuid_v4_gen(char *buf)
+{
+ snprintf(buf, 38, "ddad00f1-3806-46ab-88d1-277a8c863cd6");
+ return 0;
+}
+#endif
+
+int flb_utils_read_file(char *path, char **out_buf, size_t *out_size)
+{
+ int fd;
+ int ret;
+ size_t bytes;
+ struct stat st;
+ flb_sds_t buf;
+ FILE *fp;
+
+ fp = fopen(path, "rb");
+ if (!fp) {
+ return -1;
+ }
+ fd = fileno(fp);
+
+ ret = fstat(fd, &st);
+ if (ret == -1) {
+ flb_errno();
+ fclose(fp);
+ return -1;
+ }
+
+ buf = flb_calloc(1, st.st_size + 1);
+ if (!buf) {
+ flb_errno();
+ fclose(fp);
+ return -1;
+ }
+
+ bytes = fread(buf, st.st_size, 1, fp);
+ if (bytes < 1) {
+ if (ferror(fp)) {
+ flb_errno();
+ }
+ flb_free(buf);
+ fclose(fp);
+ return -1;
+ }
+ fclose(fp);
+
+ *out_buf = buf;
+ *out_size = st.st_size;
+ return 0;
+}
+
+static int machine_id_read_and_sanitize(char *path,
+ char **out_buf, size_t *out_size)
+{
+ int ret;
+ size_t s;
+ char *p;
+ char *buf;
+ size_t bytes;
+
+ ret = flb_utils_read_file(path, &buf, &bytes);
+ if (ret != 0) {
+ return -1;
+ }
+
+ p = buf + bytes - 1;
+ while (*p == ' ' || *p == '\n') {
+ p--;
+ }
+
+ /* set new size */
+ s = p - buf + 1;
+
+ buf[s] = '\0';
+ *out_size = s;
+ *out_buf = buf;
+
+ return 0;
+}
+
+int flb_utils_get_machine_id(char **out_id, size_t *out_size)
+{
+ int ret;
+ char *id;
+ size_t bytes;
+ char *uuid;
+
+#ifdef __linux__
+ char *dbus_var = "/var/lib/dbus/machine-id";
+ char *dbus_etc = "/etc/machine-id";
+
+ /* dbus */
+ if (access(dbus_var, F_OK) == 0) { /* check if the file exists first */
+ ret = machine_id_read_and_sanitize(dbus_var, &id, &bytes);
+ if (ret == 0) {
+ *out_id = id;
+ *out_size = bytes;
+ return 0;
+ }
+ }
+
+ /* etc */
+ if (access(dbus_etc, F_OK) == 0) { /* check if the file exists first */
+ ret = machine_id_read_and_sanitize(dbus_etc, &id, &bytes);
+ if (ret == 0) {
+ *out_id = id;
+ *out_size = bytes;
+ return 0;
+ }
+ }
+#elif defined(__FreeBSD__) || defined(__NetBSD__) || \
+ defined(__OpenBSD__) || defined(__DragonFly__)
+
+ char *hostid = "/etc/hostid";
+
+ /* hostid */
+ ret = machine_id_read_and_sanitize(hostid, &id, &bytes);
+ if (ret == 0) {
+ *out_id = id;
+ *out_size = bytes;
+ return 0;
+ }
+#endif
+
+ /* generate a random uuid */
+ uuid = flb_malloc(38);
+ if (!uuid) {
+ flb_errno();
+ return -1;
+ }
+ ret = flb_utils_uuid_v4_gen(uuid);
+ if (ret == 0) {
+ *out_id = uuid;
+ *out_size = strlen(uuid);
+ return 0;
+ }
+
+ return -1;
+}
+
+void flb_utils_set_plugin_string_property(const char *name,
+ flb_sds_t *field_storage,
+ flb_sds_t new_value)
+{
+ if (field_storage == NULL) {
+ flb_error("[utils] invalid field storage pointer for property '%s'",
+ name);
+
+ return;
+ }
+
+ if (*field_storage != NULL) {
+ flb_warn("[utils] property '%s' is already specified with value '%s'."
+ " Overwriting with '%s'",
+ name,
+ *field_storage,
+ new_value);
+
+ flb_sds_destroy(*field_storage);
+
+ *field_storage = NULL;
+ }
+
+ *field_storage = new_value;
+}