diff options
Diffstat (limited to 'collectors/log2journal/log2journal-pcre2.c')
-rw-r--r-- | collectors/log2journal/log2journal-pcre2.c | 139 |
1 files changed, 139 insertions, 0 deletions
diff --git a/collectors/log2journal/log2journal-pcre2.c b/collectors/log2journal/log2journal-pcre2.c new file mode 100644 index 000000000..185e69108 --- /dev/null +++ b/collectors/log2journal/log2journal-pcre2.c @@ -0,0 +1,139 @@ +// SPDX-License-Identifier: GPL-3.0-or-later + +#include "log2journal.h" + +#define PCRE2_ERROR_LINE_MAX 1024 +#define PCRE2_KEY_MAX 1024 + +struct pcre2_state { + LOG_JOB *jb; + + const char *line; + uint32_t pos; + uint32_t key_start; + + pcre2_code *re; + pcre2_match_data *match_data; + + char key[PCRE2_KEY_MAX]; + char msg[PCRE2_ERROR_LINE_MAX]; +}; + +static inline void copy_and_convert_key(PCRE2_STATE *pcre2, const char *key) { + char *d = &pcre2->key[pcre2->key_start]; + size_t remaining = sizeof(pcre2->key) - pcre2->key_start; + + while(remaining >= 2 && *key) { + *d = journal_key_characters_map[(unsigned) (*key)]; + remaining--; + key++; + d++; + } + + *d = '\0'; +} + +static inline void jb_traverse_pcre2_named_groups_and_send_keys(PCRE2_STATE *pcre2, pcre2_code *re, pcre2_match_data *match_data, char *line) { + PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data); + uint32_t names_count; + pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &names_count); + + if (names_count > 0) { + PCRE2_SPTR name_table; + pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &name_table); + uint32_t name_entry_size; + pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size); + + const unsigned char *table_ptr = name_table; + for (uint32_t i = 0; i < names_count; i++) { + int n = (table_ptr[0] << 8) | table_ptr[1]; + const char *group_name = (const char *)(table_ptr + 2); + + PCRE2_SIZE start_offset = ovector[2 * n]; + PCRE2_SIZE end_offset = ovector[2 * n + 1]; + PCRE2_SIZE group_length = end_offset - start_offset; + + copy_and_convert_key(pcre2, group_name); + log_job_send_extracted_key_value(pcre2->jb, pcre2->key, line + start_offset, group_length); + + table_ptr += name_entry_size; + } + } +} + +void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos) { + int l; + + if(pos >= 0) + l = snprintf(msg, msg_len, "PCRE2 error %d at pos %d on: ", rc, pos); + else + l = snprintf(msg, msg_len, "PCRE2 error %d on: ", rc); + + pcre2_get_error_message(rc, (PCRE2_UCHAR *)&msg[l], msg_len - l); +} + +static void pcre2_error_message(PCRE2_STATE *pcre2, int rc, int pos) { + pcre2_get_error_in_buffer(pcre2->msg, sizeof(pcre2->msg), rc, pos); +} + +bool pcre2_has_error(PCRE2_STATE *pcre2) { + return !pcre2->re || pcre2->msg[0]; +} + +PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb) { + PCRE2_STATE *pcre2 = mallocz(sizeof(PCRE2_STATE)); + memset(pcre2, 0, sizeof(PCRE2_STATE)); + pcre2->jb = jb; + + if(jb->prefix) + pcre2->key_start = copy_to_buffer(pcre2->key, sizeof(pcre2->key), pcre2->jb->prefix, strlen(pcre2->jb->prefix)); + + int rc; + PCRE2_SIZE pos; + pcre2->re = pcre2_compile((PCRE2_SPTR)jb->pattern, PCRE2_ZERO_TERMINATED, 0, &rc, &pos, NULL); + if (!pcre2->re) { + pcre2_error_message(pcre2, rc, pos); + return pcre2; + } + + pcre2->match_data = pcre2_match_data_create_from_pattern(pcre2->re, NULL); + + return pcre2; +} + +void pcre2_parser_destroy(PCRE2_STATE *pcre2) { + if(pcre2) + freez(pcre2); +} + +const char *pcre2_parser_error(PCRE2_STATE *pcre2) { + return pcre2->msg; +} + +bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len) { + pcre2->line = txt; + pcre2->pos = 0; + pcre2->msg[0] = '\0'; + + if(!len) + len = strlen(txt); + + int rc = pcre2_match(pcre2->re, (PCRE2_SPTR)pcre2->line, len, 0, 0, pcre2->match_data, NULL); + if(rc < 0) { + pcre2_error_message(pcre2, rc, -1); + return false; + } + + jb_traverse_pcre2_named_groups_and_send_keys(pcre2, pcre2->re, pcre2->match_data, (char *)pcre2->line); + + return true; +} + +void pcre2_test(void) { + LOG_JOB jb = { .prefix = "NIGNX_" }; + PCRE2_STATE *pcre2 = pcre2_parser_create(&jb); + + pcre2_parse_document(pcre2, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}", 0); + + pcre2_parser_destroy(pcre2); +} |