1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
// SPDX-License-Identifier: GPL-3.0-or-later
#include "log2journal.h"
#define PCRE2_ERROR_LINE_MAX 1024
#define PCRE2_KEY_MAX 1024
struct pcre2_state {
LOG_JOB *jb;
const char *line;
uint32_t pos;
uint32_t key_start;
pcre2_code *re;
pcre2_match_data *match_data;
char key[PCRE2_KEY_MAX];
char msg[PCRE2_ERROR_LINE_MAX];
};
static inline void copy_and_convert_key(PCRE2_STATE *pcre2, const char *key) {
char *d = &pcre2->key[pcre2->key_start];
size_t remaining = sizeof(pcre2->key) - pcre2->key_start;
while(remaining >= 2 && *key) {
*d = journal_key_characters_map[(unsigned) (*key)];
remaining--;
key++;
d++;
}
*d = '\0';
}
static inline void jb_traverse_pcre2_named_groups_and_send_keys(PCRE2_STATE *pcre2, pcre2_code *re, pcre2_match_data *match_data, char *line) {
PCRE2_SIZE *ovector = pcre2_get_ovector_pointer(match_data);
uint32_t names_count;
pcre2_pattern_info(re, PCRE2_INFO_NAMECOUNT, &names_count);
if (names_count > 0) {
PCRE2_SPTR name_table;
pcre2_pattern_info(re, PCRE2_INFO_NAMETABLE, &name_table);
uint32_t name_entry_size;
pcre2_pattern_info(re, PCRE2_INFO_NAMEENTRYSIZE, &name_entry_size);
const unsigned char *table_ptr = name_table;
for (uint32_t i = 0; i < names_count; i++) {
int n = (table_ptr[0] << 8) | table_ptr[1];
const char *group_name = (const char *)(table_ptr + 2);
PCRE2_SIZE start_offset = ovector[2 * n];
PCRE2_SIZE end_offset = ovector[2 * n + 1];
PCRE2_SIZE group_length = end_offset - start_offset;
copy_and_convert_key(pcre2, group_name);
log_job_send_extracted_key_value(pcre2->jb, pcre2->key, line + start_offset, group_length);
table_ptr += name_entry_size;
}
}
}
void pcre2_get_error_in_buffer(char *msg, size_t msg_len, int rc, int pos) {
int l;
if(pos >= 0)
l = snprintf(msg, msg_len, "PCRE2 error %d at pos %d on: ", rc, pos);
else
l = snprintf(msg, msg_len, "PCRE2 error %d on: ", rc);
pcre2_get_error_message(rc, (PCRE2_UCHAR *)&msg[l], msg_len - l);
}
static void pcre2_error_message(PCRE2_STATE *pcre2, int rc, int pos) {
pcre2_get_error_in_buffer(pcre2->msg, sizeof(pcre2->msg), rc, pos);
}
bool pcre2_has_error(PCRE2_STATE *pcre2) {
return !pcre2->re || pcre2->msg[0];
}
PCRE2_STATE *pcre2_parser_create(LOG_JOB *jb) {
PCRE2_STATE *pcre2 = mallocz(sizeof(PCRE2_STATE));
memset(pcre2, 0, sizeof(PCRE2_STATE));
pcre2->jb = jb;
if(jb->prefix)
pcre2->key_start = copy_to_buffer(pcre2->key, sizeof(pcre2->key), pcre2->jb->prefix, strlen(pcre2->jb->prefix));
int rc;
PCRE2_SIZE pos;
pcre2->re = pcre2_compile((PCRE2_SPTR)jb->pattern, PCRE2_ZERO_TERMINATED, 0, &rc, &pos, NULL);
if (!pcre2->re) {
pcre2_error_message(pcre2, rc, pos);
return pcre2;
}
pcre2->match_data = pcre2_match_data_create_from_pattern(pcre2->re, NULL);
return pcre2;
}
void pcre2_parser_destroy(PCRE2_STATE *pcre2) {
if(pcre2)
freez(pcre2);
}
const char *pcre2_parser_error(PCRE2_STATE *pcre2) {
return pcre2->msg;
}
bool pcre2_parse_document(PCRE2_STATE *pcre2, const char *txt, size_t len) {
pcre2->line = txt;
pcre2->pos = 0;
pcre2->msg[0] = '\0';
if(!len)
len = strlen(txt);
int rc = pcre2_match(pcre2->re, (PCRE2_SPTR)pcre2->line, len, 0, 0, pcre2->match_data, NULL);
if(rc < 0) {
pcre2_error_message(pcre2, rc, -1);
return false;
}
jb_traverse_pcre2_named_groups_and_send_keys(pcre2, pcre2->re, pcre2->match_data, (char *)pcre2->line);
return true;
}
void pcre2_test(void) {
LOG_JOB jb = { .prefix = "NIGNX_" };
PCRE2_STATE *pcre2 = pcre2_parser_create(&jb);
pcre2_parse_document(pcre2, "{\"value\":\"\\u\\u039A\\u03B1\\u03BB\\u03B7\\u03BC\\u03AD\\u03C1\\u03B1\"}", 0);
pcre2_parser_destroy(pcre2);
}
|