summaryrefslogtreecommitdiffstats
path: root/modules/policy/lua-aho-corasick/ac_fast.cxx
blob: 9dbc2e6688348dea88ff55303d5fdfbc448846ff (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
#include <algorithm>    // for std::sort
#include "ac_slow.hpp"
#include "ac_fast.hpp"

uint32
AC_Converter::Calc_State_Sz(const ACS_State* s) const {
    AC_State dummy;
    uint32 sz = offsetof(AC_State, input_vect);
    sz += s->Get_GotoNum() * sizeof(dummy.input_vect[0]);

    if (sz < sizeof(AC_State))
        sz = sizeof(AC_State);

    uint32 align = __alignof__(dummy);
    sz = (sz + align - 1) & ~(align - 1);
    return sz;
}

AC_Buffer*
AC_Converter::Alloc_Buffer() {
    const vector<ACS_State*>& all_states = _acs.Get_All_States();
    const ACS_State* root_state = _acs.Get_Root_State();
    uint32 root_fanout = root_state->Get_GotoNum();

    // Step 1: Calculate the buffer size
    AC_Ofst root_goto_ofst, states_ofst_ofst, first_state_ofst;

    // part 1 :  buffer header
    uint32 sz = root_goto_ofst = sizeof(AC_Buffer);

    // part 2: Root-node's goto function
    if (likely(root_fanout != 255))
        sz += 256;
    else
        root_goto_ofst = 0;

    // part 3: mapping of state's relative position.
    unsigned align = __alignof__(AC_Ofst);
    sz = (sz + align - 1) & ~(align - 1);
    states_ofst_ofst = sz;

    sz += sizeof(AC_Ofst) * all_states.size();

    // part 4: state's contents
    align = __alignof__(AC_State);
    sz = (sz + align - 1) & ~(align - 1);
    first_state_ofst = sz;

    uint32 state_sz = 0;
    for (vector<ACS_State*>::const_iterator i = all_states.begin(),
            e = all_states.end(); i != e; i++) {
        state_sz += Calc_State_Sz(*i);
    }
    state_sz -= Calc_State_Sz(root_state);

    sz += state_sz;

    // Step 2: Allocate buffer, and populate header.
    AC_Buffer* buf = _buf_alloc.alloc(sz);

    buf->hdr.magic_num = AC_MAGIC_NUM;
    buf->hdr.impl_variant = IMPL_FAST_VARIANT;
    buf->buf_len = sz;
    buf->root_goto_ofst = root_goto_ofst;
    buf->states_ofst_ofst = states_ofst_ofst;
    buf->first_state_ofst = first_state_ofst;
    buf->root_goto_num = root_fanout;
    buf->state_num = _acs.Get_State_Num();
    return buf;
}

void
AC_Converter::Populate_Root_Goto_Func(AC_Buffer* buf,
                                      GotoVect& goto_vect) {
    unsigned char *buf_base = (unsigned char*)(buf);
    InputTy* root_gotos = (InputTy*)(buf_base + buf->root_goto_ofst);
    const ACS_State* root_state = _acs.Get_Root_State();

    root_state->Get_Sorted_Gotos(goto_vect);

    // Renumber the ID of root-node's immediate kids.
    uint32 new_id = 1;
    bool full_fantout = (goto_vect.size() == 255);
    if (likely(!full_fantout))
        bzero(root_gotos, 256*sizeof(InputTy));

    for (GotoVect::iterator i = goto_vect.begin(), e = goto_vect.end();
            i != e; i++, new_id++) {
        InputTy c = i->first;
        ACS_State* s = i->second;
        _id_map[s->Get_ID()] = new_id;

        if (likely(!full_fantout))
            root_gotos[c] = new_id;
    }
}

AC_Buffer*
AC_Converter::Convert() {
    // Step 1: Some preparation stuff.
    GotoVect gotovect;

    _id_map.clear();
    _ofst_map.clear();
    _id_map.resize(_acs.Get_Next_Node_Id());
    _ofst_map.resize(_acs.Get_Next_Node_Id());

    // Step 2: allocate buffer to accommodate the entire AC graph.
    AC_Buffer* buf = Alloc_Buffer();
    unsigned char* buf_base = (unsigned char*)buf;

    // Step 3: Root node need special care.
    Populate_Root_Goto_Func(buf, gotovect);
    buf->root_goto_num = gotovect.size();
    _id_map[_acs.Get_Root_State()->Get_ID()] = 0;

    // Step 4: Converting the remaining states by BFSing the graph.
    // First of all, enter root's immediate kids to the working list.
    vector<const ACS_State*> wl;
    State_ID id = 1;
    for (GotoVect::iterator i = gotovect.begin(), e = gotovect.end();
            i != e; i++, id++) {
        ACS_State* s = i->second;
        wl.push_back(s);
        _id_map[s->Get_ID()] = id;
    }

    AC_Ofst* state_ofst_vect = (AC_Ofst*)(buf_base + buf->states_ofst_ofst);
    AC_Ofst ofst = buf->first_state_ofst;
    for (uint32 idx = 0; idx < wl.size(); idx++) {
        const ACS_State* old_s = wl[idx];
        AC_State* new_s = (AC_State*)(buf_base + ofst);

        // This property should hold as we:
        //  - States are appended to worklist in the BFS order.
        //  - sibiling states are appended to worklist in the order of their
        //    corresponding input.
        //
        State_ID state_id = idx + 1;
        ASSERT(_id_map[old_s->Get_ID()] == state_id);

        state_ofst_vect[state_id] = ofst;

        new_s->first_kid = wl.size() + 1;
        new_s->depth = old_s->Get_Depth();
        new_s->is_term = old_s->is_Terminal() ?
                         old_s->get_Pattern_Idx() + 1 : 0;

        uint32 gotonum = old_s->Get_GotoNum();
        new_s->goto_num = gotonum;

        // Populate the "input" field
        old_s->Get_Sorted_Gotos(gotovect);
        uint32 input_idx = 0;
        uint32 id = wl.size() + 1;
        InputTy* input_vect = new_s->input_vect;
        for (GotoVect::iterator i = gotovect.begin(), e = gotovect.end();
             i != e; i++, id++, input_idx++) {
            input_vect[input_idx] = i->first;

            ACS_State* kid = i->second;
            _id_map[kid->Get_ID()] = id;
            wl.push_back(kid);
        }

        _ofst_map[old_s->Get_ID()] = ofst;
        ofst += Calc_State_Sz(old_s);
    }

    // This assertion might be useful to catch buffer overflow
    ASSERT(ofst == buf->buf_len);

    // Populate the fail-link field.
    for (vector<const ACS_State*>::iterator i = wl.begin(), e = wl.end();
            i != e; i++) {
        const ACS_State* slow_s = *i;
        State_ID fast_s_id = _id_map[slow_s->Get_ID()];
        AC_State* fast_s = (AC_State*)(buf_base + state_ofst_vect[fast_s_id]);
        if (const ACS_State* fl = slow_s->Get_FailLink()) {
            State_ID id = _id_map[fl->Get_ID()];
            fast_s->fail_link = id;
        } else
            fast_s->fail_link = 0;
    }
#ifdef DEBUG
    //dump_buffer(buf, stderr);
#endif
    return buf;
}

static inline AC_State*
Get_State_Addr(unsigned char* buf_base, AC_Ofst* StateOfstVect, uint32 state_id) {
    ASSERT(state_id != 0 && "root node is handled in speical way");
    ASSERT(state_id < ((AC_Buffer*)buf_base)->state_num);
    return (AC_State*)(buf_base + StateOfstVect[state_id]);
}

// The performance of the binary search is critical to this work.
//
// Here we provide two versions of binary-search functions.
// The non-pristine version seems to consistently out-perform "pristine" one on
// bunch of benchmarks we tested.  With the benchmark under tests/testinput/
//
//   The speedup is following on my laptop (core i7, ubuntu):
//
//   benchmark       was                is
//  ----------------------------------------
//  image.bin       2.3s               2.0s
//  test.tar        6.7s               5.7s
//
//  NOTE: As of I write this comment, we only measure the performance on about
// 10+ benchmarks. It's still too early to say which one works better.
//
#if !defined(BS_MULTI_VER)
static bool __attribute__((always_inline)) inline
Binary_Search_Input(InputTy* input_vect, int vect_len, InputTy input, int& idx) {
    if (vect_len <= 8) {
        for (int i = 0; i < vect_len; i++) {
            if (input_vect[i] == input) {
                idx = i;
                return true;
            }
        }
        return false;
    }

    // The "low" and "high" must be signed integers, as they could become -1.
    // Also since they are signed integer, "(low + high)/2" is sightly more
    // expensive than (low+high)>>1 or ((unsigned)(low + high))/2.
    //
    int low = 0, high = vect_len - 1;
    while (low <= high) {
        int mid = (low + high) >> 1;
        InputTy mid_c = input_vect[mid];

        if (input < mid_c)
            high = mid - 1;
        else if (input > mid_c)
            low = mid + 1;
        else {
            idx = mid;
            return true;
        }
    }
    return false;
}

#else

/* Let us call this version "pristine" version. */
static inline bool
Binary_Search_Input(InputTy* input_vect, int vect_len, InputTy input, int& idx) {
    int low = 0, high = vect_len - 1;
    while (low <= high) {
        int mid = (low + high) >> 1;
        InputTy mid_c = input_vect[mid];

        if (input < mid_c)
            high = mid - 1;
        else if (input > mid_c)
            low = mid + 1;
        else {
            idx = mid;
            return true;
        }
    }
    return false;
}
#endif

typedef enum {
    // Look for the first match. e.g. pattern set = {"ab", "abc", "def"},
    // subject string "ababcdef". The first match would be "ab" at the
    // beginning of the subject string.
    MV_FIRST_MATCH,

    // Look for the left-most longest match. Follow above example; there are
    // two longest matches, "abc" and "def", and the left-most longest match
    // is "abc".
    MV_LEFT_LONGEST,

    // Similar to the left-most longest match, except that it returns the
    // *right* most longest match. Follow above example, the match would
    // be "def". NYI.
    MV_RIGHT_LONGEST,

    // Return all patterns that match that given subject string. NYI.
    MV_ALL_MATCHES,
} MATCH_VARIANT;

/* The Match_Tmpl is the template for vairants MV_FIRST_MATCH, MV_LEFT_LONGEST,
 * MV_RIGHT_LONGEST (If we really really need MV_RIGHT_LONGEST variant, we are
 * better off implementing it in a seprate function).
 *
 * The Match_Tmpl supports three variants at once "symbolically", once it's
 * instanced to a particular variants, all the code irrelevant to the variants
 * will be statically removed. So don't worry about the code like
 * "if (variant == MV_XXXX)"; they will not incur any penalty.
 *
 * The drawback of using template is increased code size. Unfortunately, there
 * is no silver bullet.
 */
template<MATCH_VARIANT variant> static ac_result_t
Match_Tmpl(AC_Buffer* buf, const char* str, uint32 len) {
    unsigned char* buf_base = (unsigned char*)(buf);
    unsigned char* root_goto = buf_base + buf->root_goto_ofst;
    AC_Ofst* states_ofst_vect = (AC_Ofst* )(buf_base + buf->states_ofst_ofst);

    AC_State* state = 0;
    uint32 idx = 0;

    // Skip leading chars that are not valid input of root-nodes.
    if (likely(buf->root_goto_num != 255)) {
        while(idx < len) {
            unsigned char c = str[idx++];
            if (unsigned char kid_id = root_goto[c]) {
                state = Get_State_Addr(buf_base, states_ofst_vect, kid_id);
                break;
            }
        }
    } else {
        idx = 1;
        state = Get_State_Addr(buf_base, states_ofst_vect, *str);
    }

    ac_result_t r = {-1, -1};
    if (likely(state != 0)) {
        if (unlikely(state->is_term)) {
            /* Dictionary may have string of length 1 */
            r.match_begin = idx - state->depth;
            r.match_end = idx - 1;
            r.pattern_idx = state->is_term - 1;

            if (variant == MV_FIRST_MATCH) {
                return r;
            }
        }
    }

    while (idx < len) {
        unsigned char c = str[idx];
        int res;
        bool found;
        found = Binary_Search_Input(state->input_vect, state->goto_num, c, res);
        if (found) {
            // The "t = goto(c, current_state)" is valid, advance to state "t".
            uint32 kid = state->first_kid + res;
            state = Get_State_Addr(buf_base, states_ofst_vect, kid);
            idx++;
        } else {
            // Follow the fail-link.
            State_ID fl = state->fail_link;
            if (fl == 0) {
                // fail-link is root-node, which implies the root-node dosen't
                // have 255 valid transitions (otherwise, the fail-link should
                // points to "goto(root, c)"), so we don't need speical handling
                // as we did before this while-loop is entered.
                //
                while(idx < len) {
                    InputTy c = str[idx++];
                    if (unsigned char kid_id = root_goto[c]) {
                        state =
                            Get_State_Addr(buf_base, states_ofst_vect, kid_id);
                        break;
                    }
                }
            } else {
                state = Get_State_Addr(buf_base, states_ofst_vect, fl);
            }
        }

        // Check to see if the state is terminal state?
        if (state->is_term) {
            if (variant == MV_FIRST_MATCH) {
                ac_result_t r;
                r.match_begin = idx - state->depth;
                r.match_end = idx - 1;
                r.pattern_idx = state->is_term - 1;
                return r;
            }

            if (variant == MV_LEFT_LONGEST) {
                int match_begin = idx - state->depth;
                int match_end = idx - 1;

                if (r.match_begin == -1 ||
                    match_end - match_begin > r.match_end - r.match_begin) {
                    r.match_begin = match_begin;
                    r.match_end = match_end;
                    r.pattern_idx = state->is_term - 1;
                }
                continue;
            }

            ASSERT(false && "NYI");
        }
    }

    return r;
}

ac_result_t
Match(AC_Buffer* buf, const char* str, uint32 len) {
    return Match_Tmpl<MV_FIRST_MATCH>(buf, str, len);
}

ac_result_t
Match_Longest_L(AC_Buffer* buf, const char* str, uint32 len) {
    return Match_Tmpl<MV_LEFT_LONGEST>(buf, str, len);
}

#ifdef DEBUG
void
AC_Converter::dump_buffer(AC_Buffer* buf, FILE* f) {
    vector<AC_Ofst> state_ofst;
    state_ofst.resize(_id_map.size());

    fprintf(f, "Id maps between old/slow and new/fast graphs\n");
    int old_id = 0;
    for (vector<uint32>::iterator i = _id_map.begin(), e = _id_map.end();
         i != e; i++, old_id++) {
        State_ID new_id = *i;
        if (new_id != 0) {
            fprintf(f, "%d -> %d, ", old_id, new_id);
        }
    }
    fprintf(f, "\n");

    int idx = 0;
    for (vector<uint32>::iterator i = _id_map.begin(), e = _id_map.end();
         i != e; i++, idx++) {
        uint32 id = *i;
        if (id == 0) continue;
        state_ofst[id] = _ofst_map[idx];
    }

    unsigned char* buf_base = (unsigned char*)buf;

    // dump root goto-function.
    fprintf(f, "root, fanout:%d goto {", buf->root_goto_num);
    if (buf->root_goto_num != 255) {
        unsigned char* root_goto = buf_base + buf->root_goto_ofst;
        for (uint32 i = 0; i < 255; i++) {
            if (root_goto[i] != 0)
                fprintf(f, "%c->S:%d, ", (unsigned char)i, root_goto[i]);
        }
    } else {
        fprintf(f, "full fanout\n");
    }
    fprintf(f, "}\n");

    // dump remaining states.
    AC_Ofst* state_ofst_vect = (AC_Ofst*)(buf_base + buf->states_ofst_ofst);
    for (uint32 i = 1, e = buf->state_num; i < e; i++) {
        AC_Ofst ofst = state_ofst_vect[i];
        ASSERT(ofst == state_ofst[i]);
        fprintf(f, "S:%d, ofst:%d, goto={", i, ofst);

        AC_State* s = (AC_State*)(buf_base + ofst);
        State_ID kid = s->first_kid;
        for (uint32 k = 0, ke = s->goto_num; k < ke; k++, kid++)
            fprintf(f, "%c->S:%d, ", s->input_vect[k], kid);

        fprintf(f, "}, fail-link = S:%d, %s\n", s->fail_link,
                s->is_term ? "terminal" : "");
    }
}
#endif