summaryrefslogtreecommitdiffstats
path: root/modules/policy/lua-aho-corasick/tests/ac_test_simple.cxx
blob: fa2d7fd3e4e7149485909f2d293d59bc628cf72b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
#include <stdio.h>
#include <string.h>
#include <vector>
#include <string>

#include "ac.h"
#include "ac_util.hpp"
#include "test_base.hpp"

using namespace std;

namespace {
typedef struct {
    const char* str;
    const char* match;
} StrPair;

typedef enum {
    MV_FIRST_MATCH = 0,
    MV_LEFT_LONGEST = 1,
} MatchVariant;

typedef struct {
    const char* name;
    const char** dict;
    StrPair* strpairs;
    int dict_len;
    int strpair_num;
    MatchVariant match_variant;
} TestingCase;

class Tests {
public:
    Tests(const char* name,
          const char* dict[], int dict_len,
          StrPair strpairs[], int strpair_num,
          MatchVariant mv = MV_FIRST_MATCH) {
        if (!_tests)
            _tests = new vector<TestingCase>;

        TestingCase tc;
        tc.name = name;
        tc.dict = dict;
        tc.strpairs = strpairs;
        tc.dict_len = dict_len;
        tc.strpair_num = strpair_num;
        tc.match_variant = mv;
        _tests->push_back(tc);
    }

    static vector<TestingCase>* Get_Tests() { return _tests; }
    static void Erase_Tests() { delete _tests; _tests = 0; }

private:
    static vector<TestingCase> *_tests;
};

class LeftLongestTests : public Tests {
public:
    LeftLongestTests (const char* name, const char* dict[], int dict_len,
                      StrPair strpairs[], int strpair_num):
        Tests(name, dict, dict_len, strpairs, strpair_num, MV_LEFT_LONGEST) {
    }
};

vector<TestingCase>* Tests::_tests = 0;

class ACTestSimple: public ACTestBase {
public:
    ACTestSimple(const char* banner) : ACTestBase(banner) {}
    virtual bool Run();

private:
    void PrintSummary(int total, int fail)  {
        fprintf(stdout, "Test count : %d, fail: %d\n", total, fail);
        fflush(stdout);
    }
};
}

bool
ACTestSimple::Run() {
    int total = 0;
    int fail = 0;

    vector<TestingCase> *tests = Tests::Get_Tests();
    if (!tests) {
        PrintSummary(0, 0);
        return true;
    }

    for (vector<TestingCase>::iterator i = tests->begin(), e = tests->end();
            i != e; i++) {
        TestingCase& t = *i;
        int dict_len = t.dict_len;
        unsigned int* strlen_v = new unsigned int[dict_len];

        fprintf(stdout, ">Testing %s\nDictionary:[ ", t.name);
        for (int i = 0, need_break=0; i < dict_len; i++) {
            const char* s = t.dict[i];
            fprintf(stdout, "%s, ", s);
            strlen_v[i] = strlen(s);
            if (need_break++ == 16) {
                fputs("\n  ", stdout);
                need_break = 0;
            }
        }
        fputs("]\n", stdout);

        /* Create the dictionary */
        ac_t* ac = ac_create(t.dict, strlen_v, dict_len);
        delete[] strlen_v;

        for (int ii = 0, ee = t.strpair_num; ii < ee; ii++, total++) {
            const StrPair& sp = t.strpairs[ii];
            const char *str = sp.str; // the string to be matched
            const char *match = sp.match;

            fprintf(stdout, "[%3d] Testing '%s' : ", total, str);

            int len = strlen(str);
            ac_result_t r;
            if (t.match_variant == MV_FIRST_MATCH)
                r = ac_match(ac, str, len);
            else if (t.match_variant == MV_LEFT_LONGEST)
                r = ac_match_longest_l(ac, str, len);
            else {
                ASSERT(false && "Unknown variant");
            }

            int m_b = r.match_begin;
            int m_e = r.match_end;

            // The return value per se is insane.
            if (m_b > m_e ||
                ((m_b < 0 || m_e < 0) && (m_b != -1 || m_e != -1))) {
                fprintf(stdout, "Insane return value (%d, %d)\n", m_b, m_e);
                fail ++;
                continue;
            }

            // If the string is not supposed to match the dictionary.
            if (!match) {
                if (m_b != -1 || m_e != -1) {
                    fail ++;
                    fprintf(stdout, "Not Supposed to match (%d, %d) \n",
                            m_b, m_e);
                } else
                    fputs("Pass\n", stdout);
                continue;
            }

            // The string or its substring is match the dict.
            if (m_b >= len || m_b >= len) {
                fail ++;
                fprintf(stdout,
                        "Return value >= the length of the string (%d, %d)\n",
                        m_b, m_e);
                continue;
            } else {
                int mlen = strlen(match);
                if ((mlen != m_e - m_b + 1) ||
                    strncmp(str + m_b, match, mlen)) {
                    fail ++;
                    fprintf(stdout, "Fail\n");
                } else
                    fprintf(stdout, "Pass\n");
            }
        }
        fputs("\n", stdout);
        ac_free(ac);
    }

    PrintSummary(total, fail);
    return fail == 0;
}

bool
Run_AC_Simple_Test() {
    ACTestSimple t("AC Simple test");
    t.PrintBanner();
    return t.Run();
}

//////////////////////////////////////////////////////////////////////////////
//
//    Testing cases for first-match variant (i.e. test ac_match())
//
//////////////////////////////////////////////////////////////////////////////
//

/* test 1*/
const char *dict1[] = {"he", "she", "his", "her"};
StrPair strpair1[] = {
    {"he", "he"}, {"she", "she"}, {"his", "his"},
    {"hers", "he"}, {"ahe", "he"}, {"shhe", "he"},
    {"shis2", "his"}, {"ahhe", "he"}
};
Tests test1("test 1",
            dict1, sizeof(dict1)/sizeof(dict1[0]),
            strpair1, sizeof(strpair1)/sizeof(strpair1[0]));

/* test 2*/
const char *dict2[] = {"poto", "poto"}; /* duplicated strings*/
StrPair strpair2[] = {{"The pot had a handle", 0}};
Tests test2("test 2", dict2, 2, strpair2, 1);

/* test 3*/
const char *dict3[] = {"The"};
StrPair strpair3[] = {{"The pot had a handle", "The"}};
Tests test3("test 3", dict3, 1, strpair3, 1);

/* test 4*/
const char *dict4[] = {"pot"};
StrPair strpair4[] = {{"The pot had a handle", "pot"}};
Tests test4("test 4", dict4, 1, strpair4, 1);

/* test 5*/
const char *dict5[] = {"pot "};
StrPair strpair5[] = {{"The pot had a handle", "pot "}};
Tests test5("test 5", dict5, 1, strpair5, 1);

/* test 6*/
const char *dict6[] = {"ot h"};
StrPair strpair6[] = {{"The pot had a handle", "ot h"}};
Tests test6("test 6", dict6, 1, strpair6, 1);

/* test 7*/
const char *dict7[] = {"andle"};
StrPair strpair7[] = {{"The pot had a handle", "andle"}};
Tests test7("test 7", dict7, 1, strpair7, 1);

const char *dict8[] = {"aaab"};
StrPair strpair8[] = {{"aaaaaaab", "aaab"}};
Tests test8("test 8", dict8, 1, strpair8, 1);

const char *dict9[] = {"haha", "z"};
StrPair strpair9[] = {{"aaaaz", "z"}, {"z", "z"}};
Tests test9("test 9", dict9, 2, strpair9, 2);

/* test the case when input string dosen't contain even a single char
 * of the pattern in dictionary.
 */
const char *dict10[] = {"abc"};
StrPair strpair10[] = {{"cde", 0}};
Tests test10("test 10", dict10, 1, strpair10, 1);


//////////////////////////////////////////////////////////////////////////////
//
//    Testing cases for first longest match variant (i.e.
// test ac_match_longest_l())
//
//////////////////////////////////////////////////////////////////////////////
//

// This was actually first motivation for left-longest-match
const char *dict100[] = {"Mozilla", "Mozilla Mobile"};
StrPair strpair100[] = {{"User Agent containing string Mozilla Mobile", "Mozilla Mobile"}};
LeftLongestTests test100("l_test 100", dict100, 2, strpair100, 1);

// Dict with single char is tricky
const char *dict101[] = {"a", "abc"};
StrPair strpair101[] = {{"abcdef", "abc"}};
LeftLongestTests test101("l_test 101", dict101, 2, strpair101, 1);

// Testing case with partially overlapping patterns. The purpose is to
// check if the fail-link leading from terminal state is correct.
//
// The fail-link leading from terminal-state does not matter in
// match-first-occurrence variant, as it stop when a terminal is hit.
//
const char *dict102[] = {"abc", "bcdef"};
StrPair strpair102[] = {{"abcdef", "bcdef"}};
LeftLongestTests test102("l_test 102", dict102, 2, strpair102, 1);